/[projects]/android/TrainInfoService/src/dk/thoerup/traininfoservice/banedk/DepartureFetcher.java
ViewVC logotype

Contents of /android/TrainInfoService/src/dk/thoerup/traininfoservice/banedk/DepartureFetcher.java

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1366 - (show annotations) (download)
Wed Apr 20 20:04:05 2011 UTC (13 years ago) by torben
File size: 17514 byte(s)
More complete mobile site parser
1 package dk.thoerup.traininfoservice.banedk;
2
3
4 import java.net.URL;
5 import java.net.URLEncoder;
6 import java.util.Collections;
7 import java.util.Map;
8 import java.util.logging.Logger;
9
10 import org.jsoup.nodes.Document;
11 import org.jsoup.nodes.Element;
12 import org.jsoup.select.Elements;
13
14 import dk.thoerup.android.traininfo.common.DepartureBean;
15 import dk.thoerup.android.traininfo.common.DepartureEntry;
16 import dk.thoerup.android.traininfo.common.StationBean.StationEntry;
17 import dk.thoerup.circuitbreaker.CircuitBreaker;
18 import dk.thoerup.circuitbreaker.CircuitBreakerManager;
19 import dk.thoerup.genericjavautils.HttpUtil;
20 import dk.thoerup.genericjavautils.TimeoutMap;
21 import dk.thoerup.traininfoservice.Statistics;
22 import dk.thoerup.traininfoservice.TraininfoSettings;
23 import dk.thoerup.traininfoservice.db.StationDAO;
24
25 public class DepartureFetcher {
26
27 enum TrainType{
28 STOG,
29 REGIONAL
30 }
31
32 enum FetchTrainType {
33 STOG,
34 REGIONAL,
35 BOTH
36 }
37
38 Logger logger = Logger.getLogger(DepartureFetcher.class.getName());
39
40 Map<String, DepartureBean> cache;
41
42 StationDAO stationDao = new StationDAO();
43
44
45 private TraininfoSettings settings;
46
47 public DepartureFetcher(TraininfoSettings settings) {
48 this.settings = settings;
49 cache = new TimeoutMap<String,DepartureBean>( settings.getCacheTimeout() );
50 }
51
52
53
54
55 public DepartureBean cachedLookupDepartures(int stationID, boolean arrival, FetchTrainType type) throws Exception {
56
57 final String key = "" + stationID + ":" + arrival + ":" + type.toString();
58
59 DepartureBean departureBean = cache.get(key);
60
61
62 if (departureBean == null) {
63 departureBean = lookupDepartures(stationID, arrival, type);
64 cache.put(key, departureBean);
65 } else {
66 Statistics.getInstance().incrementDepartureCacheHits();
67 logger.info("Departure: Cache hit " + key); //remove before production
68 }
69 return departureBean;
70 }
71
72
73 public DepartureBean lookupDepartures(int stationID, boolean arrival, FetchTrainType type) throws Exception {
74
75 DepartureBean departureBean = new DepartureBean();
76
77 StationEntry station = stationDao.getById(stationID);
78
79 departureBean.stationName = station.getName();
80
81 if (station.getRegional() != null && (type == FetchTrainType.REGIONAL||type == FetchTrainType.BOTH) ) {
82 DepartureBean tempBean = lookupDepartures(station.getRegional(), TrainType.REGIONAL, arrival);
83 departureBean.entries.addAll( tempBean.entries );
84 departureBean.notifications.addAll(tempBean.notifications);
85 }
86
87 if (station.getStrain() != null && (type == FetchTrainType.STOG||type == FetchTrainType.BOTH)) {
88 DepartureBean tempBean = lookupDepartures(station.getStrain(), TrainType.STOG, arrival);
89 departureBean.entries.addAll( tempBean.entries );
90 departureBean.notifications.addAll(tempBean.notifications);
91 }
92
93 if (departureBean.entries.size() == 0) {
94 logger.info("No departures found for station " + stationID);
95 }
96
97 if (type == FetchTrainType.BOTH) { //if we have both S-tog and regional order by departure/arrival time
98 Collections.sort( departureBean.entries );
99 }
100
101
102 return departureBean;
103 }
104
105 public DepartureBean lookupDepartures(String stationcode, TrainType type, boolean arrival) throws Exception {
106 if ( settings.getUseAzureSite() == true) {
107 return lookupDeparturesAzureSite(stationcode, type, arrival);
108 } else {
109 return lookupDeparturesMobileSite(stationcode, type, arrival);
110 }
111 }
112
113 private String getTypeStringAzure(TrainType type) {
114 switch (type) {
115 case STOG:
116 return "S-Tog";
117 case REGIONAL:
118 return "Fjerntog";
119 default:
120 return ""; //Can not happen
121 }
122 }
123
124 private String getTypeStringWww(TrainType type) {
125 switch (type) {
126 case STOG:
127 return "S2";
128 case REGIONAL:
129 return "FJRN";
130 default:
131 return ""; //Can not happen
132 }
133 }
134
135 public DepartureBean lookupDeparturesAzureSite(String stationcode, TrainType type, boolean arrival) throws Exception {
136
137 DepartureBean departureBean = new DepartureBean();
138
139
140 String typeString = getTypeStringAzure(type);
141 String arrivalDeparture = (arrival==false) ? "Afgang" : "Ankomst";
142
143 stationcode = URLEncoder.encode(stationcode,"ISO-8859-1");
144
145 String uri = "http://trafikinfo.bane.dk/Trafikinformation/AfgangAnkomst/" + arrivalDeparture + "/" + stationcode + "/" + typeString + "/UdvidetVisning";
146
147 logger.fine("URI: " + uri);
148 JsoupInvocation wrapper = new JsoupInvocation( new URL(uri), settings.getReplyTimeout() );
149 CircuitBreaker breaker = CircuitBreakerManager.getManager().getCircuitBreaker("banedk");
150
151 Document page = (Document) breaker.invoke(wrapper);
152
153 String tableName = arrival == false ? "afgangtabel" : "ankomsttabel";
154 Element table = page.getElementById(tableName);
155
156 if (table != null) {
157 Elements tableRows = table.getElementsByTag("tr");
158
159 //boolean tidsstregExists = (table.getElementsByAttributeValue("class", "Tidsstreg").size() > 0);
160 //boolean passedTidsstreg = false;
161
162 for (Element currentRow : tableRows) {
163 String rowClass = currentRow.attr("class");
164 /*
165 if (tidsstregExists == true && passedTidsstreg == false) {
166 if (currentRow.getElementsByAttributeValue("class", "Tidsstreg").size() > 0) {
167 passedTidsstreg = true;
168 } else {
169 continue;
170 }
171 }*/
172
173 if (rowClass != null && rowClass.toLowerCase().contains("station") ) {
174
175 Elements fields = currentRow.getElementsByTag("td");
176
177 DepartureEntry departure = new DepartureEntry();
178
179 String time = fields.get(0).text();
180 if (time.equals(""))
181 time = "0:00"; //Bane.dk bug work-around
182 departure.setTime(time);
183
184 int updated = extractUpdated( fields.get(1) );
185 departure.setUpdated(updated);
186
187 String trainNumber = fields.get(2).text();
188 if (type == TrainType.STOG) //If it is S-train we need to extract the trainNumber
189 trainNumber = trainNumber + " " + extractTrainNumberAzure(fields.get(2));
190 departure.setTrainNumber(trainNumber);
191
192 String destination = fields.get(3).text();
193 departure.setDestination(destination);
194
195 String origin = fields.get(4).text();
196 departure.setOrigin(origin);
197
198 String location = fields.get(5).text();
199 departure.setLocation(location);
200
201 String status = fields.get(6).text().trim();
202 departure.setStatus(status);
203
204 String note = extractNote( fields.get(7) );
205 departure.setNote(note);
206
207 departure.setType(typeString);
208
209 departureBean.entries.add( departure );
210 }
211 }
212 } else {
213 logger.warning("No departures found for station=" + stationcode + ", type=" + type);
214 }
215
216 Element notifDiv = page.getElementById("station_planlagte_text");
217 if (notifDiv != null) {
218
219 Elements tables = notifDiv.getElementsByTag("table");
220 for (Element tab : tables) {
221
222 Elements anchors = tab.getElementsByTag("a");
223 if (anchors.size() == 2) {
224 departureBean.notifications.add( anchors.get(1).text() );
225 }
226 }
227
228 }
229
230
231 return departureBean;
232 }
233
234 public DepartureBean lookupDeparturesMobileSite(String stationcode, TrainType traintype, boolean arrival) throws Exception {
235
236 DepartureBean departureBean = new DepartureBean();
237
238
239 String typeString = getTypeStringWww(traintype);
240 String arrivalDeparture = (arrival==false) ? "afgang" : "ankomst";
241
242 stationcode = URLEncoder.encode(stationcode,"ISO-8859-1");
243
244 //String uri = "http://trafikinfo.bane.dk/Trafikinformation/AfgangAnkomst/" + arrivalDeparture + "/" + stationcode + "/" + typeString + "/UdvidetVisning";
245 String uri = "http://mobil.bane.dk/mobilStation.asp?artikelID=5332&stat_kode=" + stationcode + "&webprofil=" + typeString +"&beskrivelse=&mode=ankomstafgang&ankomstafgang=" + arrivalDeparture + "&gemstation=&fuldvisning=1";
246 logger.fine("URI: " + uri);
247 JsoupInvocation wrapper = new JsoupInvocation( new URL(uri), settings.getReplyTimeout() );
248 CircuitBreaker breaker = CircuitBreakerManager.getManager().getCircuitBreaker("banedk");
249
250 Document page = (Document) breaker.invoke(wrapper);
251
252
253 Element content = page.getElementsByClass("contentDiv").get(0);
254
255
256 if (content != null) {
257 Elements tableRows = content.child(0).children();
258
259
260
261 for (Element currentRow : tableRows) {
262 if (currentRow.tagName().equals("br") ) {
263 break;
264 }
265
266
267 String link = currentRow.child(0).attr("href");
268
269 logger.fine( currentRow.text() );
270 logger.fine("Href: " + link);
271
272
273 String parts[] = currentRow.text().split(",");
274
275
276 DepartureEntry departure = new DepartureEntry();
277
278 //if we do these things upfront, then we are allowed to use continue statement when row contains no more data
279 departure.setType(typeString);
280 departureBean.entries.add( departure );
281
282 /*
283 http://mobil.bane.dk/mobilStation.asp?artikelID=5332&tognummer=111&webprofil=FJRN&mode=rute&strBemaerkning=Afg%E5r+fra+%C5rhus+H+kl%2E07%3A21++&strRefURL=%2FmobilStation%2Easp%3FartikelID%3D5332%26stat%5Fkode%3DAR%26webprofil%3DFJRN%26beskrivelse%3D%25C5rhus%2BH%26mode%3Dankomstafgang%26ankomstafgang%3Dafgang%26gemstation%3D
284 */
285 int offset = 0;
286
287 String time = parts[offset++];
288 if (time.equals(""))
289 time = "0:00"; //Bane.dk bug work-around
290 departure.setTime(time);
291
292 int updated = 4; //does not exist on mobile
293 departure.setUpdated(updated);
294
295 String trainNumber = extractTrainNumberMobile(link);
296 /*if (traintype == TrainType.STOG) //If it is S-train we need to extract the trainNumber
297 trainNumber = trainNumber + " " + extractTrainNumberAzure(fields.get(2));*/
298 departure.setTrainNumber(trainNumber);
299
300 if (traintype == TrainType.STOG) { //if it is stog the next vield is the "Line" code - this should be used somewhere, but skippint ahead for now
301 String stogLine = parts[offset++].trim();
302 departure.setTrainNumber(stogLine + " " + trainNumber);
303 }
304
305 String destination = parts[offset++].trim();;
306 departure.setDestination(destination);
307
308 String origin = "-"; // fields.get(4).text(); does not exist on mobile
309 departure.setOrigin(origin);
310
311 String location = ""; // fields.get(5).text(); does not exist on mobile
312 departure.setLocation(location);
313
314 if (offset == parts.length) {
315 continue;
316 }
317
318 if (parts[offset].trim().equalsIgnoreCase("NB!")) {
319 offset++;
320 }
321
322 if (offset == parts.length) {
323 continue;
324 }
325
326 String status = parts[offset++].trim();; //fields.get(6).text().trim(); - extract from url
327 departure.setStatus(status);
328
329 String note = ""; //extractNote( fields.get(7) ); - extract from url
330 departure.setNote(note);
331
332 }
333 } else {
334 logger.warning("No departures found for station=" + stationcode + ", type=" + traintype);
335 }
336
337 return departureBean;
338 }
339
340
341
342 public static String cleanText(String input) {
343 //apparently JSoup translates &nbsp; characters on www.bane.dk to 0xA0
344 return input.replace((char) 0xA0, (char)0x20).trim();
345 }
346
347
348 // old www site is not available any more
349 @Deprecated
350 public DepartureBean lookupDeparturesWwwSite(String stationcode, TrainType trainType, boolean arrival) throws Exception {
351
352 DepartureBean departureBean = new DepartureBean();
353
354 String type = getTypeStringWww(trainType);
355
356 stationcode = URLEncoder.encode(stationcode, "ISO-8859-1");
357
358
359 String uri = "http://www.bane.dk/visStation.asp?ArtikelID=4275&W=" + type + "&S=" + stationcode;
360 logger.fine("URI:" + uri);
361
362
363 JsoupInvocation wrapper = new JsoupInvocation( new URL(uri), settings.getReplyTimeout() );
364 CircuitBreaker breaker = CircuitBreakerManager.getManager().getCircuitBreaker("banedk");
365
366 Element page = (Element) breaker.invoke(wrapper);
367
368 String tableName = arrival == false ? "afgangtabel" : "ankomsttabel";
369 Element table = page.getElementById(tableName);
370
371
372
373 if (table != null) {
374 Elements tableRows = table.getElementsByTag("tr");
375
376 //boolean passedTidsstreg = false;
377 //boolean tidsstregExists = (table.getElementsByAttributeValue("class", "Tidsstreg").size() > 0);
378
379 for (Element currentRow : tableRows) {
380 String rowClass = currentRow.attr("class");
381 /*
382 if (tidsstregExists == true && passedTidsstreg == false) {
383 if (currentRow.getElementsByAttributeValue("class", "Tidsstreg").size() > 0) {
384 passedTidsstreg = true;
385 } else {
386 continue;
387 }
388 }*/
389
390
391 if (rowClass != null && rowClass.toLowerCase().contains("station") ) {
392 Elements fields = currentRow.getElementsByTag("td");
393
394 DepartureEntry departure = new DepartureEntry();
395
396
397
398 String time = cleanText( fields.get(0).getAllElements().get(2).text() );
399 if (time.equals(""))
400 time = "0:00"; //Bane.dk bug work-around
401 departure.setTime(time);
402
403 int updated = extractUpdated( fields.get(1) );
404 departure.setUpdated(updated);
405
406 String trainNumber = cleanText( fields.get(2).text() );
407 if (type.equalsIgnoreCase("S2")) //If it is S-train we need to extract the trainNumber
408 trainNumber = trainNumber + " " + extractTrainNumberWww(fields.get(2));
409 departure.setTrainNumber(trainNumber);
410
411 String destination = cleanText( fields.get(3).text() );
412 departure.setDestination(destination);
413
414 String origin = cleanText( fields.get(4).text() );
415 departure.setOrigin(origin);
416
417 String location = cleanText( fields.get(5).text() );
418 departure.setLocation(location);
419
420 String status = cleanText( fields.get(6).text() );
421 departure.setStatus(status);
422
423 String note = cleanText( extractNote( fields.get(7) ) );
424 departure.setNote(note);
425
426 departure.setType(type);
427
428 departureBean.entries.add(departure);
429
430
431 }
432 }
433 } else {
434 logger.warning("No departures found for station=" + stationcode + ", type=" + type);
435 }
436
437
438 return departureBean;
439 }
440
441
442 private int extractUpdated(Element updatedTd) { //extract the digit (in this case: 4) from "media/trafikinfo/opdater4.gif"
443 int updated = -1;
444
445 Elements updatedImgs = updatedTd.getElementsByTag("img");
446 String updatedStr = updatedImgs.get(0).attr("src");
447
448 if (updatedStr != null) {
449 for (int i=0; i<updatedStr.length(); i++) {
450 char c = updatedStr.charAt(i);
451 if ( Character.isDigit(c)) {
452 updated = Character.digit(c, 10);
453 break;
454 }
455 }
456 }
457 return updated;
458 }
459
460 private String extractNote(Element noteTd) {
461 String note = noteTd.text().trim();
462
463
464 Elements elems = noteTd.getElementsByClass("bemtype");
465 if (elems.size() > 0 && note.charAt(note.length()-1) == 'i')
466 note = note.substring(0,note.length() -1 );
467
468 return note.trim();
469 }
470
471 private String extractTrainNumberAzure(Element trainTd) {
472 Element anchorElement = trainTd.getElementsByTag("a").get(0);
473 String href = anchorElement.attr("href");
474
475 int pos = href.lastIndexOf('/');
476 String number = href.substring(pos+1);
477
478 return number;
479 }
480
481 private String extractTrainNumberMobile(String link) {
482 Map<String,String> elements = HttpUtil.decodeParams(link);
483
484 return elements.get("tognummer");
485 }
486
487 private String extractTrainNumberWww(Element trainTd) {
488 String number = "";
489 Element anchorElement = trainTd.getElementsByTag("a").get(0);
490 String href = anchorElement.attr("href");
491
492 String argstring = href.split("?")[1];
493 Map<String,String> elements = HttpUtil.decodeParams(argstring);
494 number = elements.get("TogNr");
495
496
497 /*String argstring = href.substring( href.indexOf('?') + 1);
498 String args[] = argstring.split("&");
499 for (String arg : args) {
500 String pair[] = arg.split("="); // Key=pair[0], Value=pair[1]
501
502 if (pair[0].equalsIgnoreCase("TogNr"))
503 number = pair[1];
504 }*/
505
506
507 return number;
508 }
509
510
511 //test
512 /*
513 public static void main(String args[]) throws Exception {
514 DepartureFetcher f = new DepartureFetcher();
515 List<DepartureBean> deps = f.lookupDepartures("AR", "FJRN");
516 for(DepartureBean d : deps) {
517 System.out.println( d.getTime() + ";" + d.getUpdated() + ";" + d.getTrainNumber() + ";" +
518 d.getDestination() + ";" + d.getOrigin() + ";" + d.getLocation() + ";" + d.getStatus() + ";" + d.getNote() );
519 }
520
521 System.out.println("--------------------------");
522 }*/
523 }

  ViewVC Help
Powered by ViewVC 1.1.20