/[projects]/android/TrainInfoService/src/dk/thoerup/traininfoservice/banedk/DepartureFetcher.java
ViewVC logotype

Contents of /android/TrainInfoService/src/dk/thoerup/traininfoservice/banedk/DepartureFetcher.java

Parent Directory Parent Directory | Revision Log Revision Log


Revision 580 - (show annotations) (download)
Tue Feb 2 18:42:38 2010 UTC (14 years, 3 months ago) by torben
File size: 9238 byte(s)
Added a screenscraper for bane.dk's temporary site
1 package dk.thoerup.traininfoservice.banedk;
2
3 import java.sql.Connection;
4 import java.sql.ResultSet;
5 import java.sql.Statement;
6 import java.util.ArrayList;
7 import java.util.Collections;
8 import java.util.List;
9 import java.util.Map;
10 import java.util.logging.Logger;
11
12 import com.gargoylesoftware.htmlunit.WebClient;
13 import com.gargoylesoftware.htmlunit.html.DomNodeList;
14 import com.gargoylesoftware.htmlunit.html.HtmlElement;
15 import com.gargoylesoftware.htmlunit.html.HtmlPage;
16
17 import dk.thoerup.circuitbreaker.CircuitBreaker;
18 import dk.thoerup.circuitbreaker.CircuitBreakerManager;
19 import dk.thoerup.traininfoservice.DBConnection;
20
21 public class DepartureFetcher {
22
23 Logger logger = Logger.getLogger(DepartureFetcher.class.getName());
24
25 Map<Integer, List<DepartureBean>> cache = new TimeoutMap<Integer,List<DepartureBean>>(120 * 1000);
26
27 private boolean useTempSite;
28
29 public DepartureFetcher(boolean tempSite) {
30 useTempSite = tempSite;
31 }
32
33
34
35
36 public List<DepartureBean> cachedLookupDepartures(int stationID) throws Exception {
37
38 List<DepartureBean> list = cache.get(stationID);
39
40 if (list == null) {
41 list = lookupDepartures(stationID);
42 cache.put(stationID, list);
43 } else {
44 logger.info("Departure: Cache hit " + stationID); //remove before production
45 }
46 return list;
47 }
48
49
50 public List<DepartureBean> lookupDepartures(int stationID) throws Exception {
51 List<DepartureBean> departureList = new ArrayList<DepartureBean>();
52
53 Connection conn = null;
54 try
55 {
56 conn = DBConnection.getConnection();
57
58 String SQL = "SELECT stationcode_fjrn, stationcode_stog FROM trainstations WHERE id=" + stationID;
59 Statement stmt = conn.createStatement();
60 ResultSet rs = stmt.executeQuery(SQL);
61
62 if (rs.next()) {
63 String code = rs.getString( 1 );
64 if (! rs.wasNull() ) {
65 List<DepartureBean> list = lookupDepartures(code, "FJRN");
66 departureList.addAll(list);
67 }
68
69 code = rs.getString(2);
70 if (! rs.wasNull() ) {
71 List<DepartureBean> list = lookupDepartures(code, "S2");
72 departureList.addAll(list);
73 }
74 Collections.sort( departureList );
75
76 }
77
78 } finally {
79 if (conn != null && !conn.isClosed() ) {
80 conn.close();
81 }
82 }
83
84 return departureList;
85 }
86
87 public List<DepartureBean> lookupDepartures(String stationcode, String type) throws Exception {
88 if (useTempSite == false) {
89 return lookupDeparturesNormalSite(stationcode, type);
90 } else {
91 return lookupDeparturesFromTemporarySite(stationcode, type);
92 }
93 }
94
95 public List<DepartureBean> lookupDeparturesNormalSite(String stationcode, String type) throws Exception {
96
97 List<DepartureBean> departureList = new ArrayList<DepartureBean>();
98
99 final WebClient webClient = new WebClient();
100 webClient.setTimeout(2500);
101 webClient.setJavaScriptEnabled(false);
102
103 String uri = "http://www.bane.dk/visStation.asp?ArtikelID=4275&W=" + type + "&S=" + stationcode;
104 BanedkInvocation wrapper = new BanedkInvocation(webClient, uri);
105 CircuitBreaker breaker = CircuitBreakerManager.getManager().getCircuitBreaker("banedk");
106
107 HtmlPage page = (HtmlPage) breaker.invoke(wrapper);
108
109 HtmlElement table = page.getElementById("afgangtabel");
110
111 if (table != null) {
112 DomNodeList<HtmlElement> tableRows = table.getElementsByTagName("tr");
113
114 for (HtmlElement currentRow : tableRows) {
115 String rowClass = currentRow.getAttribute("class");
116 if (rowClass != null && rowClass.toLowerCase().contains("station") ) {
117 DomNodeList<HtmlElement> fields = currentRow.getElementsByTagName("td");
118
119 DepartureBean departure = new DepartureBean();
120
121 String time = fields.get(0).asText();
122 if (time.equals(""))
123 time = "0:00"; //Bane.dk bug work-around
124 departure.setTime(time);
125
126 int updated = extractUpdated( fields.get(1) );
127 departure.setUpdated(updated);
128
129 String trainNumber = fields.get(2).asText();
130 if (type.equalsIgnoreCase("S2")) //If it is S-train we need to extract the trainNumber
131 trainNumber = trainNumber + " " + extractTrainNumber(fields.get(2));
132 departure.setTrainNumber(trainNumber);
133
134 String destination = fields.get(3).asText();
135 departure.setDestination(destination);
136
137 String origin = fields.get(4).asText();
138 departure.setOrigin(origin);
139
140 String location = fields.get(5).asText();
141 departure.setLocation(location);
142
143 String status = fields.get(6).asText();
144 departure.setStatus(status);
145
146 String note = extractNote( fields.get(7) );
147 departure.setNote(note);
148
149 departureList.add(departure);
150 }
151 }
152 } else {
153 logger.warning("No departures found for station=" + stationcode + ", type=" + type);
154 }
155
156 return departureList;
157 }
158
159 public List<DepartureBean> lookupDeparturesFromTemporarySite(String stationcode, String type) throws Exception {
160
161 List<DepartureBean> departureList = new ArrayList<DepartureBean>();
162
163 final WebClient webClient = new WebClient();
164 webClient.setTimeout(2500);
165 webClient.setJavaScriptEnabled(false);
166
167
168 String uri = "http://bane.dk/lite/station.asp?w=" + type + "&s=" + stationcode;
169
170 BanedkInvocation wrapper = new BanedkInvocation(webClient, uri);
171 CircuitBreaker breaker = CircuitBreakerManager.getManager().getCircuitBreaker("banedk");
172
173 HtmlPage page = (HtmlPage) breaker.invoke(wrapper);
174
175 HtmlElement table = page.getElementById("traf_afgang");
176
177 if (table != null) {
178 DomNodeList<HtmlElement> tableRows = table.getElementsByTagName("tr");
179
180 boolean isFirst = true;
181
182 for (HtmlElement currentRow : tableRows) {
183 if (isFirst == true) { //skip table headers
184 isFirst = false;
185 continue;
186 }
187
188 DomNodeList<HtmlElement> fields = currentRow.getElementsByTagName("td");
189
190 DepartureBean departure = new DepartureBean();
191
192 String time = fields.get(0).asText().trim();
193 logger.info("time:" + time);
194 if (time.equals(""))
195 time = "0:00"; //Bane.dk bug work-around
196 departure.setTime(time);
197
198
199 String trainNumber = fields.get(1).asText();
200 if (type.equalsIgnoreCase("S2")) //If it is S-train we need to extract the trainNumber
201 trainNumber = trainNumber + " " + extractTrainNumber(fields.get(2));
202 departure.setTrainNumber(trainNumber);
203
204 String destination = fields.get(2).asText();
205 departure.setDestination(destination);
206
207 String origin = fields.get(3).asText();
208 departure.setOrigin(origin);
209
210 String status = fields.get(4).asText();
211 departure.setStatus(status);
212
213 String note = extractNote( fields.get(5) );
214 departure.setNote(note);
215
216 departureList.add(departure);
217 }
218 } else {
219 logger.warning("No departures found for station=" + stationcode + ", type=" + type);
220 }
221
222 return departureList;
223 }
224
225
226 private int extractUpdated(HtmlElement updatedTd) { //extract the digit (in this case: 4) from "media/trafikinfo/opdater4.gif"
227 int updated = -1;
228
229 DomNodeList<HtmlElement> updatedImgs = updatedTd.getElementsByTagName("img");
230 String updatedStr = updatedImgs.get(0).getAttribute("src");
231
232 if (updatedStr != null) {
233 for (int i=0; i<updatedStr.length(); i++) {
234 char c = updatedStr.charAt(i);
235 if ( Character.isDigit(c)) {
236 updated = Character.digit(c, 10);
237 break;
238 }
239 }
240 }
241 return updated;
242 }
243
244 private String extractNote(HtmlElement noteTd) {
245 String note = noteTd.asText().trim();
246
247 List<HtmlElement> elems = noteTd.getElementsByAttribute("span", "class", "bemtype");
248 if (elems.size() > 0 && note.charAt(note.length()-1) == 'i')
249 note = note.substring(0,note.length() -1 );
250
251 return note;
252 }
253
254 private String extractTrainNumber(HtmlElement trainTd) {
255 String number = "";
256 HtmlElement anchorElement = trainTd.getElementsByTagName("a").get(0);
257 String href = anchorElement.getAttribute("href");
258 String argstring = href.substring( href.indexOf('?') + 1);
259
260 String args[] = argstring.split("&");
261 for (String arg : args) {
262 String pair[] = arg.split("="); // Key=pair[0], Value=pair[1]
263
264 if (pair[0].equalsIgnoreCase("TogNr"))
265 number = pair[1];
266 }
267
268
269
270 return number;
271 }
272
273 //test
274 /*
275 public static void main(String args[]) throws Exception {
276 DepartureFetcher f = new DepartureFetcher();
277 List<DepartureBean> deps = f.lookupDepartures("AR", "FJRN");
278 for(DepartureBean d : deps) {
279 System.out.println( d.getTime() + ";" + d.getUpdated() + ";" + d.getTrainNumber() + ";" +
280 d.getDestination() + ";" + d.getOrigin() + ";" + d.getLocation() + ";" + d.getStatus() + ";" + d.getNote() );
281 }
282
283 System.out.println("--------------------------");
284 }*/
285 }

  ViewVC Help
Powered by ViewVC 1.1.20