--- android/TrainInfoService/src/dk/thoerup/traininfoservice/banedk/DepartureFetcher.java 2010/07/09 22:30:18 974 +++ android/TrainInfoService/src/dk/thoerup/traininfoservice/banedk/DepartureFetcher.java 2012/08/21 06:45:41 1833 @@ -1,22 +1,27 @@ package dk.thoerup.traininfoservice.banedk; -import java.util.ArrayList; + +import java.net.URL; +import java.net.URLEncoder; import java.util.Collections; -import java.util.List; +import java.util.Comparator; import java.util.Map; import java.util.logging.Logger; -import com.gargoylesoftware.htmlunit.BrowserVersion; -import com.gargoylesoftware.htmlunit.WebClient; -import com.gargoylesoftware.htmlunit.html.DomNodeList; -import com.gargoylesoftware.htmlunit.html.HtmlElement; -import com.gargoylesoftware.htmlunit.html.HtmlPage; - +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import dk.thoerup.android.traininfo.common.DepartureBean; +import dk.thoerup.android.traininfo.common.DepartureEntry; +import dk.thoerup.android.traininfo.common.StationEntry; import dk.thoerup.circuitbreaker.CircuitBreaker; import dk.thoerup.circuitbreaker.CircuitBreakerManager; -import dk.thoerup.traininfoservice.StationBean; -import dk.thoerup.traininfoservice.StationDAO; +import dk.thoerup.genericjavautils.HttpUtil; +import dk.thoerup.genericjavautils.TimeoutMap; import dk.thoerup.traininfoservice.Statistics; +import dk.thoerup.traininfoservice.TraininfoSettings; +import dk.thoerup.traininfoservice.db.StationDAO; public class DepartureFetcher { @@ -25,71 +30,133 @@ REGIONAL } + enum FetchTrainType { + STOG, + REGIONAL, + BOTH + } + Logger logger = Logger.getLogger(DepartureFetcher.class.getName()); - Map> cache; + Map cache; StationDAO stationDao = new StationDAO(); - private boolean useTempSite; + + private TraininfoSettings settings; + + Comparator departureTimeComparator = new Comparator() { + + @Override + public int compare(DepartureEntry arg0, DepartureEntry arg1) { + String timeStr1 = arg0.getTime().replace(":","").trim(); + String timeStr2 = arg1.getTime().replace(":","").trim(); + + int time1 = 0; + int time2 = 0; + + if (timeStr1.length() > 0) + time1 = Integer.parseInt(timeStr1); + + if (timeStr2.length() > 0) + time2 = Integer.parseInt(timeStr2); + + //work correctly when clock wraps around at midnight + if (Math.abs(time1-time2) < 1200) { + if (time1 > time2) + return 1; + else + return -1; + } else { + if (time1 < time2) + return 1; + else + return -1; + + } + + } + + }; - public DepartureFetcher(boolean tempSite, int cacheTimeout) { - useTempSite = tempSite; - cache = new TimeoutMap>(cacheTimeout); + public DepartureFetcher(TraininfoSettings settings) { + this.settings = settings; + cache = new TimeoutMap( settings.getCacheTimeout() ); } - public List cachedLookupDepartures(int stationID, boolean arrival) throws Exception { - final String key = "" + stationID + ":" + arrival; + public DepartureBean cachedLookupDepartures(int stationID, boolean arrival, FetchTrainType type) throws Exception { - List list = cache.get(key); + final String key = "" + stationID + ":" + arrival + ":" + type.toString(); + + DepartureBean departureBean = cache.get(key); - if (list == null) { - list = lookupDepartures(stationID,arrival); - cache.put(key, list); + if (departureBean == null) { + departureBean = lookupDepartures(stationID, arrival, type); + cache.put(key, departureBean); } else { Statistics.getInstance().incrementDepartureCacheHits(); logger.info("Departure: Cache hit " + key); //remove before production } - return list; + return departureBean; } - public List lookupDepartures(int stationID, boolean arrival) throws Exception { - List departureList = new ArrayList(); + public DepartureBean lookupDepartures(int stationID, boolean arrival, FetchTrainType type) throws Exception { + + DepartureBean departureBean = new DepartureBean(); - StationBean station = stationDao.getById(stationID); + StationEntry station = stationDao.getById(stationID); - if (station.getRegional() != null) { - List list = lookupDepartures(station.getRegional(), TrainType.REGIONAL, arrival); - departureList.addAll(list); + departureBean.stationName = station.getName(); + + //TODO: FetchTraintype.Both should be removed some time after 0.9.5 release + if (station.getRegional() != null && (type == FetchTrainType.REGIONAL||type == FetchTrainType.BOTH) ) { + DepartureBean tempBean = lookupDepartures(station.getRegional(), TrainType.REGIONAL, arrival); + departureBean.entries.addAll( tempBean.entries ); + departureBean.notifications.addAll(tempBean.notifications); } - if (station.getStrain() != null) { - List list = lookupDepartures(station.getStrain(), TrainType.STOG, arrival); - departureList.addAll(list); + if (station.getStrain() != null && (type == FetchTrainType.STOG||type == FetchTrainType.BOTH)) { + DepartureBean tempBean = lookupDepartures(station.getStrain(), TrainType.STOG, arrival); + departureBean.entries.addAll( tempBean.entries ); + departureBean.notifications.addAll(tempBean.notifications); } - Collections.sort( departureList ); + if (departureBean.entries.size() == 0) { + logger.info("No departures found for station " + stationID); + } + + //TODO: FetchTraintype.Both should be removed some time after 0.9.5 release + if (type == FetchTrainType.BOTH) { //if we have both S-tog and regional order by departure/arrival time + Collections.sort( departureBean.entries, departureTimeComparator); + } + + System.out.println("Trit: " + settings.isTritinfoEnabled() + " " + station.getTritStation() ); + if ( settings.isTritinfoEnabled() && station.getTritStation() != -1) { + try { + injectTritinfoData(departureBean, station); + } catch (Exception ex) { //det er ikke kritisk at vi får perron numre med + ex.printStackTrace(); + } + } - return departureList; + return departureBean; } - public List lookupDepartures(String stationcode, TrainType type, boolean arrival) throws Exception { - if (useTempSite == false) { - return lookupDeparturesNormalSite(stationcode, type, arrival); - } else { - //return lookupDeparturesFromTemporarySite(stationcode, type); - //TODO: find out what to to if they ever put a temp site up on trafikinfo.bane.dk - return null; + public DepartureBean lookupDepartures(String stationcode, TrainType type, boolean arrival) throws Exception { + if ( settings.getBackend() == TraininfoSettings.Backend.Azure) { + return lookupDeparturesAzureSite(stationcode, type, arrival); + } else { + return lookupDeparturesMobileSite(stationcode, type, arrival); } } - private String getTypeString(TrainType type) { + private String getTypeStringAzure(TrainType type) { switch (type) { case STOG: return "S-Tog"; @@ -100,41 +167,62 @@ } } - public List lookupDeparturesNormalSite(String stationcode, TrainType type, boolean arrival) throws Exception { - - List departureList = new ArrayList(); + private String getTypeStringWww(TrainType type) { + switch (type) { + case STOG: + return "S2"; + case REGIONAL: + return "FJRN"; + default: + return ""; //Can not happen + } + } + + public DepartureBean lookupDeparturesAzureSite(String stationcode, TrainType type, boolean arrival) throws Exception { - final WebClient webClient = new WebClient( BrowserVersion.FIREFOX_3 ); - webClient.setTimeout(2500); - webClient.setJavaScriptEnabled(false); - + DepartureBean departureBean = new DepartureBean(); + - String typeString = getTypeString(type); + String typeString = getTypeStringAzure(type); String arrivalDeparture = (arrival==false) ? "Afgang" : "Ankomst"; - - //String uri = "http://www.bane.dk/visStation.asp?ArtikelID=4275&W=" + type + "&S=" + stationcode; - String uri = "http://trafikinfo.bane.dk/Trafikinformation/AfgangAnkomst/" + arrivalDeparture + "/" + stationcode + "/" + typeString + "/UdvidetVisning"; + + stationcode = URLEncoder.encode(stationcode,"ISO-8859-1"); - //logger.info("URI: " + uri); - HtmlunitInvocation wrapper = new HtmlunitInvocation(webClient, uri); + String uri = "http://trafikinfo.bane.dk/Trafikinformation/AfgangAnkomst/" + arrivalDeparture + "/" + stationcode + "/" + typeString + "/UdvidetVisning"; + + logger.fine("URI: " + uri); + JsoupInvocation wrapper = new JsoupInvocation( new URL(uri), settings.getReplyTimeout() ); CircuitBreaker breaker = CircuitBreakerManager.getManager().getCircuitBreaker("banedk"); - HtmlPage page = (HtmlPage) breaker.invoke(wrapper); + Document page = (Document) breaker.invoke(wrapper); String tableName = arrival == false ? "afgangtabel" : "ankomsttabel"; - HtmlElement table = page.getElementById(tableName); + Element table = page.getElementById(tableName); if (table != null) { - DomNodeList tableRows = table.getElementsByTagName("tr"); + Elements tableRows = table.getElementsByTag("tr"); - for (HtmlElement currentRow : tableRows) { - String rowClass = currentRow.getAttribute("class"); + //boolean tidsstregExists = (table.getElementsByAttributeValue("class", "Tidsstreg").size() > 0); + //boolean passedTidsstreg = false; + + for (Element currentRow : tableRows) { + String rowClass = currentRow.attr("class"); + /* + if (tidsstregExists == true && passedTidsstreg == false) { + if (currentRow.getElementsByAttributeValue("class", "Tidsstreg").size() > 0) { + passedTidsstreg = true; + } else { + continue; + } + }*/ + if (rowClass != null && rowClass.toLowerCase().contains("station") ) { - DomNodeList fields = currentRow.getElementsByTagName("td"); + + Elements fields = currentRow.getElementsByTag("td"); - DepartureBean departure = new DepartureBean(); + DepartureEntry departure = new DepartureEntry(); - String time = fields.get(0).asText(); + String time = fields.get(0).text(); if (time.equals("")) time = "0:00"; //Bane.dk bug work-around departure.setTime(time); @@ -142,21 +230,21 @@ int updated = extractUpdated( fields.get(1) ); departure.setUpdated(updated); - String trainNumber = fields.get(2).asText(); + String trainNumber = fields.get(2).text(); if (type == TrainType.STOG) //If it is S-train we need to extract the trainNumber - trainNumber = trainNumber + " " + extractTrainNumber(fields.get(2)); + trainNumber = trainNumber + " " + extractTrainNumberAzure(fields.get(2)); departure.setTrainNumber(trainNumber); - String destination = fields.get(3).asText(); + String destination = fields.get(3).text(); departure.setDestination(destination); - String origin = fields.get(4).asText(); + String origin = fields.get(4).text(); departure.setOrigin(origin); - String location = fields.get(5).asText(); + String location = fields.get(5).text(); departure.setLocation(location); - String status = fields.get(6).asText().trim(); + String status = fields.get(6).text().trim(); departure.setStatus(status); String note = extractNote( fields.get(7) ); @@ -164,91 +252,244 @@ departure.setType(typeString); - departureList.add(departure); + departureBean.entries.add( departure ); } } } else { logger.warning("No departures found for station=" + stationcode + ", type=" + type); } - webClient.closeAllWindows(); - return departureList; + Element notifDiv = page.getElementById("station_planlagte_text"); + if (notifDiv != null) { + + Elements tables = notifDiv.getElementsByTag("table"); + for (Element tab : tables) { + + Elements anchors = tab.getElementsByTag("a"); + if (anchors.size() == 2) { + departureBean.notifications.add( anchors.get(1).text() ); + } + } + + } + + + return departureBean; } - /* - @Deprecated - public List lookupDeparturesFromTemporarySite(String stationcode, String type) throws Exception { - - List departureList = new ArrayList(); + public DepartureBean lookupDeparturesMobileSite(String stationcode, TrainType traintype, boolean arrival) throws Exception { - final WebClient webClient = new WebClient(BrowserVersion.FIREFOX_3); - webClient.setTimeout(2500); - webClient.setJavaScriptEnabled(false); + DepartureBean departureBean = new DepartureBean(); + + String typeString = getTypeStringWww(traintype); + String arrivalDeparture = (arrival==false) ? "afgang" : "ankomst"; + + stationcode = URLEncoder.encode(stationcode,"ISO-8859-1"); - String uri = "http://bane.dk/lite/station.asp?w=" + type + "&s=" + stationcode; - HtmlunitInvocation wrapper = new HtmlunitInvocation(webClient, uri); + String uri = "http://mobil.bane.dk/mobilStation.asp?artikelID=5332&stat_kode=" + stationcode + "&webprofil=" + typeString +"&beskrivelse=&mode=ankomstafgang&ankomstafgang=" + arrivalDeparture + "&gemstation=&fuldvisning=1"; + logger.fine("URI: " + uri); + JsoupInvocation wrapper = new JsoupInvocation( new URL(uri), settings.getReplyTimeout() ); CircuitBreaker breaker = CircuitBreakerManager.getManager().getCircuitBreaker("banedk"); - HtmlPage page = (HtmlPage) breaker.invoke(wrapper); + Document page = (Document) breaker.invoke(wrapper); + + + Element content = page.getElementsByClass("contentDiv").get(0); - HtmlElement table = page.getElementById("traf_afgang"); - if (table != null) { - DomNodeList tableRows = table.getElementsByTagName("tr"); + if (content != null) { + Elements tableRows = content.child(0).children(); - boolean isFirst = true; + - for (HtmlElement currentRow : tableRows) { - if (isFirst == true) { //skip table headers - isFirst = false; - continue; + for (Element currentRow : tableRows) { + if (currentRow.tagName().equals("br") ) { + break; } - DomNodeList fields = currentRow.getElementsByTagName("td"); - - DepartureBean departure = new DepartureBean(); + + String link = currentRow.child(0).attr("href"); + + logger.fine( currentRow.text() ); + logger.fine("Href: " + link); + - String time = fields.get(0).asText().trim(); + String parts[] = currentRow.text().split(","); + + DepartureEntry departure = new DepartureEntry(); + + //if we do these things upfront, then we are allowed to use continue statement when row contains no more data + departure.setType(typeString); + departureBean.entries.add( departure ); + +/* +http://mobil.bane.dk/mobilStation.asp?artikelID=5332&tognummer=111&webprofil=FJRN&mode=rute&strBemaerkning=Afg%E5r+fra+%C5rhus+H+kl%2E07%3A21++&strRefURL=%2FmobilStation%2Easp%3FartikelID%3D5332%26stat%5Fkode%3DAR%26webprofil%3DFJRN%26beskrivelse%3D%25C5rhus%2BH%26mode%3Dankomstafgang%26ankomstafgang%3Dafgang%26gemstation%3D +*/ + int offset = 0; + + String time = parts[offset++]; if (time.equals("")) time = "0:00"; //Bane.dk bug work-around departure.setTime(time); + int updated = 4; //does not exist on mobile + departure.setUpdated(updated); - String trainNumber = fields.get(1).asText(); + String trainNumber = extractTrainNumberMobile(link); + /*if (traintype == TrainType.STOG) //If it is S-train we need to extract the trainNumber + trainNumber = trainNumber + " " + extractTrainNumberAzure(fields.get(2));*/ departure.setTrainNumber(trainNumber); - String destination = fields.get(2).asText(); + if (traintype == TrainType.STOG) { //if it is stog the next vield is the "Line" code - this should be used somewhere, but skippint ahead for now + String stogLine = parts[offset++].trim(); + departure.setTrainNumber(stogLine + " " + trainNumber); + } + + String destination = parts[offset++].trim();; departure.setDestination(destination); - String origin = fields.get(3).asText(); + String origin = "-"; // fields.get(4).text(); does not exist on mobile departure.setOrigin(origin); - String status = fields.get(4).asText(); + String location = ""; // fields.get(5).text(); does not exist on mobile + departure.setLocation(location); + + if (offset == parts.length) { + continue; + } + + if (parts[offset].trim().equalsIgnoreCase("NB!")) { + offset++; + } + + if (offset == parts.length) { + continue; + } + + String status = parts[offset++].trim();; //fields.get(6).text().trim(); - extract from url departure.setStatus(status); - String note = fields.get(5).asText(); + String note = ""; //extractNote( fields.get(7) ); - extract from url departure.setNote(note); - departureList.add(departure); } } else { - logger.warning("No departures found for station=" + stationcode + ", type=" + type); + logger.warning("No departures found for station=" + stationcode + ", type=" + traintype); } - webClient.closeAllWindows(); + return departureBean; + } + + + + public static String cleanText(String input) { + //apparently JSoup translates   characters on www.bane.dk to 0xA0 + return input.replace((char) 0xA0, (char)0x20).trim(); + } + + + // old www site is not available any more + @Deprecated + public DepartureBean lookupDeparturesWwwSite(String stationcode, TrainType trainType, boolean arrival) throws Exception { + + DepartureBean departureBean = new DepartureBean(); + + String type = getTypeStringWww(trainType); + + stationcode = URLEncoder.encode(stationcode, "ISO-8859-1"); + + + String uri = "http://www.bane.dk/visStation.asp?ArtikelID=4275&W=" + type + "&S=" + stationcode; + logger.fine("URI:" + uri); + + + JsoupInvocation wrapper = new JsoupInvocation( new URL(uri), settings.getReplyTimeout() ); + CircuitBreaker breaker = CircuitBreakerManager.getManager().getCircuitBreaker("banedk"); + + Element page = (Element) breaker.invoke(wrapper); + + String tableName = arrival == false ? "afgangtabel" : "ankomsttabel"; + Element table = page.getElementById(tableName); - return departureList; - }*/ + + if (table != null) { + Elements tableRows = table.getElementsByTag("tr"); + + //boolean passedTidsstreg = false; + //boolean tidsstregExists = (table.getElementsByAttributeValue("class", "Tidsstreg").size() > 0); + + for (Element currentRow : tableRows) { + String rowClass = currentRow.attr("class"); + /* + if (tidsstregExists == true && passedTidsstreg == false) { + if (currentRow.getElementsByAttributeValue("class", "Tidsstreg").size() > 0) { + passedTidsstreg = true; + } else { + continue; + } + }*/ + + + if (rowClass != null && rowClass.toLowerCase().contains("station") ) { + Elements fields = currentRow.getElementsByTag("td"); - private int extractUpdated(HtmlElement updatedTd) { //extract the digit (in this case: 4) from "media/trafikinfo/opdater4.gif" + DepartureEntry departure = new DepartureEntry(); + + + + String time = cleanText( fields.get(0).getAllElements().get(2).text() ); + if (time.equals("")) + time = "0:00"; //Bane.dk bug work-around + departure.setTime(time); + + int updated = extractUpdated( fields.get(1) ); + departure.setUpdated(updated); + + String trainNumber = cleanText( fields.get(2).text() ); + if (type.equalsIgnoreCase("S2")) //If it is S-train we need to extract the trainNumber + trainNumber = trainNumber + " " + extractTrainNumberWww(fields.get(2)); + departure.setTrainNumber(trainNumber); + + String destination = cleanText( fields.get(3).text() ); + departure.setDestination(destination); + + String origin = cleanText( fields.get(4).text() ); + departure.setOrigin(origin); + + String location = cleanText( fields.get(5).text() ); + departure.setLocation(location); + + String status = cleanText( fields.get(6).text() ); + departure.setStatus(status); + + String note = cleanText( extractNote( fields.get(7) ) ); + departure.setNote(note); + + departure.setType(type); + + departureBean.entries.add(departure); + + + } + } + } else { + logger.warning("No departures found for station=" + stationcode + ", type=" + type); + } + + + return departureBean; + } + + + private int extractUpdated(Element updatedTd) { //extract the digit (in this case: 4) from "media/trafikinfo/opdater4.gif" int updated = -1; - DomNodeList updatedImgs = updatedTd.getElementsByTagName("img"); - String updatedStr = updatedImgs.get(0).getAttribute("src"); + Elements updatedImgs = updatedTd.getElementsByTag("img"); + String updatedStr = updatedImgs.get(0).attr("src"); if (updatedStr != null) { for (int i=0; i elems = noteTd.getElementsByAttribute("span", "class", "bemtype"); + + Elements elems = noteTd.getElementsByClass("bemtype"); if (elems.size() > 0 && note.charAt(note.length()-1) == 'i') note = note.substring(0,note.length() -1 ); - return note; + return note.trim(); } - private String extractTrainNumber(HtmlElement trainTd) { - HtmlElement anchorElement = trainTd.getElementsByTagName("a").get(0); - String href = anchorElement.getAttribute("href"); + private String extractTrainNumberAzure(Element trainTd) { + Element anchorElement = trainTd.getElementsByTag("a").get(0); + String href = anchorElement.attr("href"); int pos = href.lastIndexOf('/'); String number = href.substring(pos+1); @@ -282,6 +524,82 @@ return number; } + private String extractTrainNumberMobile(String link) { + Map elements = HttpUtil.decodeParams(link); + + return elements.get("tognummer"); + } + + private String extractTrainNumberWww(Element trainTd) { + String number = ""; + Element anchorElement = trainTd.getElementsByTag("a").get(0); + String href = anchorElement.attr("href"); + + String argstring = href.split("?")[1]; + Map elements = HttpUtil.decodeParams(argstring); + number = elements.get("TogNr"); + + + /*String argstring = href.substring( href.indexOf('?') + 1); + String args[] = argstring.split("&"); + for (String arg : args) { + String pair[] = arg.split("="); // Key=pair[0], Value=pair[1] + + if (pair[0].equalsIgnoreCase("TogNr")) + number = pair[1]; + }*/ + + + return number; + } + + + private void injectTritinfoData(DepartureBean departureBean, StationEntry station) throws Exception { + String uri = "http://tritinfo.pallas.dk/webtavle?page=stationcontent&staid=" + station.getTritStation(); + logger.fine("URI:" + uri); + System.out.println("URI:" + uri); + + + JsoupInvocation wrapper = new JsoupInvocation( new URL(uri), settings.getReplyTimeout() ); + CircuitBreaker breaker = CircuitBreakerManager.getManager().getCircuitBreaker("tritinfo"); + + Element page = (Element) breaker.invoke(wrapper); + + Element table = page.getElementsByClass("passages").get(0); + + Elements trains = table.getElementsByClass("train"); + + for (int i=0; i