package dk.daoas.daoadresseservice; import java.sql.SQLException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; import java.util.concurrent.ConcurrentHashMap; import org.apache.commons.lang3.StringUtils; import dk.daoas.daoadresseservice.admin.ServiceConfig; import dk.daoas.daoadresseservice.beans.Address; import dk.daoas.daoadresseservice.beans.Address.RoutingInfo; import dk.daoas.daoadresseservice.beans.AliasBean; import dk.daoas.daoadresseservice.beans.DataStatisticsBean; import dk.daoas.daoadresseservice.beans.ExtendedBean; import dk.daoas.daoadresseservice.beans.HundredePctBean; import dk.daoas.daoadresseservice.beans.SearchRequest; import dk.daoas.daoadresseservice.beans.SearchResult; import dk.daoas.daoadresseservice.beans.SearchResult.Status; import dk.daoas.daoadresseservice.db.DatabaseLayer; import dk.daoas.daoadresseservice.util.DaoUtils; import dk.daoas.daoadresseservice.util.DeduplicateHelper; import dk.daoas.daoadresseservice.util.NaturalOrderComparator; import dk.daoas.daoadresseservice.util.NearestShortTreeMap; public class AddressSearch { private Map> searchPostnrVejnavnGadeid; private Map>> searchGadeidentAdresser; // Gadeident -> husnrumre -> litra private ArrayList
ikkeDaekkedeAdresser = new ArrayList
(60000); private ArrayList
hundredePctAdresser = new ArrayList
(120000); private Map helperCache; private DataStatisticsBean stats = new DataStatisticsBean(); private DatabaseLayer db; private ServiceConfig config; StreetnameHelper levenshteinHelper; StreetnameHelper metaphoneHelper; StreetnameHelper googleHelper; StreetnameHelper osmHelper; public AddressSearch(DatabaseLayer db, ServiceConfig config) { this.db = db; this.config = config; levenshteinHelper = new LevenshteinStreetnameHelper(); metaphoneHelper = new DoubleMetaphoneStreetnameHelper(config); googleHelper = new GoogleStreetnameHelper( config ); osmHelper = new OSMStreetnameHelper( config ); } public Address getAddressSafe(int gadeid, short husnr, String litra) throws Exception { Map> gade = searchGadeidentAdresser.get(gadeid); if (gade == null) { throw new Exception("GadeID ikke fundet"); } List
litraList = gade.get(husnr); Address addr = null; for(Address a : litraList) { if (a.husnrbogstav.equals(litra) ) { addr = a; break; } } if (addr == null) { throw new Exception("Adresse ikke ikke fundet"); } return addr; } public SearchResult search(String postnrStr, String adresse) { return search(postnrStr, adresse, false); } public SearchResult search(String postnrStr, String adresse, boolean naermesteHusnr) { postnrStr = postnrStr.trim(); adresse = adresse.trim(); SearchRequest request = new SearchRequest(); SearchResult result = new SearchResult(); try { request.postnr = Short.parseShort(postnrStr); } catch (Exception E) { result.status = Status.ERROR_UNKNOWN_POSTAL; return result; } request.streetNames = searchPostnrVejnavnGadeid.get(request.postnr); if (request.streetNames == null) { result.status = Status.ERROR_UNKNOWN_POSTAL; return result; } result.splitResult = AddressUtils.splitAdresse(adresse); request.vejnavn = result.splitResult.vej; if (result.splitResult.husnr.length() == 0) { result.status = Status.ERROR_MISSING_HOUSENUMBER; return result; } result.vasketVejnavn = AddressUtils.vaskVejnavn( request.vejnavn ); if (result.vasketVejnavn.indexOf("pakkebo") > -1 || result.vasketVejnavn.indexOf("pakkepost") > -1 || result.vasketVejnavn.indexOf("postbo") > -1 // postbox/postboks || result.vasketVejnavn.indexOf("døgnpost") > -1 || result.vasketVejnavn.indexOf("døgnbo") > -1 // døgnbox // døgnboks || result.vasketVejnavn.equals("id") ) { result.status = Status.ERROR_POSTBOX; return result; } // LookupStreetname er en indkapsling af alm + levenstein + google + osm Integer gadeident = lookupStretname(request, result, true); /* * Hvis der ikke er direkte hits, prøver vi at skære ord af vejnavn * * Denne sektion er MEGET experimental da den øger antallet af kald til google/OSM væsentligt*/ if (gadeident == null) { final int MAX_TRIES = 4; String vejnavnParts[] = request.vejnavn.split(" "); for (int i=1; i<=MAX_TRIES && i> gade = searchGadeidentAdresser.get(gadeident); if (gade == null) { //Denne søgning må ikke fejle result.status = Status.ERROR_INTERNAL; return result; } //Kunne evt klares med Iterables.get fra Guava/Collections Address firstAddress = gade.values().iterator().next().get(0); //Første husnr -> første litra result.vej = firstAddress.vejnavn; short husnrSearch = Short.parseShort(result.splitResult.husnr ); List
litraList = gade.get(husnrSearch); if (litraList == null) { //Husnr ikke fundet if (naermesteHusnr) { TreeMap> gadeTreeMap = (TreeMap>) gade; result.anvendtHusnr = NearestShortTreeMap.getNearestKey(husnrSearch, gadeTreeMap); result.nearestHusnr = true; litraList = gade.get(result.anvendtHusnr); } else { result.status = Status.ERROR_UNKNOWN_ADDRESSPOINT; return result; } } Address addr = null; for(Address a : litraList) { if ( a.husnrbogstav.equals( result.splitResult.litra ) ) { addr = a; break; } } if (addr == null) { //litra ikke fundet if (config.nearestLitra == true || naermesteHusnr == true) { result.nearestLitra = true; addr = litraList.get(0); } else { //husnr fundet men litra blev ikke fundet result.status = Status.ERROR_UNKNOWN_ADDRESSPOINT; return result; } } result.address = addr; if ( StringUtils.equals(addr.distributor, "LUKKET") ) { result.status = Status.STATUS_NOT_COVERED; //Skal vi have en special status til Lukkede adresser ? return result; } if (addr.daekningsType == DaekningsType.DAEKNING_IKKEDAEKKET) { result.status = Status.STATUS_NOT_COVERED; return result; } result.status = Status.STATUS_OK; return result; } private Integer lookupStretname(SearchRequest request, SearchResult result, boolean useExternal) { String helperSearchKey = ""; Integer gadeident = request.streetNames.get(result.vasketVejnavn); if ( gadeident == null) { helperSearchKey = "" + request.postnr + "/" + result.vasketVejnavn; gadeident = helperCache.get(helperSearchKey); } if (gadeident == null) { String vej = levenshteinHelper.proposeStreetName(request, result); if (vej != null) { gadeident = request.streetNames.get(vej); } } if (gadeident == null) { String vej = metaphoneHelper.proposeStreetName(request, result); if (vej != null) { gadeident = request.streetNames.get(vej); } } // Brug OpenStreetMap før vi prøver google // For google har en request limit, det har OSM ikke! if ( gadeident == null && useExternal) { String vej = osmHelper.proposeStreetName(request, result); gadeident = helperWrapper(vej, request.streetNames, helperSearchKey); } if ( gadeident == null && useExternal) { String vej = googleHelper.proposeStreetName(request, result); gadeident = helperWrapper(vej, request.streetNames, helperSearchKey); } return gadeident; } private Integer helperWrapper(String vej, Map postnrVeje, String helperSearchKey) { if (vej == null) return null; String vejVasket = AddressUtils.vaskVejnavn( vej ); Integer gadeident = postnrVeje.get(vejVasket); if (gadeident != null) { helperCache.put(helperSearchKey, gadeident); } return gadeident; } public Map getStretsByPostal(short zip) { return searchPostnrVejnavnGadeid.get(zip); } public void buildSearchStructures() throws SQLException{ searchPostnrVejnavnGadeid = new HashMap>(); searchGadeidentAdresser = new HashMap>>(); helperCache = new ConcurrentHashMap(); // ////////////////////// // Start stage1 long start1 = System.currentTimeMillis(); System.out.println("Build -- stage 1"); List
alleAdresser = db.getAllAdresses(); /* Mapper mellem db Row ID og adresse noden - skal bruges i stage3*/ Map idAddressMap = new HashMap( alleAdresser.size() ); DeduplicateHelper shortHelper = new DeduplicateHelper(); DeduplicateHelper intHelper = new DeduplicateHelper(); AliasGenerator aliasGenerator = new AliasGenerator(); ArrayList> litraListCache = new ArrayList>(); for (Address a : alleAdresser) { idAddressMap.put(a.id, a); Short postnr = shortHelper.getInstance(a.postnr); Map postnrVeje = searchPostnrVejnavnGadeid.get(postnr); if (postnrVeje == null) { postnrVeje = new HashMap(10240); //lige nu indeholder den største 9500 entries searchPostnrVejnavnGadeid.put(postnr, postnrVeje); } String vasketVejnavn = AddressUtils.vaskVejnavn(a.vejnavn); Integer gadeident = postnrVeje.get(vasketVejnavn); if (gadeident == null) { //postnrVeje.put(vasketVejnavn, a.gadeid); gadeident = intHelper.getInstance( a.gadeid ); Set aliaser = aliasGenerator.findVejAliaser(a.vejnavn); for(String alias : aliaser) { postnrVeje.put(alias, gadeident); } } Map> gade = searchGadeidentAdresser.get(gadeident); if (gade == null) { gade = new TreeMap >(); searchGadeidentAdresser.put(gadeident, gade); } List
litraList = gade.get(a.husnr); if (litraList == null) { litraList = new ArrayList
(); gade.put(a.husnr, litraList); litraListCache.add( (ArrayList
) litraList); } litraList.add(a); } for (ArrayList
list: litraListCache) { list.trimToSize(); } litraListCache.clear(); //frigiv hukommelse inden stage2 aliasGenerator.clear(); shortHelper.clear(); intHelper.clear(); //////////////////////////////////////////////////////////////////////////////////////// long start2 = System.currentTimeMillis(); System.out.println("Build, stage1 elapsed: " + (start2-start1) ); System.out.println("Build -- stage 2 alias tabel"); int vaskCount = 0; List aliasList = db.getAliasList(); for (AliasBean alias : aliasList) { Map postnrVeje = searchPostnrVejnavnGadeid.get(alias.postnr); if (postnrVeje == null) { //Burde ikke kunne ske - men better safe than sorry continue; } String vasketVej = AddressUtils.vaskVejnavn(alias.vejnavn); String vasketAlias = AddressUtils.vaskVejnavn(alias.aliasVejnavn); Integer gadeident = postnrVeje.get(vasketVej); if (gadeident == null) { //Kender ikke den oprindelige vej continue; } Integer aliasIdent = postnrVeje.get(vasketAlias); if (aliasIdent == null) { //Vi kender ikke denne variant af vejnavnet postnrVeje.put(vasketAlias, gadeident); vaskCount++; } } System.out.println("Anvendte " + vaskCount + " aliaser fra databasen"); aliasList.clear();//cleanup inden stage3 //////////////////////////////////////////////////////////////////////////////////////// long start3 = System.currentTimeMillis(); System.out.println("Build, stage2 elapsed: " + (start3-start2) ); System.out.println("Build -- stage 3 udvidet dækning"); DeduplicateHelper ruteHelper = new DeduplicateHelper(); DeduplicateHelper korelisteHelper = new DeduplicateHelper(); List extDao = db.getExtendedAdresslist(); for (ExtendedBean eb : extDao) { Address orgAddress = idAddressMap.get(eb.orgId); if (orgAddress == null) continue; /*if (orgAddress.daekningsType != DaekningsType.DAEKNING_IKKEDAEKKET) { //kan være at adressen er blevet markeret dækket og - find-nærmeste data ikke er opdateret i mellemtiden continue; }*/ Address targetAddress = idAddressMap.get(eb.targetId); if (targetAddress == null) { continue; } if ( StringUtils.equals(orgAddress.distributor, "LUKKET") ) { continue; } if ( StringUtils.equals(targetAddress.distributor, "LUKKET") ) { continue; } orgAddress.extTarget = targetAddress; orgAddress.extAfstand = eb.afstand; boolean covered = false; if (targetAddress.distributor.equals("DAO")) { orgAddress.routingMandag.rute = calculateExtendedDaoRoute(eb, targetAddress.routingMandag); orgAddress.routingMandag.rute = ruteHelper.getInstance(orgAddress.routingMandag.rute); if (orgAddress.routingMandag.rute != null) { orgAddress.routingMandag.koreliste = targetAddress.routingMandag.koreliste; covered = true; } } if (targetAddress.distributor.equals("BK")) { orgAddress.routingMandag.koreliste = calculateExtendedBkKoreliste(eb, targetAddress.routingMandag); orgAddress.routingMandag.koreliste = korelisteHelper.getInstance(orgAddress.routingMandag.koreliste); if (orgAddress.routingMandag.koreliste != null) { orgAddress.routingMandag.rute = targetAddress.routingMandag.rute; covered = true; } } if (covered) { //Kopier resten af felterne orgAddress.daekningsType = DaekningsType.DAEKNING_UDVIDET; orgAddress.dbkBane = targetAddress.dbkBane; /* Sådan gør den gamle service */ /* 20150520 THN- hvis vi bibeholder de originale kommune+vejkoder kan vi bruge DAVID til join i databasen*/ //orgAddress.kommunekode = targetAddress.kommunekode; //orgAddress.vejkode = targetAddress.vejkode; orgAddress.distributor = targetAddress.distributor; } } // cleanup inden stage 4 idAddressMap = null; ruteHelper = null; korelisteHelper = null; ////////////////////////////////////////////////////////////////////////////////////// long start4 = System.currentTimeMillis(); System.out.println("Build, stage3 elapsed: " + (start4-start3) ); System.out.println("Build -- stage 4 - 100pct"); Map hundredePct = db.get100PctList(); for (Address addr : alleAdresser) { if (addr.distributor != null && addr.distributor.equals("LUKKET")) { continue; } HundredePctBean bean = hundredePct.get(addr.postnr); if (bean == null) { continue; } short modifyCounter = 0; modifyCounter += update100PctRouting(bean, addr.routingMandag); modifyCounter += update100PctRouting(bean, addr.routingTirsdag); modifyCounter += update100PctRouting(bean, addr.routingOnsdag); modifyCounter += update100PctRouting(bean, addr.routingTorsdag); modifyCounter += update100PctRouting(bean, addr.routingFredag); modifyCounter += update100PctRouting(bean, addr.routingLordag); modifyCounter += update100PctRouting(bean, addr.routingSondag); if (addr.daekningsType == DaekningsType.DAEKNING_IKKEDAEKKET && modifyCounter>0) { addr.daekningsType = DaekningsType.DAEKNING_100PCT; addr.dbkBane = bean.dbkBane; addr.distributor = bean.distributor; } } //////////////////////////////////////////////////////////////////////////////////// long stop = System.currentTimeMillis(); System.out.println("Build, stage4 elapsed: " + (stop-start4) ); System.out.println("Build -- Gathering statistics"); for (Address addr : alleAdresser) { switch (addr.daekningsType) { case DAEKNING_DIREKTE: stats.direkteCount++; break; case DAEKNING_UDVIDET: stats.extendedCount++; break; case DAEKNING_100PCT: hundredePctAdresser.add(addr); stats.hundredePctCount++; break; default: ikkeDaekkedeAdresser.add(addr); stats.ikkeDaekketCount++; } } stats.totalCount = alleAdresser.size(); ikkeDaekkedeAdresser.trimToSize(); hundredePctAdresser.trimToSize(); Collections.sort( ikkeDaekkedeAdresser ); //Bruger Comparable interfacet Collections.sort( hundredePctAdresser ); stats.elapsed = stop-start1; stats.buildTime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format( new Date() ); System.out.println("Build: totalCount: " + stats.totalCount); System.out.println("Build: direkteCount: " + stats.direkteCount); System.out.println("Build: extendedCount: " + stats.extendedCount); System.out.println("Build: hundredePctCount: " + stats.hundredePctCount); System.out.println("Build: ikkeDaekketCount: " + stats.ikkeDaekketCount); System.out.println("Build: Total Elapsed: " + (stop-start1) ); System.out.println("Build Completed"); } public short update100PctRouting(HundredePctBean bean, RoutingInfo routing) { if (routing.rute == null) { routing.rute = bean.rute; routing.koreliste = bean.koreliste; return 1; } else { return 0; } } public DataStatisticsBean getStatistics() { return stats; } public DatabaseLayer getDatabaseLayer() { return db; } public void clear() { searchPostnrVejnavnGadeid.clear(); searchGadeidentAdresser.clear(); helperCache.clear(); } private String calculateExtendedDaoRoute(ExtendedBean eb, RoutingInfo targetRoute) { if (targetRoute.rute == null) { //hvis targetAdress.rute er null så er adressen ikke dækket alligevel return null; } // /////////////////////////////////////////////////////////////////// switch( eb.transport) { case "cykel": if (eb.afstand < 0.151) { return "." + targetRoute.rute; } else if (eb.afstand < 0.501) { return ".." + targetRoute.rute; } else if (eb.afstand < 0.701) { return "..." + targetRoute.rute; } else if (eb.afstand < 1.001) { return "...." + targetRoute.rute; } break; case "scooter": if (eb.afstand < 0.151) { return "." + targetRoute.rute; } else if (eb.afstand < 0.801) { return ".." + targetRoute.rute; } else if (eb.afstand < 1.201) { return "..." + targetRoute.rute; } else if (eb.afstand < 2.101) { return "...." + targetRoute.rute; } break; case "bil": if (eb.afstand < 0.151) { return "." + targetRoute.rute; } else if (eb.afstand < 1.001) { return ".." + targetRoute.rute; } else if (eb.afstand < 1.601) { return "..." + targetRoute.rute; } else if (eb.afstand < 2.601) { return "...." + targetRoute.rute; } break; default: System.out.println("Ukendt transport type: " + eb ); } return null; } public List
getNonCoveredAddresses() { return ikkeDaekkedeAdresser; } public List
get100PctAddresses() { return hundredePctAdresser; } /////////////////////////////////////////////////////////// private String calculateExtendedBkKoreliste(ExtendedBean eb, RoutingInfo targetRoute) { if (targetRoute.rute == null) { //hvis targetAdress.rute er null så er adressen ikke dækket alligevel return null; } String inject; if (eb.afstand <= 0.500) { inject = "."; } else { inject = ".."; } return AddressUtils.injectIntoBk(targetRoute.koreliste, inject); } public Set getPostnumre() { return new TreeSet( searchPostnrVejnavnGadeid.keySet() ); } public Set> getVejnavne(short postnr) { Map map = searchPostnrVejnavnGadeid.get(postnr); TreeMap newMap = new TreeMap( map );//Lav et nyt TreeMap for at sikre sortering return newMap.entrySet(); } public Set getHusnumre(int gadeid) { Map> gade = searchGadeidentAdresser.get(gadeid); Set set = new TreeSet( new NaturalOrderComparator() ); for(List
litraList : gade.values()) { for (Address a: litraList) { set.add( a.husnr + a.husnrbogstav ); } } return set; } public Address getAdresse(int gadeid, String husnrStr) { short husnr = Short.parseShort( husnrStr.replaceAll("[^\\d]","") ); String litra = husnrStr.replaceAll("\\d", ""); Map> gade = searchGadeidentAdresser.get(gadeid); List
litraList = gade.get(husnr); for(Address addr : litraList) { if (addr.husnrbogstav.equals(litra)) { return addr; } } return null; } }