package dk.daoas.daoadresseservice; import java.sql.SQLException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; import java.util.concurrent.ConcurrentHashMap; import org.apache.commons.lang3.StringUtils; import dk.daoas.daoadresseservice.admin.ServiceConfig; import dk.daoas.daoadresseservice.beans.Address; import dk.daoas.daoadresseservice.beans.AliasBean; import dk.daoas.daoadresseservice.beans.DataStatisticsBean; import dk.daoas.daoadresseservice.beans.ExtendedBean; import dk.daoas.daoadresseservice.beans.HundredePctBean; import dk.daoas.daoadresseservice.beans.SearchRequest; import dk.daoas.daoadresseservice.beans.SearchResult; import dk.daoas.daoadresseservice.beans.SearchResult.Status; import dk.daoas.daoadresseservice.db.DatabaseLayer; import dk.daoas.daoadresseservice.util.DaoUtils; import dk.daoas.daoadresseservice.util.DeduplicateHelper; import dk.daoas.daoadresseservice.util.NaturalOrderComparator; import dk.daoas.daoadresseservice.util.NearestShortTreeMap; public class AddressSearch { private Map> searchPostnrVejnavnGadeid; private Map>> searchGadeidentAdresser; // Gadeident -> husnrumre -> litra private List
alleAdresser; private Map helperCache; private DataStatisticsBean stats = new DataStatisticsBean(); private DatabaseLayer db; private ServiceConfig config; StreetnameHelper levenshteinHelper; StreetnameHelper metaphoneHelper; StreetnameHelper googleHelper; StreetnameHelper osmHelper; public AddressSearch(DatabaseLayer db, ServiceConfig config) { this.db = db; this.config = config; levenshteinHelper = new LevenshteinStreetnameHelper( this ); metaphoneHelper = new DoubleMetaphoneStreetnameHelper(config); googleHelper = new GoogleStreetnameHelper( config ); osmHelper = new OSMStreetnameHelper( config ); } public Address getAddressSafe(int gadeid, short husnr, String litra) throws Exception { Map> gade = searchGadeidentAdresser.get(gadeid); if (gade == null) { throw new Exception("GadeID ikke fundet"); } Map litraList = gade.get(husnr); Address addr = litraList.get(litra); if (addr == null) { throw new Exception("Adresse ikke ikke fundet"); } return addr; } public SearchResult search(String postnrStr, String adresse) { return search(postnrStr, adresse, false); } public SearchResult search(String postnrStr, String adresse, boolean naermesteHusnr) { postnrStr = postnrStr.trim(); adresse = adresse.trim(); SearchRequest request = new SearchRequest(); SearchResult result = new SearchResult(); try { request.postnr = Short.parseShort(postnrStr); } catch (Exception E) { result.status = Status.ERROR_UNKNOWN_POSTAL; return result; } request.streetNames = searchPostnrVejnavnGadeid.get(request.postnr); if (request.streetNames == null) { result.status = Status.ERROR_UNKNOWN_POSTAL; return result; } result.splitResult = AddressUtils.splitAdresse(adresse); request.vejnavn = result.splitResult.vej; if (result.splitResult.husnr.length() == 0) { result.status = Status.ERROR_MISSING_HOUSENUMBER; return result; } result.vasketVejnavn = AddressUtils.vaskVejnavn( request.vejnavn ); if (result.vasketVejnavn.indexOf("pakkebo") > -1 || result.vasketVejnavn.indexOf("pakkepost") > -1 || result.vasketVejnavn.indexOf("postbo") > -1 // postbox/postboks || result.vasketVejnavn.indexOf("døgnpost") > -1 || result.vasketVejnavn.indexOf("døgnbo") > -1 // døgnbox // døgnboks || result.vasketVejnavn.equals("id") ) { result.status = Status.ERROR_POSTBOX; return result; } // LookupStreetname er en indkapsling af alm + levenstein + google + osm Integer gadeident = lookupStretname(request, result, true); /* * Hvis der ikke er direkte hits, prøver vi at skære ord af vejnavn * * Denne sektion er MEGET experimental da den øger antallet af kald til google/OSM væsentligt*/ if (gadeident == null) { final int MAX_TRIES = 4; String vejnavnParts[] = request.vejnavn.split(" "); for (int i=1; i<=MAX_TRIES && i> gade = searchGadeidentAdresser.get(gadeident); if (gade == null) { //Denne søgning må ikke fejle result.status = Status.ERROR_INTERNAL; return result; } //Kunne evt klares med Iterables.get fra Guava/Collections Address firstAddress = gade.values().iterator().next().values().iterator().next(); //Første husnr -> første litra result.vej = firstAddress.vejnavn; short husnrSearch = Short.parseShort(result.splitResult.husnr ); Map litraList = gade.get(husnrSearch); if (litraList == null) { //Husnr ikke fundet if (naermesteHusnr) { TreeMap> gadeTreeMap = (TreeMap>) gade; result.anvendtHusnr = NearestShortTreeMap.getNearestKey(husnrSearch, gadeTreeMap); result.nearestHusnr = true; litraList = gade.get(result.anvendtHusnr); } else { result.status = Status.ERROR_UNKNOWN_ADDRESSPOINT; return result; } } Address addr = litraList.get( result.splitResult.litra ); if (addr == null) { //litra ikke fundet if (config.nearestLitra == true || naermesteHusnr == true) { result.nearestLitra = true; addr = litraList.values().iterator().next(); } else { //husnr fundet men litra blev ikke fundet result.status = Status.ERROR_UNKNOWN_ADDRESSPOINT; return result; } } result.address = addr; if ( StringUtils.equals(addr.distributor, "LUKKET") ) { result.status = Status.STATUS_NOT_COVERED; //Skal vi have en special status til Lukkede adresser ? return result; } if (addr.daekningsType == DaekningsType.DAEKNING_IKKEDAEKKET) { result.status = Status.STATUS_NOT_COVERED; return result; } result.status = Status.STATUS_OK; return result; } private Integer lookupStretname(SearchRequest request, SearchResult result, boolean useExternal) { String helperSearchKey = ""; Integer gadeident = request.streetNames.get(result.vasketVejnavn); if ( gadeident == null) { helperSearchKey = "" + request.postnr + "/" + result.vasketVejnavn; gadeident = helperCache.get(helperSearchKey); } if (gadeident == null) { String vej = levenshteinHelper.proposeStreetName(request, result); if (vej != null) { gadeident = request.streetNames.get(vej); } } if (gadeident == null) { String vej = metaphoneHelper.proposeStreetName(request, result); if (vej != null) { gadeident = request.streetNames.get(vej); } } // Brug OpenStreetMap før vi prøver google // For google har en request limit, det har OSM ikke! if ( gadeident == null && useExternal) { String vej = osmHelper.proposeStreetName(request, result); gadeident = helperWrapper(vej, request.streetNames, helperSearchKey); } if ( gadeident == null && useExternal) { String vej = googleHelper.proposeStreetName(request, result); gadeident = helperWrapper(vej, request.streetNames, helperSearchKey); } return gadeident; } private Integer helperWrapper(String vej, Map postnrVeje, String helperSearchKey) { if (vej == null) return null; String vejVasket = AddressUtils.vaskVejnavn( vej ); Integer gadeident = postnrVeje.get(vejVasket); if (gadeident != null) { helperCache.put(helperSearchKey, gadeident); } return gadeident; } public Map getStretsByPostal(short zip) { return searchPostnrVejnavnGadeid.get(zip); } public void buildSearchStructures() throws SQLException{ searchPostnrVejnavnGadeid = new HashMap>(); searchGadeidentAdresser = new HashMap>>(); helperCache = new ConcurrentHashMap(); long start1 = System.currentTimeMillis(); System.out.println("Build -- stage 1"); alleAdresser = db.getAllAdresses(); /* Mapper mellem db Row ID og adresse noden */ Map idAddressMap = new HashMap( alleAdresser.size() ); DeduplicateHelper aliasHelper = new DeduplicateHelper(400000); // Indeholder pt 379664 entries DeduplicateHelper shortHelper = new DeduplicateHelper(); DeduplicateHelper intHelper = new DeduplicateHelper(); AliasGenerator aliasGenerator = new AliasGenerator(); for (Address a : alleAdresser) { idAddressMap.put(a.id, a); Short postnr = shortHelper.getInstance(a.postnr); Map postnrVeje = searchPostnrVejnavnGadeid.get(postnr); if (postnrVeje == null) { postnrVeje = new HashMap(10240); //lige nu indeholder den største 9500 entries searchPostnrVejnavnGadeid.put(postnr, postnrVeje); } String vasketVejnavn = AddressUtils.vaskVejnavn(a.vejnavn); Integer gadeident = postnrVeje.get(vasketVejnavn); if (gadeident == null) { //postnrVeje.put(vasketVejnavn, a.gadeid); gadeident = intHelper.getInstance( a.gadeid ); Set aliaser = aliasGenerator.findVejAliaser(a.vejnavn); for(String alias : aliaser) { String vasketAlias = AddressUtils.vaskVejnavn(alias); vasketAlias = aliasHelper.getInstance(vasketAlias); postnrVeje.put(vasketAlias, gadeident); } } Map> gade = searchGadeidentAdresser.get(gadeident); if (gade == null) { gade = new TreeMap>(); searchGadeidentAdresser.put(gadeident, gade); } Map litraList = gade.get(a.husnr); if (litraList == null) { litraList = new HashMap(); gade.put(a.husnr, litraList); } litraList.put(a.husnrbogstav, a); } //////////////////////////////////////////////////////////////////////////////////////// long start2 = System.currentTimeMillis(); System.out.println("Build, stage1 elapsed: " + (start2-start1) ); System.out.println("Build -- stage 2 alias tabel"); int vaskCount = 0; List aliasList = db.getAliasList(); for (AliasBean alias : aliasList) { Map postnrVeje = searchPostnrVejnavnGadeid.get(alias.postnr); if (postnrVeje == null) { //Burde ikke kunne ske - men better safe than sorry continue; } String vasketVej = AddressUtils.vaskVejnavn(alias.vejnavn); String vasketAlias = AddressUtils.vaskVejnavn(alias.aliasVejnavn); Integer gadeident = postnrVeje.get(vasketVej); if (gadeident == null) { //Kender ikke den oprindelige vej continue; } Integer aliasIdent = postnrVeje.get(vasketAlias); if (aliasIdent == null) { //Vi kender ikke denne variant af vejnavnet postnrVeje.put(vasketAlias, gadeident); vaskCount++; } } System.out.println("Anvendte " + vaskCount + " aliaser fra databasen"); //////////////////////////////////////////////////////////////////////////////////////// long start3 = System.currentTimeMillis(); System.out.println("Build, stage2 elapsed: " + (start3-start2) ); System.out.println("Build -- stage 3 udvidet dækning"); DeduplicateHelper ruteHelper = new DeduplicateHelper(); DeduplicateHelper korelisteHelper = new DeduplicateHelper(); List extDao = db.getExtendedAdresslist(); for (ExtendedBean eb : extDao) { Address orgAddress = idAddressMap.get(eb.orgId); if (orgAddress == null) continue; if (orgAddress.daekningsType != DaekningsType.DAEKNING_IKKEDAEKKET) { //kan være at adressen er blevet markeret dækket og - find-nærmeste data ikke er opdateret i mellemtiden continue; } Address targetAddress = idAddressMap.get(eb.targetId); if (targetAddress == null) { continue; } if ( StringUtils.equals(orgAddress.distributor, "LUKKET") ) { continue; } if ( StringUtils.equals(targetAddress.distributor, "LUKKET") ) { continue; } orgAddress.extTarget = targetAddress; orgAddress.extAfstand = eb.afstand; boolean covered = false; if (targetAddress.distributor.equals("DAO")) { orgAddress.rute = calculateExtendedDaoRoute(eb,orgAddress,targetAddress); orgAddress.rute = ruteHelper.getInstance(orgAddress.rute); if (orgAddress.rute != null) { orgAddress.koreliste = targetAddress.koreliste; covered = true; } } if (targetAddress.distributor.equals("BK")) { orgAddress.koreliste = calculateExtendedBkKoreliste(eb,orgAddress,targetAddress); orgAddress.koreliste = korelisteHelper.getInstance(orgAddress.koreliste); if (orgAddress.koreliste != null) { orgAddress.rute = targetAddress.rute; covered = true; } } if (covered) { //Kopier resten af felterne orgAddress.daekningsType = DaekningsType.DAEKNING_UDVIDET; orgAddress.dbkBane = targetAddress.dbkBane; /* Sådan gør den gamle service */ /* 20150520 THN- hvis vi bibeholder de originale kommune+vejkoder kan vi bruge DAVID til join i databasen*/ //orgAddress.kommunekode = targetAddress.kommunekode; //orgAddress.vejkode = targetAddress.vejkode; orgAddress.distributor = targetAddress.distributor; } } // nu skal vi ikke bruge idAddressMap længere idAddressMap = null; ////////////////////////////////////////////////////////////////////////////////////// long start4 = System.currentTimeMillis(); System.out.println("Build, stage3 elapsed: " + (start4-start3) ); System.out.println("Build -- stage 4 - 100pct"); Map hundredePct = db.get100PctList(); for (Address addr : alleAdresser) { if (addr.daekningsType != DaekningsType.DAEKNING_IKKEDAEKKET) { continue; } if (addr.distributor != null && addr.distributor.equals("LUKKET")) { continue; } HundredePctBean bean = hundredePct.get(addr.postnr); if (bean == null) { continue; } addr.daekningsType = DaekningsType.DAEKNING_100PCT; addr.rute = bean.rute; addr.koreliste = bean.koreliste; addr.dbkBane = bean.dbkBane; addr.distributor = bean.distributor; } //////////////////////////////////////////////////////////////////////////////////// long stop = System.currentTimeMillis(); System.out.println("Build, stage4 elapsed: " + (stop-start4) ); System.out.println("Build -- Gathering statistics"); for (Address addr : alleAdresser) { switch (addr.daekningsType) { case DAEKNING_DIREKTE: stats.direkteCount++; break; case DAEKNING_UDVIDET: stats.extendedCount++; break; case DAEKNING_100PCT: stats.hundredePctCount++; break; default: stats.ikkeDaekketCount++; } } stats.totalCount = alleAdresser.size(); stats.elapsed = stop-start1; stats.buildTime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format( new Date() ); System.out.println("Build: totalCount: " + stats.totalCount); System.out.println("Build: direkteCount: " + stats.direkteCount); System.out.println("Build: extendedCount: " + stats.extendedCount); System.out.println("Build: hundredePctCount: " + stats.hundredePctCount); System.out.println("Build: ikkeDaekketCount: " + stats.ikkeDaekketCount); System.out.println("Build: Total Elapsed: " + (stop-start1) ); System.out.println("Build Completed"); } public DataStatisticsBean getStatistics() { return stats; } public DatabaseLayer getDatabaseLayer() { return db; } public void clear() { searchPostnrVejnavnGadeid.clear(); searchGadeidentAdresser.clear(); alleAdresser.clear(); helperCache.clear(); } private String calculateExtendedDaoRoute(ExtendedBean eb, Address orgAddress, Address targetAddress) { if (targetAddress.rute == null) { //hvis targetAdress.rute er null så er adressen ikke dækket alligevel return null; } // /////////////////////////////////////////////////////////////////// switch( eb.transport) { case "cykel": if (eb.afstand < 0.151) { return "." + targetAddress.rute; } else if (eb.afstand < 0.501) { return ".." + targetAddress.rute; } else if (eb.afstand < 0.701) { return "..." + targetAddress.rute; } else if (eb.afstand < 1.001) { return "...." + targetAddress.rute; } break; case "scooter": if (eb.afstand < 0.151) { return "." + targetAddress.rute; } else if (eb.afstand < 0.801) { return ".." + targetAddress.rute; } else if (eb.afstand < 1.201) { return "..." + targetAddress.rute; } else if (eb.afstand < 2.101) { return "...." + targetAddress.rute; } break; case "bil": if (eb.afstand < 0.151) { return "." + targetAddress.rute; } else if (eb.afstand < 1.001) { return ".." + targetAddress.rute; } else if (eb.afstand < 1.601) { return "..." + targetAddress.rute; } else if (eb.afstand < 2.601) { return "...." + targetAddress.rute; } break; default: System.out.println("Ukendt transport type: " + eb ); } return null; } public List
getNonCoveredAddresses() { List
result = new ArrayList
(60000); for (Address a : alleAdresser) { if ( a.daekningsType == DaekningsType.DAEKNING_IKKEDAEKKET) { result.add(a); } } Collections.sort(result ); return result; } public List
get100PctAddresses() { List
result = new ArrayList
(120000); for (Address a : alleAdresser) { if ( a.daekningsType == DaekningsType.DAEKNING_100PCT) { result.add(a); } } Collections.sort(result ); return result; } /////////////////////////////////////////////////////////// private String calculateExtendedBkKoreliste(ExtendedBean eb, Address orgAddress, Address targetAddress) { String inject; if (eb.afstand <= 0.500) { inject = "."; } else { inject = ".."; } return AddressUtils.injectIntoBk(targetAddress.koreliste, inject); } public Set getPostnumre() { return new TreeSet( searchPostnrVejnavnGadeid.keySet() ); } public Set> getVejnavne(short postnr) { Map map = searchPostnrVejnavnGadeid.get(postnr); TreeMap newMap = new TreeMap( map );//Lav et nyt TreeMap for at sikre sortering return newMap.entrySet(); } public Set getHusnumre(int gadeid) { Map> gade = searchGadeidentAdresser.get(gadeid); Set set = new TreeSet( new NaturalOrderComparator() ); for(Map litraList : gade.values()) { for (Address a: litraList.values()) { set.add( a.husnr + a.husnrbogstav ); } } return set; } public Address getAdresse(int gadeid, String husnrStr) { short husnr = Short.parseShort( husnrStr.replaceAll("[^\\d]","") ); String litra = husnrStr.replaceAll("\\d", ""); Map> gade = searchGadeidentAdresser.get(gadeid); Map litraList = gade.get(husnr); return litraList.get(litra); } }