--- dao/DaoAdresseService/src/dk/daoas/daoadresseservice/AdressSearch.java 2015/02/14 08:43:45 2289 +++ dao/DaoAdresseService/src/main/java/dk/daoas/daoadresseservice/AddressSearch.java 2016/01/14 17:13:55 2814 @@ -1,155 +1,449 @@ package dk.daoas.daoadresseservice; import java.sql.SQLException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.TreeMap; +import java.util.TreeSet; import java.util.concurrent.ConcurrentHashMap; -import dk.daoas.daoadresseservice.AddressUtils.SplitResult; +import org.apache.commons.lang3.StringUtils; + +import dk.daoas.daoadresseservice.admin.ServiceConfig; import dk.daoas.daoadresseservice.beans.Address; +import dk.daoas.daoadresseservice.beans.AliasBean; +import dk.daoas.daoadresseservice.beans.DataStatisticsBean; import dk.daoas.daoadresseservice.beans.ExtendedBean; import dk.daoas.daoadresseservice.beans.HundredePctBean; +import dk.daoas.daoadresseservice.beans.SearchRequest; import dk.daoas.daoadresseservice.beans.SearchResult; import dk.daoas.daoadresseservice.beans.SearchResult.Status; import dk.daoas.daoadresseservice.db.DatabaseLayer; +import dk.daoas.daoadresseservice.util.DaoUtils; +import dk.daoas.daoadresseservice.util.DeduplicateHelper; +import dk.daoas.daoadresseservice.util.NaturalOrderComparator; +import dk.daoas.daoadresseservice.util.NearestShortTreeMap; -public class AdressSearch { +public class AddressSearch { - Map> searchPostnrVejnavnGadeid; - Map> searchGadeidentAdresser; + private Map> searchPostnrVejnavnGadeid; + private Map>> searchGadeidentAdresser; // Gadeident -> husnrumre -> litra + - List
alleAdresser; + private List
alleAdresser; + + private Map helperCache; + + private DataStatisticsBean stats = new DataStatisticsBean(); + + private DatabaseLayer db; + private ServiceConfig config; + + StreetnameHelper levenshteinHelper; + StreetnameHelper metaphoneHelper; + StreetnameHelper googleHelper; + StreetnameHelper osmHelper; - Map hundredePct; + public AddressSearch(DatabaseLayer db, ServiceConfig config) { + + this.db = db; + this.config = config; + + levenshteinHelper = new LevenshteinStreetnameHelper( this ); + metaphoneHelper = new DoubleMetaphoneStreetnameHelper(config); + googleHelper = new GoogleStreetnameHelper( config ); + osmHelper = new OSMStreetnameHelper( config ); + } + public Address getAddressSafe(int gadeid, short husnr, String litra) throws Exception { + + Map> gade = searchGadeidentAdresser.get(gadeid); + + if (gade == null) { + throw new Exception("GadeID ikke fundet"); + } + + Map litraList = gade.get(husnr); + + Address addr = litraList.get(litra); + + if (addr == null) { + throw new Exception("Adresse ikke ikke fundet"); + } + + return addr; + } + + public SearchResult search(String postnrStr, String adresse) { -//long start1 = System.currentTimeMillis(); + return search(postnrStr, adresse, false); + } + + + public SearchResult search(String postnrStr, String adresse, boolean naermesteHusnr) { + + postnrStr = postnrStr.trim(); + adresse = adresse.trim(); + + + SearchRequest request = new SearchRequest(); + SearchResult result = new SearchResult(); - int postnr=0; try { - postnr = Integer.parseInt(postnrStr); + request.postnr = Short.parseShort(postnrStr); } catch (Exception E) { - return new SearchResult(Status.ERROR_UNKNOWN_POSTAL); + result.status = Status.ERROR_UNKNOWN_POSTAL; + return result; } -//long start2 = System.currentTimeMillis(); - Map postnrVeje = searchPostnrVejnavnGadeid.get(postnr); - if (postnrVeje == null) { - return new SearchResult(Status.ERROR_UNKNOWN_POSTAL); + request.streetNames = searchPostnrVejnavnGadeid.get(request.postnr); + + if (request.streetNames == null) { + result.status = Status.ERROR_UNKNOWN_POSTAL; + return result; } -//long start3 = System.currentTimeMillis(); - SplitResult split = AddressUtils.splitAdresse(adresse); - String vasketVejnavn = AddressUtils.vaskVejnavn( split.vej ); + result.splitResult = AddressUtils.splitAdresse(adresse); + request.vejnavn = result.splitResult.vej; + + - if (split.husnr.length() == 0) { - return new SearchResult(Status.ERROR_MISSING_HOUSENUMBER); + if (result.splitResult.husnr.length() == 0) { + result.status = Status.ERROR_MISSING_HOUSENUMBER; + return result; } -//long start4 = System.currentTimeMillis(); - Long gadeident = postnrVeje.get(vasketVejnavn); + result.vasketVejnavn = AddressUtils.vaskVejnavn( request.vejnavn ); + + + if (result.vasketVejnavn.indexOf("pakkebo") > -1 + || result.vasketVejnavn.indexOf("pakkepost") > -1 + || result.vasketVejnavn.indexOf("postbo") > -1 // postbox/postboks + || result.vasketVejnavn.indexOf("døgnpost") > -1 + || result.vasketVejnavn.indexOf("døgnbo") > -1 // døgnbox // døgnboks + || result.vasketVejnavn.equals("id") + ) { + result.status = Status.ERROR_POSTBOX; + return result; + } + + // LookupStreetname er en indkapsling af alm + levenstein + google + osm + Integer gadeident = lookupStretname(request, result, true); + + + /* + * Hvis der ikke er direkte hits, prøver vi at skære ord af vejnavn + * + * Denne sektion er MEGET experimental da den øger antallet af kald til google/OSM væsentligt*/ if (gadeident == null) { - return new SearchResult(Status.ERROR_UNKNOWN_STREETNAME); + final int MAX_TRIES = 4; + + String vejnavnParts[] = request.vejnavn.split(" "); + + for (int i=1; i<=MAX_TRIES && i gade = searchGadeidentAdresser.get(gadeident); //Denne søgning må ikke fejle + -//long start6 = System.currentTimeMillis(); + if (gadeident == null) { + result.status = Status.ERROR_UNKNOWN_STREETNAME; + return result; + } - String husnrSearch = "" + split.husnr + split.litra; - Address addr = gade.get(husnrSearch); -//long start7 = System.currentTimeMillis(); - if (addr == null) { - return new SearchResult(Status.ERROR_UNKNOWN_ADDRESSPOINT); + result.gadeident = gadeident; + + + Map> gade = searchGadeidentAdresser.get(gadeident); + if (gade == null) { //Denne søgning må ikke fejle + result.status = Status.ERROR_INTERNAL; + return result; } - if (addr.daekningsType == DaekningsType.DAEKNING_IKKEDAEKKET) { - return new SearchResult(Status.STATUS_NOT_COVERED); + + //Kunne evt klares med Iterables.get fra Guava/Collections + Address firstAddress = gade.values().iterator().next().values().iterator().next(); //Første husnr -> første litra + result.vej = firstAddress.vejnavn; + + short husnrSearch = Short.parseShort(result.splitResult.husnr ); + Map litraList = gade.get(husnrSearch); + + + if (litraList == null) { //Husnr ikke fundet + + if (naermesteHusnr) { + TreeMap> gadeTreeMap = (TreeMap>) gade; + result.anvendtHusnr = NearestShortTreeMap.getNearestKey(husnrSearch, gadeTreeMap); + + result.nearestHusnr = true; + litraList = gade.get(result.anvendtHusnr); + + + } else { + result.status = Status.ERROR_UNKNOWN_ADDRESSPOINT; + return result; + } + } + + Address addr = litraList.get( result.splitResult.litra ); + + if (addr == null) { //litra ikke fundet + if (config.nearestLitra == true || naermesteHusnr == true) { + result.nearestLitra = true; + addr = litraList.values().iterator().next(); + } else { + //husnr fundet men litra blev ikke fundet + result.status = Status.ERROR_UNKNOWN_ADDRESSPOINT; + return result; + } } - if (addr.distributor.equals("LUKKET")) { - return new SearchResult(Status.STATUS_NOT_COVERED); //Skal vi have en special status til Lukkede adresser ? + result.address = addr; + + + if ( StringUtils.equals(addr.distributor, "LUKKET") ) { + result.status = Status.STATUS_NOT_COVERED; //Skal vi have en special status til Lukkede adresser ? + return result; + } + + if (addr.daekningsType == DaekningsType.DAEKNING_IKKEDAEKKET) { + result.status = Status.STATUS_NOT_COVERED; + return result; } + -/* -System.out.println("Search2: " + (start2-start1) ); -System.out.println("Search3: " + (start3-start1) ); -System.out.println("Search4: " + (start4-start1) ); -System.out.println("Search5: " + (start5-start1) ); -System.out.println("Search6: " + (start6-start1) ); -System.out.println("Search7: " + (start7-start1) ); -*/ + result.status = Status.STATUS_OK; - + return result; + } + + + private Integer lookupStretname(SearchRequest request, SearchResult result, boolean useExternal) { + String helperSearchKey = ""; + + Integer gadeident = request.streetNames.get(result.vasketVejnavn); + - return new SearchResult(addr); + if ( gadeident == null) { + helperSearchKey = "" + request.postnr + "/" + result.vasketVejnavn; + gadeident = helperCache.get(helperSearchKey); + } + + if (gadeident == null) { + String vej = levenshteinHelper.proposeStreetName(request, result); + if (vej != null) { + gadeident = request.streetNames.get(vej); + } + } + + if (gadeident == null) { + String vej = metaphoneHelper.proposeStreetName(request, result); + if (vej != null) { + gadeident = request.streetNames.get(vej); + } + } + + + // Brug OpenStreetMap før vi prøver google + // For google har en request limit, det har OSM ikke! + if ( gadeident == null && useExternal) { + String vej = osmHelper.proposeStreetName(request, result); + gadeident = helperWrapper(vej, request.streetNames, helperSearchKey); + } + + if ( gadeident == null && useExternal) { + String vej = googleHelper.proposeStreetName(request, result); + gadeident = helperWrapper(vej, request.streetNames, helperSearchKey); + } + return gadeident; } + private Integer helperWrapper(String vej, Map postnrVeje, String helperSearchKey) { + if (vej == null) + return null; + + String vejVasket = AddressUtils.vaskVejnavn( vej ); + Integer gadeident = postnrVeje.get(vejVasket); + + if (gadeident != null) { + helperCache.put(helperSearchKey, gadeident); + } + return gadeident; + + } + + public Map getStretsByPostal(short zip) { + return searchPostnrVejnavnGadeid.get(zip); + } + + public void buildSearchStructures() throws SQLException{ - searchPostnrVejnavnGadeid = new HashMap>(); - searchGadeidentAdresser = new HashMap>(); - + searchPostnrVejnavnGadeid = new HashMap>(); + searchGadeidentAdresser = new HashMap>>(); + helperCache = new ConcurrentHashMap(); + + long start1 = System.currentTimeMillis(); System.out.println("Build -- stage 1"); - alleAdresser = DatabaseLayer.getAllAdresses(); + alleAdresser = db.getAllAdresses(); /* Mapper mellem db Row ID og adresse noden */ Map idAddressMap = new HashMap( alleAdresser.size() ); + DeduplicateHelper aliasHelper = new DeduplicateHelper(400000); // Indeholder pt 379664 entries + + DeduplicateHelper shortHelper = new DeduplicateHelper(); + DeduplicateHelper intHelper = new DeduplicateHelper(); + + AliasGenerator aliasGenerator = new AliasGenerator(); + for (Address a : alleAdresser) { idAddressMap.put(a.id, a); - Map postnrVeje = searchPostnrVejnavnGadeid.get(a.postnr); + Short postnr = shortHelper.getInstance(a.postnr); + + Map postnrVeje = searchPostnrVejnavnGadeid.get(postnr); if (postnrVeje == null) { - postnrVeje = new ConcurrentHashMap(); - searchPostnrVejnavnGadeid.put(a.postnr, postnrVeje); + postnrVeje = new HashMap(10240); //lige nu indeholder den største 9500 entries + searchPostnrVejnavnGadeid.put(postnr, postnrVeje); } + String vasketVejnavn = AddressUtils.vaskVejnavn(a.vejnavn); - Long gadeident = postnrVeje.get(vasketVejnavn); - if (gadeident == null) { - postnrVeje.put(vasketVejnavn, a.gadeid); + Integer gadeident = postnrVeje.get(vasketVejnavn); + if (gadeident == null) { + //postnrVeje.put(vasketVejnavn, a.gadeid); + + gadeident = intHelper.getInstance( a.gadeid ); + + Set aliaser = aliasGenerator.findVejAliaser(a.vejnavn); + for(String alias : aliaser) { + String vasketAlias = AddressUtils.vaskVejnavn(alias); + vasketAlias = aliasHelper.getInstance(vasketAlias); + + postnrVeje.put(vasketAlias, gadeident); + } + } - Map gade = searchGadeidentAdresser.get(a.gadeid); + + Map> gade = searchGadeidentAdresser.get(gadeident); if (gade == null) { - gade = new HashMap(); - searchGadeidentAdresser.put(a.gadeid, gade); + gade = new TreeMap>(); + searchGadeidentAdresser.put(gadeident, gade); + } + + Map litraList = gade.get(a.husnr); + if (litraList == null) { + litraList = new HashMap(); + gade.put(a.husnr, litraList); + } + + litraList.put(a.husnrbogstav, a); + } + + aliasHelper = null; //frigiv hukommelse inden stage2 + aliasGenerator = null; + + + //////////////////////////////////////////////////////////////////////////////////////// + long start2 = System.currentTimeMillis(); + System.out.println("Build, stage1 elapsed: " + (start2-start1) ); + System.out.println("Build -- stage 2 alias tabel"); + + int vaskCount = 0; + List aliasList = db.getAliasList(); + for (AliasBean alias : aliasList) { + Map postnrVeje = searchPostnrVejnavnGadeid.get(alias.postnr); + + if (postnrVeje == null) { + //Burde ikke kunne ske - men better safe than sorry + continue; + } + + String vasketVej = AddressUtils.vaskVejnavn(alias.vejnavn); + String vasketAlias = AddressUtils.vaskVejnavn(alias.aliasVejnavn); + + Integer gadeident = postnrVeje.get(vasketVej); + if (gadeident == null) { + //Kender ikke den oprindelige vej + continue; + } + + Integer aliasIdent = postnrVeje.get(vasketAlias); + + if (aliasIdent == null) { //Vi kender ikke denne variant af vejnavnet + postnrVeje.put(vasketAlias, gadeident); + vaskCount++; } - String husnrSearch = "" + a.husnr + a.husnrbogstav; - gade.put(husnrSearch, a); + } + System.out.println("Anvendte " + vaskCount + " aliaser fra databasen"); //////////////////////////////////////////////////////////////////////////////////////// - System.out.println("Build -- stage 2 udvidet dækning"); + long start3 = System.currentTimeMillis(); + System.out.println("Build, stage2 elapsed: " + (start3-start2) ); + System.out.println("Build -- stage 3 udvidet dækning"); + + DeduplicateHelper ruteHelper = new DeduplicateHelper(); + DeduplicateHelper korelisteHelper = new DeduplicateHelper(); - List extDao = DatabaseLayer.getExtendedAdresslist(); + List extDao = db.getExtendedAdresslist(); for (ExtendedBean eb : extDao) { Address orgAddress = idAddressMap.get(eb.orgId); if (orgAddress == null) continue; + if (orgAddress.daekningsType != DaekningsType.DAEKNING_IKKEDAEKKET) { + //kan være at adressen er blevet markeret dækket og - find-nærmeste data ikke er opdateret i mellemtiden + continue; + } + Address targetAddress = idAddressMap.get(eb.targetId); - if (targetAddress == null) + if (targetAddress == null) { continue; + } - if (orgAddress.distributor != null && orgAddress.distributor.equals("LUKKET")) { + + if ( StringUtils.equals(orgAddress.distributor, "LUKKET") ) { continue; } - if (targetAddress.distributor.equals("LUKKET")) { + + + if ( StringUtils.equals(targetAddress.distributor, "LUKKET") ) { continue; } @@ -158,7 +452,9 @@ boolean covered = false; if (targetAddress.distributor.equals("DAO")) { - orgAddress.rute = calculateExtendedDaoRoute(eb,orgAddress,targetAddress); + orgAddress.rute = calculateExtendedDaoRoute(eb,orgAddress,targetAddress); + orgAddress.rute = ruteHelper.getInstance(orgAddress.rute); + if (orgAddress.rute != null) { orgAddress.koreliste = targetAddress.koreliste; covered = true; @@ -166,7 +462,9 @@ } if (targetAddress.distributor.equals("BK")) { - orgAddress.koreliste = calculateExtendedBkKoreliste(eb,orgAddress,targetAddress); + orgAddress.koreliste = calculateExtendedBkKoreliste(eb,orgAddress,targetAddress); + orgAddress.koreliste = korelisteHelper.getInstance(orgAddress.koreliste); + if (orgAddress.koreliste != null) { orgAddress.rute = targetAddress.rute; covered = true; @@ -178,8 +476,10 @@ orgAddress.dbkBane = targetAddress.dbkBane; /* Sådan gør den gamle service */ - orgAddress.kommunekode = targetAddress.kommunekode; - orgAddress.vejkode = targetAddress.vejkode; + /* 20150520 THN- hvis vi bibeholder de originale kommune+vejkoder kan vi bruge DAVID til join i databasen*/ + //orgAddress.kommunekode = targetAddress.kommunekode; + //orgAddress.vejkode = targetAddress.vejkode; + orgAddress.distributor = targetAddress.distributor; } @@ -189,9 +489,11 @@ idAddressMap = null; ////////////////////////////////////////////////////////////////////////////////////// - System.out.println("Build -- stage 3 - 100pct"); + long start4 = System.currentTimeMillis(); + System.out.println("Build, stage3 elapsed: " + (start4-start3) ); + System.out.println("Build -- stage 4 - 100pct"); - hundredePct = DatabaseLayer.get100PctList(); + Map hundredePct = db.get100PctList(); for (Address addr : alleAdresser) { if (addr.daekningsType != DaekningsType.DAEKNING_IKKEDAEKKET) { continue; @@ -215,40 +517,62 @@ } //////////////////////////////////////////////////////////////////////////////////// + long stop = System.currentTimeMillis(); + System.out.println("Build, stage4 elapsed: " + (stop-start4) ); System.out.println("Build -- Gathering statistics"); - - int direkteCount = 0; - int extendedCount = 0; - int hundredePctCount = 0; - int ikkeDaekketCount = 0; - + for (Address addr : alleAdresser) { switch (addr.daekningsType) { case DAEKNING_DIREKTE: - direkteCount++; + stats.direkteCount++; break; case DAEKNING_UDVIDET: - extendedCount++; + stats.extendedCount++; break; case DAEKNING_100PCT: - hundredePctCount++; + stats.hundredePctCount++; break; default: - ikkeDaekketCount++; + stats.ikkeDaekketCount++; } - } + } + stats.totalCount = alleAdresser.size(); - System.out.println("Build: direkteCount: " + direkteCount); - System.out.println("Build: extendedCount: " + extendedCount); - System.out.println("Build: hundredePctCount: " + hundredePctCount); - System.out.println("Build: ikkeDaekketCount: " + ikkeDaekketCount); + stats.elapsed = stop-start1; + stats.buildTime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format( new Date() ); + + System.out.println("Build: totalCount: " + stats.totalCount); + System.out.println("Build: direkteCount: " + stats.direkteCount); + System.out.println("Build: extendedCount: " + stats.extendedCount); + System.out.println("Build: hundredePctCount: " + stats.hundredePctCount); + System.out.println("Build: ikkeDaekketCount: " + stats.ikkeDaekketCount); + + System.out.println("Build: Total Elapsed: " + (stop-start1) ); System.out.println("Build Completed"); } + public DataStatisticsBean getStatistics() { + return stats; + } + + public DatabaseLayer getDatabaseLayer() { + return db; + } + + public void clear() { + searchPostnrVejnavnGadeid.clear(); + searchGadeidentAdresser.clear(); + alleAdresser.clear(); + helperCache.clear(); + } + private String calculateExtendedDaoRoute(ExtendedBean eb, Address orgAddress, Address targetAddress) { + if (targetAddress.rute == null) { //hvis targetAdress.rute er null så er adressen ikke dækket alligevel + return null; + } // /////////////////////////////////////////////////////////////////// switch( eb.transport) { @@ -259,7 +583,7 @@ return ".." + targetAddress.rute; } else if (eb.afstand < 0.701) { return "..." + targetAddress.rute; - } else if (eb.afstand < 0.501) { + } else if (eb.afstand < 1.001) { return "...." + targetAddress.rute; } break; @@ -285,9 +609,33 @@ return "...." + targetAddress.rute; } break; + default: + System.out.println("Ukendt transport type: " + eb ); } return null; - } + } + + public List
getNonCoveredAddresses() { + List
result = new ArrayList
(60000); + for (Address a : alleAdresser) { + if ( a.daekningsType == DaekningsType.DAEKNING_IKKEDAEKKET) { + result.add(a); + } + } + Collections.sort(result ); + return result; + } + + public List
get100PctAddresses() { + List
result = new ArrayList
(120000); + for (Address a : alleAdresser) { + if ( a.daekningsType == DaekningsType.DAEKNING_100PCT) { + result.add(a); + } + } + Collections.sort(result ); + return result; + } /////////////////////////////////////////////////////////// private String calculateExtendedBkKoreliste(ExtendedBean eb, Address orgAddress, Address targetAddress) { @@ -301,4 +649,40 @@ } + public Set getPostnumre() { + return new TreeSet( searchPostnrVejnavnGadeid.keySet() ); + } + + public Set> getVejnavne(short postnr) { + Map map = searchPostnrVejnavnGadeid.get(postnr); + + TreeMap newMap = new TreeMap( map );//Lav et nyt TreeMap for at sikre sortering + + return newMap.entrySet(); + } + + public Set getHusnumre(int gadeid) { + + Map> gade = searchGadeidentAdresser.get(gadeid); + + Set set = new TreeSet( new NaturalOrderComparator() ); + for(Map litraList : gade.values()) { + for (Address a: litraList.values()) { + set.add( a.husnr + a.husnrbogstav ); + } + } + + return set; + } + + public Address getAdresse(int gadeid, String husnrStr) { + short husnr = Short.parseShort( husnrStr.replaceAll("[^\\d]","") ); + String litra = husnrStr.replaceAll("\\d", ""); + + Map> gade = searchGadeidentAdresser.get(gadeid); + Map litraList = gade.get(husnr); + return litraList.get(litra); + + } + }