--- dao/DaoAdresseService/src/dk/daoas/daoadresseservice/AdressSearch.java 2015/02/12 13:57:32 2267 +++ dao/DaoAdresseService/src/main/java/dk/daoas/daoadresseservice/AddressSearch.java 2016/01/19 17:03:18 2831 @@ -1,21 +1,687 @@ package dk.daoas.daoadresseservice; +import java.sql.SQLException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Date; import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.TreeMap; +import java.util.TreeSet; +import java.util.concurrent.ConcurrentHashMap; -public class AdressSearch { +import org.apache.commons.lang3.StringUtils; - HashMap> searchPostnrVejnavnDavid = new HashMap>(); +import dk.daoas.daoadresseservice.admin.ServiceConfig; +import dk.daoas.daoadresseservice.beans.Address; +import dk.daoas.daoadresseservice.beans.AliasBean; +import dk.daoas.daoadresseservice.beans.DataStatisticsBean; +import dk.daoas.daoadresseservice.beans.ExtendedBean; +import dk.daoas.daoadresseservice.beans.HundredePctBean; +import dk.daoas.daoadresseservice.beans.SearchRequest; +import dk.daoas.daoadresseservice.beans.SearchResult; +import dk.daoas.daoadresseservice.beans.SearchResult.Status; +import dk.daoas.daoadresseservice.db.DatabaseLayer; +import dk.daoas.daoadresseservice.util.DaoUtils; +import dk.daoas.daoadresseservice.util.DeduplicateHelper; +import dk.daoas.daoadresseservice.util.NaturalOrderComparator; +import dk.daoas.daoadresseservice.util.NearestShortTreeMap; + +public class AddressSearch { + + private Map> searchPostnrVejnavnGadeid; + private Map>> searchGadeidentAdresser; // Gadeident -> husnrumre -> litra + + private List
ikkeDaekkedeAdresser = new ArrayList
(60000); + private List
hundredePctAdresser = new ArrayList
(120000); + + private Map helperCache; + + private DataStatisticsBean stats = new DataStatisticsBean(); + + private DatabaseLayer db; + private ServiceConfig config; - public void search(int postnr, String adresse) throws SearchException { - HashMap postnrVeje = searchPostnrVejnavnDavid.get(postnr); + StreetnameHelper levenshteinHelper; + StreetnameHelper metaphoneHelper; + StreetnameHelper googleHelper; + StreetnameHelper osmHelper; + + + public AddressSearch(DatabaseLayer db, ServiceConfig config) { + + this.db = db; + this.config = config; - if (postnrVeje == null) { - throw new SearchException("Ukendt postnr"); + levenshteinHelper = new LevenshteinStreetnameHelper( this ); + metaphoneHelper = new DoubleMetaphoneStreetnameHelper(config); + googleHelper = new GoogleStreetnameHelper( config ); + osmHelper = new OSMStreetnameHelper( config ); + } + + public Address getAddressSafe(int gadeid, short husnr, String litra) throws Exception { + + Map> gade = searchGadeidentAdresser.get(gadeid); + + if (gade == null) { + throw new Exception("GadeID ikke fundet"); } + Map litraList = gade.get(husnr); + Address addr = litraList.get(litra); + if (addr == null) { + throw new Exception("Adresse ikke ikke fundet"); + } + + return addr; + } + + + public SearchResult search(String postnrStr, String adresse) { + return search(postnrStr, adresse, false); } + + public SearchResult search(String postnrStr, String adresse, boolean naermesteHusnr) { + + postnrStr = postnrStr.trim(); + adresse = adresse.trim(); + + + SearchRequest request = new SearchRequest(); + SearchResult result = new SearchResult(); + + + try { + request.postnr = Short.parseShort(postnrStr); + } catch (Exception E) { + result.status = Status.ERROR_UNKNOWN_POSTAL; + return result; + } + + + request.streetNames = searchPostnrVejnavnGadeid.get(request.postnr); + + if (request.streetNames == null) { + result.status = Status.ERROR_UNKNOWN_POSTAL; + return result; + } + + + result.splitResult = AddressUtils.splitAdresse(adresse); + request.vejnavn = result.splitResult.vej; + + + + if (result.splitResult.husnr.length() == 0) { + result.status = Status.ERROR_MISSING_HOUSENUMBER; + return result; + } + + result.vasketVejnavn = AddressUtils.vaskVejnavn( request.vejnavn ); + + + if (result.vasketVejnavn.indexOf("pakkebo") > -1 + || result.vasketVejnavn.indexOf("pakkepost") > -1 + || result.vasketVejnavn.indexOf("postbo") > -1 // postbox/postboks + || result.vasketVejnavn.indexOf("døgnpost") > -1 + || result.vasketVejnavn.indexOf("døgnbo") > -1 // døgnbox // døgnboks + || result.vasketVejnavn.equals("id") + ) { + result.status = Status.ERROR_POSTBOX; + return result; + } + + // LookupStreetname er en indkapsling af alm + levenstein + google + osm + Integer gadeident = lookupStretname(request, result, true); + + + /* + * Hvis der ikke er direkte hits, prøver vi at skære ord af vejnavn + * + * Denne sektion er MEGET experimental da den øger antallet af kald til google/OSM væsentligt*/ + if (gadeident == null) { + final int MAX_TRIES = 4; + + String vejnavnParts[] = request.vejnavn.split(" "); + + for (int i=1; i<=MAX_TRIES && i> gade = searchGadeidentAdresser.get(gadeident); + if (gade == null) { //Denne søgning må ikke fejle + result.status = Status.ERROR_INTERNAL; + return result; + } + + + //Kunne evt klares med Iterables.get fra Guava/Collections + Address firstAddress = gade.values().iterator().next().values().iterator().next(); //Første husnr -> første litra + result.vej = firstAddress.vejnavn; + + short husnrSearch = Short.parseShort(result.splitResult.husnr ); + Map litraList = gade.get(husnrSearch); + + + if (litraList == null) { //Husnr ikke fundet + + if (naermesteHusnr) { + TreeMap> gadeTreeMap = (TreeMap>) gade; + result.anvendtHusnr = NearestShortTreeMap.getNearestKey(husnrSearch, gadeTreeMap); + + result.nearestHusnr = true; + litraList = gade.get(result.anvendtHusnr); + + + } else { + result.status = Status.ERROR_UNKNOWN_ADDRESSPOINT; + return result; + } + } + + Address addr = litraList.get( result.splitResult.litra ); + + if (addr == null) { //litra ikke fundet + if (config.nearestLitra == true || naermesteHusnr == true) { + result.nearestLitra = true; + addr = litraList.values().iterator().next(); + } else { + //husnr fundet men litra blev ikke fundet + result.status = Status.ERROR_UNKNOWN_ADDRESSPOINT; + return result; + } + } + + result.address = addr; + + + if ( StringUtils.equals(addr.distributor, "LUKKET") ) { + result.status = Status.STATUS_NOT_COVERED; //Skal vi have en special status til Lukkede adresser ? + return result; + } + + if (addr.daekningsType == DaekningsType.DAEKNING_IKKEDAEKKET) { + result.status = Status.STATUS_NOT_COVERED; + return result; + } + + + result.status = Status.STATUS_OK; + + return result; + } + + + private Integer lookupStretname(SearchRequest request, SearchResult result, boolean useExternal) { + String helperSearchKey = ""; + + Integer gadeident = request.streetNames.get(result.vasketVejnavn); + + + if ( gadeident == null) { + helperSearchKey = "" + request.postnr + "/" + result.vasketVejnavn; + gadeident = helperCache.get(helperSearchKey); + } + + if (gadeident == null) { + String vej = levenshteinHelper.proposeStreetName(request, result); + if (vej != null) { + gadeident = request.streetNames.get(vej); + } + } + + if (gadeident == null) { + String vej = metaphoneHelper.proposeStreetName(request, result); + if (vej != null) { + gadeident = request.streetNames.get(vej); + } + } + + + // Brug OpenStreetMap før vi prøver google + // For google har en request limit, det har OSM ikke! + if ( gadeident == null && useExternal) { + String vej = osmHelper.proposeStreetName(request, result); + gadeident = helperWrapper(vej, request.streetNames, helperSearchKey); + } + + if ( gadeident == null && useExternal) { + String vej = googleHelper.proposeStreetName(request, result); + gadeident = helperWrapper(vej, request.streetNames, helperSearchKey); + } + return gadeident; + } + + private Integer helperWrapper(String vej, Map postnrVeje, String helperSearchKey) { + if (vej == null) + return null; + + String vejVasket = AddressUtils.vaskVejnavn( vej ); + Integer gadeident = postnrVeje.get(vejVasket); + + if (gadeident != null) { + helperCache.put(helperSearchKey, gadeident); + } + return gadeident; + + } + + public Map getStretsByPostal(short zip) { + return searchPostnrVejnavnGadeid.get(zip); + } + + + + public void buildSearchStructures() throws SQLException{ + searchPostnrVejnavnGadeid = new HashMap>(); + searchGadeidentAdresser = new HashMap>>(); + helperCache = new ConcurrentHashMap(); + + // ////////////////////// + // Start stage1 + + long start1 = System.currentTimeMillis(); + System.out.println("Build -- stage 1"); + + List
alleAdresser = db.getAllAdresses(); + + + /* Mapper mellem db Row ID og adresse noden - skal bruges i stage3*/ + Map idAddressMap = new HashMap( alleAdresser.size() ); + + + DeduplicateHelper shortHelper = new DeduplicateHelper(); + DeduplicateHelper intHelper = new DeduplicateHelper(); + + AliasGenerator aliasGenerator = new AliasGenerator(); + + + for (Address a : alleAdresser) { + idAddressMap.put(a.id, a); + + Short postnr = shortHelper.getInstance(a.postnr); + + Map postnrVeje = searchPostnrVejnavnGadeid.get(postnr); + + if (postnrVeje == null) { + postnrVeje = new HashMap(10240); //lige nu indeholder den største 9500 entries + searchPostnrVejnavnGadeid.put(postnr, postnrVeje); + } + + + String vasketVejnavn = AddressUtils.vaskVejnavn(a.vejnavn); + Integer gadeident = postnrVeje.get(vasketVejnavn); + if (gadeident == null) { + //postnrVeje.put(vasketVejnavn, a.gadeid); + + gadeident = intHelper.getInstance( a.gadeid ); + + Set aliaser = aliasGenerator.findVejAliaser(a.vejnavn); + for(String alias : aliaser) { + postnrVeje.put(alias, gadeident); + } + + } + + + Map> gade = searchGadeidentAdresser.get(gadeident); + if (gade == null) { + gade = new TreeMap>(); + searchGadeidentAdresser.put(gadeident, gade); + } + + Map litraList = gade.get(a.husnr); + if (litraList == null) { + litraList = new HashMap(); + gade.put(a.husnr, litraList); + } + + litraList.put(a.husnrbogstav, a); + } + + + aliasGenerator.clear(); //frigiv hukommelse inden stage2 + aliasGenerator = null; + shortHelper.clear(); + shortHelper = null; + intHelper.clear(); + intHelper = null; + + + //////////////////////////////////////////////////////////////////////////////////////// + long start2 = System.currentTimeMillis(); + System.out.println("Build, stage1 elapsed: " + (start2-start1) ); + System.out.println("Build -- stage 2 alias tabel"); + + int vaskCount = 0; + List aliasList = db.getAliasList(); + for (AliasBean alias : aliasList) { + Map postnrVeje = searchPostnrVejnavnGadeid.get(alias.postnr); + + if (postnrVeje == null) { + //Burde ikke kunne ske - men better safe than sorry + continue; + } + + String vasketVej = AddressUtils.vaskVejnavn(alias.vejnavn); + String vasketAlias = AddressUtils.vaskVejnavn(alias.aliasVejnavn); + + Integer gadeident = postnrVeje.get(vasketVej); + if (gadeident == null) { + //Kender ikke den oprindelige vej + continue; + } + + Integer aliasIdent = postnrVeje.get(vasketAlias); + + if (aliasIdent == null) { //Vi kender ikke denne variant af vejnavnet + postnrVeje.put(vasketAlias, gadeident); + vaskCount++; + } + + } + System.out.println("Anvendte " + vaskCount + " aliaser fra databasen"); + + aliasList.clear();//cleanup inden stage3 + + + //////////////////////////////////////////////////////////////////////////////////////// + long start3 = System.currentTimeMillis(); + System.out.println("Build, stage2 elapsed: " + (start3-start2) ); + System.out.println("Build -- stage 3 udvidet dækning"); + + DeduplicateHelper ruteHelper = new DeduplicateHelper(); + DeduplicateHelper korelisteHelper = new DeduplicateHelper(); + + List extDao = db.getExtendedAdresslist(); + for (ExtendedBean eb : extDao) { + + Address orgAddress = idAddressMap.get(eb.orgId); + if (orgAddress == null) + continue; + + if (orgAddress.daekningsType != DaekningsType.DAEKNING_IKKEDAEKKET) { + //kan være at adressen er blevet markeret dækket og - find-nærmeste data ikke er opdateret i mellemtiden + continue; + } + + Address targetAddress = idAddressMap.get(eb.targetId); + if (targetAddress == null) { + continue; + } + + + if ( StringUtils.equals(orgAddress.distributor, "LUKKET") ) { + continue; + } + + + + if ( StringUtils.equals(targetAddress.distributor, "LUKKET") ) { + continue; + } + + orgAddress.extTarget = targetAddress; + orgAddress.extAfstand = eb.afstand; + + boolean covered = false; + if (targetAddress.distributor.equals("DAO")) { + orgAddress.ruteMandag = calculateExtendedDaoRoute(eb,orgAddress,targetAddress); + orgAddress.ruteMandag = ruteHelper.getInstance(orgAddress.ruteMandag); + + if (orgAddress.ruteMandag != null) { + orgAddress.korelisteMandag = targetAddress.korelisteMandag; + covered = true; + } + } + + if (targetAddress.distributor.equals("BK")) { + orgAddress.korelisteMandag = calculateExtendedBkKoreliste(eb,orgAddress,targetAddress); + orgAddress.korelisteMandag = korelisteHelper.getInstance(orgAddress.korelisteMandag); + + if (orgAddress.korelisteMandag != null) { + orgAddress.ruteMandag = targetAddress.ruteMandag; + covered = true; + } + } + + if (covered) { //Kopier resten af felterne + orgAddress.daekningsType = DaekningsType.DAEKNING_UDVIDET; + orgAddress.dbkBane = targetAddress.dbkBane; + + /* Sådan gør den gamle service */ + /* 20150520 THN- hvis vi bibeholder de originale kommune+vejkoder kan vi bruge DAVID til join i databasen*/ + //orgAddress.kommunekode = targetAddress.kommunekode; + //orgAddress.vejkode = targetAddress.vejkode; + + + orgAddress.distributor = targetAddress.distributor; + } + } + + // cleanup inden stage 4 + idAddressMap = null; + ruteHelper = null; + korelisteHelper = null; + + ////////////////////////////////////////////////////////////////////////////////////// + long start4 = System.currentTimeMillis(); + System.out.println("Build, stage3 elapsed: " + (start4-start3) ); + System.out.println("Build -- stage 4 - 100pct"); + + Map hundredePct = db.get100PctList(); + for (Address addr : alleAdresser) { + if (addr.daekningsType != DaekningsType.DAEKNING_IKKEDAEKKET) { + continue; + } + + if (addr.distributor != null && addr.distributor.equals("LUKKET")) { + continue; + } + + + HundredePctBean bean = hundredePct.get(addr.postnr); + if (bean == null) { + continue; + } + + addr.daekningsType = DaekningsType.DAEKNING_100PCT; + addr.ruteMandag = bean.rute; + addr.korelisteMandag = bean.koreliste; + addr.dbkBane = bean.dbkBane; + addr.distributor = bean.distributor; + } + + //////////////////////////////////////////////////////////////////////////////////// + long stop = System.currentTimeMillis(); + System.out.println("Build, stage4 elapsed: " + (stop-start4) ); + System.out.println("Build -- Gathering statistics"); + + for (Address addr : alleAdresser) { + switch (addr.daekningsType) { + case DAEKNING_DIREKTE: + stats.direkteCount++; + break; + case DAEKNING_UDVIDET: + stats.extendedCount++; + break; + case DAEKNING_100PCT: + hundredePctAdresser.add(addr); + stats.hundredePctCount++; + break; + default: + ikkeDaekkedeAdresser.add(addr); + stats.ikkeDaekketCount++; + } + } + stats.totalCount = alleAdresser.size(); + + ikkeDaekkedeAdresser.sort( null ); // ved Null comparator bruges Comparable interfacet + ikkeDaekkedeAdresser.sort( null ); + + + stats.elapsed = stop-start1; + stats.buildTime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format( new Date() ); + + System.out.println("Build: totalCount: " + stats.totalCount); + System.out.println("Build: direkteCount: " + stats.direkteCount); + System.out.println("Build: extendedCount: " + stats.extendedCount); + System.out.println("Build: hundredePctCount: " + stats.hundredePctCount); + System.out.println("Build: ikkeDaekketCount: " + stats.ikkeDaekketCount); + + System.out.println("Build: Total Elapsed: " + (stop-start1) ); + System.out.println("Build Completed"); + + } + + public DataStatisticsBean getStatistics() { + return stats; + } + + public DatabaseLayer getDatabaseLayer() { + return db; + } + + public void clear() { + searchPostnrVejnavnGadeid.clear(); + searchGadeidentAdresser.clear(); + helperCache.clear(); + } + + + private String calculateExtendedDaoRoute(ExtendedBean eb, Address orgAddress, Address targetAddress) { + if (targetAddress.ruteMandag == null) { //hvis targetAdress.rute er null så er adressen ikke dækket alligevel + return null; + } + + // /////////////////////////////////////////////////////////////////// + switch( eb.transport) { + case "cykel": + if (eb.afstand < 0.151) { + return "." + targetAddress.ruteMandag; + } else if (eb.afstand < 0.501) { + return ".." + targetAddress.ruteMandag; + } else if (eb.afstand < 0.701) { + return "..." + targetAddress.ruteMandag; + } else if (eb.afstand < 1.001) { + return "...." + targetAddress.ruteMandag; + } + break; + case "scooter": + if (eb.afstand < 0.151) { + return "." + targetAddress.ruteMandag; + } else if (eb.afstand < 0.801) { + return ".." + targetAddress.ruteMandag; + } else if (eb.afstand < 1.201) { + return "..." + targetAddress.ruteMandag; + } else if (eb.afstand < 2.101) { + return "...." + targetAddress.ruteMandag; + } + break; + case "bil": + if (eb.afstand < 0.151) { + return "." + targetAddress.ruteMandag; + } else if (eb.afstand < 1.001) { + return ".." + targetAddress.ruteMandag; + } else if (eb.afstand < 1.601) { + return "..." + targetAddress.ruteMandag; + } else if (eb.afstand < 2.601) { + return "...." + targetAddress.ruteMandag; + } + break; + default: + System.out.println("Ukendt transport type: " + eb ); + } + return null; + } + + public List
getNonCoveredAddresses() { + return ikkeDaekkedeAdresser; + } + + public List
get100PctAddresses() { + return hundredePctAdresser; + } + + /////////////////////////////////////////////////////////// + private String calculateExtendedBkKoreliste(ExtendedBean eb, Address orgAddress, Address targetAddress) { + String inject; + if (eb.afstand <= 0.500) { + inject = "."; + } else { + inject = ".."; + } + return AddressUtils.injectIntoBk(targetAddress.korelisteMandag, inject); + } + + + public Set getPostnumre() { + return new TreeSet( searchPostnrVejnavnGadeid.keySet() ); + } + + public Set> getVejnavne(short postnr) { + Map map = searchPostnrVejnavnGadeid.get(postnr); + + TreeMap newMap = new TreeMap( map );//Lav et nyt TreeMap for at sikre sortering + + return newMap.entrySet(); + } + + public Set getHusnumre(int gadeid) { + + Map> gade = searchGadeidentAdresser.get(gadeid); + + Set set = new TreeSet( new NaturalOrderComparator() ); + for(Map litraList : gade.values()) { + for (Address a: litraList.values()) { + set.add( a.husnr + a.husnrbogstav ); + } + } + + return set; + } + + public Address getAdresse(int gadeid, String husnrStr) { + short husnr = Short.parseShort( husnrStr.replaceAll("[^\\d]","") ); + String litra = husnrStr.replaceAll("\\d", ""); + + Map> gade = searchGadeidentAdresser.get(gadeid); + Map litraList = gade.get(husnr); + return litraList.get(litra); + + } + }