--- miscJava/SpejdernetScraper/src/main/java/dk/thoerup/spejdernetscraper/IMDBSeriesScraper.java 2018/05/16 10:38:32 3230 +++ miscJava/SpejdernetScraper/src/main/java/dk/thoerup/spejdernetscraper/IMDBSeriesScraper.java 2018/05/16 11:39:19 3231 @@ -1,12 +1,37 @@ package dk.thoerup.spejdernetscraper; +import java.util.concurrent.Callable; +import java.util.concurrent.TimeUnit; + import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; + +import dk.thoerup.genericjavautils.HttpUtil; + public class IMDBSeriesScraper { + private static Cache webCache = CacheBuilder.newBuilder() + .expireAfterWrite(5, TimeUnit.MINUTES) + .maximumSize(1000) + .build(); + + private String getDoc(final String url) throws Exception { + + return webCache.get(url, new Callable() { + + @Override + public String call() throws Exception { + return HttpUtil.getContentString(url, 10000); + } + }); + + } + public String fechTitle(String sid, int season, int episode) throws Exception { final String epMatch = "ep" + episode; @@ -14,8 +39,9 @@ String url = "http://www.imdb.com/title/" + sid + "/episodes?season=" + season; - Document page = Jsoup.connect(url) - .get(); + String rawHtml = getDoc(url); + + Document page = Jsoup.parse(rawHtml); Element episodesContent = page.getElementById("episodes_content");