--- miscJava/SpejdernetScraper/src/dk/thoerup/spejdernetscraper/IMDBSeriesScraper.java	2014/02/11 20:42:35	2119
+++ miscJava/SpejdernetScraper/src/main/java/dk/thoerup/spejdernetscraper/IMDBSeriesScraper.java	2018/05/16 12:09:23	3234
@@ -1,21 +1,48 @@
 package dk.thoerup.spejdernetscraper;
 
+import java.util.concurrent.Callable;
+import java.util.concurrent.TimeUnit;
+
 import org.jsoup.Jsoup;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
 import org.jsoup.select.Elements;
 
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
+
+import dk.thoerup.genericjavautils.HttpUtil;
+
 public class IMDBSeriesScraper {
 	
+	private static Cache<String,String> webCache = CacheBuilder.newBuilder()
+			.expireAfterWrite(5, TimeUnit.MINUTES)
+			.maximumSize(1000)
+			.build();
+	
+	private String getDoc(final String url) throws Exception {
+		
+		return webCache.get(url, new Callable<String>() {
+
+			@Override
+			public String call() throws Exception {
+				//TODO: implement something that can follow redirects
+				return HttpUtil.getContentString(url, 10000);
+			}
+		});
+		
+	}
+	
 	public String fechTitle(String sid, int season, int episode) throws Exception {
 		
-		final String epMatch = "Ep" + episode;		
+		final String epMatch = "ep" + episode;		
 		final String noTitle = "Episode #" + season + "." + episode;
 		
-		String url = "http://www.imdb.com/title/" + sid + "/episodes?season=" + season;
+		String url = "https://www.imdb.com/title/" + sid + "/episodes?season=" + season;
+		
+		String rawHtml = getDoc(url);	
 		
-		Document page = Jsoup.connect(url)
-				.get();
+		Document page = Jsoup.parse(rawHtml);
 		
 		Element episodesContent = page.getElementById("episodes_content");
 		
@@ -29,9 +56,10 @@
 			Element anchor = image.child(0);
 			
 			String title = anchor.attr("title");
-			String content = anchor.text().trim();
+			//String content = anchor.text().trim();
+			String href = anchor.attr("href");
 			
-			if ( content.endsWith(epMatch)) {
+			if ( href.endsWith(epMatch)) {
 				if (title.equalsIgnoreCase(noTitle)) {
 					return "!Title not found!";			
 				}