/[projects]/miscJava/SpejdernetScraper/src/dk/thoerup/spejdernetscraper/IMDBSeriesScraper.java
ViewVC logotype

Annotation of /miscJava/SpejdernetScraper/src/dk/thoerup/spejdernetscraper/IMDBSeriesScraper.java

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2067 - (hide annotations) (download)
Thu Nov 7 08:11:17 2013 UTC (10 years, 6 months ago) by torben
File size: 1057 byte(s)
Added IMDB episode title scraper
1 torben 2067 package dk.thoerup.spejdernetscraper;
2    
3     import org.jsoup.Jsoup;
4     import org.jsoup.nodes.Document;
5     import org.jsoup.nodes.Element;
6     import org.jsoup.select.Elements;
7    
8     public class IMDBSeriesScraper {
9    
10     public String fechTitle(String sid, int season, int episode) throws Exception {
11    
12     String epMatch = "Ep" + episode;
13    
14     String url = "http://www.imdb.com/title/" + sid + "/episodes?season=" + season;
15    
16     Document page = Jsoup.connect(url)
17     .get();
18    
19     Element episodesContent = page.getElementById("episodes_content");
20    
21     Elements episodesList = episodesContent.getElementsByClass("list_item");
22    
23     for (int i=0; i<episodesList.size(); i++) {
24     Element curEp = episodesList.get(i);
25    
26     Element image = curEp.getElementsByClass("image").first();
27    
28     Element anchor = image.child(0);
29    
30     String title = anchor.attr("title");
31     String content = anchor.text().trim();
32    
33     if ( content.endsWith(epMatch)) {
34     return title;
35     }
36     }
37    
38    
39     throw new Exception("Episode not found !");
40     }
41    
42     }

  ViewVC Help
Powered by ViewVC 1.1.20