/[projects]/miscJava/SpejdernetScraper/src/dk/thoerup/spejdernetscraper/IMDBSeriesScraper.java
ViewVC logotype

Contents of /miscJava/SpejdernetScraper/src/dk/thoerup/spejdernetscraper/IMDBSeriesScraper.java

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2068 - (show annotations) (download)
Thu Nov 7 08:14:34 2013 UTC (10 years, 6 months ago) by torben
File size: 1238 byte(s)
Minor corrections
1 package dk.thoerup.spejdernetscraper;
2
3 import org.jsoup.Jsoup;
4 import org.jsoup.nodes.Document;
5 import org.jsoup.nodes.Element;
6 import org.jsoup.select.Elements;
7
8 public class IMDBSeriesScraper {
9
10 public String fechTitle(String sid, int season, int episode) throws Exception {
11
12 final String epMatch = "Ep" + episode;
13 final String noTitle = "Episode #" + season + "." + episode;
14
15 String url = "http://www.imdb.com/title/" + sid + "/episodes?season=" + season;
16
17 Document page = Jsoup.connect(url)
18 .get();
19
20 Element episodesContent = page.getElementById("episodes_content");
21
22 Elements episodesList = episodesContent.getElementsByClass("list_item");
23
24 for (int i=0; i<episodesList.size(); i++) {
25 Element curEp = episodesList.get(i);
26
27 Element image = curEp.getElementsByClass("image").first();
28
29 Element anchor = image.child(0);
30
31 String title = anchor.attr("title");
32 String content = anchor.text().trim();
33
34 if ( content.endsWith(epMatch)) {
35 if (title.equalsIgnoreCase(noTitle))
36 throw new Exception("Episode found without title");
37
38 return title;
39 }
40 }
41
42
43 throw new Exception("Episode not found !");
44 }
45
46 }

  ViewVC Help
Powered by ViewVC 1.1.20