/[projects]/miscJava/SpejdernetScraper/src/main/java/dk/thoerup/spejdernetscraper/IMDBSeriesScraper.java
ViewVC logotype

Contents of /miscJava/SpejdernetScraper/src/main/java/dk/thoerup/spejdernetscraper/IMDBSeriesScraper.java

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3230 - (show annotations) (download)
Wed May 16 10:38:32 2018 UTC (6 years ago) by torben
File size: 1261 byte(s)
Improve title parsing
1 package dk.thoerup.spejdernetscraper;
2
3 import org.jsoup.Jsoup;
4 import org.jsoup.nodes.Document;
5 import org.jsoup.nodes.Element;
6 import org.jsoup.select.Elements;
7
8 public class IMDBSeriesScraper {
9
10 public String fechTitle(String sid, int season, int episode) throws Exception {
11
12 final String epMatch = "ep" + episode;
13 final String noTitle = "Episode #" + season + "." + episode;
14
15 String url = "http://www.imdb.com/title/" + sid + "/episodes?season=" + season;
16
17 Document page = Jsoup.connect(url)
18 .get();
19
20 Element episodesContent = page.getElementById("episodes_content");
21
22 Elements episodesList = episodesContent.getElementsByClass("list_item");
23
24 for (int i=0; i<episodesList.size(); i++) {
25 Element curEp = episodesList.get(i);
26
27 Element image = curEp.getElementsByClass("image").first();
28
29 Element anchor = image.child(0);
30
31 String title = anchor.attr("title");
32 //String content = anchor.text().trim();
33 String href = anchor.attr("href");
34
35 if ( href.endsWith(epMatch)) {
36 if (title.equalsIgnoreCase(noTitle)) {
37 return "!Title not found!";
38 }
39
40 return title;
41 }
42 }
43
44
45 throw new Exception("Episode not found !");
46 }
47
48 }

  ViewVC Help
Powered by ViewVC 1.1.20