/[projects]/miscJava/SpejdernetScraper/src/main/java/dk/thoerup/spejdernetscraper/IMDBSeriesScraper.java
ViewVC logotype

Annotation of /miscJava/SpejdernetScraper/src/main/java/dk/thoerup/spejdernetscraper/IMDBSeriesScraper.java

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3230 - (hide annotations) (download)
Wed May 16 10:38:32 2018 UTC (6 years ago) by torben
File size: 1261 byte(s)
Improve title parsing
1 torben 2067 package dk.thoerup.spejdernetscraper;
2    
3     import org.jsoup.Jsoup;
4     import org.jsoup.nodes.Document;
5     import org.jsoup.nodes.Element;
6     import org.jsoup.select.Elements;
7    
8     public class IMDBSeriesScraper {
9    
10     public String fechTitle(String sid, int season, int episode) throws Exception {
11    
12 torben 3230 final String epMatch = "ep" + episode;
13 torben 2068 final String noTitle = "Episode #" + season + "." + episode;
14 torben 2067
15     String url = "http://www.imdb.com/title/" + sid + "/episodes?season=" + season;
16    
17     Document page = Jsoup.connect(url)
18     .get();
19    
20     Element episodesContent = page.getElementById("episodes_content");
21    
22     Elements episodesList = episodesContent.getElementsByClass("list_item");
23    
24     for (int i=0; i<episodesList.size(); i++) {
25     Element curEp = episodesList.get(i);
26    
27     Element image = curEp.getElementsByClass("image").first();
28    
29     Element anchor = image.child(0);
30    
31     String title = anchor.attr("title");
32 torben 3230 //String content = anchor.text().trim();
33     String href = anchor.attr("href");
34 torben 2067
35 torben 3230 if ( href.endsWith(epMatch)) {
36 torben 2119 if (title.equalsIgnoreCase(noTitle)) {
37     return "!Title not found!";
38     }
39 torben 2068
40 torben 2067 return title;
41     }
42     }
43    
44    
45     throw new Exception("Episode not found !");
46     }
47    
48     }

  ViewVC Help
Powered by ViewVC 1.1.20