1 |
package dk.thoerup.spejdernetscraper;
|
2 |
|
3 |
import org.jsoup.Jsoup;
|
4 |
import org.jsoup.nodes.Document;
|
5 |
import org.jsoup.nodes.Element;
|
6 |
import org.jsoup.select.Elements;
|
7 |
|
8 |
public class IMDBSeriesScraper {
|
9 |
|
10 |
public String fechTitle(String sid, int season, int episode) throws Exception {
|
11 |
|
12 |
String epMatch = "Ep" + episode;
|
13 |
|
14 |
String url = "http://www.imdb.com/title/" + sid + "/episodes?season=" + season;
|
15 |
|
16 |
Document page = Jsoup.connect(url)
|
17 |
.get();
|
18 |
|
19 |
Element episodesContent = page.getElementById("episodes_content");
|
20 |
|
21 |
Elements episodesList = episodesContent.getElementsByClass("list_item");
|
22 |
|
23 |
for (int i=0; i<episodesList.size(); i++) {
|
24 |
Element curEp = episodesList.get(i);
|
25 |
|
26 |
Element image = curEp.getElementsByClass("image").first();
|
27 |
|
28 |
Element anchor = image.child(0);
|
29 |
|
30 |
String title = anchor.attr("title");
|
31 |
String content = anchor.text().trim();
|
32 |
|
33 |
if ( content.endsWith(epMatch)) {
|
34 |
return title;
|
35 |
}
|
36 |
}
|
37 |
|
38 |
|
39 |
throw new Exception("Episode not found !");
|
40 |
}
|
41 |
|
42 |
}
|