1 |
package dk.thoerup.spejdernetscraper; |
2 |
|
3 |
import java.util.Iterator; |
4 |
|
5 |
import org.jsoup.Jsoup; |
6 |
import org.jsoup.nodes.Document; |
7 |
import org.jsoup.nodes.Element; |
8 |
import org.jsoup.select.Elements; |
9 |
|
10 |
public class IMDBGenreScraper { |
11 |
|
12 |
public static String fetchGenres(String movieid) throws Exception { |
13 |
|
14 |
String url = "http://www.imdb.com/title/" + movieid + "/"; |
15 |
|
16 |
Document page = Jsoup.connect(url).get(); |
17 |
|
18 |
StringBuffer sb = new StringBuffer(); |
19 |
|
20 |
Element infoBar = page.getElementsByClass("infobar").get(0); |
21 |
System.out.println(infoBar.html()); |
22 |
|
23 |
Elements genres = infoBar.getElementsByAttributeValue("itemprop", "genre"); |
24 |
|
25 |
Iterator<Element> it = genres.iterator(); |
26 |
while (it.hasNext()) { |
27 |
Element el = it.next(); |
28 |
if (sb.length() > 0) |
29 |
sb.append("|"); |
30 |
|
31 |
sb.append( el.text().trim().toLowerCase() ); |
32 |
} |
33 |
|
34 |
return sb.toString(); |
35 |
} |
36 |
|
37 |
} |