/[projects]/miscJava/SpejdernetScraper/src/dk/thoerup/spejdernetscraper/IMDBGenreScraper.java
ViewVC logotype

Contents of /miscJava/SpejdernetScraper/src/dk/thoerup/spejdernetscraper/IMDBGenreScraper.java

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2122 - (show annotations) (download)
Mon Mar 3 17:32:14 2014 UTC (10 years, 2 months ago) by torben
File size: 866 byte(s)
Add imdb genre scraper
1 package dk.thoerup.spejdernetscraper;
2
3 import java.util.Iterator;
4
5 import org.jsoup.Jsoup;
6 import org.jsoup.nodes.Document;
7 import org.jsoup.nodes.Element;
8 import org.jsoup.select.Elements;
9
10 public class IMDBGenreScraper {
11
12 public static String fetchGenres(String movieid) throws Exception {
13
14 String url = "http://www.imdb.com/title/" + movieid + "/";
15
16 Document page = Jsoup.connect(url).get();
17
18 StringBuffer sb = new StringBuffer();
19
20 Element infoBar = page.getElementsByClass("infobar").get(0);
21 System.out.println(infoBar.html());
22
23 Elements genres = infoBar.getElementsByAttributeValue("itemprop", "genre");
24
25 Iterator<Element> it = genres.iterator();
26 while (it.hasNext()) {
27 Element el = it.next();
28 if (sb.length() > 0)
29 sb.append("|");
30
31 sb.append( el.text().trim().toLowerCase() );
32 }
33
34 return sb.toString();
35 }
36
37 }

  ViewVC Help
Powered by ViewVC 1.1.20