/[projects]/miscJava/SpejdernetScraper/src/dk/thoerup/spejdernetscraper/IMDBGenreScraper.java
ViewVC logotype

Annotation of /miscJava/SpejdernetScraper/src/dk/thoerup/spejdernetscraper/IMDBGenreScraper.java

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2122 - (hide annotations) (download)
Mon Mar 3 17:32:14 2014 UTC (10 years, 3 months ago) by torben
File size: 866 byte(s)
Add imdb genre scraper
1 torben 2122 package dk.thoerup.spejdernetscraper;
2    
3     import java.util.Iterator;
4    
5     import org.jsoup.Jsoup;
6     import org.jsoup.nodes.Document;
7     import org.jsoup.nodes.Element;
8     import org.jsoup.select.Elements;
9    
10     public class IMDBGenreScraper {
11    
12     public static String fetchGenres(String movieid) throws Exception {
13    
14     String url = "http://www.imdb.com/title/" + movieid + "/";
15    
16     Document page = Jsoup.connect(url).get();
17    
18     StringBuffer sb = new StringBuffer();
19    
20     Element infoBar = page.getElementsByClass("infobar").get(0);
21     System.out.println(infoBar.html());
22    
23     Elements genres = infoBar.getElementsByAttributeValue("itemprop", "genre");
24    
25     Iterator<Element> it = genres.iterator();
26     while (it.hasNext()) {
27     Element el = it.next();
28     if (sb.length() > 0)
29     sb.append("|");
30    
31     sb.append( el.text().trim().toLowerCase() );
32     }
33    
34     return sb.toString();
35     }
36    
37     }

  ViewVC Help
Powered by ViewVC 1.1.20