thoerup/spejdernetscraper/IMDBSeriesScraper.java

package dk.thoerup.spejdernetscraper;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class IMDBSeriesScraper {
        
        public String fechTitle(String sid, int season, int episode) throws Exception {
                
                final String epMatch = "ep" + episode;          
                final String noTitle = "Episode #" + season + "." + episode;
                
                String url = "http://www.imdb.com/title/" + sid + "/episodes?season=" + season;
                
                Document page = Jsoup.connect(url)
                                .get();
                
                Element episodesContent = page.getElementById("episodes_content");
                
                Elements episodesList = episodesContent.getElementsByClass("list_item");
                
                for (int i=0; i<episodesList.size(); i++) {
                        Element curEp = episodesList.get(i);
                        
                        Element image = curEp.getElementsByClass("image").first();
                        
                        Element anchor = image.child(0);
                        
                        String title = anchor.attr("title");
                        //String content = anchor.text().trim();
                        String href = anchor.attr("href");
                        
                        if ( href.endsWith(epMatch)) {
                                if (title.equalsIgnoreCase(noTitle)) {
                                        return "!Title not found!";                     
                                }
                                
                                return title;   
                        }                       
                }
                
                
                throw new Exception("Episode not found !");
        }

}
1	package dk.thoerup.spejdernetscraper;
2
3	import org.jsoup.Jsoup;
4	import org.jsoup.nodes.Document;
5	import org.jsoup.nodes.Element;
6	import org.jsoup.select.Elements;
7
8	public class IMDBSeriesScraper {
9
10	public String fechTitle(String sid, int season, int episode) throws Exception {
11
12	final String epMatch = "ep" + episode;
13	final String noTitle = "Episode #" + season + "." + episode;
14
15	String url = "http://www.imdb.com/title/" + sid + "/episodes?season=" + season;
16
17	Document page = Jsoup.connect(url)
18	.get();
19
20	Element episodesContent = page.getElementById("episodes_content");
21
22	Elements episodesList = episodesContent.getElementsByClass("list_item");
23
24	for (int i=0; i<episodesList.size(); i++) {
25	Element curEp = episodesList.get(i);
26
27	Element image = curEp.getElementsByClass("image").first();
28
29	Element anchor = image.child(0);
30
31	String title = anchor.attr("title");
32	//String content = anchor.text().trim();
33	String href = anchor.attr("href");
34
35	if ( href.endsWith(epMatch)) {
36	if (title.equalsIgnoreCase(noTitle)) {
37	return "!Title not found!";
38	}
39
40	return title;
41	}
42	}
43
44
45	throw new Exception("Episode not found !");
46	}
47
48	}