package dk.thoerup.spejdernetscraper; import java.util.Map; import java.util.TreeMap; import javax.servlet.http.HttpUtils; import org.jsoup.Connection.Method; import org.jsoup.Connection.Response; import org.jsoup.Jsoup; import dk.thoerup.genericjavautils.HttpUtil; public class ScraperWorker implements Runnable { @Override public void run() { ResultBuffer rb = ResultBuffer.getResultBuffer(); try { rb.clear(); rb.addString("Starting"); Response res = Jsoup.connect("http://medlemssystem.spejdernet.dk/login.aspx?ReturnUrl=%2f").execute(); Map cookies = res.cookies(); rb.addString("Got login page"); res = Jsoup .connect("http://medlemssystem.spejdernet.dk/Login.aspx?changeuser=1") .data("ctl00$main$loginForm$_txtUserName", "torbenhoerupnielsen", "ctl00$main$loginForm$_txtPassword", "Pwspejder2013", "ctl00$main$loginForm$_buttonLogin", "Login", "__EVENTTARGET", "", "__EVENTARGUMENT", "", "__VIEWSTATE", "/wEPDwUJNjg2MjA4NzU2D2QWAmYPZBYCAgMPZBYGAgEPDxYCHgdWaXNpYmxlaGRkAgIPZBYIAgkPDxYCHwBoZGQCDQ8PFgIfAGhkZAIPDw8WAh8AaGRkAhMPDxYCHwBoZGQCAw9kFgICAQ9kFgRmDw8WAh8AaGRkAgYPDxYCHwBoZGRk8Vg6q/CGyZv+RzwBkdn5NiLJnoGAlWgM6iWe62LJuHM=", "ctl00$ctl04$hiddenTab", "" ) .method(Method.POST) .cookies(cookies) .execute(); rb.addString("Login OK"); //This will get you cookies cookies.putAll( res.cookies() ); //printMap(cookies); String dataCsvUrl = "http://medlemssystem.spejdernet.dk/csvx.ashx?q=JQBOZXRtZXN0ZXIuS0ZVTS5NZW1iZXIsIE5ldG1lc3Rlci5LRlVNPgB4LlVuaXQuR3JvdXAuSWQgPT0gImd1aWQ6NWMwY2Y2MzgtZGM4Ny00MTNmLTkxYjYtOWM3MzAwYzcwMTM3Ig==&type=Member&view=csv"; //And this is the easieste way I've found to remain in session Response data = Jsoup.connect(dataCsvUrl).cookies(cookies).timeout(10000).execute(); if ( ! data.contentType().equalsIgnoreCase("text/x-csv; charset=iso-8859-1") ) { rb.addString("Data.csv - has the wrong content type: " + data.contentType()); return; } String dataCsv = data.body(); rb.addString("Got data.csv"); String rollerCsvUrl = "http://medlemssystem.spejdernet.dk/DataExport.aspx?Id=5c0cf638-dc87-413f-91b6-9c7300c70137&listid=e8c5ae9d-5ea7-4a00-bea4-a0ce00ea891e&execute=true"; Response roller = Jsoup.connect(rollerCsvUrl).cookies(cookies).timeout(10000).execute(); if ( ! roller.contentType().equalsIgnoreCase("text/x-csv; charset=iso-8859-1") ) { rb.addString("Roller.csv - has the wrong content type: " + roller.contentType()); return; } String rollerCsv = roller.body(); rb.addString("Got roller.csv"); Map postData = new TreeMap(); postData.put("roller", rollerCsv); postData.put("data", dataCsv); String params = HttpUtil.encodeParams( postData ); byte resp[] = HttpUtil.postContent("http://horsensspejder.t-hoerup.dk/data/postdata.php", params, 3000); String response = new String(resp); rb.addString("POST Response: " + response); rb.addString("Done !"); } catch (Exception e) { rb.addString("Error occurred: " + e.toString() ); } } static void printMap(Map map) { for(String key : map.keySet()) { System.out.println( key + ": " +map.get(key) ); } } public static void main(String[] args) throws Exception { new ScraperWorker().run(); System.out.println( ResultBuffer.getResultBuffer().getString() ); } }