/[projects]/misc/bashloader/bashloader.cpp
ViewVC logotype

Annotation of /misc/bashloader/bashloader.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 547 - (hide annotations) (download)
Sun Jan 24 15:38:37 2010 UTC (14 years, 4 months ago) by torben
File size: 2684 byte(s)
Found another old project laying around
1 torben 547 /*
2     *
3     * bashloader
4     *
5     * for use with following table
6     *
7     * CREATE TABLE bash (
8     * id int auto_increment primary key,
9     * bashid int,
10     * rating int,
11     * quote text,
12     * KEY bash_rating(rating)
13     * );
14     */
15    
16     #include <iostream>
17     #include <sstream>
18     #include <string>
19    
20     #include <stdlib.h>
21    
22     #include "readUrl.h"
23    
24     using namespace std;
25    
26     void cleanString(string* str, string search, string replace = "")
27     {
28     unsigned int pos = 0;
29     while ( (pos = str->find(search,pos)) != string::npos)
30     {
31     str->replace(pos, search.size(), replace);
32     pos += replace.size();
33     }
34     }
35    
36     void parseQuote(std::string str)
37     {
38     int pos, end_pos;
39     string quote, str_number,str_rating;
40     // cout << "-----------------------------------------------"<< endl;
41     pos = str.find("#");
42     pos++;
43     end_pos = str.find("<", pos);
44     str_number = str.substr(pos, end_pos-pos);
45    
46     pos = str.find("(", end_pos);
47     pos++;
48     end_pos = str.find(")", pos);
49     str_rating = str.substr(pos, end_pos-pos);
50     pos = str.find("<p");
51    
52     pos += 14;
53     quote = str.substr(pos);
54    
55     cleanString(&quote, "\r");
56     cleanString(&quote, "&amp;", "&");
57     cleanString(&quote, "<br />");
58     cleanString(&quote, "&lt;", "<");
59     cleanString(&quote, "&gt;", ">");
60     cleanString(&quote, "&nbsp;", " ");
61     cleanString(&quote, "\\", "\\\\");
62     cleanString(&quote, "&quot;", "\"");
63     cleanString(&quote, "'", "\\'");
64    
65     cout << "INSERT INTO bash (bashid, rating, quote) values (" << str_number << "," << str_rating << ",'" << quote << "');" << endl;
66     }
67    
68     void parseDocument(int i)
69     {
70     ostringstream url;
71     url << "http://bash.org/?browse&p=" << i;
72     string document = readUrl( url.str() );
73    
74     unsigned int pos=0, end_pos;
75     while(1)
76     {
77     pos = document.find("<p class=\"quote\">",pos);
78     if (pos == string::npos)
79     break;
80     pos += 10;
81     end_pos = document.find("</p>", pos);
82    
83     end_pos = document.find("</p>", end_pos+4);
84    
85     parseQuote(document.substr(pos, end_pos-pos) );
86     }
87    
88     }
89    
90     int main()
91     {
92     unsigned pos, end_pos, max;
93     string document = readUrl("http://bash.org/?browse");
94    
95     if (document == "")
96     exit(1);
97    
98     pos = document.rfind("<option value=");
99     pos += 15;
100     end_pos = document.find("\"", pos+1);
101     max = atoi( document.substr(pos, end_pos-pos).c_str() );
102    
103     cout << "CREATE TABLE IF NOT EXISTS bash (" << endl;
104     cout << " id int auto_increment primary key," << endl;
105     cout << " bashid int," << endl;
106     cout << " rating int," << endl;
107     cout << " quote text," << endl;
108     cout << " KEY bash_rating(rating)" << endl;
109     cout << " );" << endl << endl;
110    
111     cout << "TRUNCATE TABLE bash;" << endl;
112     //parseDocument(2);
113     for (unsigned int i=1; i<= max; i++)
114     {
115     cerr << "Parsing #" << i << "/" << max << endl;
116     parseDocument(i);
117     usleep(50*1000); //be nice to the webserver
118    
119     }
120     }

  ViewVC Help
Powered by ViewVC 1.1.20