/[projects]/misc/bashloader/bashloader.cpp
ViewVC logotype

Contents of /misc/bashloader/bashloader.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 547 - (show annotations) (download)
Sun Jan 24 15:38:37 2010 UTC (14 years, 3 months ago) by torben
File size: 2684 byte(s)
Found another old project laying around
1 /*
2 *
3 * bashloader
4 *
5 * for use with following table
6 *
7 * CREATE TABLE bash (
8 * id int auto_increment primary key,
9 * bashid int,
10 * rating int,
11 * quote text,
12 * KEY bash_rating(rating)
13 * );
14 */
15
16 #include <iostream>
17 #include <sstream>
18 #include <string>
19
20 #include <stdlib.h>
21
22 #include "readUrl.h"
23
24 using namespace std;
25
26 void cleanString(string* str, string search, string replace = "")
27 {
28 unsigned int pos = 0;
29 while ( (pos = str->find(search,pos)) != string::npos)
30 {
31 str->replace(pos, search.size(), replace);
32 pos += replace.size();
33 }
34 }
35
36 void parseQuote(std::string str)
37 {
38 int pos, end_pos;
39 string quote, str_number,str_rating;
40 // cout << "-----------------------------------------------"<< endl;
41 pos = str.find("#");
42 pos++;
43 end_pos = str.find("<", pos);
44 str_number = str.substr(pos, end_pos-pos);
45
46 pos = str.find("(", end_pos);
47 pos++;
48 end_pos = str.find(")", pos);
49 str_rating = str.substr(pos, end_pos-pos);
50 pos = str.find("<p");
51
52 pos += 14;
53 quote = str.substr(pos);
54
55 cleanString(&quote, "\r");
56 cleanString(&quote, "&amp;", "&");
57 cleanString(&quote, "<br />");
58 cleanString(&quote, "&lt;", "<");
59 cleanString(&quote, "&gt;", ">");
60 cleanString(&quote, "&nbsp;", " ");
61 cleanString(&quote, "\\", "\\\\");
62 cleanString(&quote, "&quot;", "\"");
63 cleanString(&quote, "'", "\\'");
64
65 cout << "INSERT INTO bash (bashid, rating, quote) values (" << str_number << "," << str_rating << ",'" << quote << "');" << endl;
66 }
67
68 void parseDocument(int i)
69 {
70 ostringstream url;
71 url << "http://bash.org/?browse&p=" << i;
72 string document = readUrl( url.str() );
73
74 unsigned int pos=0, end_pos;
75 while(1)
76 {
77 pos = document.find("<p class=\"quote\">",pos);
78 if (pos == string::npos)
79 break;
80 pos += 10;
81 end_pos = document.find("</p>", pos);
82
83 end_pos = document.find("</p>", end_pos+4);
84
85 parseQuote(document.substr(pos, end_pos-pos) );
86 }
87
88 }
89
90 int main()
91 {
92 unsigned pos, end_pos, max;
93 string document = readUrl("http://bash.org/?browse");
94
95 if (document == "")
96 exit(1);
97
98 pos = document.rfind("<option value=");
99 pos += 15;
100 end_pos = document.find("\"", pos+1);
101 max = atoi( document.substr(pos, end_pos-pos).c_str() );
102
103 cout << "CREATE TABLE IF NOT EXISTS bash (" << endl;
104 cout << " id int auto_increment primary key," << endl;
105 cout << " bashid int," << endl;
106 cout << " rating int," << endl;
107 cout << " quote text," << endl;
108 cout << " KEY bash_rating(rating)" << endl;
109 cout << " );" << endl << endl;
110
111 cout << "TRUNCATE TABLE bash;" << endl;
112 //parseDocument(2);
113 for (unsigned int i=1; i<= max; i++)
114 {
115 cerr << "Parsing #" << i << "/" << max << endl;
116 parseDocument(i);
117 usleep(50*1000); //be nice to the webserver
118
119 }
120 }

  ViewVC Help
Powered by ViewVC 1.1.20