/[projects]/misc/pythonScraper/scraper.py
ViewVC logotype

Contents of /misc/pythonScraper/scraper.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 629 - (show annotations) (download) (as text)
Tue Mar 16 10:10:55 2010 UTC (14 years, 2 months ago) by torben
File MIME type: text/x-python
File size: 1326 byte(s)
added sample python screen/web scraper
1 #!/usr/bin/python
2
3 # Requireed python modules:
4 # * Mechanize
5 # * BeatifulSoup
6 # apt-get install python-mechanize python-beautifulsoup beep
7
8
9
10 from mechanize import Browser
11 from mechanize import _response
12 from BeautifulSoup import BeautifulSoup
13
14 import time
15 import sys
16 import os
17
18 savedVal1 = 0
19
20 class CheckDevice:
21
22 def __init__(self): #constructor
23 self.savedVal1 = 0
24
25 def beep(self):
26 os.system("beep -l 200 -r 2")
27
28 def printTime(self):
29 os.system("date");
30
31 def checkVars(self):
32 br = Browser()
33 try:
34 br.open("http://t-hoerup.dk/test.html")
35
36 soup = BeautifulSoup( br.response().read() )
37
38 rows = soup.findAll('tr')
39
40 for row in rows:
41 key = row.contents[1].string
42 val = row.contents[3].string
43 # print "key ", key
44 # print "val ", val
45
46 if (key == "Key1"):
47 if (val != self.savedVal1):
48 self.beep()
49 self.printTime()
50 print "Val1 has changed %s -> %s\n" % ( str(self.savedVal1), str(val) )
51 self.savedVal1 = val
52
53
54
55 except:
56 print "Error, probably a 404 : ", sys.exc_info()
57
58 # print "----------------------------- done-------------------------"
59
60
61
62
63 def main():
64
65 cd = CheckDevice()
66
67 #dumb continous main-loop abort with ctrl+c and ignore the error output
68 while (1):
69 cd.checkVars()
70 time.sleep(1) #seconds
71
72
73 #Finally, launch main function
74 main()

  ViewVC Help
Powered by ViewVC 1.1.20