-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtotoscrape.py
90 lines (78 loc) · 3.37 KB
/
totoscrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
from selenium import webdriver
from bs4 import BeautifulSoup
import sqlite3, platform, re
# connect to database
conn = sqlite3.connect('toto.sqlite')
cur = conn.cursor()
# connect to windows or mac chromedriver
if platform.system() == 'Windows':
cdriver = r'C:\xxx\MyPythonScripts\chromedriver.exe'
else: # for macOS
# cdriver = r'/xxx/MyPythonScripts/chromedriver_mac.exe'
cdriver = r'/home/manhunt2k/Downloads/toto-master/chromedriver_mac.exe'
url='http://www.singaporepools.com.sg/en/product/sr/Pages/toto_results.aspx?sppl=RHJhd051bWJlcj0zMjM4'
driver=webdriver.Chrome(cdriver)
driver.get(url)
html = driver.page_source.encode('utf-8')
# get list of toto draw numbers and put in database
alltoto = re.findall(r'value="(\d\d\d\d)',html)
for i in alltoto:
cur.execute("INSERT OR IGNORE INTO date(draw_no) VALUES(?)",(i,))
conn.commit()
# extract draw numbers not scanned
cur.execute("SELECT draw_no FROM date WHERE scanned=0")
alltoto=cur.fetchall()
for i in alltoto:
for i in i:
string = "//select[@class='form-control selectDrawList']/option[@value='{}']".format(i)
driver.find_element_by_xpath(string).click()
# get new html
html = driver.page_source.encode('utf-8')
soup = BeautifulSoup(html, 'html.parser')
# get date
date=soup.select('th[class="drawDate"]')[0].getText().split(', ')
day=date[0]
date1=date[1]
cur.execute("UPDATE date SET day=?,date=?,scanned=? WHERE draw_no=?", (day,date1,1,i,))
print date
# first 6 numbers
text=soup.select('td[width="16%"]')
y=0
for ab in text:
firstsix = ab.getText()
y+=1
if y == 5: pass
cur.execute("INSERT OR IGNORE INTO jackpot_no(draw_no,no_type,number) VALUES(?,?,?)",(i,'normal',firstsix,))
# additional number
additional = soup.select('td[class="additional"]')[0].getText()
cur.execute("INSERT OR IGNORE INTO jackpot_no(draw_no,no_type,number) VALUES(?,?,?)",(i,'additional',additional,))
# get locations
if html.find('Group 1 has no winner')>0:
pass
else:
# cut html till group 1 winner only, ie. remove group 2 winners
html = html[:html.find('Group 2 winning tickets')]
soup = BeautifulSoup(html, 'html.parser')
text=soup.select('li')
try:
for t in text:
a= t.getText().replace('\n','').replace(' ','').replace(' )','').split('( ')
b= t.getText()
location = a[0].split(' - ')
place = location[0]
address = location[1]
if a[1].find('QuickPick')==-1:
drawtype = re.search(r'1\s(\w+)\sE',a[1])
draw = 'No QuickPick'
system = drawtype.group(1)
else:
drawtype = re.search(r'1\s(\w+)\s(.+)\sE',a[1])
draw = drawtype.group(1)
system = drawtype.group(2)
print location
print draw
print system
cur.execute("INSERT OR IGNORE INTO place(draw_no,raw_data,location,address,quickpick,system) VALUES(?,?,?,?,?,?)",(i,b,place,address,draw,system,))
except:
pass
conn.commit()