-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathscraper.py
More file actions
37 lines (27 loc) · 1 KB
/
scraper.py
File metadata and controls
37 lines (27 loc) · 1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# author: Carlos Parra (Parra Inc)
# python-version: 3.4.3 (3.x)
import re
import urllib.request
from bs4 import BeautifulSoup
url = "http://www.loteriasdominicanas.com/"
try:
html = urllib.request.urlopen(url).read()
except:
print("Error opening the url")
exit()
soup = BeautifulSoup(html, "html.parser")
lotteries = soup.select('div.lottery-list div.heading')
numbers = soup.select('div.lottery-list div.content ul')
date = soup.select('div.lottery-list div.content div.status')
lotteries_names = [lottery_name.contents[4].strip()
for lottery_name in lotteries]
dates_list = [dates.contents[2].strip() for dates in date]
numbers_dict = dict()
increment = 0
for tag in numbers:
numbers_list = re.findall('<li>(.+?)</li>+?', str(tag.contents))
numbers_dict[lotteries_names[increment]] = {'numbers': numbers_list,
'date': dates_list[increment]}
increment = increment + 1
for k, v in numbers_dict.items():
print(k, "->", v)