forked from QS03/lambda-selenium
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhandler.py
More file actions
executable file
·60 lines (45 loc) · 1.76 KB
/
handler.py
File metadata and controls
executable file
·60 lines (45 loc) · 1.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import os
import json
import zipfile
from datetime import datetime
import urllib.parse as urlparse
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
def chromium_extractor(func):
def wrapper_to_func():
with zipfile.ZipFile('./bin.zip', 'r') as zip_ref:
zip_ref.extractall('/tmp/bin')
os.system('chmod 755 /tmp/bin/*')
func()
return wrapper_to_func
@chromium_extractor
def run_scrapper():
url = 'https://google.com'
try:
print(f"Started at: {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')}")
driver_path = "/tmp/bin/chromedriver"
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--single-process')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument("--window-size=1920,1080")
chrome_options.binary_location = "/tmp/bin/headless-chromium"
driver = webdriver.Chrome(driver_path, chrome_options=chrome_options)
driver.get(url)
driver.save_screenshot("/tmp/screenshot.png")
size = os.path.getsize("/tmp/screenshot.png")
print(f"Image size: {size}")
print(f"Completed at: {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')}")
except Exception as e:
print(e)
def handler(event, context):
run_scrapper()
if __name__ == "__main__":
run_scrapper()