-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_crawler.py
42 lines (34 loc) · 953 Bytes
/
run_crawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# -*- coding: utf-8 -*-
# import sys
# import importlib
# importlib.reload(sys)
# sys.setdefaultencoding('utf8')
# crawler built based on:
seed = 'http://m.patentati.it/quiz-patente-b/lista-domande.php'
baseUrl = 'http://m.patentati.it/'
# path where images are saved
import tp
tp.init()
print('App initialized')
from tp import app
imgPath = app.config["QUIZ_IMAGE_FOLDER"]
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy.schema import DropTable
from crawler import crawler
from tp import db
from tp.game.models import Quiz, Category
# Create directory if it doesn't exist
import os
print('Creating images dir')
if not os.path.exists(imgPath):
os.makedirs(imgPath)
print('Image dir created')
### Extraction ###
print(f"Start crawling {seed}..")
db.create_all()
crawler.getCategories(db.session, seed, baseUrl, imgPath)
### Saving Data ###
print('Saving data..')
db.session.commit()
print('Done')