-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathparse_hearings.py
135 lines (116 loc) · 3.74 KB
/
parse_hearings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
"""
Module to get case details given case numbers.
To perform a scraper run, use: python parse_hearings.py name_of_csv_with_case_numbers
"""
import csv
import click
import logging
import sys
import simplejson
import scrapers
from typing import Any, Dict, List, Optional
from emailing import log_and_email
logger = logging.getLogger()
logging.basicConfig(stream=sys.stdout)
logger.setLevel(logging.INFO)
def get_ids_to_parse(infile: click.File) -> List[str]:
"""Gets a list of case numbers from the csv `infile`"""
ids_to_parse = []
reader = csv.reader(infile)
for row in reader:
ids_to_parse.append(row[0])
return ids_to_parse
def parse_all_from_parse_filings(
case_nums: List[str],
scraper: Optional[scrapers.FakeScraper] = None,
db: bool = True,
county: str = "travis",
showbrowser: bool = False,
) -> List[Dict[str, Any]]:
"""
Gets case details for each case number in `case_nums` and sends the data to PostgreSQL.
Logs any case numbers for which getting data failed.
"""
if not scraper:
# Get the scraper corresponding to the lowercase command line entry for county. Default to TravisScraper.
county = county.lower()
scraper = (
scrapers.SCRAPER_NAMES[county]()
if county in scrapers.SCRAPER_NAMES
else scrapers.TravisScraper()
)
parsed_cases = []
for tries in range(1, 6):
try:
parsed_cases = scraper.make_case_list(ids_to_parse=case_nums)
return parsed_cases
except Exception as e:
logger.error(
f"Failed to parse hearings on attempt {tries}. Error message: {e}"
)
return parsed_cases
def persist_parsed_cases(cases: List[Dict[str, Any]]) -> None:
import persist
logger.info(
f"Finished making case list, now will send all {len(cases)} cases to SQL."
)
failed_cases = []
for parsed_case in cases:
try:
persist.rest_case(parsed_case)
except:
try:
failed_cases.append(parsed_case.case_number)
except:
logger.error(
"A case failed to be parsed but it doesn't have a case number."
)
if failed_cases:
error_message = f"Failed to send the following case numbers to SQL:\n{', '.join(failed_cases)}"
log_and_email(
error_message,
"Case Numbers for Which Sending to SQL Failed",
error=True,
)
logger.info("Finished sending cases to SQL.")
@click.command()
@click.option("--infile", type=click.File(mode="r"), required=False)
@click.option("--outfile", type=click.File(mode="w"), required=False)
@click.option(
"--county",
type=click.Choice(scrapers.SCRAPER_NAMES, case_sensitive=False),
default="travis",
)
@click.option(
"--showbrowser / --headless",
default=False,
help="whether to operate in headless mode or not",
)
@click.option(
"--db / --no-db",
default=True,
help="whether to persist the data to a db",
)
@click.option(
"--db / --no-db",
default=True,
help="whether to persist the data to a db",
)
def parse_all(
infile: Optional[click.File],
outfile: Optional[click.File],
county: Optional[click.STRING],
showbrowser=False,
db=True,
):
"""Same as `parse_all_from_parse_filings()` but takes in a csv of case numbers instead of a list."""
ids_to_parse = get_ids_to_parse(infile)
parsed_cases = parse_all_from_parse_filings(
case_nums=ids_to_parse, showbrowser=showbrowser, db=db, county=county
)
if db:
persist_parsed_cases(parsed_cases)
if outfile:
simplejson.dump(parsed_cases, outfile, default=dict)
if __name__ == "__main__":
parse_all()