diff --git a/code/downloader/scrapers.py b/code/downloader/scrapers.py index 0d2b98c..4e4d85a 100644 --- a/code/downloader/scrapers.py +++ b/code/downloader/scrapers.py @@ -612,7 +612,7 @@ def pull_case(self, case, new_member_list_seen): download_url = self.browser.current_url page_source = self.browser.page_source if self.court == 'psc': - cost = 0 + cost = 0 else: cost = float(ftools.parse_transaction_history(page_source)['cost']) diff --git a/code/parsers/parse_pacer.py b/code/parsers/parse_pacer.py index a522a75..a20f327 100644 --- a/code/parsers/parse_pacer.py +++ b/code/parsers/parse_pacer.py @@ -877,11 +877,9 @@ def case_runner(case, output_dir, court, debug, force_rerun, count, member_df, l ''' Case parser management ''' - # Get the output path (??? seems like it's trying to make sure to use the new get_expected_path) + # Get the output path case_fname = Path(case['docket_paths'][0]).stem - pacer_dir = Path(output_dir).resolve().parent.parent - outname = ftools.get_expected_path(ucid=case['ucid'], pacer_path=pacer_dir) - # outname = Path(output_dir) / f"{case_fname}.json" + outname = ftools.get_expected_path(ucid=case['ucid'], manual_subdir_path=output_dir) if force_rerun or not outname.exists(): # Check whether the output file exists already case_data = process_html_file(case, member_df, court = court) @@ -999,7 +997,7 @@ def parse(input_dir, output_dir, summaries_dir, court=None, all_courts=False, de @click.command() @click.argument('input-dir') @click.option('--output-dir', '-o', default=None, type=click.Path(exists=True, file_okay=False), - help="Directory to place parsed output, if none provided defaults to INPUT_DIR/json ") + help="Directory to place parsed output, if none provided defaults to INPUT_DIR/../json ") @click.option('--summaries-dir', '-s', default=None, type=click.Path(exists=True, file_okay=False), help="Directory to place parsed output, if none provided defaults to INPUT_DIR/summaries ") @click.option('--court', '-c', default=None, @@ -1023,4 +1021,5 @@ def parser(**kwargs ): parse(**kwargs) if __name__ == '__main__': - parser() \ No newline at end of file + parser() + diff --git a/code/support/fhandle_tools.py b/code/support/fhandle_tools.py index ff26c6d..e2950f7 100644 --- a/code/support/fhandle_tools.py +++ b/code/support/fhandle_tools.py @@ -573,13 +573,14 @@ def _get_expected_path_old_(ucid, subdir='json', pacer_path=settings.PACER_PATH, return pacer_path / court / subdir / fname -def get_expected_path(ucid, subdir='json', pacer_path=settings.PACER_PATH, def_no=None, update_ind=None): +def get_expected_path(ucid, subdir='json', manual_subdir_path=None, pacer_path=settings.PACER_PATH, def_no=None, update_ind=None): ''' Find the expected path of case-level data files Inputs: - ucid (str): case ucid - subdir (str): the subdirectory to look in (see scrapers.PacerCourtDir), one of 'html', 'json', 'docs', 'summaries', 'members' + - manual_subdir_path (str): (mainly for parser) the directory in which to locate the expected path, rather than pacer_path/court/subdir - pacer_path (Path): path to pacer data directory - def_no (str or int): the defendant no., if specifying a defendant-specific docket - update_ind (int): update index (for html files), passed through to generate_docket_filename @@ -591,11 +592,15 @@ def get_expected_path(ucid, subdir='json', pacer_path=settings.PACER_PATH, def_n court, case_no = ucid_data['court'], ucid_data['case_no'] year_part = decompose_caseno(case_no)['year'] - # Build the filepath + # Build the filename ext = SUBDIR_EXTENSIONS[subdir] fname = generate_docket_filename(case_no, ext=ext, def_no=def_no, ind=update_ind) - return pacer_path / court / subdir / year_part / fname + # Build the full filepath + if manual_subdir_path: + return Path(manual_subdir_path).resolve() / year_part / fname + else: + return pacer_path / court / subdir / year_part / fname def filename_to_ucid(fname, court): fpath = Path(fname)