From e0cb85555e52fe41e6a2cc35e5c44d824cfa39d3 Mon Sep 17 00:00:00 2001 From: Geoffrey Jost Date: Mon, 11 Sep 2023 16:28:38 -0700 Subject: [PATCH 01/10] ddrfar: Print CSV linking FAR facility with each page's DDR File ID --- ddr/DDR/cli/ddrfar.py | 55 +++++++++++++++++++++++++++++++++++++++++++ ddr/setup.py | 1 + 2 files changed, 56 insertions(+) create mode 100644 ddr/DDR/cli/ddrfar.py diff --git a/ddr/DDR/cli/ddrfar.py b/ddr/DDR/cli/ddrfar.py new file mode 100644 index 0000000..2a6637a --- /dev/null +++ b/ddr/DDR/cli/ddrfar.py @@ -0,0 +1,55 @@ +import click + +from DDR import config +from DDR import fileio +from DDR import identifier + +FAR_FACILITIES = [ + '1-topaz', + '2-poston', + '3-gilariver', + '4-amache', + '5-heartmountain', + '6-jerome', + '7-manzanar', + '8-minidoka', + '9-rohwer', + '10-tulelake', +] + +FACILITY_DDRCOLLECTION = { + '1-topaz': 'ddr-densho-305-1', + '2-poston': 'ddr-densho-305-2', + '3-gilariver': 'ddr-densho-305-3', + '4-amache': 'ddr-densho-305-4', + '5-heartmountain': 'ddr-densho-305-5', + '6-jerome': 'ddr-densho-305-6', + '7-manzanar': 'ddr-densho-305-7', + '8-minidoka': 'ddr-densho-305-8', + '9-rohwer': 'ddr-densho-305-9', + '10-tulelake': 'ddr-densho-305-10', +} + + +@click.command() +@click.option('--basedir','-b', default=config.MEDIA_BASE, help='Repository base directory') +def ddrfar(basedir): + """Print CSV linking FAR facility with each page's DDR File ID + + \b + BASEDIR - DDR repository base directory (ddrlocal.cfg [cmdln] media_base) + """ + headers = ['facility', 'page', 'file_id', 'file_label'] + click.echo(fileio.write_csv_str(headers)) + for facility in FAR_FACILITIES: + eid = FACILITY_DDRCOLLECTION[facility] + e = identifier.Identifier(eid, basedir).object() + for n,file_ in enumerate(e.children()): + if n == 0: + continue + row = [facility, n, file_.identifier.id, file_.label] + click.echo(fileio.write_csv_str(row)) + + +if __name__ == '__main__': + ddrfar() diff --git a/ddr/setup.py b/ddr/setup.py index f3a539b..3c65b85 100644 --- a/ddr/setup.py +++ b/ddr/setup.py @@ -72,6 +72,7 @@ def find_version(*file_paths): ddrconfig=DDR.cli.ddrconfig:ddrconfig ddrdesc=DDR.cli.ddrdesc:ddrdesc ddrexport=DDR.cli.ddrexport:ddrexport + ddrfar=DDR.cli.ddrfar:ddrfar ddrindex=DDR.cli.ddrindex:ddrindex ddrinfo=DDR.cli.ddrinfo:ddrinfo ddrimport=DDR.cli.ddrimport:ddrimport From aad833d42cd2cc1d6f88e9030081c2714b8c24fd Mon Sep 17 00:00:00 2001 From: Geoffrey Jost Date: Wed, 26 Apr 2023 09:03:54 -0700 Subject: [PATCH 02/10] converters: First pass at oh_id for narrators --- ddr/DDR/converters.py | 22 ++++++++++++--------- ddr/tests/test_converters.py | 38 ++++++++++++++++++------------------ 2 files changed, 32 insertions(+), 28 deletions(-) diff --git a/ddr/DDR/converters.py b/ddr/DDR/converters.py index e2cd300..31a4b0a 100644 --- a/ddr/DDR/converters.py +++ b/ddr/DDR/converters.py @@ -739,7 +739,7 @@ def text_to_bracketids(text: str, # # text = "Masuda, Kikuye [42]:narrator" # data = [ -# {'namepart': 'Masuda, Kikuye', 'role': 'narrator', 'id': 42} +# {'namepart': 'Masuda, Kikuye', 'role': 'narrator', 'oh_id': 42} # ] # # text = "Watanabe, Joe:author; Masuda, Kikuye:narrator" @@ -749,11 +749,11 @@ def text_to_bracketids(text: str, # ] # text = [ # {'namepart': 'Watanabe, Joe', 'role': 'author'} -# {'namepart': 'Masuda, Kikuye', 'role': 'narrator', 'id': 42} +# {'namepart': 'Masuda, Kikuye', 'role': 'narrator', 'oh_id': 42} # ] # data = [ # {'namepart': 'Watanabe, Joe', 'role': 'author'} -# {'namepart': 'Masuda, Kikuye', 'role': 'narrator', 'id': 42} +# {'namepart': 'Masuda, Kikuye', 'role': 'narrator', 'oh_id': 42} # ] # @@ -775,12 +775,10 @@ def _parse_rolepeople_text(texts, default): item = copy.deepcopy(default) if ('|' in txt) and (':' in txt): - # ex: "namepart:Sadako Kashiwagi|role:narrator|id:856" + # ex: "namepart:Sadako Kashiwagi|role:narrator|oh_id:856" for chunk in txt.split('|'): key,val = chunk.split(':') item[key.strip()] = val.strip() - if item.get('name') and not item.get('namepart'): - item['namepart'] = item.pop('name') elif ':' in txt: # ex: "Sadako Kashiwagi:narrator" @@ -798,13 +796,19 @@ def _parse_rolepeople_text(texts, default): # ex: "Sadako Kashiwagi" item['namepart'] = txt + # convert old 'id' to oral history id + if item.get('name') and not item.get('namepart'): + item['namepart'] = item.pop('name') + if item.get('id'): + item['oh_id'] = item.pop('id') + # extract person ID if present match = _is_text_bracketid(item.get('namepart','')) if match: item['namepart'] = match.groupdict()['term'].strip() - item['id'] = match.groupdict()['id'].strip() - if item.get('id') and item['id'].isdigit(): - item['id'] = int(item['id']) + #item['oh_id'] = match.groupdict()['oh_id'].strip() + if item.get('oh_id') and item['oh_id'].isdigit(): + item['oh_id'] = int(item['oh_id']) data.append(item) return data diff --git a/ddr/tests/test_converters.py b/ddr/tests/test_converters.py index 96d5fb7..ae81ea3 100644 --- a/ddr/tests/test_converters.py +++ b/ddr/tests/test_converters.py @@ -379,14 +379,14 @@ def test_text_to_bracketids(): ] TEXTROLEPEOPLE_NAME_OUT_ROLE = 'namepart: Watanabe, Joe | role: author' -TEXTROLEPEOPLE_SINGLE_TEXT = 'namepart: Masuda, Kikuye | role: narrator | id: 42' +TEXTROLEPEOPLE_SINGLE_TEXT = 'namepart: Masuda, Kikuye | role: narrator | oh_id: 42' TEXTROLEPEOPLE_SINGLE_DATA = [ - {'namepart': 'Masuda, Kikuye', 'role': 'narrator', 'id': 42}, + {'namepart': 'Masuda, Kikuye', 'role': 'narrator', 'oh_id': 42}, ] -TEXTROLEPEOPLE_SINGLE_ID_TEXT = 'namepart: Masuda, Kikuye | role: narrator | id: 42' +TEXTROLEPEOPLE_SINGLE_ID_TEXT = 'namepart: Masuda, Kikuye | role: narrator | oh_id: 42' TEXTROLEPEOPLE_SINGLE_ID_DATA = [ - {'namepart': 'Masuda, Kikuye', 'role': 'narrator', 'id': 42}, + {'namepart': 'Masuda, Kikuye', 'role': 'narrator', 'oh_id': 42}, ] TEXTROLEPEOPLE_SINGLE_NRID_TEXT = 'namepart: Masuda, Kikuye | nr_id: 88922/nr014m435 | role: narrator' @@ -394,22 +394,22 @@ def test_text_to_bracketids(): {'namepart': 'Masuda, Kikuye', 'nr_id': '88922/nr014m435', 'role': 'narrator'}, ] -TEXTROLEPEOPLE_SINGLE_NRIDID_TEXT = 'namepart: Masuda, Kikuye | nr_id: 88922/nr014m435 | role: narrator | id: 42' +TEXTROLEPEOPLE_SINGLE_NRIDID_TEXT = 'namepart: Masuda, Kikuye | nr_id: 88922/nr014m435 | role: narrator | oh_id: 42' TEXTROLEPEOPLE_SINGLE_NRIDID_DATA = [ - {'namepart': 'Masuda, Kikuye', 'nr_id': '88922/nr014m435', 'role': 'narrator', 'id': 42}, + {'namepart': 'Masuda, Kikuye', 'nr_id': '88922/nr014m435', 'role': 'narrator', 'oh_id': 42}, ] -TEXTROLEPEOPLE_MULTI_TEXT = 'namepart: Watanabe, Joe | role: author; namepart: Masuda, Kikuye | role: narrator | id: 42' +TEXTROLEPEOPLE_MULTI_TEXT = 'namepart: Watanabe, Joe | role: author; namepart: Masuda, Kikuye | role: narrator | oh_id: 42' TEXTROLEPEOPLE_MULTI_DATA = [ {'namepart': 'Watanabe, Joe', 'role': 'author'}, - {'namepart': 'Masuda, Kikuye', 'role': 'narrator', 'id': 42}, + {'namepart': 'Masuda, Kikuye', 'role': 'narrator', 'oh_id': 42}, ] -TEXTROLEPEOPLE_MULTI_NRID_TEXT = 'namepart: Ito, Jo | role: author; namepart: Ban, Shig | nr_id: 88922/nr014m437 | role: arch; namepart: Aso, San | role: narrator | id: 42' +TEXTROLEPEOPLE_MULTI_NRID_TEXT = 'namepart: Ito, Jo | role: author; namepart: Ban, Shig | nr_id: 88922/nr014m437 | role: arch; namepart: Aso, San | role: narrator | oh_id: 42' TEXTROLEPEOPLE_MULTI_NRID_DATA = [ {'namepart': 'Ito, Jo', 'role': 'author'}, {'namepart': 'Ban, Shig', 'nr_id': '88922/nr014m437', 'role': 'arch'}, - {'namepart': 'Aso, San', 'role': 'narrator', 'id': 42}, + {'namepart': 'Aso, San', 'role': 'narrator', 'oh_id': 42}, ] TEXTROLEPEOPLE_LISTSTRSNAME_TEXT = [ @@ -425,33 +425,33 @@ def test_text_to_bracketids(): ] TEXTROLEPEOPLE_LISTSTRS_DATA = [ {'namepart': 'Watanabe, Joe', 'role': 'author'}, - {'namepart': 'Masuda, Kikuye', 'role': 'narrator', 'id': 42}, + {'namepart': 'Masuda, Kikuye', 'role': 'narrator', 'oh_id': 42}, ] -TEXTROLEPEOPLE_MULTI_TEXT = 'namepart: Watanabe, Joe | role: author; namepart: Masuda, Kikuye | role: narrator | id: 42' +TEXTROLEPEOPLE_MULTI_TEXT = 'namepart: Watanabe, Joe | role: author; namepart: Masuda, Kikuye | role: narrator | oh_id: 42' TEXTROLEPEOPLE_MULTI_DATA = [ {'namepart': 'Watanabe, Joe', 'role': 'author'}, - {'namepart': 'Masuda, Kikuye', 'role': 'narrator', 'id': 42}, + {'namepart': 'Masuda, Kikuye', 'role': 'narrator', 'oh_id': 42}, ] -TEXTROLEPEOPLE_MULTI_NRID_TEXT = 'namepart: Ito, Jo | role: author; namepart: Ban, Shig | nr_id: 88922/nr014m437 | matching: match | role: arch; namepart: Aso, San | role: narrator | id: 42' +TEXTROLEPEOPLE_MULTI_NRID_TEXT = 'namepart: Ito, Jo | role: author; namepart: Ban, Shig | nr_id: 88922/nr014m437 | matching: match | role: arch; namepart: Aso, San | role: narrator | oh_id: 42' TEXTROLEPEOPLE_MULTI_NRID_DATA = [ {'namepart': 'Ito, Jo', 'role': 'author'}, {'namepart': 'Ban, Shig', 'nr_id': '88922/nr014m437', 'matching': 'match', 'role': 'arch'}, - {'namepart': 'Aso, San', 'role': 'narrator', 'id': 42}, + {'namepart': 'Aso, San', 'role': 'narrator', 'oh_id': 42}, ] -TEXTROLEPEOPLE_PIPES_TEXT = 'namepart: Watanabe, Joe | role: author; namepart: Masuda, Kikuye [42] | role: narrator; namepart: Joi Ito | role: techie | id:123' +TEXTROLEPEOPLE_PIPES_TEXT = 'namepart: Watanabe, Joe | role: author; namepart: Masuda, Kikuye [42] | role: narrator; namepart: Joi Ito | role: techie | oh_id:123' TEXTROLEPEOPLE_PIPES_DATA = [ {'namepart': 'Watanabe, Joe', 'role': 'author'}, - {'namepart': 'Masuda, Kikuye', 'role': 'narrator', 'id': 42}, - {'namepart': 'Joi Ito', 'role': 'techie', 'id': 123}, + {'namepart': 'Masuda, Kikuye', 'role': 'narrator', 'oh_id': 42}, + {'namepart': 'Joi Ito', 'role': 'techie', 'oh_id': 123}, ] TEXTROLEPEOPLE_NOSPACES_TEXT = 'namepart:Watanabe, Joe|role:author; namepart:Masuda, Kikuye [42]|role:narrator;' TEXTROLEPEOPLE_NOSPACES_DATA = [ {'namepart': 'Watanabe, Joe', 'role': 'author'}, - {'namepart': 'Masuda, Kikuye', 'role': 'narrator', 'id': 42}, + {'namepart': 'Masuda, Kikuye', 'role': 'narrator', 'oh_id': 42}, ] # many legacy files have this pattern From 0d04bae0d25eadadf083fd652bdd1c7b06979696 Mon Sep 17 00:00:00 2001 From: Geoffrey Jost Date: Tue, 12 Sep 2023 12:55:28 -0700 Subject: [PATCH 03/10] converters: Test for reading old oh_id format and writing new --- ddr/tests/test_converters.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/ddr/tests/test_converters.py b/ddr/tests/test_converters.py index ae81ea3..e4e8557 100644 --- a/ddr/tests/test_converters.py +++ b/ddr/tests/test_converters.py @@ -428,9 +428,11 @@ def test_text_to_bracketids(): {'namepart': 'Masuda, Kikuye', 'role': 'narrator', 'oh_id': 42}, ] -TEXTROLEPEOPLE_MULTI_TEXT = 'namepart: Watanabe, Joe | role: author; namepart: Masuda, Kikuye | role: narrator | oh_id: 42' +TEXTROLEPEOPLE_MULTI_TEXT_OLD = 'namepart: Watanabe, Joe | role: author; namepart: Tanaka, Cherry | role: narrator | id: 41; namepart: Masuda, Kikuye | role: narrator | oh_id: 42' +TEXTROLEPEOPLE_MULTI_TEXT_NEW = 'namepart: Watanabe, Joe | role: author; namepart: Tanaka, Cherry | role: narrator | oh_id: 41; namepart: Masuda, Kikuye | role: narrator | oh_id: 42' TEXTROLEPEOPLE_MULTI_DATA = [ {'namepart': 'Watanabe, Joe', 'role': 'author'}, + {'namepart': 'Tanaka, Cherry', 'role': 'narrator', 'oh_id': 41}, {'namepart': 'Masuda, Kikuye', 'role': 'narrator', 'oh_id': 42}, ] @@ -482,7 +484,8 @@ def test_text_to_rolepeople(): assert converters.text_to_rolepeople(TEXTROLEPEOPLE_SINGLE_ID_TEXT, defaults) == TEXTROLEPEOPLE_SINGLE_ID_DATA assert converters.text_to_rolepeople(TEXTROLEPEOPLE_SINGLE_NRID_TEXT, defaults) == TEXTROLEPEOPLE_SINGLE_NRID_DATA assert converters.text_to_rolepeople(TEXTROLEPEOPLE_SINGLE_NRIDID_TEXT, defaults) == TEXTROLEPEOPLE_SINGLE_NRIDID_DATA - assert converters.text_to_rolepeople(TEXTROLEPEOPLE_MULTI_TEXT, defaults) == TEXTROLEPEOPLE_MULTI_DATA + assert converters.text_to_rolepeople(TEXTROLEPEOPLE_MULTI_TEXT_NEW, defaults) == TEXTROLEPEOPLE_MULTI_DATA + assert converters.text_to_rolepeople(TEXTROLEPEOPLE_MULTI_TEXT_OLD, defaults) == TEXTROLEPEOPLE_MULTI_DATA assert converters.text_to_rolepeople(TEXTROLEPEOPLE_MULTI_NRID_TEXT, defaults) == TEXTROLEPEOPLE_MULTI_NRID_DATA assert converters.text_to_rolepeople(TEXTROLEPEOPLE_LISTSTRSNAME_TEXT, defaults) == TEXTROLEPEOPLE_LISTSTRSNAME_DATA assert converters.text_to_rolepeople(TEXTROLEPEOPLE_LISTSTRS_TEXT, defaults) == TEXTROLEPEOPLE_LISTSTRS_DATA @@ -496,6 +499,6 @@ def test_rolepeople_to_text(): assert converters.rolepeople_to_text([]) == '' assert converters.rolepeople_to_text(TEXTROLEPEOPLE_NAME_DATA) == TEXTROLEPEOPLE_NAME_OUT assert converters.rolepeople_to_text(TEXTROLEPEOPLE_SINGLE_DATA) == TEXTROLEPEOPLE_SINGLE_TEXT - assert converters.rolepeople_to_text(TEXTROLEPEOPLE_MULTI_DATA) == TEXTROLEPEOPLE_MULTI_TEXT + assert converters.rolepeople_to_text(TEXTROLEPEOPLE_MULTI_DATA) == TEXTROLEPEOPLE_MULTI_TEXT_NEW assert converters.rolepeople_to_text(TEXTROLEPEOPLE_SINGLE_NRID_DATA) == TEXTROLEPEOPLE_SINGLE_NRID_TEXT assert converters.rolepeople_to_text(TEXTROLEPEOPLE_MULTI_NRID_DATA) == TEXTROLEPEOPLE_MULTI_NRID_TEXT From 091b43e98bd4ab685c20b907d894e61ae433c0cb Mon Sep 17 00:00:00 2001 From: Geoffrey Jost Date: Wed, 13 Sep 2023 09:59:21 -0700 Subject: [PATCH 04/10] converters: Parse original/obsolete narrator ID to oh_id --- ddr/DDR/converters.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/ddr/DDR/converters.py b/ddr/DDR/converters.py index 31a4b0a..e05bea8 100644 --- a/ddr/DDR/converters.py +++ b/ddr/DDR/converters.py @@ -796,6 +796,19 @@ def _parse_rolepeople_text(texts, default): # ex: "Sadako Kashiwagi" item['namepart'] = txt + # obsolete/original narrator ID format: 'Masuda, Kikuye [42]' + if item.get('role') and item['role'] == 'narrator' and not item.get('oh_id'): + # "namepart: Masuda, Kikuye [42] | role: narrator" + m = re.search('([\w\s,-]+) \[(\d+)]', item['namepart']) + if m and m.groups() and len(m.groups()) == 2: + item['namepart'] = m.groups()[0] + item['oh_id'] = m.groups()[1] + # "Masuda, Kikuye [42]:narrator" + m1 = re.search('([\w\s,-]+) \[(\d+)]:narrator', txt) + if m1 and m1.groups() and len(m1.groups()) == 2: + item['namepart'] = m1.groups()[0] + item['oh_id'] = m1.groups()[1] + # convert old 'id' to oral history id if item.get('name') and not item.get('namepart'): item['namepart'] = item.pop('name') From 9373ed9904b67e6be1c24cd30e2d654d22ed229b Mon Sep 17 00:00:00 2001 From: Geoffrey Jost Date: Wed, 13 Sep 2023 11:46:16 -0700 Subject: [PATCH 05/10] ddrfar: Reformat as a script in a Markdown document This is a one-off. It needs clear documentation but it doesn't need to be a Click command. --- bin/ddrfar.md | 58 +++++++++++++++++++++++++++++++++++++++++++ ddr/DDR/cli/ddrfar.py | 55 ---------------------------------------- ddr/setup.py | 1 - 3 files changed, 58 insertions(+), 56 deletions(-) create mode 100644 bin/ddrfar.md delete mode 100644 ddr/DDR/cli/ddrfar.py diff --git a/bin/ddrfar.md b/bin/ddrfar.md new file mode 100644 index 0000000..ea6d10c --- /dev/null +++ b/bin/ddrfar.md @@ -0,0 +1,58 @@ +# Make CSV linking FAR facility codes to FAR document scan files + +This script produces a CSV file linking FAR codes used in `namesdb-editor` and elsewhere with individual `File` IDs. + +The Final Accountability Rosters Collection (ddr-densho-305-1) contains scans of +the original FAR documents. +Each `Entity` represents a camp and a set of documents. +Each `File` represents a single scanned page. + +``` python +FAR_FACILITIES = [ + '1-topaz', + '2-poston', + '3-gilariver', + '4-amache', + '5-heartmountain', + '6-jerome', + '7-manzanar', + '8-minidoka', + '9-rohwer', + '10-tulelake', +] + +FACILITY_DDRCOLLECTION = { + '1-topaz': 'ddr-densho-305-1', + '2-poston': 'ddr-densho-305-2', + '3-gilariver': 'ddr-densho-305-3', + '4-amache': 'ddr-densho-305-4', + '5-heartmountain': 'ddr-densho-305-5', + '6-jerome': 'ddr-densho-305-6', + '7-manzanar': 'ddr-densho-305-7', + '8-minidoka': 'ddr-densho-305-8', + '9-rohwer': 'ddr-densho-305-9', + '10-tulelake': 'ddr-densho-305-10', +} + +BASEDIR = '/var/www/media/ddr' +OUTPUT_FILE = '/tmp/ddr-far.csv' + +from pathlib import Path +from DDR import config +from DDR import fileio +from DDR import identifier +headers = ['facility', 'page', 'file_id', 'file_label'] +lines = [fileio.write_csv_str(headers)] +for facility in FAR_FACILITIES: + eid = FACILITY_DDRCOLLECTION[facility] + entity = identifier.Identifier(eid, BASEDIR).object() + for n,file_ in enumerate(entity.children()): + if n == 0: + continue + row = [facility, n, file_.identifier.id, file_.label] + line = fileio.write_csv_str(row) + lines.append(line) + +with Path(OUTPUT_FILE).open('w') as f: + f.write('\n'.join(lines)) +``` diff --git a/ddr/DDR/cli/ddrfar.py b/ddr/DDR/cli/ddrfar.py deleted file mode 100644 index 2a6637a..0000000 --- a/ddr/DDR/cli/ddrfar.py +++ /dev/null @@ -1,55 +0,0 @@ -import click - -from DDR import config -from DDR import fileio -from DDR import identifier - -FAR_FACILITIES = [ - '1-topaz', - '2-poston', - '3-gilariver', - '4-amache', - '5-heartmountain', - '6-jerome', - '7-manzanar', - '8-minidoka', - '9-rohwer', - '10-tulelake', -] - -FACILITY_DDRCOLLECTION = { - '1-topaz': 'ddr-densho-305-1', - '2-poston': 'ddr-densho-305-2', - '3-gilariver': 'ddr-densho-305-3', - '4-amache': 'ddr-densho-305-4', - '5-heartmountain': 'ddr-densho-305-5', - '6-jerome': 'ddr-densho-305-6', - '7-manzanar': 'ddr-densho-305-7', - '8-minidoka': 'ddr-densho-305-8', - '9-rohwer': 'ddr-densho-305-9', - '10-tulelake': 'ddr-densho-305-10', -} - - -@click.command() -@click.option('--basedir','-b', default=config.MEDIA_BASE, help='Repository base directory') -def ddrfar(basedir): - """Print CSV linking FAR facility with each page's DDR File ID - - \b - BASEDIR - DDR repository base directory (ddrlocal.cfg [cmdln] media_base) - """ - headers = ['facility', 'page', 'file_id', 'file_label'] - click.echo(fileio.write_csv_str(headers)) - for facility in FAR_FACILITIES: - eid = FACILITY_DDRCOLLECTION[facility] - e = identifier.Identifier(eid, basedir).object() - for n,file_ in enumerate(e.children()): - if n == 0: - continue - row = [facility, n, file_.identifier.id, file_.label] - click.echo(fileio.write_csv_str(row)) - - -if __name__ == '__main__': - ddrfar() diff --git a/ddr/setup.py b/ddr/setup.py index 3c65b85..f3a539b 100644 --- a/ddr/setup.py +++ b/ddr/setup.py @@ -72,7 +72,6 @@ def find_version(*file_paths): ddrconfig=DDR.cli.ddrconfig:ddrconfig ddrdesc=DDR.cli.ddrdesc:ddrdesc ddrexport=DDR.cli.ddrexport:ddrexport - ddrfar=DDR.cli.ddrfar:ddrfar ddrindex=DDR.cli.ddrindex:ddrindex ddrinfo=DDR.cli.ddrinfo:ddrinfo ddrimport=DDR.cli.ddrimport:ddrimport From 4e3888943b9e486159ee783928f9bae6b72fcab6 Mon Sep 17 00:00:00 2001 From: Geoffrey Jost Date: Thu, 21 Sep 2023 10:28:09 -0700 Subject: [PATCH 06/10] converters: Upconvert narrator id to oh_id from both CSV and JSON DDR.converters.text_to_rolepeople handles creator/person structs in both CSV (the rolepeople format with colons,pipes,semicolons) and in JSON. Previous commits only updated CSV text. This commit upconverts `id` to `oh_id` in JSON as well. This commit also ensures that oh_id is an int. --- ddr/DDR/converters.py | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/ddr/DDR/converters.py b/ddr/DDR/converters.py index e05bea8..11c73ee 100644 --- a/ddr/DDR/converters.py +++ b/ddr/DDR/converters.py @@ -758,21 +758,33 @@ def text_to_bracketids(text: str, # def _filter_rolepeople(data: List[Dict[str,str]]) -> List[Dict[str,str]]: - """filters out items with empty nameparts + """Processes persons/creators data already in list-of-dicts format + filters out items with empty nameparts prevents this: [{'namepart': '', 'role': 'author'}] """ - return [ + data = [ item for item in data if item.get('namepart') # and item.get('role') ] + # convert old 'id' to oral history id + for item in data: + if item.get('id'): + item['oh_id'] = item.pop('id') + # make sure oh_id is int + if item.get('oh_id'): + item['oh_id'] = int(item['oh_id']) + return data # TODO add type hints def _parse_rolepeople_text(texts, default): + """Parses rolepeople format used to represent persons/creators in CSV + """ data = [] for text in _unroll_gloppy_list(texts): txt = text.strip() if txt: item = copy.deepcopy(default) + # TODO clean this up if ('|' in txt) and (':' in txt): # ex: "namepart:Sadako Kashiwagi|role:narrator|oh_id:856" @@ -809,18 +821,22 @@ def _parse_rolepeople_text(texts, default): item['namepart'] = m1.groups()[0] item['oh_id'] = m1.groups()[1] - # convert old 'id' to oral history id + # convert old 'name' to 'namepart' if item.get('name') and not item.get('namepart'): item['namepart'] = item.pop('name') + + # convert old 'id' to oral history id if item.get('id'): item['oh_id'] = item.pop('id') - # extract person ID if present + # old bracketid format match = _is_text_bracketid(item.get('namepart','')) if match: item['namepart'] = match.groupdict()['term'].strip() #item['oh_id'] = match.groupdict()['oh_id'].strip() - if item.get('oh_id') and item['oh_id'].isdigit(): + + # make sure oh_id is int + if item.get('oh_id'): item['oh_id'] = int(item['oh_id']) data.append(item) @@ -851,9 +867,9 @@ def text_to_rolepeople(text: str, default: dict) -> List[Dict[str,str]]: # might already be listofdicts or listofstrs if isinstance(text, list): - if _is_listofdicts(text): + if _is_listofdicts(text): # from JSON return _filter_rolepeople(text) - elif _is_listofstrs(text): + elif _is_listofstrs(text): # from CSV data = _parse_rolepeople_text(text, default) return _filter_rolepeople(data) From 0d397df6dcbd6fbb33f43e5db244e93563d723ec Mon Sep 17 00:00:00 2001 From: Geoffrey Jost Date: Thu, 21 Sep 2023 10:52:20 -0700 Subject: [PATCH 07/10] ddrindex: Less error-prone output for destroy confirm You never know what info will be available from Elasticsearch objects so just print the whole thing. --- ddr/DDR/cli/ddrindex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ddr/DDR/cli/ddrindex.py b/ddr/DDR/cli/ddrindex.py index 99c980c..83bf349 100644 --- a/ddr/DDR/cli/ddrindex.py +++ b/ddr/DDR/cli/ddrindex.py @@ -270,7 +270,7 @@ def destroy(confirm, host): + "will be DESTROYED!" ) for index in identifier.ELASTICSEARCH_CLASSES['all']: - click.echo(f"- {index['doc_type']}") + click.echo(f"- {index}") else: click.echo( f"Add '--confirm' to destroy the {cluster} cluster ({ds.host})." From cfc6e192b478ff254561dd428edd9418f5736ce3 Mon Sep 17 00:00:00 2001 From: Geoffrey Jost Date: Thu, 12 Oct 2023 14:47:14 -0700 Subject: [PATCH 08/10] commands: Make ddr-clone complain if collection does not exist ddr-clone should only be used to clone *existing* collections. Connect to gitolite and see if requested collection is in the list. Raise exception if not. ddr-create already does this but in reverse. --- ddr/DDR/commands.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/ddr/DDR/commands.py b/ddr/DDR/commands.py index d1a9f1e..2ede269 100644 --- a/ddr/DDR/commands.py +++ b/ddr/DDR/commands.py @@ -167,6 +167,12 @@ def clone(user_name, user_mail, identifier, dest_path): @return: message ('ok' if successful) """ git_url = '{}:{}.git'.format(config.GITOLITE, identifier.id) + # Raise exception if repository does not exist + gitolite = dvcs.Gitolite(config.GITOLITE) + gitolite.initialize() + if not identifier.id in gitolite.repos(): + raise Exception(f'{git_url} must be created before it can be cloned.') + # clone from Gitolite repo = git.Repo.clone_from(git_url, dest_path) logging.debug(' git clone {}'.format(git_url)) if repo: @@ -220,11 +226,13 @@ def create(user_name, user_mail, identifier, agent=''): @param agent: (optional) Name of software making the change. @return: message ('ok' if successful) """ + git_url = '{}:{}.git'.format(config.GITOLITE, identifier.id) + # Raise exception if repository already exists gitolite = dvcs.Gitolite(config.GITOLITE) gitolite.initialize() if identifier.id in gitolite.collections(): - raise Exception("'%s' already exists -- clone instead." % identifier.id) - git_url = '{}:{}.git'.format(config.GITOLITE, identifier.id) + raise Exception(f'{git_url} cannot be created when it already exists.') + # clone from Gitolite (Gitolite creates the repository on the server) repo = git.Repo.clone_from(git_url, identifier.path_abs()) logging.debug(' git clone {}'.format(git_url)) if repo: From 5f6016e5a1234074ae890ccb421ce41ebc14f8a7 Mon Sep 17 00:00:00 2001 From: Geoffrey Jost Date: Tue, 17 Oct 2023 13:44:06 -0700 Subject: [PATCH 09/10] ddrindex: Add organization sort field when publishing to ES This will enable sorting of organizations. --- ddr/DDR/docstore.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ddr/DDR/docstore.py b/ddr/DDR/docstore.py index f4d621a..76a3569 100644 --- a/ddr/DDR/docstore.py +++ b/ddr/DDR/docstore.py @@ -309,6 +309,7 @@ def _repo_org(self, path, doctype, remove=False): d = ES_Class(id=oi.id) d.meta.id = oi.id d.model = oi.model + d.sort = data.get('sort') d.parent_id = oi.parent_id(stubs=1) # links d.links_html = oi.id From b2693a77d2b62b68ec5c61f7b2403d3424822662 Mon Sep 17 00:00:00 2001 From: Geoffrey Jost Date: Tue, 24 Oct 2023 10:16:50 -0700 Subject: [PATCH 10/10] tests: Add expected partner phljacl --- ddr/tests/test_identifier.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ddr/tests/test_identifier.py b/ddr/tests/test_identifier.py index 272ffba..e5fbfaa 100644 --- a/ddr/tests/test_identifier.py +++ b/ddr/tests/test_identifier.py @@ -172,7 +172,7 @@ def test_definitions_valid_components(): 'repo': 'ddr', 'org': [ 'densho', 'ajah', 'chi', 'csujad', 'fom', 'hmwf', 'jamsj', 'janm', - 'jcch', 'manz', 'njpa', 'one', 'pc', 'sbbt', 'sjacl', 'dev', + 'jcch', 'manz', 'njpa', 'one', 'pc', 'phljacl', 'sbbt', 'sjacl', 'dev', 'qumulo', 'test', 'testing' ], 'role': [