From c0bfc524a3bd757cc70cc84d343925a28ebe4f63 Mon Sep 17 00:00:00 2001 From: Emanuel Calvo <3manuek@gmail.com> Date: Tue, 13 Jun 2023 14:01:32 +0200 Subject: [PATCH] fix: ndjson file escaped Signed-off-by: Emanuel Calvo <3manuek@gmail.com> --- artifact_builder/__main__.py | 17 +++++++++++++++-- artifact_builder/indexer.py | 2 +- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/artifact_builder/__main__.py b/artifact_builder/__main__.py index 27cceb3..7a89c88 100644 --- a/artifact_builder/__main__.py +++ b/artifact_builder/__main__.py @@ -3,7 +3,8 @@ import json import ndjson import argparse - +from re import sub, escape, MULTILINE +from os import remove sqlDirectory = "../sql/" global _engine @@ -30,11 +31,23 @@ def main(): with open(args.output, 'rb') as f: data = json.load(f) - with open(args.output_ndjson, 'w+', encoding='utf-8-sig') as f: + """ + Next blocks are doing a nasty thing. They dump into a temporal file the contents of + the data dictionary for escaping the escape character later. This generates an ndjson + compatible with Postgres COPY. + """ + with open(args.output_ndjson + '.temp', 'w+', encoding='utf-8-sig') as f: writer = ndjson.writer(f) for key in data: writer.writerow(data[key]) + with open(args.output_ndjson, 'w+', encoding='utf-8-sig') as f: + input = open(args.output_ndjson + '.temp') + f.write(sub(r'\\',r'\\\\',input.read())) + input.close() + + remove(args.output_ndjson + '.temp') + if __name__ == "__main__": main() \ No newline at end of file diff --git a/artifact_builder/indexer.py b/artifact_builder/indexer.py index bb2ab88..047d6d0 100644 --- a/artifact_builder/indexer.py +++ b/artifact_builder/indexer.py @@ -18,7 +18,7 @@ def indexDir(sqlDirectory: str, _engine: str) -> fileMap: # For now, we ignore READMEs. But, we might furtherly include some documentation # artifact. - if key not in _fileMap and filename.removesuffix(".md").lower() != 'readme': + if key not in _fileMap and filename.removesuffix(".md").lower() not in ('readme', '.gitkeep'): _fileMap[key]={'engine': _engine} _fileMap[key]={'title': sub('[_-]'," ", str(key)).capitalize()}