From 3b2f8d607ecfac9641d63be754fd5241b99d6433 Mon Sep 17 00:00:00 2001 From: Andre Martins Date: Mon, 13 Oct 2014 20:19:35 +0100 Subject: [PATCH] ENH Added script to create a single gazetteer file. --- scripts_ner/create_gazetteer_file.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 scripts_ner/create_gazetteer_file.py diff --git a/scripts_ner/create_gazetteer_file.py b/scripts_ner/create_gazetteer_file.py new file mode 100644 index 0000000..1608d25 --- /dev/null +++ b/scripts_ner/create_gazetteer_file.py @@ -0,0 +1,18 @@ +import os +import sys + +if __name__ == "__main__": + path = sys.argv[1] # Path to the folder where the gazetteers are. + destination_filepath = sys.argv[2] # Path to the destination file. + f_out = open(destination_filepath, 'w') + gazetteers_list = [name for name in os.listdir(path) if + not os.path.isdir(path + os.sep + name)] + gazetteers_list.sort() + for name in gazetteers_list: + print 'Adding ', name, '...' + filepath = path + os.sep + name + f = open(filepath) + for line in f: + line = line.rstrip('\r\n') + f_out.write('%s\t%s\n' % (name, line)) + f_out.close()