From 21c16347928ab9e24f197f3b53519d81a1f287d3 Mon Sep 17 00:00:00 2001 From: luoruixuan <1500012842@pku.edu.cn> Date: Fri, 11 Jan 2019 19:12:18 +0800 Subject: [PATCH] remove dead code --- pkuseg/config.py | 4 +--- pkuseg/main.py | 18 ------------------ pkuseg/process_data.py | 24 ------------------------ 3 files changed, 1 insertion(+), 45 deletions(-) delete mode 100644 pkuseg/process_data.py diff --git a/pkuseg/config.py b/pkuseg/config.py index f084c0e..bf86b5a 100644 --- a/pkuseg/config.py +++ b/pkuseg/config.py @@ -27,9 +27,7 @@ def __init__(self): self.readFile = "data/small_test.utf8" self.outputFile = "data/small_test_output.utf8" - self.runMode = ( - "test" - ) # train (normal training), train.rich (training with rich edge features), test, tune£¬ tune.rich, cv (cross validation), cv.rich + self.runMode = "train" self.modelOptimizer = "crf.adf" self.rate0 = 0.05 # init value of decay rate in SGD and ADF training self.regs = [1] diff --git a/pkuseg/main.py b/pkuseg/main.py index d5d0aca..a58a8fe 100644 --- a/pkuseg/main.py +++ b/pkuseg/main.py @@ -8,8 +8,6 @@ from .inference import * from .config import Config import time -from .process_data import tocrfoutput - def run(config=None): if config is None: @@ -41,22 +39,6 @@ def run(config=None): richEdge.train() else: train(config) - elif config.runMode.find("test") >= 0: - config.swLog.write("\nstart testing...\n") - if config.runMode.find("rich") >= 0: - richEdge.test() - else: - test(config) - tocrfoutput( - config, - os.path.join(config.outFolder, "outputTag.txt"), - config.outputFile, - os.path.join(config.tempFile, "test.raw.txt"), - ) - elif config.rumMode.find("cv") >= 0: - print("\nstart cross validation") - config.swLog.write("\nstart cross validation\n") - crossValidation(config) else: raise Exception("unknown mode") diff --git a/pkuseg/process_data.py b/pkuseg/process_data.py deleted file mode 100644 index 66fa099..0000000 --- a/pkuseg/process_data.py +++ /dev/null @@ -1,24 +0,0 @@ -def tocrfoutput(config, readpath, writedatapath, rawdatapath): - with open(os.path.join(config.modelDir, "tagIndex.txt")) as tagfile: - lines = tagfile.readlines() - tags = {} - for line in lines: - wordTags = line.split(" ") - tags[int(wordTags[1])] = wordTags[0] - - with open(readpath, encoding="utf-8") as outputtag, open( - writedatapath, "w", encoding="utf-8" - ) as sw, open(rawdatapath, encoding="utf-8") as rawtext: - lines = outputtag.readlines() - rawlines = rawtext.readlines() - for line, raw in zip(lines, rawlines): - write_string = "" - linetag = line.split(",") - for i, word in enumerate(raw): - if linetag[i] == "\n": - continue - if tags[int(linetag[i])].find("B") >= 0: - write_string = write_string + " " + word - else: - write_string = write_string + word - sw.write(write_string.strip() + "\n")