From a1414f98210c63c72f5228497382988882b65f7b Mon Sep 17 00:00:00 2001 From: Jussi Laasonen Date: Mon, 11 Mar 2019 13:57:37 +0100 Subject: [PATCH] Do not ignore CAD --- lib/extractDocuments.js | 1 - lib/extractDocuments.test.js | 18 +++--------------- 2 files changed, 3 insertions(+), 16 deletions(-) diff --git a/lib/extractDocuments.js b/lib/extractDocuments.js index ee19934..2981bd5 100644 --- a/lib/extractDocuments.js +++ b/lib/extractDocuments.js @@ -15,7 +15,6 @@ module.exports = function countHits (catalog) { .reject(line => /^\s*$/.test(line)) .reject(line => /"?File name"?;"Title";"Page";"Search Instance"/.test(line)) .reject(line => /^"?BC .+\.pdf/.test(line)) - .reject(line => /^"?CAD[ _]/i.test(line)) .map(line => _(line).split(';').take(3)) .map(createReference()) .groupBy('document') diff --git a/lib/extractDocuments.test.js b/lib/extractDocuments.test.js index dd8584e..8208996 100644 --- a/lib/extractDocuments.test.js +++ b/lib/extractDocuments.test.js @@ -23,8 +23,6 @@ File name;"Title";"Page";"Search Instance"\t\t\t\t\t\t ABL 4-6.pdf;"";"97";"433 [412.] 48-7-20\t 115. OBVERSE. T; ^ft H ' T "\t\t\t\t\t ABL 4-6.pdf;"";"98";"434 [412.] 48-7-20\t 115 (continued). REVERSE. raTA T T e "\t\t\t\t\t ABL 4-6.pdf;"";"127";"Rm. 2\t 5 48-7-20\t 115 Bu. 91-5-9\t 12 Rm. 77 "\t\t\t -CADShoudlBeAHit.pdf;"";"208";"to the king. 48-7-20\t 115 (p. 1688). Letter to the - "\t\t\t\t\t -cadShoudlBeAHit.pdf;"";"208";"to the king. 48-7-20\t 115 (p. 1688). Letter to the - "\t\t\t\t\t \t\t\t\t\t\t File name;"Title";"Page";"Search Instance"\t\t\t\t\t\t BC Abcd 123.pdf;"";"330";"p. 372. [48-7-20\t 115] Upper half\t 2 in. by 1 in.; "\t\t\t\t @@ -32,9 +30,7 @@ BC Abcd 123.pdf;"";"330";"p. 372. [48-7-20\t 115] Upper half\t 2 in. by 1 in.; " File name;"Title";"Page";"Search Instance"\t\t\t\t\t\t BC 5.pdf;"";"194";"(p. 219). 48-7-20\t 115 (p. 1688). 67-4-2\t 1 "\t\t\t\t BC 5.pdf;"";"208";"to the king. 48-7-20\t 115 (p. 1688). Letter to the - "\t\t\t\t\t -CAD 5.pdf;"";"208";"to the king. 48-7-20\t 115 (p. 1688). Letter to the - "\t\t\t\t\t -[CAD D, E, I] Edzard 1961 Rv CAD D, E, I ZA 54.pdf;"";"1";"= I G Rm 2, 447 zu Rm 2,447 I G S. U. 51(" -cad_s_shin_2 [1992].pdf;"Untitled";"458";"BSOAS 20 263 DT 290: 8 f. (= Bil. Edubba B iv). pi-ia-"` +CAD 5.pdf;"";"208";"to the king. 48-7-20\t 115 (p. 1688). Letter to the - "\t\t\t\t\t` const alternativeCatalog = `"Search Results" @@ -47,12 +43,8 @@ const alternativeCatalog = `"Search Results" "File name";"Title";"Page";"Search Instance" "ABL 4-6.pdf";"";"97";"433 [412.] 48-7-20\t 115. OBVERSE. T; ^ft H ' T " -"[CAD D, E, I] Edzard 1961 Rv CAD D, E, I ZA 54.pdf";"";"1";"= I G Rm 2, 447 zu Rm 2,447 I G S. U. 51(" "BC 3.pdf";"";"467";"houses, etc. [K. 14021] Left-hand corner, 1ain. by 1lin.; ......... + 6 " "CAD 3.pdf";"";"467";"houses, etc. [K. 14021] Left-hand corner, 1ain. by 1lin.; ......... + 6 " -"cad_s_shin_2 [1992].pdf";"Untitled";"458";"BSOAS 20 263 DT 290: 8 f. (= Bil. Edubba B iv). pi-ia-" -"CADShoudlBeAHit.pdf";"";"208";"to the king. 48-7-20\t 115 (p. 1688). Letter to the - "\t\t\t\t\t -"cadShoudlBeAHit.pdf";"";"208";"to the king. 48-7-20\t 115 (p. 1688). Letter to the - "\t\t\t\t\t ` test('Returns documents not starting with "BC" or "CAD"', () => { @@ -60,17 +52,13 @@ test('Returns documents not starting with "BC" or "CAD"', () => { new Reference('Fincke 2003-2004 The Babylonian Texts of Nineveh AfO 50', [38]), new Reference('0(7)', [7, 10]), new Reference('ABL 4-6', [97, 98, 127]), - new Reference('CADShoudlBeAHit', [208]), - new Reference('cadShoudlBeAHit', [208]), - new Reference('[CAD D, E, I] Edzard 1961 Rv CAD D, E, I ZA 54', [1]) + new Reference('CAD 5', [208]) ]) }) test('Returns documents not starting with "BC" or "CAD" in alternative format', () => { expect(extractDocuments(alternativeCatalog)).toEqual([ new Reference('ABL 4-6', [97]), - new Reference('[CAD D, E, I] Edzard 1961 Rv CAD D, E, I ZA 54', [1]), - new Reference('CADShoudlBeAHit', [208]), - new Reference('cadShoudlBeAHit', [208]) + new Reference('CAD 3', [467]) ]) })