diff --git a/docs/plugin-basics.md b/docs/plugin-basics.md index 29709450..5715bc20 100644 --- a/docs/plugin-basics.md +++ b/docs/plugin-basics.md @@ -32,10 +32,8 @@ npm install @ezs/basics - [TARDump](#tardump) - [TARExtract](#tarextract) - [TXTConcat](#txtconcat) -- [TXTInflection](#txtinflection) - [TXTObject](#txtobject) - [TXTParse](#txtparse) -- [TXTSentences](#txtsentences) - [TXTZip](#txtzip) - [URLConnect](#urlconnect) - [URLFetch](#urlfetch) @@ -650,46 +648,6 @@ Output: Returns **[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** -### TXTInflection - -Take a `String` and inflect it with or more transformers from this list - pluralize, singularize, camelize, underscore, humanize, capitalize, - dasherize, titleize, demodulize, tableize, classify, foreign_key, ordinalize - -Input: - -```json -{ "id": 1, "value": "all job" } -``` - -Script: - -```ini -[TXTInflection] -transform = pluralize -transform = capitalize -transform = dasherize -``` - -Output: - -```json -{ "id": 1, "value": "All-jobs" } -``` - -#### Parameters - -- `path` **[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** path of the field to segment (optional, default `"value"`) -- `transform` **[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)?** name of a transformer - -Returns **[Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)<[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)>** - -**Meta** - -- **deprecated**: see - see - - ### TXTObject Take an array of values and generate an array containing objects with the @@ -735,33 +693,6 @@ Output: Returns **[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** -### TXTSentences - -Take a `String` and split it into an array of sentences. - -Input: - -```json -{ "id": 1, "value": "First sentence? Second sentence. My name is Bond, J. Bond." } -``` - -Output: - -```json -{ "id": 1, "value": ["First sentence?", "Second sentence.", "My name is Bond, J. Bond."] } -``` - -#### Parameters - -- `path` **[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** path of the field to segment (optional, default `"value"`) - -Returns **[Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)<[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)>** - -**Meta** - -- **deprecated**: see - - ### TXTZip Take a `String` and zip it. diff --git a/packages/basics/README.md b/packages/basics/README.md index 29709450..5715bc20 100644 --- a/packages/basics/README.md +++ b/packages/basics/README.md @@ -32,10 +32,8 @@ npm install @ezs/basics - [TARDump](#tardump) - [TARExtract](#tarextract) - [TXTConcat](#txtconcat) -- [TXTInflection](#txtinflection) - [TXTObject](#txtobject) - [TXTParse](#txtparse) -- [TXTSentences](#txtsentences) - [TXTZip](#txtzip) - [URLConnect](#urlconnect) - [URLFetch](#urlfetch) @@ -650,46 +648,6 @@ Output: Returns **[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** -### TXTInflection - -Take a `String` and inflect it with or more transformers from this list - pluralize, singularize, camelize, underscore, humanize, capitalize, - dasherize, titleize, demodulize, tableize, classify, foreign_key, ordinalize - -Input: - -```json -{ "id": 1, "value": "all job" } -``` - -Script: - -```ini -[TXTInflection] -transform = pluralize -transform = capitalize -transform = dasherize -``` - -Output: - -```json -{ "id": 1, "value": "All-jobs" } -``` - -#### Parameters - -- `path` **[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** path of the field to segment (optional, default `"value"`) -- `transform` **[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)?** name of a transformer - -Returns **[Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)<[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)>** - -**Meta** - -- **deprecated**: see - see - - ### TXTObject Take an array of values and generate an array containing objects with the @@ -735,33 +693,6 @@ Output: Returns **[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** -### TXTSentences - -Take a `String` and split it into an array of sentences. - -Input: - -```json -{ "id": 1, "value": "First sentence? Second sentence. My name is Bond, J. Bond." } -``` - -Output: - -```json -{ "id": 1, "value": ["First sentence?", "Second sentence.", "My name is Bond, J. Bond."] } -``` - -#### Parameters - -- `path` **[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** path of the field to segment (optional, default `"value"`) - -Returns **[Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)<[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)>** - -**Meta** - -- **deprecated**: see - - ### TXTZip Take a `String` and zip it. diff --git a/packages/basics/package.json b/packages/basics/package.json index ab73860a..0899564b 100644 --- a/packages/basics/package.json +++ b/packages/basics/package.json @@ -16,7 +16,6 @@ "from": "0.1.7", "get-stream": "6.0.1", "higher-path": "1.0.0", - "inflection": "2.0.1", "lodash": "4.17.21", "make-dir": "4.0.0", "micromatch": "4.0.8", diff --git a/packages/basics/src/index.js b/packages/basics/src/index.js index 75d867c9..142a0708 100644 --- a/packages/basics/src/index.js +++ b/packages/basics/src/index.js @@ -7,8 +7,6 @@ import BIBParse from './bib-parse'; import TXTConcat from './txt-concat'; import TXTObject from './txt-object'; import TXTParse from './txt-parse'; -import TXTSentences from './txt-sentences'; -import TXTInflection from './txt-inflection'; import XMLParse from './xml-parse'; import XMLString from './xml-string'; import XMLConvert from './xml-convert'; @@ -43,8 +41,6 @@ const funcs = { TXTParse, TXTObject, TXTConcat, - TXTSentences, - TXTInflection, XMLParse, XMLString, XMLConvert, diff --git a/packages/basics/src/txt-inflection.js b/packages/basics/src/txt-inflection.js deleted file mode 100644 index a7d2881a..00000000 --- a/packages/basics/src/txt-inflection.js +++ /dev/null @@ -1,57 +0,0 @@ -import { get } from 'lodash'; -import { transform } from 'inflection'; - - -const transformer = (transformations) => - (str) => - (str && typeof str === 'string') ? transform( str, transformations) : str; - -const TXTInflection = (data, feed, ctx) => { - if (ctx.isLast()) { - return feed.close(); - } - const transformations = [].concat(ctx.getParam('transform', [])).filter(Boolean); - const path = ctx.getParam('path', 'value'); - const value = get(data, path, ''); - const process = transformer(transformations); - const result = Array.isArray(value) ? value.map((item) => process(item)) : process(value); - - feed.write({ ...data, [path]: result }); - return feed.end(); -}; - -/** - * Take a `String` and inflect it with or more transformers from this list - * pluralize, singularize, camelize, underscore, humanize, capitalize, - * dasherize, titleize, demodulize, tableize, classify, foreign_key, ordinalize - * - * Input: - * - * ```json - * { "id": 1, "value": "all job" } - * ``` - * Script: - * ```ini - * [TXTInflection] - * transform = pluralize - * transform = capitalize - * transform = dasherize - * ``` - * - * Output: - * - * ```json - * { "id": 1, "value": "All-jobs" } - * ``` - * - * @name TXTInflection - * @param {String} [path="value"] path of the field to segment - * @param {String} [transform] name of a transformer - * @returns {String[]} - * @deprecated - * see https://inist-cnrs.github.io/ezs/#/plugin-strings?id=inflection - * see https://www.npmjs.com/package/inflection - */ -export default { - TXTInflection, -}; diff --git a/packages/basics/src/txt-sentences.js b/packages/basics/src/txt-sentences.js deleted file mode 100644 index 5b6b6531..00000000 --- a/packages/basics/src/txt-sentences.js +++ /dev/null @@ -1,98 +0,0 @@ -import { get } from 'lodash'; - -const UPPER_LETTERS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'; -const SENTENCE_INIT = ' '; -const SENTENCE_ENDING = '.?!'; - -/* - * Segment sentences from `str` into an array - * @param {string} str - * @returns {string[]} - */ -const segmentSentences = (str) => { - const characters = Array.from(str); - const sentences = characters - .reduce( - /* - * @param {string[]} prevSentences - * @param {string} character - * @return {string[]} - */ - (prevSentences, character) => { - const currentSentence = prevSentences.slice(-1)[0]; - const [char1, char2] = currentSentence.slice(-2); - if (SENTENCE_ENDING.includes(character)) { - if (character !== '.') { - return [ - ...prevSentences.slice(0, -1), - currentSentence + character, - SENTENCE_INIT, - ]; - } - if (char1 !== ' ') { - return [ - ...prevSentences.slice(0, -1), - currentSentence + character, - SENTENCE_INIT, - ]; - } - if (!UPPER_LETTERS.includes(char2)) { - return [ - ...prevSentences.slice(0, -1), - currentSentence + character, - SENTENCE_INIT, - ]; - } - } - return [ - ...prevSentences.slice(0, -1), - currentSentence + character, - ]; - }, - [SENTENCE_INIT] - ) - .filter((sentence) => sentence !== SENTENCE_INIT) - .map((sentence) => sentence.trimStart()); - return sentences; -}; - -const TXTSentences = (data, feed, ctx) => { - if (ctx.isLast()) { - return feed.close(); - } - const path = ctx.getParam('path', 'value'); - const value = get(data, path); - - const str = Array.isArray(value) - ? value.map((item) => (typeof item === 'string' ? item : '')).join(' ') - : value; - const sentences = str ? segmentSentences(str) : []; - - feed.write({ ...data, [path]: sentences }); - return feed.end(); -}; - -/** - * Take a `String` and split it into an array of sentences. - * - * Input: - * - * ```json - * { "id": 1, "value": "First sentence? Second sentence. My name is Bond, J. Bond." } - * ``` - * - * Output: - * - * ```json - * { "id": 1, "value": ["First sentence?", "Second sentence.", "My name is Bond, J. Bond."] } - * ``` - * - * @name TXTSentences - * @param {String} [path="value"] path of the field to segment - * @returns {String[]} - * @deprecated - * see https://inist-cnrs.github.io/ezs/#/plugin-strings?id=sentences - */ -export default { - TXTSentences, -}; diff --git a/packages/basics/test/txt-inflection.js b/packages/basics/test/txt-inflection.js deleted file mode 100644 index 69944317..00000000 --- a/packages/basics/test/txt-inflection.js +++ /dev/null @@ -1,111 +0,0 @@ -import from from 'from'; -// @ts-ignore -import ezs from '../../core/src'; -import ezsBasics from '../src'; - -ezs.use(ezsBasics); - -describe('TXTInflection', () => { - it('should return input #1', (done) => { - let res = []; - from([{ value: '' }]) - .pipe(ezs('TXTInflection')) - .on('data', (data) => { - res = [...res, data]; - }) - .on('end', () => { - expect(res).toStrictEqual([{ value: '' }]); - done(); - }); - }); - it('should return empty', (done) => { - let res = []; - from([{ term: 'truc' }]) - .pipe(ezs('TXTInflection')) - .on('data', (data) => { - res = [...res, data]; - }) - .on('end', () => { - expect(res).toStrictEqual([{ term: 'truc', value: '' }]); - done(); - }); - }); - - it('should return input #2', (done) => { - let res = []; - from([{ term: 'Trucs' }]) - .pipe(ezs('TXTInflection', { path: 'term', })) - .on('data', (data) => { - res = [...res, data]; - }) - .on('end', () => { - expect(res).toStrictEqual([ - { term: 'Trucs' }, - ]); - done(); - }); - }); - - - it('should transfrom #1', (done) => { - let res = []; - from([{ term: 'Trucs' }]) - .pipe(ezs('TXTInflection', { path: 'term', transform: 'singularize' })) - .on('data', (data) => { - res = [...res, data]; - }) - .on('end', () => { - expect(res).toStrictEqual([ - { term: 'Truc' }, - ]); - done(); - }); - }); - - it('should transfrom #2', (done) => { - let res = []; - from([{ term: 'all job' }]) - .pipe(ezs('TXTInflection', { path: 'term', transform: ['pluralize', 'capitalize', 'dasherize'] })) - .on('data', (data) => { - res = [...res, data]; - }) - .on('end', () => { - expect(res).toStrictEqual([ - { term: 'All-jobs' }, - ]); - done(); - }); - }); - - it('should transfrom #2', (done) => { - let res = []; - from([{ term: 'loess' }]) - .pipe(ezs('TXTInflection', { path: 'term', transform: ['singularize', 'humanize'] })) - .on('data', (data) => { - res = [...res, data]; - }) - .on('end', () => { - expect(res).toStrictEqual([ - { term: 'Loess' }, - ]); - done(); - }); - }); - - it('should transfrom #3', (done) => { - let res = []; - from([{ term: ['apples', 'sciences' ]}]) - .pipe(ezs('TXTInflection', { path: 'term', transform: ['singularize', 'humanize'] })) - .on('data', (data) => { - res = [...res, data]; - }) - .on('end', () => { - expect(res).toStrictEqual([ - { term: ['Apple', 'Science' ] }, - ]); - done(); - }); - }); - - -}); diff --git a/packages/basics/test/txt-sentences.js b/packages/basics/test/txt-sentences.js deleted file mode 100644 index b1fcae54..00000000 --- a/packages/basics/test/txt-sentences.js +++ /dev/null @@ -1,184 +0,0 @@ -import from from 'from'; -// @ts-ignore -import ezs from '../../core/src'; -import ezsBasics from '../src'; - -ezs.use(ezsBasics); - -describe('TXTSentences', () => { - it('should return an array', (done) => { - let res = []; - from([{ value: '' }]) - .pipe(ezs('TXTSentences')) - .on('data', (data) => { - res = [...res, data]; - }) - .on('end', () => { - expect(res).toStrictEqual([{ value: [] }]); - done(); - }); - }); - - it('should generate two sentences', (done) => { - let res = []; - from([{ value: 'After all. These are two sentences.' }]) - .pipe(ezs('TXTSentences')) - .on('data', (data) => { - res = [...res, data]; - }) - .on('end', () => { - expect(res).toStrictEqual([ - { value: ['After all.', 'These are two sentences.'] }, - ]); - done(); - }); - }); - - it('should take path parameter into account', (done) => { - let res = []; - from([{ other: 'After all. These are two sentences.' }]) - .pipe(ezs('TXTSentences', { path: 'other' })) - .on('data', (data) => { - res = [...res, data]; - }) - .on('end', () => { - expect(res).toStrictEqual([ - { other: ['After all.', 'These are two sentences.'] }, - ]); - done(); - }); - }); - - it('should generate three sentences', (done) => { - let res = []; - from([{ value: 'And now. Three sentences. Indeed.' }]) - .pipe(ezs('TXTSentences')) - .on('data', (data) => { - res = [...res, data]; - }) - .on('end', () => { - expect(res).toStrictEqual([ - { value: ['And now.', 'Three sentences.', 'Indeed.'] }, - ]); - done(); - }); - }); - - it('should return an empty array when input is not a string', (done) => { - let res = []; - from([{ value: {} }, { value: 1 }, { value: true }]) - .pipe(ezs('TXTSentences')) - .on('data', (data) => { - res = [...res, data]; - }) - .on('end', () => { - expect(res).toStrictEqual([ - { value: [] }, - { value: [] }, - { value: [] }, - ]); - done(); - }); - }); - - it('should generate two sentences with other endings', (done) => { - let res = []; - from([{ value: 'Is it? It is!' }]) - .pipe(ezs('TXTSentences')) - .on('data', (data) => { - res = [...res, data]; - }) - .on('end', () => { - expect(res).toStrictEqual([{ value: ['Is it?', 'It is!'] }]); - done(); - }); - }); - - it('should not split initials in the middle of a sentence', (done) => { - let res = []; - from([{ value: 'My name is Bond, J. Bond.' }]) - .pipe(ezs('TXTSentences')) - .on('data', (data) => { - res = [...res, data]; - }) - .on('end', () => { - expect(res).toStrictEqual([ - { value: ['My name is Bond, J. Bond.'] }, - ]); - done(); - }); - }); - - it('should not split initials at the beginning of a sentence', (done) => { - let res = []; - from([{ value: 'C. Norris, that means Chuck Norris.' }]) - .pipe(ezs('TXTSentences')) - .on('data', (data) => { - res = [...res, data]; - }) - .on('end', () => { - expect(res).toStrictEqual([ - { value: ['C. Norris, that means Chuck Norris.'] }, - ]); - done(); - }); - }); - - it('should return an array already segmented', (done) => { - let res = []; - from([{ value: ['Sentence 1.', 'Sentence 2.'] }]) - .pipe(ezs('TXTSentences')) - .on('data', (data) => { - res = [...res, data]; - }) - .on('end', () => { - expect(res).toStrictEqual([ - { value: ['Sentence 1.', 'Sentence 2.'] }, - ]); - done(); - }); - }); - - it('should segment again an array wrongly segmented', (done) => { - let res = []; - from([{ value: ['Sentence', '1. Sentence 2.'] }]) - .pipe(ezs('TXTSentences')) - .on('data', (data) => { - res = [...res, data]; - }) - .on('end', () => { - expect(res).toStrictEqual([ - { value: ['Sentence 1.', 'Sentence 2.'] }, - ]); - done(); - }); - }); - - it.skip('should not split abbreviations in a sentence', (done) => { - let res = []; - from([{ value: 'Born in the U.S.A.' }]) - .pipe(ezs('TXTSentences')) - .on('data', (data) => { - res = [...res, data]; - }) - .on('end', () => { - expect(res).toStrictEqual([{ value: ['Born in the U.S.A.'] }]); - done(); - }); - }); - - it.skip('should not split abbreviations at the end of a sentence', (done) => { - let res = []; - from([{ value: 'Don\'t use T.N.T. inside buildings.' }]) - .pipe(ezs('TXTSentences')) - .on('data', (data) => { - res = [...res, data]; - }) - .on('end', () => { - expect(res).toStrictEqual([ - { value: ['Don\'t use T.N.T. inside buildings.'] }, - ]); - done(); - }); - }); -});