Skip to content

Commit

Permalink
#171 - a PoC of the idea of metadata value extractors. Extended synta…
Browse files Browse the repository at this point in the history
…x, unit tests, error handling
  • Loading branch information
SebastianMC committed Nov 5, 2024
1 parent f210a41 commit 9e2e120
Show file tree
Hide file tree
Showing 4 changed files with 166 additions and 75 deletions.
74 changes: 42 additions & 32 deletions src/custom-sort/mdata-extractors.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import {
getNormalizedDate_NormalizerFn_for
} from "./matchers";
import {NormalizerFn} from "./custom-sort-types";

const DateExtractorSpecPattern1 = 'date(dd/mm/yyyy)'
const DateExtractorRegex1 = new RegExp('\\d{2}/\\d{2}/\\d{4}')
const DateExtractorNormalizer1 = getNormalizedDate_NormalizerFn_for('/', 0, 1, 2)
const DateExtractorSpecPattern2 = 'date(mm/dd/yyyy)'
const DateExtractorRegex2 = new RegExp('\\d{2}/\\d{2}/\\d{4}')
const DateExtractorNormalizer2 = getNormalizedDate_NormalizerFn_for('/', 1, 0, 2)
type ExtractorFn = (mdataValue: string) => string|undefined

interface DateExtractorSpec {
specPattern: string|RegExp,
extractorFn: ExtractorFn
}

export interface MDataExtractor {
(mdataValue: string): string|undefined
Expand All @@ -18,37 +19,46 @@ export interface MDataExtractorParseResult {
remainder: string
}

export const tryParseAsMDataExtractorSpec = (s: string): MDataExtractorParseResult|undefined => {
// Simplistic initial implementation of the idea with hardcoded two extractors
if (s.trim().startsWith(DateExtractorSpecPattern1)) {
return {
m: extractorForPattern1,
remainder: s.substring(DateExtractorSpecPattern1.length).trim()
function getGenericPlainRegexpExtractorFn(extractorRegexp: RegExp, extractedValueNormalizer: NormalizerFn) {
return (mdataValue: string): string | undefined => {
const hasMatch = mdataValue?.match(extractorRegexp)
if (hasMatch && hasMatch[0]) {
return extractedValueNormalizer(hasMatch[0]) ?? undefined
} else {
return undefined
}
}
if (s.trim().startsWith(DateExtractorSpecPattern2)) {
return {
m: extractorForPattern2,
remainder: s.substring(DateExtractorSpecPattern2.length).trim()
}
}
return undefined
}

export function extractorForPattern1(mdataValue: string): string|undefined {
const hasDate = mdataValue?.match(DateExtractorRegex1)
if (hasDate && hasDate[0]) {
return DateExtractorNormalizer1(hasDate[0]) ?? undefined
} else {
return undefined
const Extractors: DateExtractorSpec[] = [
{ specPattern: 'date(dd/mm/yyyy)',
extractorFn: getGenericPlainRegexpExtractorFn(
new RegExp('\\d{2}/\\d{2}/\\d{4}'),
getNormalizedDate_NormalizerFn_for('/', 0, 1, 2)
)
}, {
specPattern: 'date(mm/dd/yyyy)',
extractorFn: getGenericPlainRegexpExtractorFn(
new RegExp('\\d{2}/\\d{2}/\\d{4}'),
getNormalizedDate_NormalizerFn_for('/', 1, 0, 2)
)
}
}
]

export function extractorForPattern2(mdataValue: string): string|undefined {
const hasDate = mdataValue?.match(DateExtractorRegex2)
if (hasDate && hasDate[0]) {
return DateExtractorNormalizer2(hasDate[0]) ?? undefined
} else {
return undefined
export const tryParseAsMDataExtractorSpec = (s: string): MDataExtractorParseResult|undefined => {
// Simplistic initial implementation of the idea with hardcoded two extractors
for (const extrSpec of Extractors) {
if ('string' === typeof extrSpec.specPattern && s.trim().startsWith(extrSpec.specPattern)) {
return {
m: extrSpec.extractorFn,
remainder: s.substring(extrSpec.specPattern.length).trim()
}
}
}
return undefined
}

export const _unitTests = {
extractorFnForDate_ddmmyyyy: Extractors.find((it) => it.specPattern === 'date(dd/mm/yyyy)')?.extractorFn!,
extractorFnForDate_mmddyyyy: Extractors.find((it) => it.specPattern === 'date(mm/dd/yyyy)')?.extractorFn!,
}
19 changes: 10 additions & 9 deletions src/custom-sort/sorting-spec-processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,8 @@ const OrderLiterals: { [key: string]: CustomSortOrderAscDescPair } = {

const OrderByMetadataLexeme: string = 'by-metadata:'

const ValueExtractorLexeme: string = 'using-extractor:'

const OrderLevelsSeparator: string = ','

enum Attribute {
Expand Down Expand Up @@ -1511,24 +1513,23 @@ export class SortingSpecProcessor {
applyToMetadata = true
const metadataNameAndOptionalExtractorSpec = orderSpec.substring(OrderByMetadataLexeme.length).trim() || undefined
if (metadataNameAndOptionalExtractorSpec) {
if (metadataNameAndOptionalExtractorSpec.indexOf(' ') > -1) {
const metadataSpec = metadataNameAndOptionalExtractorSpec.split(' ')
metadataName = metadataSpec.shift()
const metadataExtractorSpec = metadataSpec?.shift()
if (metadataNameAndOptionalExtractorSpec.indexOf(ValueExtractorLexeme) > -1) {
const metadataSpec = metadataNameAndOptionalExtractorSpec.split(ValueExtractorLexeme)
metadataName = metadataSpec.shift()?.trim()
const metadataExtractorSpec = metadataSpec?.shift()?.trim()
const hasMetadataExtractor = metadataExtractorSpec ? tryParseAsMDataExtractorSpec(metadataExtractorSpec) : undefined
if (hasMetadataExtractor) {
metadataExtractor = hasMetadataExtractor.m
} else {
// TODO: raise error of syntax error - metadata name followed by unrecognized text
// take into account all of the texts resulting from the split(' ') - there could be more segments
return new AttrError(`${orderNameForErrorMsg} sorting order contains unrecognized value extractor: >>> ${metadataExtractorSpec} <<<`)
}
orderSpec = '' // Intentionally ignore anything beyond the metadata name and extractor
orderSpec = '' // all consumed as metadata and extractor
} else {
metadataName = metadataNameAndOptionalExtractorSpec
orderSpec = '' // Intentionally ignore anything beyond the metadata name (and no known extractor)
orderSpec = '' // all consumed as metadata name
}
} else {
orderSpec = ''
orderSpec = '' // no metadata name found
}
}

Expand Down
37 changes: 14 additions & 23 deletions src/test/unit/mdata-extractors.spec.ts
Original file line number Diff line number Diff line change
@@ -1,38 +1,29 @@
import {
extractorForPattern1
_unitTests
} from '../../custom-sort/mdata-extractors'

describe('extractorForPattern1', () => {
describe('extractor for date(dd/mm/yyyy)', () => {
const params = [
// Positive
['03/05/2019', '2019-05-03//'],
['103/05/2019', '2019-05-03//'],
['103/05/20193232', '2019-05-03//'],
['99/99/9999', '9999-99-99//'],
['00/00/0000', '0000-00-00//'],
['Created at: 03/05/2019', '2019-05-03//'],
['03/05/2019 | 22:00', '2019-05-03//'],
['Created at: 03/05/2019 | 22:00', '2019-05-03//'],

// TODO: more positive then negative examples

['13-Jan-2012', '2012-01-13//'],
['3-Feb-2', '0002-02-03//'],
['1-Mar-1900', '1900-03-01//'],
['42-Apr-9999', '9999-04-42//'],
['0-May-0', '0000-05-00//'],
['21-Jun-2024', '2024-06-21//'],
['7-Jul-1872', '1872-07-07//'],
['15-Aug-1234', '1234-08-15//'],
['1234-Sep-7777', '7777-09-1234//'],
['3-Oct-2023', '2023-10-03//'],
['8-Nov-2022', '2022-11-08//'],
['18-Dec-2021', '2021-12-18//'],
// Negative
['88-Dec-2012', '2012-12-88//'], // Invalid case, Regexp on matcher in the caller should guard against this
['13-JANUARY-2012', '2012-00-13//'], // Invalid case, Regexp on matcher in the caller should guard against this
['1 .1', '0000-00-1 .1//'], // Invalid case, Regexp on matcher in the caller should guard against this
['', '0000-00-00//'], // Invalid case, Regexp on matcher in the caller should guard against this
['abc', '0000-00-abc//'], // Invalid case, Regexp on matcher in the caller should guard against this
['def-abc', '0000-00-def//'], // Invalid case, Regexp on matcher in the caller should guard against this
['88-Dec-2012', undefined],
['13-JANUARY-2012', undefined],
['1 .1', undefined],
['', undefined],
['abc', undefined],
['def-abc', undefined],
['3/5/2019', undefined],
];
it.each(params)('>%s< should become %s', (s: string, out: string) => {
expect(extractorForPattern1(s)).toBe(out)
expect(_unitTests.extractorFnForDate_ddmmyyyy(s)).toBe(out)
})
})
111 changes: 100 additions & 11 deletions src/test/unit/sorting-spec-processor.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ import {
CompoundDotNumberNormalizerFn,
ConsumedFolderMatchingRegexp,
consumeFolderByRegexpExpression,
convertPlainStringToRegex, Date_dd_Mmm_yyyy_NormalizerFn,
convertPlainStringToRegex,
Date_dd_Mmm_yyyy_NormalizerFn,
detectSortingSymbols,
escapeRegexUnsafeCharacters,
extractSortingSymbol,
Expand All @@ -14,8 +15,14 @@ import {
RomanNumberNormalizerFn,
SortingSpecProcessor
} from "../../custom-sort/sorting-spec-processor"
import {CustomSortGroupType, CustomSortOrder, CustomSortSpec, IdentityNormalizerFn} from "../../custom-sort/custom-sort-types";
import {
CustomSortGroupType,
CustomSortOrder,
CustomSortSpec,
IdentityNormalizerFn
} from "../../custom-sort/custom-sort-types";
import {FolderMatchingRegexp, FolderMatchingTreeNode} from "../../custom-sort/folder-matching-rules";
import {_unitTests} from "../../custom-sort/mdata-extractors";

const txtInputExampleA: string = `
order-asc: a-z
Expand Down Expand Up @@ -356,6 +363,17 @@ const expectedSortSpecsExampleA: { [key: string]: CustomSortSpec } = {
}
}

const txtInputExampleSortingSymbols: string = `
/folders Chapter \\.d+ ...
/:files ...section \\-r+.
% Appendix \\-d+ (attachments)
Plain syntax\\R+ ... works?
And this kind of... \\D+plain syntax???
Here goes ASCII word \\a+
\\A+. is for any modern language word
\\[dd-Mmm-yyyy] for the specific date format of 12-Apr-2024
`

const expectedSortSpecsExampleSortingSymbols: { [key: string]: CustomSortSpec } = {
"mock-folder": {
groups: [{
Expand Down Expand Up @@ -418,17 +436,67 @@ const expectedSortSpecsExampleSortingSymbols: { [key: string]: CustomSortSpec }
}
}

const txtInputExampleSortingSymbols: string = `
/folders Chapter \\.d+ ...
/:files ...section \\-r+.
% Appendix \\-d+ (attachments)
Plain syntax\\R+ ... works?
And this kind of... \\D+plain syntax???
Here goes ASCII word \\a+
\\A+. is for any modern language word
\\[dd-Mmm-yyyy] for the specific date format of 12-Apr-2024
const txtInputExampleMDataExtractors1: string = `
< a-z by-metadata: created-by using-extractor: date(dd/mm/yyyy)
/folders Chapter...
> a-z by-metadata: updated-on using-extractor: date(mm/dd/yyyy)
`

// Tricky elements captured:
// - Order a-z. for by metadata is transformed to a-z (there is no notion of 'file extension' in metadata values)

const txtInputExampleMDataExtractors2: string = `
< a-z. by-metadata: created by using-extractor: date(mm/dd/yyyy), < true a-z. by-metadata: using-extractor: date(dd/mm/yyyy)
/folders ...Chapter
> a-z. by-metadata: updated-on using-extractor: date(dd/mm/yyyy), > true a-z by-metadata: md2 using-extractor: date(mm/dd/yyyy)
`

const expectedSortSpecsExampleMDataExtractors1: { [key: string]: CustomSortSpec } = {
"mock-folder": {
defaultOrder: CustomSortOrder.byMetadataFieldAlphabetical,
byMetadataField: 'created-by',
metadataFieldValueExtractor: _unitTests.extractorFnForDate_ddmmyyyy,
groups: [{
foldersOnly: true,
type: CustomSortGroupType.ExactPrefix,
exactPrefix: 'Chapter',
order: CustomSortOrder.byMetadataFieldAlphabeticalReverse,
byMetadataField: 'updated-on',
metadataFieldValueExtractor: _unitTests.extractorFnForDate_mmddyyyy
}, {
type: CustomSortGroupType.Outsiders
}],
targetFoldersPaths: ['mock-folder'],
outsidersGroupIdx: 1
}
}

const expectedSortSpecsExampleMDataExtractors2: { [key: string]: CustomSortSpec } = {
"mock-folder": {
defaultOrder: CustomSortOrder.byMetadataFieldAlphabetical,
byMetadataField: 'created by',
metadataFieldValueExtractor: _unitTests.extractorFnForDate_mmddyyyy,
defaultSecondaryOrder: CustomSortOrder.byMetadataFieldTrueAlphabetical,
byMetadataFieldSecondary: '',
metadataFieldSecondaryValueExtractor: _unitTests.extractorFnForDate_ddmmyyyy,
groups: [{
foldersOnly: true,
type: CustomSortGroupType.ExactSuffix,
exactSuffix: 'Chapter',
order: CustomSortOrder.byMetadataFieldAlphabeticalReverse,
byMetadataField: 'updated-on',
metadataFieldValueExtractor: _unitTests.extractorFnForDate_ddmmyyyy,
secondaryOrder: CustomSortOrder.byMetadataFieldTrueAlphabeticalReverse,
byMetadataFieldSecondary: 'md2',
metadataFieldSecondaryValueExtractor: _unitTests.extractorFnForDate_mmddyyyy
}, {
type: CustomSortGroupType.Outsiders
}],
targetFoldersPaths: ['mock-folder'],
outsidersGroupIdx: 1
}
}

describe('SortingSpecProcessor', () => {
let processor: SortingSpecProcessor;
beforeEach(() => {
Expand All @@ -449,6 +517,16 @@ describe('SortingSpecProcessor', () => {
const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md')
expect(result?.sortSpecByPath).toEqual(expectedSortSpecsExampleSortingSymbols)
})
it('should generate correct SortSpecs (example with mdata extractors)', () => {
const inputTxtArr: Array<string> = txtInputExampleMDataExtractors1.split('\n')
const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md')
expect(result?.sortSpecByPath).toEqual(expectedSortSpecsExampleMDataExtractors1)
})
it('should generate correct SortSpecs (example with mdata extractors, advanced)', () => {
const inputTxtArr: Array<string> = txtInputExampleMDataExtractors2.split('\n')
const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md')
expect(result?.sortSpecByPath).toEqual(expectedSortSpecsExampleMDataExtractors2)
})
})

const txtInputNotDuplicatedSortSpec: string = `
Expand Down Expand Up @@ -2922,6 +3000,17 @@ describe('SortingSpecProcessor error detection and reporting', () => {
`${ERR_PREFIX} 7:InvalidAttributeValue Secondary sorting direction order-asc: and desc are contradicting ${ERR_SUFFIX_IN_LINE(2)}`)
expect(errorsLogger).toHaveBeenNthCalledWith(2, ERR_LINE_TXT('sorting: standard, order-asc: modified desc by-metadata: xyz // <-- and it is checked earlier than the by-metadata incompatible order'))
})
it('should reject unknown value extractor', () => {
const inputTxtArr: Array<string> = `
< a-z. by-metadata: created by using-extractor: date(mm/dd/YYYY)
`.replace(/\t/gi, '').split('\n')
const result = processor.parseSortSpecFromText(inputTxtArr, 'mock-folder', 'custom-name-note.md')
expect(result).toBeNull()
expect(errorsLogger).toHaveBeenCalledTimes(2)
expect(errorsLogger).toHaveBeenNthCalledWith(1,
`${ERR_PREFIX} 7:InvalidAttributeValue Primary sorting order contains unrecognized value extractor: >>> date(mm/dd/YYYY) <<< ${ERR_SUFFIX_IN_LINE(2)}`)
expect(errorsLogger).toHaveBeenNthCalledWith(2, ERR_LINE_TXT('< a-z. by-metadata: created by using-extractor: date(mm/dd/YYYY)'))
})
})

const txtInputTargetFolderCCC: string = `
Expand Down

0 comments on commit 9e2e120

Please sign in to comment.