From 42a5f1feb27a8bbb7767bc0b8f11f89eda17215c Mon Sep 17 00:00:00 2001 From: SebastianMC <23032356+SebastianMC@users.noreply.github.com> Date: Sun, 3 Nov 2024 22:56:55 +0100 Subject: [PATCH] #171 - started creation of a PoC of the idea of metadata value extractors. At a glance a low hanging fruit turned out to be far too complex to be worth it. --- src/custom-sort/matchers.ts | 38 +++++++++++----- src/custom-sort/mdata-extractors.ts | 54 +++++++++++++++++++++++ src/custom-sort/sorting-spec-processor.ts | 37 ++++++++++++++-- src/test/unit/mdata-extractors.spec.ts | 38 ++++++++++++++++ 4 files changed, 154 insertions(+), 13 deletions(-) create mode 100644 src/custom-sort/mdata-extractors.ts create mode 100644 src/test/unit/mdata-extractors.spec.ts diff --git a/src/custom-sort/matchers.ts b/src/custom-sort/matchers.ts index b92c65c27..6fda71e7e 100644 --- a/src/custom-sort/matchers.ts +++ b/src/custom-sort/matchers.ts @@ -104,17 +104,35 @@ export function getNormalizedRomanNumber(s: string, separator?: string, places?: } } -const DAY_POSITIONS = '00'.length -const MONTH_POSITIONS = '00'.length -const YEAR_POSITIONS = '0000'.length +export const DAY_POSITIONS = '00'.length +export const MONTH_POSITIONS = '00'.length +export const YEAR_POSITIONS = '0000'.length const MONTHS = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'] -export function getNormalizedDate_dd_Mmm_yyyy_NormalizerFn(s: string): string | null { - // Assumption - the regex date matched against input s, no extensive defensive coding needed - const components = s.split('-') - const day = prependWithZeros(components[0], DAY_POSITIONS) - const month = prependWithZeros( `${1 + MONTHS.indexOf(components[1])}`, MONTH_POSITIONS) - const year = prependWithZeros(components[2], YEAR_POSITIONS) - return `${year}-${month}-${day}//` +export function getNormalizedDate_NormalizerFn_for(separator: string, dayIdx: number, monthIdx: number, yearIdx: number, months?: string[]) { + return (s: string): string | null => { + // Assumption - the regex date matched against input s, no extensive defensive coding needed + const components = s.split(separator) + const day = prependWithZeros(components[dayIdx], DAY_POSITIONS) + const monthValue = months ? `${1 + MONTHS.indexOf(components[monthIdx])}` : components[monthIdx] + const month = prependWithZeros(monthValue, MONTH_POSITIONS) + const year = prependWithZeros(components[yearIdx], YEAR_POSITIONS) + return `${year}-${month}-${day}//` + } } + +export const getNormalizedDate_dd_Mmm_yyyy_NormalizerFn = getNormalizedDate_NormalizerFn_for('-', 0, 1, 2, MONTHS) + +/* +// Assumption - the regex date matched against input s, no extensive defensive coding needed +const components = s.split('-') +const day = prependWithZeros(components[0], DAY_POSITIONS) +const month = prependWithZeros( `${1 + MONTHS.indexOf(components[1])}`, MONTH_POSITIONS) +const year = prependWithZeros(components[2], YEAR_POSITIONS) +return `${year}-${month}-${day}//` + + */ + + + diff --git a/src/custom-sort/mdata-extractors.ts b/src/custom-sort/mdata-extractors.ts new file mode 100644 index 000000000..6e761c0c0 --- /dev/null +++ b/src/custom-sort/mdata-extractors.ts @@ -0,0 +1,54 @@ +import { + getNormalizedDate_NormalizerFn_for +} from "./matchers"; + +const DateExtractorSpecPattern1 = 'date(dd/mm/yyyy)' +const DateExtractorRegex1 = new RegExp('\\d{2}/\\d{2}/\\d{4}') +const DateExtractorNormalizer1 = getNormalizedDate_NormalizerFn_for('/', 0, 1, 2) +const DateExtractorSpecPattern2 = 'date(mm/dd/yyyy)' +const DateExtractorRegex2 = new RegExp('\\d{2}/\\d{2}/\\d{4}') +const DateExtractorNormalizer2 = getNormalizedDate_NormalizerFn_for('/', 1, 0, 2) + +export interface MDataExtractor { + (mdataValue: string): string|undefined +} + +export interface MDataExtractorParseResult { + m: MDataExtractor + remainder: string +} + +export const tryParseAsMDataExtractorSpec = (s: string): MDataExtractorParseResult|undefined => { + // Simplistic initial implementation of the idea with hardcoded two extractors + if (s.trim().startsWith(DateExtractorSpecPattern1)) { + return { + m: extractorForPattern1, + remainder: s.substring(DateExtractorSpecPattern1.length).trim() + } + } + if (s.trim().startsWith(DateExtractorSpecPattern2)) { + return { + m: extractorForPattern2, + remainder: s.substring(DateExtractorSpecPattern2.length).trim() + } + } + return undefined +} + +export function extractorForPattern1(mdataValue: string): string|undefined { + const hasDate = mdataValue?.match(DateExtractorRegex1) + if (hasDate && hasDate[0]) { + return DateExtractorNormalizer1(hasDate[0]) ?? undefined + } else { + return undefined + } +} + +export function extractorForPattern2(mdataValue: string): string|undefined { + const hasDate = mdataValue?.match(DateExtractorRegex2) + if (hasDate && hasDate[0]) { + return DateExtractorNormalizer2(hasDate[0]) ?? undefined + } else { + return undefined + } +} diff --git a/src/custom-sort/sorting-spec-processor.ts b/src/custom-sort/sorting-spec-processor.ts index 537dcc170..4850b7457 100644 --- a/src/custom-sort/sorting-spec-processor.ts +++ b/src/custom-sort/sorting-spec-processor.ts @@ -33,6 +33,10 @@ import { MATCH_CHILDREN_2_SUFFIX, NO_PRIORITY } from "./folder-matching-rules" +import { + MDataExtractor, + tryParseAsMDataExtractorSpec +} from "./mdata-extractors"; interface ProcessingContext { folderPath: string @@ -1497,10 +1501,30 @@ export class SortingSpecProcessor { orderSpec = hasDirectionPostfix ? orderSpec.substring(hasDirectionPostfix.lexeme.length).trim() : orderSpec let metadataName: string|undefined + let metadataExtractor: MDataExtractor|undefined if (orderSpec.startsWith(OrderByMetadataLexeme)) { applyToMetadata = true - metadataName = orderSpec.substring(OrderByMetadataLexeme.length).trim() || undefined - orderSpec = '' // metadataName is unparsed, consumes the remainder string, even if malformed, e.g. with infix spaces + const metadataNameAndOptionalExtractorSpec = orderSpec.substring(OrderByMetadataLexeme.length).trim() || undefined + if (metadataNameAndOptionalExtractorSpec) { + if (metadataNameAndOptionalExtractorSpec.indexOf(' ') > -1) { + const metadataSpec = metadataNameAndOptionalExtractorSpec.split(' ') + metadataName = metadataSpec.shift() + const metadataExtractorSpec = metadataSpec?.shift() + const hasMetadataExtractor = metadataExtractorSpec ? tryParseAsMDataExtractorSpec(metadataExtractorSpec) : undefined + if (hasMetadataExtractor) { + metadataExtractor = hasMetadataExtractor.m + } else { + // TODO: raise error of syntax error - metadata name followed by unrecognized text + // take into account all of the texts resulting from the split(' ') - there could be more segments + } + orderSpec = '' // Intentionally ignore anything beyond the metadata name and extractor + } else { + metadataName = metadataNameAndOptionalExtractorSpec + orderSpec = '' // Intentionally ignore anything beyond the metadata name (and no known extractor) + } + } else { + orderSpec = '' + } } // check for any superfluous text @@ -1553,7 +1577,14 @@ export class SortingSpecProcessor { } sortOrderSpec[level] = { order: order!, - byMetadataField: metadataName + byMetadataField: metadataName, + + metadataFieldExtractor: metadataExtractor + + ... and the carry the metadataFieldExtractor attribute down the parser, handle correctly in the 4-levels mdata sorting options + and execute at runtime + + Seems to be far too complex to be worth it. } } return sortOrderSpec diff --git a/src/test/unit/mdata-extractors.spec.ts b/src/test/unit/mdata-extractors.spec.ts new file mode 100644 index 000000000..1a114d2a9 --- /dev/null +++ b/src/test/unit/mdata-extractors.spec.ts @@ -0,0 +1,38 @@ +import { + extractorForPattern1 +} from '../../custom-sort/mdata-extractors' + +describe('extractorForPattern1', () => { + const params = [ + // Positive + ['03/05/2019', '2019-05-03//'], + ['Created at: 03/05/2019', '2019-05-03//'], + ['03/05/2019 | 22:00', '2019-05-03//'], + ['Created at: 03/05/2019 | 22:00', '2019-05-03//'], + + // TODO: more positive then negative examples + + ['13-Jan-2012', '2012-01-13//'], + ['3-Feb-2', '0002-02-03//'], + ['1-Mar-1900', '1900-03-01//'], + ['42-Apr-9999', '9999-04-42//'], + ['0-May-0', '0000-05-00//'], + ['21-Jun-2024', '2024-06-21//'], + ['7-Jul-1872', '1872-07-07//'], + ['15-Aug-1234', '1234-08-15//'], + ['1234-Sep-7777', '7777-09-1234//'], + ['3-Oct-2023', '2023-10-03//'], + ['8-Nov-2022', '2022-11-08//'], + ['18-Dec-2021', '2021-12-18//'], + // Negative + ['88-Dec-2012', '2012-12-88//'], // Invalid case, Regexp on matcher in the caller should guard against this + ['13-JANUARY-2012', '2012-00-13//'], // Invalid case, Regexp on matcher in the caller should guard against this + ['1 .1', '0000-00-1 .1//'], // Invalid case, Regexp on matcher in the caller should guard against this + ['', '0000-00-00//'], // Invalid case, Regexp on matcher in the caller should guard against this + ['abc', '0000-00-abc//'], // Invalid case, Regexp on matcher in the caller should guard against this + ['def-abc', '0000-00-def//'], // Invalid case, Regexp on matcher in the caller should guard against this + ]; + it.each(params)('>%s< should become %s', (s: string, out: string) => { + expect(extractorForPattern1(s)).toBe(out) + }) +})