Skip to content

Commit

Permalink
#171 - started creation of a PoC of the idea of metadata value extrac…
Browse files Browse the repository at this point in the history
…tors. At a glance a low hanging fruit turned out to be far too complex to be worth it.
  • Loading branch information
SebastianMC committed Nov 3, 2024
1 parent b096e4c commit 42a5f1f
Show file tree
Hide file tree
Showing 4 changed files with 154 additions and 13 deletions.
38 changes: 28 additions & 10 deletions src/custom-sort/matchers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -104,17 +104,35 @@ export function getNormalizedRomanNumber(s: string, separator?: string, places?:
}
}

const DAY_POSITIONS = '00'.length
const MONTH_POSITIONS = '00'.length
const YEAR_POSITIONS = '0000'.length
export const DAY_POSITIONS = '00'.length
export const MONTH_POSITIONS = '00'.length
export const YEAR_POSITIONS = '0000'.length

const MONTHS = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']

export function getNormalizedDate_dd_Mmm_yyyy_NormalizerFn(s: string): string | null {
// Assumption - the regex date matched against input s, no extensive defensive coding needed
const components = s.split('-')
const day = prependWithZeros(components[0], DAY_POSITIONS)
const month = prependWithZeros( `${1 + MONTHS.indexOf(components[1])}`, MONTH_POSITIONS)
const year = prependWithZeros(components[2], YEAR_POSITIONS)
return `${year}-${month}-${day}//`
export function getNormalizedDate_NormalizerFn_for(separator: string, dayIdx: number, monthIdx: number, yearIdx: number, months?: string[]) {
return (s: string): string | null => {
// Assumption - the regex date matched against input s, no extensive defensive coding needed
const components = s.split(separator)
const day = prependWithZeros(components[dayIdx], DAY_POSITIONS)
const monthValue = months ? `${1 + MONTHS.indexOf(components[monthIdx])}` : components[monthIdx]
const month = prependWithZeros(monthValue, MONTH_POSITIONS)
const year = prependWithZeros(components[yearIdx], YEAR_POSITIONS)
return `${year}-${month}-${day}//`
}
}

export const getNormalizedDate_dd_Mmm_yyyy_NormalizerFn = getNormalizedDate_NormalizerFn_for('-', 0, 1, 2, MONTHS)

/*
// Assumption - the regex date matched against input s, no extensive defensive coding needed
const components = s.split('-')
const day = prependWithZeros(components[0], DAY_POSITIONS)
const month = prependWithZeros( `${1 + MONTHS.indexOf(components[1])}`, MONTH_POSITIONS)
const year = prependWithZeros(components[2], YEAR_POSITIONS)
return `${year}-${month}-${day}//`
*/



54 changes: 54 additions & 0 deletions src/custom-sort/mdata-extractors.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import {
getNormalizedDate_NormalizerFn_for
} from "./matchers";

const DateExtractorSpecPattern1 = 'date(dd/mm/yyyy)'
const DateExtractorRegex1 = new RegExp('\\d{2}/\\d{2}/\\d{4}')
const DateExtractorNormalizer1 = getNormalizedDate_NormalizerFn_for('/', 0, 1, 2)
const DateExtractorSpecPattern2 = 'date(mm/dd/yyyy)'
const DateExtractorRegex2 = new RegExp('\\d{2}/\\d{2}/\\d{4}')
const DateExtractorNormalizer2 = getNormalizedDate_NormalizerFn_for('/', 1, 0, 2)

export interface MDataExtractor {
(mdataValue: string): string|undefined
}

export interface MDataExtractorParseResult {
m: MDataExtractor
remainder: string
}

export const tryParseAsMDataExtractorSpec = (s: string): MDataExtractorParseResult|undefined => {
// Simplistic initial implementation of the idea with hardcoded two extractors
if (s.trim().startsWith(DateExtractorSpecPattern1)) {
return {
m: extractorForPattern1,
remainder: s.substring(DateExtractorSpecPattern1.length).trim()
}
}
if (s.trim().startsWith(DateExtractorSpecPattern2)) {
return {
m: extractorForPattern2,
remainder: s.substring(DateExtractorSpecPattern2.length).trim()
}
}
return undefined
}

export function extractorForPattern1(mdataValue: string): string|undefined {
const hasDate = mdataValue?.match(DateExtractorRegex1)
if (hasDate && hasDate[0]) {
return DateExtractorNormalizer1(hasDate[0]) ?? undefined
} else {
return undefined
}
}

export function extractorForPattern2(mdataValue: string): string|undefined {
const hasDate = mdataValue?.match(DateExtractorRegex2)
if (hasDate && hasDate[0]) {
return DateExtractorNormalizer2(hasDate[0]) ?? undefined
} else {
return undefined
}
}
37 changes: 34 additions & 3 deletions src/custom-sort/sorting-spec-processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ import {
MATCH_CHILDREN_2_SUFFIX,
NO_PRIORITY
} from "./folder-matching-rules"
import {
MDataExtractor,
tryParseAsMDataExtractorSpec
} from "./mdata-extractors";

interface ProcessingContext {
folderPath: string
Expand Down Expand Up @@ -1497,10 +1501,30 @@ export class SortingSpecProcessor {
orderSpec = hasDirectionPostfix ? orderSpec.substring(hasDirectionPostfix.lexeme.length).trim() : orderSpec

let metadataName: string|undefined
let metadataExtractor: MDataExtractor|undefined
if (orderSpec.startsWith(OrderByMetadataLexeme)) {
applyToMetadata = true
metadataName = orderSpec.substring(OrderByMetadataLexeme.length).trim() || undefined
orderSpec = '' // metadataName is unparsed, consumes the remainder string, even if malformed, e.g. with infix spaces
const metadataNameAndOptionalExtractorSpec = orderSpec.substring(OrderByMetadataLexeme.length).trim() || undefined
if (metadataNameAndOptionalExtractorSpec) {
if (metadataNameAndOptionalExtractorSpec.indexOf(' ') > -1) {
const metadataSpec = metadataNameAndOptionalExtractorSpec.split(' ')
metadataName = metadataSpec.shift()
const metadataExtractorSpec = metadataSpec?.shift()
const hasMetadataExtractor = metadataExtractorSpec ? tryParseAsMDataExtractorSpec(metadataExtractorSpec) : undefined
if (hasMetadataExtractor) {
metadataExtractor = hasMetadataExtractor.m
} else {
// TODO: raise error of syntax error - metadata name followed by unrecognized text
// take into account all of the texts resulting from the split(' ') - there could be more segments
}
orderSpec = '' // Intentionally ignore anything beyond the metadata name and extractor
} else {
metadataName = metadataNameAndOptionalExtractorSpec
orderSpec = '' // Intentionally ignore anything beyond the metadata name (and no known extractor)
}
} else {
orderSpec = ''
}
}

// check for any superfluous text
Expand Down Expand Up @@ -1553,7 +1577,14 @@ export class SortingSpecProcessor {
}
sortOrderSpec[level] = {
order: order!,
byMetadataField: metadataName
byMetadataField: metadataName,

metadataFieldExtractor: metadataExtractor

... and the carry the metadataFieldExtractor attribute down the parser, handle correctly in the 4-levels mdata sorting options
and execute at runtime

Seems to be far too complex to be worth it.
}
}
return sortOrderSpec
Expand Down
38 changes: 38 additions & 0 deletions src/test/unit/mdata-extractors.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import {
extractorForPattern1
} from '../../custom-sort/mdata-extractors'

describe('extractorForPattern1', () => {
const params = [
// Positive
['03/05/2019', '2019-05-03//'],
['Created at: 03/05/2019', '2019-05-03//'],
['03/05/2019 | 22:00', '2019-05-03//'],
['Created at: 03/05/2019 | 22:00', '2019-05-03//'],

// TODO: more positive then negative examples

['13-Jan-2012', '2012-01-13//'],
['3-Feb-2', '0002-02-03//'],
['1-Mar-1900', '1900-03-01//'],
['42-Apr-9999', '9999-04-42//'],
['0-May-0', '0000-05-00//'],
['21-Jun-2024', '2024-06-21//'],
['7-Jul-1872', '1872-07-07//'],
['15-Aug-1234', '1234-08-15//'],
['1234-Sep-7777', '7777-09-1234//'],
['3-Oct-2023', '2023-10-03//'],
['8-Nov-2022', '2022-11-08//'],
['18-Dec-2021', '2021-12-18//'],
// Negative
['88-Dec-2012', '2012-12-88//'], // Invalid case, Regexp on matcher in the caller should guard against this
['13-JANUARY-2012', '2012-00-13//'], // Invalid case, Regexp on matcher in the caller should guard against this
['1 .1', '0000-00-1 .1//'], // Invalid case, Regexp on matcher in the caller should guard against this
['', '0000-00-00//'], // Invalid case, Regexp on matcher in the caller should guard against this
['abc', '0000-00-abc//'], // Invalid case, Regexp on matcher in the caller should guard against this
['def-abc', '0000-00-def//'], // Invalid case, Regexp on matcher in the caller should guard against this
];
it.each(params)('>%s< should become %s', (s: string, out: string) => {
expect(extractorForPattern1(s)).toBe(out)
})
})

0 comments on commit 42a5f1f

Please sign in to comment.