From 542d6b0119f18d6adcaf667a697a1e53d416fc17 Mon Sep 17 00:00:00 2001 From: Dustin Popp Date: Fri, 24 Jan 2025 11:22:29 -0600 Subject: [PATCH] fix(ibm-use-date-based-format): tighten heuristic for flagging date-time values (#717) The regular expressions used to determine if a value should be considered a date-time value are too loose and allow false positives to sneak in. It's a tricky problem, but this commit makes an attempt to tighten them by dismissing the possibility of a date-time value if any letters are found that are not relevant to a date-time value. Signed-off-by: Dustin Popp --- .../ruleset/src/utils/date-based-utils.js | 40 ++++++++++++++++--- .../test/utils/date-based-utils.test.js | 6 ++- 2 files changed, 39 insertions(+), 7 deletions(-) diff --git a/packages/ruleset/src/utils/date-based-utils.js b/packages/ruleset/src/utils/date-based-utils.js index 3ac62615..68dd7891 100644 --- a/packages/ruleset/src/utils/date-based-utils.js +++ b/packages/ruleset/src/utils/date-based-utils.js @@ -1,5 +1,5 @@ /** - * Copyright 2024 IBM Corporation. + * Copyright 2024-2025 IBM Corporation. * SPDX-License-Identifier: Apache2.0 */ @@ -77,12 +77,40 @@ function isDateBasedName(name) { * @returns a boolean value indicating that the value seems to be date-based */ function isDateBasedValue(value) { - const regularExpressions = [ - // Includes abbreviated month name. - /^\b(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\b/, + // The full and abbreviated values for months will be used in the generic + // string check below, as well as in the group of date-based expressions in + // the primary check below that. + const months = + /\b(Jan(uary)?|Feb(ruary)?|Mar(ch)?|Apr(il)?|May|Jun(e)?|Jul(y)?|Aug(ust)?|Sep(tember)?|Oct(ober)?|Nov(ember)?|Dec(ember)?)\b/g; + + // In an effort to avoid false positives with strings that may contain + // date-time values, but are not themselves date-time values, check for + // the presence of any lowercase letters that are not included in the + // names of months or days. + if (typeof value === 'string') { + // "Day" strings are not enough on their own to determine date-time values, + // but we need to remove them from any values before we check for any + // letters that are not relevant to a date-time value. + const days = + /\b(Mon(day)?|Tue(sday)?|Wed(nesday)?|Thu(rsday)?|Fri(day)?|Sat(urday)?|Sun(day)?)\b/g; + + // Only lowercase letters are checked for because 1) there are a number of + // valid date-time uses of uppercase letters like T, Z, GMT, UTC, etc. but + // not lowercase letters and 2) lowercase letters are more likely to + // indicate a general string over a formatted value. + const hasNonDateLetters = !!value + .replaceAll(months, '') + .replaceAll(days, '') + .match(/[a-z]/); + + if (hasNonDateLetters) { + return false; + } + } - // Includes full month name. - /^\b(January|February|March|April|May|June|July|August|September|October|November|December)\b/, + const regularExpressions = [ + // Includes full or abbreviated month name. + months, // Includes date in the format YYYY(./-)MM(./-)DD(T). /\b\d{4}[./-](0?[1-9]|1[012])[./-]([012]?[1-9]|3[01])(\b|T)/, diff --git a/packages/ruleset/test/utils/date-based-utils.test.js b/packages/ruleset/test/utils/date-based-utils.test.js index b3d50275..8f6a6113 100644 --- a/packages/ruleset/test/utils/date-based-utils.test.js +++ b/packages/ruleset/test/utils/date-based-utils.test.js @@ -1,5 +1,5 @@ /** - * Copyright 2024 IBM Corporation. + * Copyright 2024-2025 IBM Corporation. * SPDX-License-Identifier: Apache2.0 */ @@ -26,7 +26,11 @@ describe('Date-based utility functions', () => { expect(isDateBasedValue('This certificate is good until June 2032')).toBe( false ); + expect( + isDateBasedValue('0000-0000-0000-0000/abc/2022/2/22/12345678_data.json') + ).toBe(false); expect(isDateBasedValue('Octopus')).toBe(false); + expect(isDateBasedValue('Januaryuary 31')).toBe(false); expect(isDateBasedValue('12345678')).toBe(false); expect(isDateBasedValue('0001-01-2000')).toBe(false); expect(isDateBasedValue('10.1.24.1')).toBe(false);