diff --git a/README.md b/README.md index 6532d2b..8ce8a6a 100644 --- a/README.md +++ b/README.md @@ -79,6 +79,8 @@ Contributions are highly encouraged. Here are some ways you can contribute: **Writing code:** if you see a missing piece of functionality or bug, I welcome pull requests. It's best to open an issue first to make sure you don't waste any effort in case what you are building is already being worked on or going in a different direction. Even if you aren't up to writing an implimentation, creating a PR with a failing test goes a long way towards getting something off the ground. +There are already a good number of failing tests (wrapped in `XCTExpectFailure`). These would be a good place to start. + **Improving documentation:** if you find something that isn't clear it's likely that other people would find it unclear as well. **Submitting issues:** if you find a bug or an inconsistentcy in how patterns are matched, pleas file an issue. diff --git a/Sources/Glob/Pattern+Options.swift b/Sources/Glob/Pattern+Options.swift index 49e6882..791bd82 100644 --- a/Sources/Glob/Pattern+Options.swift +++ b/Sources/Glob/Pattern+Options.swift @@ -7,7 +7,7 @@ public extension Pattern { /// If a double star/asterisk causes the pattern to match path separators. /// /// If `pathSeparator` is `nil` this has no effect. - public var allowsPathLevelWildcards: Bool = true + public var supportsPathLevelWildcards: Bool = true /// How empty ranges (`[]`) are treated public enum EmptyRangeBehavior: Sendable { @@ -20,15 +20,15 @@ public extension Pattern { } /// How are empty ranges handled. - public var emptyRangeBehavior: EmptyRangeBehavior + public var emptyRangeBehavior: EmptyRangeBehavior = .error /// If the pattern supports escaping control characters with '\' /// /// When true, a backslash character ( '\' ) in pattern followed by any other character shall match that second character in string. In particular, "\\" shall match a backslash in string. Otherwise a backslash character shall be treated as an ordinary character. - public var allowEscapedCharacters: Bool = true + public var supportsEscapedCharacters: Bool = true /// Allows the `-` character to be included in a character class if it is the first or last character (ie `[-abc]` or `[abc-]`) - public var allowsRangeSeparatorInCharacterClasses: Bool = true + public var supportsRangeSeparatorAtBeginningAndEnd: Bool = true /// If a period in the name is at the beginning of a component, don't match using wildcards. /// @@ -37,14 +37,14 @@ public extension Pattern { /// Equivalent to `FNM_PERIOD`. public var requiresExplicitLeadingPeriods: Bool = true - /// If a pattern should match if it matches a parent directory, as defined by `pathSeparator` + /// If a pattern should match if it matches a parent directory, as defined by `pathSeparator`. /// - /// Ignore a trailing sequence of characters starting with a `/' in string; that is to say, test whether string starts with a directory name that pattern matches. If this flag is set, either `foo*` or `foobar` as a pattern would match the string `foobar/frobozz`. Equivalent to `FNM_LEADING_DIR`.` + /// Ignore a trailing sequence of characters starting with a `/` in string; that is to say, test whether string starts with a directory name that pattern matches. If this flag is set, either `foo*` or `foobar` as a pattern would match the string `foobar/frobozz`. Equivalent to `FNM_LEADING_DIR`.` /// /// If `pathSeparator` is `nil` this has no effect. public var matchLeadingDirectories: Bool = false - /// Recognize beside the normal patterns also the extended patterns introduced in `ksh`. + /// Recognize beside the normal patterns also the extended patterns introduced in `ksh`. Equivalent to `FNM_EXTMATCH`. /// /// The patterns are written in the form explained in the following table where pattern-list is a | separated list of patterns. /// @@ -58,10 +58,18 @@ public extension Pattern { /// The pattern matches if exactly one occurence of any of the patterns in the pattern-list allows matching the input string. /// - !(pattern-list) /// The pattern matches if the input string cannot be matched with any of the patterns in the pattern-list. - public var useExtendedMatching: Bool = false + public var supportsPatternLists: Bool = true + + /// The character used to invert a character class. + public enum RangeNegationCharacter: Equatable, Sendable { + /// Use the `!` character to denote an inverse character class. + case exclamationMark + /// Use the `^` character to denote an inverse character class. + case caret + } /// The character used to specify when a range matches characters that aren't in the range. - public var rangeNegationCharacter: Character = "!" + public var rangeNegationCharacter: RangeNegationCharacter = .exclamationMark /// The path separator to use in matching /// @@ -72,30 +80,31 @@ public extension Pattern { /// Default options for parsing and matching patterns. public static let `default`: Self = .init( - allowsPathLevelWildcards: true, + supportsPathLevelWildcards: true, emptyRangeBehavior: .error ) /// Attempts to match the behavior of [VSCode](https://code.visualstudio.com/docs/editor/glob-patterns). public static let vscode: Self = Options( - allowsPathLevelWildcards: true, + supportsPathLevelWildcards: true, emptyRangeBehavior: .error, - rangeNegationCharacter: "^" + supportsPatternLists: false, + rangeNegationCharacter: .caret ) /// Attempts to match the behavior of [`filepath.Match` in go](https://pkg.go.dev/path/filepath#Match). public static let go: Self = Options( - allowsPathLevelWildcards: false, + supportsPathLevelWildcards: false, emptyRangeBehavior: .error, - allowsRangeSeparatorInCharacterClasses: false, - rangeNegationCharacter: "^" + supportsRangeSeparatorAtBeginningAndEnd: false, + rangeNegationCharacter: .caret ) /// Attempts to match the behavior of [POSIX glob](https://man7.org/linux/man-pages/man7/glob.7.html). /// - Returns: Options to use to create a Pattern. public static func posix() -> Self { Options( - allowsPathLevelWildcards: false, + supportsPathLevelWildcards: false, emptyRangeBehavior: .allow, requiresExplicitLeadingPeriods: true ) @@ -112,12 +121,12 @@ public extension Pattern { useExtendedMatching: Bool = false ) -> Self { Options( - allowsPathLevelWildcards: false, + supportsPathLevelWildcards: false, emptyRangeBehavior: .treatClosingBracketAsCharacter, - allowEscapedCharacters: allowEscapedCharacters, + supportsEscapedCharacters: allowEscapedCharacters, requiresExplicitLeadingPeriods: requiresExplicitLeadingPeriods, matchLeadingDirectories: matchLeadingDirectories, - useExtendedMatching: useExtendedMatching, + supportsPatternLists: useExtendedMatching, pathSeparator: usePathnameBehavior ? "/" : nil ) } diff --git a/Sources/Glob/Pattern+Parser.swift b/Sources/Glob/Pattern+Parser.swift index e23693a..fcb0b70 100644 --- a/Sources/Glob/Pattern+Parser.swift +++ b/Sources/Glob/Pattern+Parser.swift @@ -11,7 +11,7 @@ extension Pattern { enum Token: Equatable { case character(Character) case leftSquareBracket // [ - case righSquareBracket // ] + case rightSquareBracket // ] case questionMark // ? case dash // - case asterisk // * @@ -22,11 +22,12 @@ extension Pattern { case at // @ case plus // + case exclamationMark // ! + case caret // ^ init(_ character: Character) { switch character { case "]": - self = .righSquareBracket + self = .rightSquareBracket case "[": self = .leftSquareBracket case "?": @@ -49,6 +50,8 @@ extension Pattern { self = .plus case "!": self = .exclamationMark + case "^": + self = .caret default: self = .character(character) } @@ -60,7 +63,7 @@ extension Pattern { character case .leftSquareBracket: "[" - case .righSquareBracket: + case .rightSquareBracket: "]" case .questionMark: "?" @@ -82,6 +85,8 @@ extension Pattern { "+" case .exclamationMark: "!" + case .caret: + "^" } } } @@ -90,7 +95,7 @@ extension Pattern { if let next = pattern.first { let updatedPattern = pattern.dropFirst() - if options.allowEscapedCharacters, next == .escape { + if options.supportsEscapedCharacters, next == .escape { guard let escaped = updatedPattern.first else { throw PatternParsingError.invalidEscapeCharacter } guard condition(.character(escaped)) else { return nil } @@ -135,10 +140,10 @@ extension Pattern { while let next = try pop({ !delimeters.contains($0) }) { switch next { case .asterisk: - if options.useExtendedMatching, let sectionList = try parsePatternList() { + if options.supportsPatternLists, let sectionList = try parsePatternList() { sections.append(.patternList(.zeroOrMore, sectionList)) } else if sections.last == .componentWildcard { - if options.allowsPathLevelWildcards { + if options.supportsPathLevelWildcards { sections[sections.endIndex - 1] = .pathWildcard } else { break // ignore repeated wildcards @@ -149,41 +154,40 @@ extension Pattern { sections.append(.componentWildcard) } case .questionMark: - if options.useExtendedMatching, let sectionList = try parsePatternList() { + if options.supportsPatternLists, let sectionList = try parsePatternList() { sections.append(.patternList(.zeroOrOne, sectionList)) } else { sections.append(.singleCharacter) } case .at: - if options.useExtendedMatching, let sectionsList = try parsePatternList() { + if options.supportsPatternLists, let sectionsList = try parsePatternList() { sections.append(.patternList(.one, sectionsList)) } else { sections.append(constant: next.character) } case .plus: - if options.useExtendedMatching, let sectionsList = try parsePatternList() { + if options.supportsPatternLists, let sectionsList = try parsePatternList() { sections.append(.patternList(.oneOrMore, sectionsList)) } else { sections.append(constant: next.character) } case .exclamationMark: - if options.useExtendedMatching, let sectionsList = try parsePatternList() { + if options.supportsPatternLists, let sectionsList = try parsePatternList() { sections.append(.patternList(.negated, sectionsList)) } else { sections.append(constant: next.character) } case .leftSquareBracket: let negated: Bool - if pattern.first == options.rangeNegationCharacter { + if try pop(options.rangeNegationCharacter.token) { negated = true - pattern = pattern.dropFirst() } else { negated = false } var ranges: [CharacterClass] = [] - if options.emptyRangeBehavior == .treatClosingBracketAsCharacter, try pop(.righSquareBracket) { + if options.emptyRangeBehavior == .treatClosingBracketAsCharacter, try pop(.rightSquareBracket) { // https://man7.org/linux/man-pages/man7/glob.7.html // The string enclosed by the brackets cannot be empty; therefore ']' can be allowed between the brackets, provided that it is the first character. ranges.append(.character("]")) @@ -195,7 +199,7 @@ extension Pattern { } switch next { - case .righSquareBracket: + case .rightSquareBracket: break loop case .leftSquareBracket: if try pop(.colon) { @@ -208,7 +212,7 @@ extension Pattern { ranges.append(.named(name)) pattern = pattern[endIndex...].dropFirst() - if try !pop(.righSquareBracket) { + if try !pop(.rightSquareBracket) { throw PatternParsingError.rangeNotClosed } } else { @@ -218,7 +222,7 @@ extension Pattern { ranges.append(.character("[")) } case .dash: - if !options.allowsRangeSeparatorInCharacterClasses { + if !options.supportsRangeSeparatorAtBeginningAndEnd { throw PatternParsingError.rangeMissingBounds } @@ -227,8 +231,8 @@ extension Pattern { ranges.append(.character("-")) default: if try pop(.dash) { - if try pop(.righSquareBracket) { - if !options.allowsRangeSeparatorInCharacterClasses { + if try pop(.rightSquareBracket) { + if !options.supportsRangeSeparatorAtBeginningAndEnd { throw PatternParsingError.rangeNotClosed } @@ -265,7 +269,7 @@ extension Pattern { sections.append(.oneOf(ranges, isNegated: negated)) case let .character(character): sections.append(constant: character) - case .righSquareBracket, .dash, .colon, .leftParen, .rightParen, .verticalLine: + case .rightSquareBracket, .dash, .colon, .leftParen, .rightParen, .verticalLine, .caret: sections.append(constant: next.character) } } @@ -275,7 +279,7 @@ extension Pattern { /// Parses a pattern list like `(abc|xyz)` mutating func parsePatternList() throws -> [[Section]]? { - if options.useExtendedMatching, try pop(.leftParen) { + if options.supportsPatternLists, try pop(.leftParen) { // start of pattern list var sectionsList: [[Section]] = [] @@ -315,3 +319,14 @@ private extension [Pattern.Section] { } } } + +extension Pattern.Options.RangeNegationCharacter { + var token: Pattern.Parser.Token { + switch self { + case .exclamationMark: + return .exclamationMark + case .caret: + return .caret + } + } +} diff --git a/Tests/GlobTests/PatternTests.swift b/Tests/GlobTests/PatternTests.swift index ee99141..91e7ca0 100644 --- a/Tests/GlobTests/PatternTests.swift +++ b/Tests/GlobTests/PatternTests.swift @@ -27,13 +27,13 @@ final class PatternTests: XCTestCase { func test_pathWildcard_pathComponentsOnly_doesNotMatchPath() throws { var options = Pattern.Options.default - options.allowsPathLevelWildcards = false + options.supportsPathLevelWildcards = false try XCTAssertDoesNotMatch("Target/Other/.build", pattern: "**/.build", options: options) } func test_componentWildcard_pathComponentsOnly_doesMatchSingleComponent() throws { var options = Pattern.Options.default - options.allowsPathLevelWildcards = false + options.supportsPathLevelWildcards = false try XCTAssertMatches("Target/.build", pattern: "*/.build", options: options) }