Skip to content

Commit

Permalink
Merge pull request #4452 from unisonweb/captureAs
Browse files Browse the repository at this point in the history
  • Loading branch information
aryairani authored Dec 1, 2023
2 parents bd52421 + cc7367a commit d611f8f
Show file tree
Hide file tree
Showing 21 changed files with 790 additions and 735 deletions.
1 change: 1 addition & 0 deletions parser-typechecker/src/Unison/Builtin.hs
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,7 @@ builtinsSrc =
B "Pattern.many" $ forall1 "a" (\a -> pat a --> pat a),
B "Pattern.replicate" $ forall1 "a" (\a -> nat --> nat --> pat a --> pat a),
B "Pattern.capture" $ forall1 "a" (\a -> pat a --> pat a),
B "Pattern.captureAs" $ forall1 "a" (\a -> a --> pat a --> pat a),
B "Pattern.join" $ forall1 "a" (\a -> list (pat a) --> pat a),
B "Pattern.or" $ forall1 "a" (\a -> pat a --> pat a --> pat a),
-- Pattern.run : Pattern a -> a -> Optional ([a], a)
Expand Down
2 changes: 2 additions & 0 deletions parser-typechecker/src/Unison/Runtime/Builtin.hs
Original file line number Diff line number Diff line change
Expand Up @@ -3078,6 +3078,8 @@ declareForeigns = do
\(TPat.CP p _) -> evaluate . TPat.cpattern $ TPat.Many p
declareForeign Untracked "Pattern.capture" boxDirect . mkForeign $
\(TPat.CP p _) -> evaluate . TPat.cpattern $ TPat.Capture p
declareForeign Untracked "Pattern.captureAs" boxBoxDirect . mkForeign $
\(t, (TPat.CP p _)) -> evaluate . TPat.cpattern $ TPat.CaptureAs t p
declareForeign Untracked "Pattern.join" boxDirect . mkForeign $ \ps ->
evaluate . TPat.cpattern . TPat.Join $ map (\(TPat.CP p _) -> p) ps
declareForeign Untracked "Pattern.or" boxBoxDirect . mkForeign $
Expand Down
10 changes: 10 additions & 0 deletions parser-typechecker/src/Unison/Util/Text/Pattern.hs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ data Pattern
= Join [Pattern] -- sequencing of patterns
| Or Pattern Pattern -- left-biased choice: tries second pattern only if first fails
| Capture Pattern -- capture all the text consumed by the inner pattern, discarding its subcaptures
| CaptureAs Text Pattern -- capture the given text, discarding its subcaptures, and name the capture
| Many Pattern -- zero or more repetitions (at least 1 can be written: Join [p, Many p])
| Replicate Int Int Pattern -- m to n occurrences of a pattern, optional = 0-1
| Eof -- succeed if given the empty text, fail otherwise
Expand Down Expand Up @@ -121,13 +122,22 @@ compile (Char Any) !err !success = go
rem
| Text.size t > Text.size rem -> success acc rem
| otherwise -> err acc rem
compile (CaptureAs t p) !err !success = go
where
err' _ _ acc0 t0 = err acc0 t0
success' _ rem acc0 _ = success (pushCapture t acc0) rem
compiled = compile p err' success'
go acc t = compiled acc t acc t


compile (Capture (Many (Char Any))) !_ !success = \acc t -> success (pushCapture t acc) Text.empty
compile (Capture c) !err !success = go
where
err' _ _ acc0 t0 = err acc0 t0
success' _ rem acc0 t0 = success (pushCapture (Text.take (Text.size t0 - Text.size rem) t0) acc0) rem
compiled = compile c err' success'
go acc t = compiled acc t acc t

compile (Or p1 p2) err success = cp1
where
cp2 = compile p2 err success
Expand Down
18 changes: 18 additions & 0 deletions scheme-libs/racket/unison/pattern.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
[join* (-> chunked-list? pattern?)]
[choice (-> pattern? pattern? ... pattern?)]
[capture (-> pattern? pattern?)]
[capture-as (-> any/c pattern? pattern?)]
[many (-> pattern? pattern?)]
[replicate (-> pattern? exact-nonnegative-integer? exact-nonnegative-integer? pattern?)]
;; Only valid pattern? in the functions below is p:char
Expand All @@ -56,6 +57,7 @@
(struct p:join (pats) #:transparent)
(struct p:or (left right) #:transparent)
(struct p:capture (pat) #:transparent)
(struct p:capture-as (cap pat) #:transparent)
(struct p:many (pat) #:transparent)
(struct p:replicate (pat min-count count))

Expand Down Expand Up @@ -124,6 +126,8 @@
(p:or (pattern-pat pat) (loop pats))])))]))

(define (capture pat) (make-pattern (p:capture (pattern-pat pat))))
(define (capture-as cap pat)
(make-pattern (p:capture-as cap (pattern-pat pat))))
(define (many pat) (make-pattern (p:many (pattern-pat pat))))
(define (replicate pat n m) (make-pattern (p:replicate (pattern-pat pat) n m)))

Expand Down Expand Up @@ -219,6 +223,20 @@
[else
(fail)]))])]

[(p:capture-as cap pat)
(cond
[in-capture?
(recur pat #t ok)]
[else
(define pat-m (recur pat #t done))
(λ (cstr captures)
(define-values [cstr* captures*] (pat-m cstr captures))
(cond
[cstr*
(ok cstr* (chunked-list-add-last captures* cap))]
[else
(fail)]))])]

[(p:many (p:char 'any))
(λ (cstr captures)
(ok empty-chunked-string captures))]
Expand Down
6 changes: 6 additions & 0 deletions scheme-libs/racket/unison/primops.ss
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,8 @@

builtin-Char.Class.is
builtin-Char.Class.is:termlink
builtin-Pattern.captureAs
builtin-Pattern.captureAs:termlink
builtin-Pattern.isMatch
builtin-Pattern.isMatch:termlink
builtin-IO.fileExists.impl.v3
Expand Down Expand Up @@ -708,6 +710,7 @@
(define-builtin-link Universal.<=)
(define-builtin-link Universal.compare)
(define-builtin-link Universal.murmurHash)
(define-builtin-link Pattern.captureAs)
(define-builtin-link Pattern.isMatch)
(define-builtin-link Char.Class.is)

Expand Down Expand Up @@ -820,6 +823,9 @@
(define-unison (builtin-Char.Class.is cc c)
(pattern-match? cc (string->chunked-string (string c))))

(define-unison (builtin-Pattern.captureAs c p)
(capture-as c p))

(define-unison (builtin-Pattern.isMatch p s)
(pattern-match? p s))

Expand Down
1 change: 1 addition & 0 deletions unison-src/builtin-tests/interpreter-tests.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ to `Tests.check` and `Tests.checkEqual`).
```

```ucm:hide
.> alias.term ##Pattern.captureAs Pattern.captureAs
.> load unison-src/builtin-tests/text-tests.u
.> add
```
Expand Down
1 change: 1 addition & 0 deletions unison-src/builtin-tests/jit-tests.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ to `Tests.check` and `Tests.checkEqual`).
```

```ucm:hide
.> alias.term ##Pattern.captureAs Pattern.captureAs
.> load unison-src/builtin-tests/text-tests.u
.> add
```
Expand Down
3 changes: 3 additions & 0 deletions unison-src/builtin-tests/text-tests.u
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,9 @@ text.matching.tests = do
checkEqual "Pattern api (22)" (isMatch (join [many letter, eof]) "aaaaabbbb") true
checkEqual "Pattern api (23)" (isMatch (join [many letter, eof]) "aaaaabbbb1") false
checkEqual "Pattern api (24)" (isMatch (join [literal "abra", many (literal "cadabra")]) "abracadabracadabra") true
checkEqual "Pattern api (25)"
(run (captureAs "goodbye" (literal "hello")) "hello")
(Some (["goodbye"], ""))

char.class.tests = do
check "Char.Class: any" do Char.Class.is Class.any ?a
Expand Down
Loading

0 comments on commit d611f8f

Please sign in to comment.