Skip to content

Commit

Permalink
Merge pull request #5282 from sellout/fix-blank-identifiers
Browse files Browse the repository at this point in the history
Change handling of “blank” identifiers
  • Loading branch information
aryairani authored Aug 18, 2024
2 parents 1c5a4e6 + eff07ae commit e388786
Show file tree
Hide file tree
Showing 5 changed files with 269 additions and 46 deletions.
6 changes: 6 additions & 0 deletions unison-core/src/Unison/Name.hs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ module Unison.Name
parent,
stripNamePrefix,
unqualified,
isUnqualified,

-- * To organize later
commonPrefix,
Expand Down Expand Up @@ -504,6 +505,11 @@ unqualified :: Name -> Name
unqualified (Name _ (s :| _)) =
Name Relative (s :| [])

isUnqualified :: Name -> Bool
isUnqualified = \case
Name Relative (_ :| []) -> True
Name _ (_ :| _) -> False

-- Tries to shorten `fqn` to the smallest suffix that still unambiguously refers to the same name. Uses an efficient
-- logarithmic lookup in the provided relation.
--
Expand Down
53 changes: 53 additions & 0 deletions unison-src/transcripts/fix2822.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Inability to reference a term or type with a name that has segments starting with an underscore

```ucm:hide
scratch/main> builtins.mergeio
```

There should be no issue having terms with an underscore-led component

```unison
_a.blah = 2
b = _a.blah + 1
```

Or even that _are_ a single “blank” component

```unison
_b = 2
x = _b + 1
```
Types can also have underscore-led components.

```unison
unique type _a.Blah = A
c : _a.Blah
c = A
```

And we should also be able to access underscore-led fields.

```unison
type Hello = {_value : Nat}
doStuff = _value.modify
```

But pattern matching shouldn’t bind to underscore-led names.

```unison:error
dontMap f = cases
None -> false
Some _used -> f _used
```

But we can use them as unbound patterns.

```unison
dontMap f = cases
None -> false
Some _unused -> f 2
```
141 changes: 141 additions & 0 deletions unison-src/transcripts/fix2822.output.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
# Inability to reference a term or type with a name that has segments starting with an underscore

There should be no issue having terms with an underscore-led component

``` unison
_a.blah = 2
b = _a.blah + 1
```

``` ucm
Loading changes detected in scratch.u.
I found and typechecked these definitions in scratch.u. If you
do an `add` or `update`, here's how your codebase would
change:
⍟ These new definitions are ok to `add`:
_a.blah : Nat
b : Nat
```
Or even that *are* a single “blank” component

``` unison
_b = 2
x = _b + 1
```

``` ucm
Loading changes detected in scratch.u.
I found and typechecked these definitions in scratch.u. If you
do an `add` or `update`, here's how your codebase would
change:
⍟ These new definitions are ok to `add`:
_b : Nat
x : Nat
```
Types can also have underscore-led components.

``` unison
unique type _a.Blah = A
c : _a.Blah
c = A
```

``` ucm
Loading changes detected in scratch.u.
I found and typechecked these definitions in scratch.u. If you
do an `add` or `update`, here's how your codebase would
change:
⍟ These new definitions are ok to `add`:
type _a.Blah
c : Blah
```
And we should also be able to access underscore-led fields.

``` unison
type Hello = {_value : Nat}
doStuff = _value.modify
```

``` ucm
Loading changes detected in scratch.u.
I found and typechecked these definitions in scratch.u. If you
do an `add` or `update`, here's how your codebase would
change:
⍟ These new definitions are ok to `add`:
type Hello
Hello._value : Hello -> Nat
Hello._value.modify : (Nat ->{g} Nat) -> Hello ->{g} Hello
Hello._value.set : Nat -> Hello -> Hello
doStuff : (Nat ->{g} Nat) -> Hello ->{g} Hello
```
But pattern matching shouldn’t bind to underscore-led names.

``` unison
dontMap f = cases
None -> false
Some _used -> f _used
```

``` ucm
Loading changes detected in scratch.u.
I couldn't figure out what _used refers to here:
3 | Some _used -> f _used
I also don't know what type it should be.
Some common causes of this error include:
* Your current namespace is too deep to contain the
definition in its subtree
* The definition is part of a library which hasn't been
added to this project
* You have a typo in the name
```
But we can use them as unbound patterns.

``` unison
dontMap f = cases
None -> false
Some _unused -> f 2
```

``` ucm
Loading changes detected in scratch.u.
I found and typechecked these definitions in scratch.u. If you
do an `add` or `update`, here's how your codebase would
change:
⍟ These new definitions are ok to `add`:
dontMap : (Nat ->{g} Boolean) -> Optional a ->{g} Boolean
```
91 changes: 53 additions & 38 deletions unison-syntax/src/Unison/Syntax/Lexer/Unison.hs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import Control.Lens qualified as Lens
import Control.Monad.State qualified as S
import Data.Char (isAlphaNum, isDigit, isSpace, ord, toLower)
import Data.Foldable qualified as Foldable
import Data.Functor.Classes (Show1 (..), showsPrec1)
import Data.List qualified as List
import Data.List.NonEmpty (NonEmpty ((:|)))
import Data.List.NonEmpty qualified as Nel
Expand All @@ -46,17 +47,15 @@ import U.Codebase.Reference (ReferenceType (..))
import Unison.HashQualifiedPrime qualified as HQ'
import Unison.Name (Name)
import Unison.Name qualified as Name
import Unison.NameSegment (NameSegment)
import Unison.NameSegment qualified as NameSegment (docSegment)
import Unison.NameSegment.Internal qualified as NameSegment
import Unison.Prelude
import Unison.ShortHash (ShortHash)
import Unison.ShortHash qualified as SH
import Unison.Syntax.HashQualifiedPrime qualified as HQ' (toText)
import Unison.Syntax.Lexer
import Unison.Syntax.Lexer.Token (posP, tokenP)
import Unison.Syntax.Name qualified as Name (isSymboly, nameP, toText, unsafeParseText)
import Unison.Syntax.NameSegment qualified as NameSegment (ParseErr (..), wordyP)
import Unison.Syntax.NameSegment qualified as NameSegment (ParseErr (..))
import Unison.Syntax.Parser.Doc qualified as Doc
import Unison.Syntax.Parser.Doc.Data qualified as Doc
import Unison.Syntax.ReservedWords (delimiters, typeModifiers, typeOrAbility)
Expand Down Expand Up @@ -105,18 +104,28 @@ data Err
-- further knowledge of spacing or indentation levels
-- any knowledge of comments
data Lexeme
= Open String -- start of a block
| Semi IsVirtual -- separator between elements of a block
| Close -- end of a block
| Reserved String -- reserved tokens such as `{`, `(`, `type`, `of`, etc
| Textual String -- text literals, `"foo bar"`
| Character Char -- character literals, `?X`
| WordyId (HQ'.HashQualified Name) -- a (non-infix) identifier. invariant: last segment is wordy
| SymbolyId (HQ'.HashQualified Name) -- an infix identifier. invariant: last segment is symboly
| Blank String -- a typed hole or placeholder
| Numeric String -- numeric literals, left unparsed
| Bytes Bytes.Bytes -- bytes literals
| Hash ShortHash -- hash literals
= -- | start of a block
Open String
| -- | separator between elements of a block
Semi IsVirtual
| -- | end of a block
Close
| -- | reserved tokens such as `{`, `(`, `type`, `of`, etc
Reserved String
| -- | text literals, `"foo bar"`
Textual String
| -- | character literals, `?X`
Character Char
| -- | a (non-infix) identifier. invariant: last segment is wordy
WordyId (HQ'.HashQualified Name)
| -- | an infix identifier. invariant: last segment is symboly
SymbolyId (HQ'.HashQualified Name)
| -- | numeric literals, left unparsed
Numeric String
| -- | bytes literals
Bytes Bytes.Bytes
| -- | hash literals
Hash ShortHash
| Err Err
| Doc (Doc.UntitledSection (Doc.Tree (ReferenceType, HQ'.HashQualified Name) [Token Lexeme]))
deriving stock (Eq, Show, Ord)
Expand Down Expand Up @@ -330,7 +339,6 @@ displayLexeme = \case
Character c -> "?" <> [c]
WordyId hq -> Text.unpack (HQ'.toTextWith Name.toText hq)
SymbolyId hq -> Text.unpack (HQ'.toTextWith Name.toText hq)
Blank b -> b
Numeric n -> n
Bytes _b -> "bytes literal"
Hash h -> Text.unpack (SH.toText h)
Expand Down Expand Up @@ -436,7 +444,6 @@ lexemes eof =
<|> token numeric
<|> token character
<|> reserved
<|> token blank
<|> token identifierLexemeP
<|> (asum . map token) [semi, textual, hash]

Expand Down Expand Up @@ -469,12 +476,6 @@ lexemes eof =
t <- tok identifierLexemeP
pure $ (fmap Reserved <$> typ) <> t

blank =
separated wordySep do
_ <- char '_'
seg <- P.optional wordyIdSegP
pure (Blank (maybe "" (Text.unpack . NameSegment.toUnescapedText) seg))

semi = char ';' $> Semi False
textual = Textual <$> quoted
quoted = quotedRaw <|> quotedSingleLine
Expand Down Expand Up @@ -758,10 +759,6 @@ identifierLexeme name =
then SymbolyId name
else WordyId name

wordyIdSegP :: P.ParsecT (Token Err) String m NameSegment
wordyIdSegP =
PI.withParsecT (fmap (ReservedWordyId . Text.unpack)) NameSegment.wordyP

shortHashP :: P.ParsecT (Token Err) String m ShortHash
shortHashP =
PI.withParsecT (fmap (InvalidShortHash . Text.unpack)) ShortHash.shortHashP
Expand Down Expand Up @@ -838,17 +835,36 @@ headToken (Block a _ _) = a
headToken (Leaf a) = a

instance (Show a) => Show (BlockTree a) where
show (Leaf a) = show a
show (Block open mid close) =
show open
++ "\n"
++ indent " " (intercalateMap "\n" (intercalateMap " " show) mid)
++ "\n"
++ maybe "" show close
showsPrec = showsPrec1

-- | This instance should be compatible with `Read`, but inserts newlines and indentation to make it more
-- /human/-readable.
instance Show1 BlockTree where
liftShowsPrec spa sla = shows ""
where
indent by s = by ++ (s >>= go by)
go by '\n' = '\n' : by
go _ c = [c]
shows by prec =
showParen (prec > appPrec) . \case
Leaf a -> showString "Leaf " . showsNext spa "" a
Block open mid close ->
showString "Block "
. showsNext spa "" open
. showString "\n"
. showIndentedList (showIndentedList (\b -> showsIndented (shows b 0) b)) (" " <> by) mid
. showString "\n"
. showsNext (liftShowsPrec spa sla) (" " <> by) close
appPrec = 10
showsNext :: (Int -> x -> ShowS) -> String -> x -> ShowS
showsNext fn = showsIndented (fn $ appPrec + 1)
showsIndented :: (x -> ShowS) -> String -> x -> ShowS
showsIndented fn by x = showString by . fn x
showIndentedList :: (String -> x -> ShowS) -> String -> [x] -> ShowS
showIndentedList fn by xs =
showString by
. showString "["
. foldr (\x acc -> showString "\n" . fn (" " <> by) x . showString "," . acc) id xs
. showString "\n"
. showString by
. showString "]"

reorderTree :: ([[BlockTree a]] -> [[BlockTree a]]) -> BlockTree a -> BlockTree a
reorderTree f (Block open mid close) = Block open (f (fmap (reorderTree f) <$> mid)) close
Expand Down Expand Up @@ -993,7 +1009,6 @@ instance P.VisualStream [Token Lexeme] where
Nothing -> '?' : [c]
pretty (WordyId n) = Text.unpack (HQ'.toText n)
pretty (SymbolyId n) = Text.unpack (HQ'.toText n)
pretty (Blank s) = "_" ++ s
pretty (Numeric n) = n
pretty (Hash sh) = show sh
pretty (Err e) = show e
Expand Down
Loading

0 comments on commit e388786

Please sign in to comment.