Merge pull request #5282 from sellout/fix-blank-identifiers

Change handling of “blank” identifiers
unisonweb · Aug 18, 2024 · e388786 · e388786
2 parents 1c5a4e6 + eff07ae
commit e388786
Show file tree

Hide file tree

Showing 5 changed files with 269 additions and 46 deletions.
diff --git a/unison-core/src/Unison/Name.hs b/unison-core/src/Unison/Name.hs
@@ -32,6 +32,7 @@ module Unison.Name
     parent,
     stripNamePrefix,
     unqualified,
+    isUnqualified,
 
     -- * To organize later
     commonPrefix,
@@ -504,6 +505,11 @@ unqualified :: Name -> Name
 unqualified (Name _ (s :| _)) =
   Name Relative (s :| [])
 
+isUnqualified :: Name -> Bool
+isUnqualified = \case
+  Name Relative (_ :| []) -> True
+  Name _ (_ :| _) -> False
+
 -- Tries to shorten `fqn` to the smallest suffix that still unambiguously refers to the same name. Uses an efficient
 -- logarithmic lookup in the provided relation.
 --

diff --git a/unison-src/transcripts/fix2822.md b/unison-src/transcripts/fix2822.md
@@ -0,0 +1,53 @@
+# Inability to reference a term or type with a name that has segments starting with an underscore
+
+```ucm:hide
+scratch/main> builtins.mergeio
+```
+
+There should be no issue having terms with an underscore-led component
+
+```unison
+_a.blah = 2
+
+b = _a.blah + 1
+```
+
+Or even that _are_ a single “blank” component
+
+```unison
+_b = 2
+
+x = _b + 1
+```
+Types can also have underscore-led components.
+
+```unison
+unique type _a.Blah = A
+
+c : _a.Blah
+c = A
+```
+
+And we should also be able to access underscore-led fields.
+
+```unison
+type Hello = {_value : Nat}
+
+doStuff = _value.modify
+```
+
+But pattern matching shouldn’t bind to underscore-led names.
+
+```unison:error
+dontMap f = cases
+  None -> false
+  Some _used -> f _used
+```
+
+But we can use them as unbound patterns.
+
+```unison
+dontMap f = cases
+  None -> false
+  Some _unused -> f 2
+```
diff --git a/unison-src/transcripts/fix2822.output.md b/unison-src/transcripts/fix2822.output.md
@@ -0,0 +1,141 @@
+# Inability to reference a term or type with a name that has segments starting with an underscore
+
+There should be no issue having terms with an underscore-led component
+
+``` unison
+_a.blah = 2
+
+b = _a.blah + 1
+```
+
+``` ucm
+
+  Loading changes detected in scratch.u.
+
+  I found and typechecked these definitions in scratch.u. If you
+  do an `add` or `update`, here's how your codebase would
+  change:
+  
+    ⍟ These new definitions are ok to `add`:
+    
+      _a.blah : Nat
+      b       : Nat
+
+```
+Or even that *are* a single “blank” component
+
+``` unison
+_b = 2
+
+x = _b + 1
+```
+
+``` ucm
+
+  Loading changes detected in scratch.u.
+
+  I found and typechecked these definitions in scratch.u. If you
+  do an `add` or `update`, here's how your codebase would
+  change:
+  
+    ⍟ These new definitions are ok to `add`:
+    
+      _b : Nat
+      x  : Nat
+
+```
+Types can also have underscore-led components.
+
+``` unison
+unique type _a.Blah = A
+
+c : _a.Blah
+c = A
+```
+
+``` ucm
+
+  Loading changes detected in scratch.u.
+
+  I found and typechecked these definitions in scratch.u. If you
+  do an `add` or `update`, here's how your codebase would
+  change:
+  
+    ⍟ These new definitions are ok to `add`:
+    
+      type _a.Blah
+      c : Blah
+
+```
+And we should also be able to access underscore-led fields.
+
+``` unison
+type Hello = {_value : Nat}
+
+doStuff = _value.modify
+```
+
+``` ucm
+
+  Loading changes detected in scratch.u.
+
+  I found and typechecked these definitions in scratch.u. If you
+  do an `add` or `update`, here's how your codebase would
+  change:
+  
+    ⍟ These new definitions are ok to `add`:
+    
+      type Hello
+      Hello._value        : Hello -> Nat
+      Hello._value.modify : (Nat ->{g} Nat) -> Hello ->{g} Hello
+      Hello._value.set    : Nat -> Hello -> Hello
+      doStuff             : (Nat ->{g} Nat) -> Hello ->{g} Hello
+
+```
+But pattern matching shouldn’t bind to underscore-led names.
+
+``` unison
+dontMap f = cases
+  None -> false
+  Some _used -> f _used
+```
+
+``` ucm
+
+  Loading changes detected in scratch.u.
+
+  I couldn't figure out what _used refers to here:
+  
+      3 |   Some _used -> f _used
+  
+  I also don't know what type it should be.
+  
+  Some common causes of this error include:
+    * Your current namespace is too deep to contain the
+      definition in its subtree
+    * The definition is part of a library which hasn't been
+      added to this project
+    * You have a typo in the name
+
+```
+But we can use them as unbound patterns.
+
+``` unison
+dontMap f = cases
+  None -> false
+  Some _unused -> f 2
+```
+
+``` ucm
+
+  Loading changes detected in scratch.u.
+
+  I found and typechecked these definitions in scratch.u. If you
+  do an `add` or `update`, here's how your codebase would
+  change:
+  
+    ⍟ These new definitions are ok to `add`:
+    
+      dontMap : (Nat ->{g} Boolean) -> Optional a ->{g} Boolean
+
+```
diff --git a/unison-syntax/src/Unison/Syntax/Lexer/Unison.hs b/unison-syntax/src/Unison/Syntax/Lexer/Unison.hs
@@ -29,6 +29,7 @@ import Control.Lens qualified as Lens
 import Control.Monad.State qualified as S
 import Data.Char (isAlphaNum, isDigit, isSpace, ord, toLower)
 import Data.Foldable qualified as Foldable
+import Data.Functor.Classes (Show1 (..), showsPrec1)
 import Data.List qualified as List
 import Data.List.NonEmpty (NonEmpty ((:|)))
 import Data.List.NonEmpty qualified as Nel
@@ -46,17 +47,15 @@ import U.Codebase.Reference (ReferenceType (..))
 import Unison.HashQualifiedPrime qualified as HQ'
 import Unison.Name (Name)
 import Unison.Name qualified as Name
-import Unison.NameSegment (NameSegment)
 import Unison.NameSegment qualified as NameSegment (docSegment)
-import Unison.NameSegment.Internal qualified as NameSegment
 import Unison.Prelude
 import Unison.ShortHash (ShortHash)
 import Unison.ShortHash qualified as SH
 import Unison.Syntax.HashQualifiedPrime qualified as HQ' (toText)
 import Unison.Syntax.Lexer
 import Unison.Syntax.Lexer.Token (posP, tokenP)
 import Unison.Syntax.Name qualified as Name (isSymboly, nameP, toText, unsafeParseText)
-import Unison.Syntax.NameSegment qualified as NameSegment (ParseErr (..), wordyP)
+import Unison.Syntax.NameSegment qualified as NameSegment (ParseErr (..))
 import Unison.Syntax.Parser.Doc qualified as Doc
 import Unison.Syntax.Parser.Doc.Data qualified as Doc
 import Unison.Syntax.ReservedWords (delimiters, typeModifiers, typeOrAbility)
@@ -105,18 +104,28 @@ data Err
 --   further knowledge of spacing or indentation levels
 --   any knowledge of comments
 data Lexeme
-  = Open String -- start of a block
-  | Semi IsVirtual -- separator between elements of a block
-  | Close -- end of a block
-  | Reserved String -- reserved tokens such as `{`, `(`, `type`, `of`, etc
-  | Textual String -- text literals, `"foo bar"`
-  | Character Char -- character literals, `?X`
-  | WordyId (HQ'.HashQualified Name) -- a (non-infix) identifier. invariant: last segment is wordy
-  | SymbolyId (HQ'.HashQualified Name) -- an infix identifier. invariant: last segment is symboly
-  | Blank String -- a typed hole or placeholder
-  | Numeric String -- numeric literals, left unparsed
-  | Bytes Bytes.Bytes -- bytes literals
-  | Hash ShortHash -- hash literals
+  = -- | start of a block
+    Open String
+  | -- | separator between elements of a block
+    Semi IsVirtual
+  | -- | end of a block
+    Close
+  | -- | reserved tokens such as `{`, `(`, `type`, `of`, etc
+    Reserved String
+  | -- | text literals, `"foo bar"`
+    Textual String
+  | -- | character literals, `?X`
+    Character Char
+  | -- | a (non-infix) identifier. invariant: last segment is wordy
+    WordyId (HQ'.HashQualified Name)
+  | -- | an infix identifier. invariant: last segment is symboly
+    SymbolyId (HQ'.HashQualified Name)
+  | -- | numeric literals, left unparsed
+    Numeric String
+  | -- | bytes literals
+    Bytes Bytes.Bytes
+  | -- | hash literals
+    Hash ShortHash
   | Err Err
   | Doc (Doc.UntitledSection (Doc.Tree (ReferenceType, HQ'.HashQualified Name) [Token Lexeme]))
   deriving stock (Eq, Show, Ord)
@@ -330,7 +339,6 @@ displayLexeme = \case
   Character c -> "?" <> [c]
   WordyId hq -> Text.unpack (HQ'.toTextWith Name.toText hq)
   SymbolyId hq -> Text.unpack (HQ'.toTextWith Name.toText hq)
-  Blank b -> b
   Numeric n -> n
   Bytes _b -> "bytes literal"
   Hash h -> Text.unpack (SH.toText h)
@@ -436,7 +444,6 @@ lexemes eof =
         <|> token numeric
         <|> token character
         <|> reserved
-        <|> token blank
         <|> token identifierLexemeP
         <|> (asum . map token) [semi, textual, hash]
 
@@ -469,12 +476,6 @@ lexemes eof =
               t <- tok identifierLexemeP
               pure $ (fmap Reserved <$> typ) <> t
 
-    blank =
-      separated wordySep do
-        _ <- char '_'
-        seg <- P.optional wordyIdSegP
-        pure (Blank (maybe "" (Text.unpack . NameSegment.toUnescapedText) seg))
-
     semi = char ';' $> Semi False
     textual = Textual <$> quoted
     quoted = quotedRaw <|> quotedSingleLine
@@ -758,10 +759,6 @@ identifierLexeme name =
     then SymbolyId name
     else WordyId name
 
-wordyIdSegP :: P.ParsecT (Token Err) String m NameSegment
-wordyIdSegP =
-  PI.withParsecT (fmap (ReservedWordyId . Text.unpack)) NameSegment.wordyP
-
 shortHashP :: P.ParsecT (Token Err) String m ShortHash
 shortHashP =
   PI.withParsecT (fmap (InvalidShortHash . Text.unpack)) ShortHash.shortHashP
@@ -838,17 +835,36 @@ headToken (Block a _ _) = a
 headToken (Leaf a) = a
 
 instance (Show a) => Show (BlockTree a) where
-  show (Leaf a) = show a
-  show (Block open mid close) =
-    show open
-      ++ "\n"
-      ++ indent "  " (intercalateMap "\n" (intercalateMap " " show) mid)
-      ++ "\n"
-      ++ maybe "" show close
+  showsPrec = showsPrec1
+
+-- | This instance should be compatible with `Read`, but inserts newlines and indentation to make it more
+--  /human/-readable.
+instance Show1 BlockTree where
+  liftShowsPrec spa sla = shows ""
     where
-      indent by s = by ++ (s >>= go by)
-      go by '\n' = '\n' : by
-      go _ c = [c]
+      shows by prec =
+        showParen (prec > appPrec) . \case
+          Leaf a -> showString "Leaf " . showsNext spa "" a
+          Block open mid close ->
+            showString "Block "
+              . showsNext spa "" open
+              . showString "\n"
+              . showIndentedList (showIndentedList (\b -> showsIndented (shows b 0) b)) ("  " <> by) mid
+              . showString "\n"
+              . showsNext (liftShowsPrec spa sla) ("  " <> by) close
+      appPrec = 10
+      showsNext :: (Int -> x -> ShowS) -> String -> x -> ShowS
+      showsNext fn = showsIndented (fn $ appPrec + 1)
+      showsIndented :: (x -> ShowS) -> String -> x -> ShowS
+      showsIndented fn by x = showString by . fn x
+      showIndentedList :: (String -> x -> ShowS) -> String -> [x] -> ShowS
+      showIndentedList fn by xs =
+        showString by
+          . showString "["
+          . foldr (\x acc -> showString "\n" . fn ("  " <> by) x . showString "," . acc) id xs
+          . showString "\n"
+          . showString by
+          . showString "]"
 
 reorderTree :: ([[BlockTree a]] -> [[BlockTree a]]) -> BlockTree a -> BlockTree a
 reorderTree f (Block open mid close) = Block open (f (fmap (reorderTree f) <$> mid)) close
@@ -993,7 +1009,6 @@ instance P.VisualStream [Token Lexeme] where
           Nothing -> '?' : [c]
       pretty (WordyId n) = Text.unpack (HQ'.toText n)
       pretty (SymbolyId n) = Text.unpack (HQ'.toText n)
-      pretty (Blank s) = "_" ++ s
       pretty (Numeric n) = n
       pretty (Hash sh) = show sh
       pretty (Err e) = show e