refactored the files

royward · Sep 30, 2024 · fd063a5 · fd063a5
1 parent 4eabfd0
commit fd063a5
Show file tree

Hide file tree

Showing 6 changed files with 389 additions and 372 deletions.
diff --git a/src/packs/lsp_server_metta/prolog/lsp_json_parser.pl b/src/packs/lsp_server_metta/prolog/lsp_json_parser.pl
@@ -0,0 +1,35 @@
+:- module(lsp_json_parser, [lsp_metta_request//1]).
+/** <module> LSP Parser
+
+Module for parsing the body & headers from an LSP client.
+
+@author James Cash
+*/
+
+:- use_module(library(assoc), [list_to_assoc/2, get_assoc/3]).
+:- use_module(library(codesio), [open_codes_stream/2]).
+:- use_module(library(dcg/basics), [string_without//2]).
+:- use_module(library(http/json), [json_read_dict/3]).
+
+header(Key-Value) -->
+    string_without(":", KeyC), ": ", string_without("\r", ValueC),
+    { string_codes(Key, KeyC), string_codes(Value, ValueC) }.
+
+headers([Header]) -->
+    header(Header), "\r\n\r\n", !.
+headers([Header|Headers]) -->
+    header(Header), "\r\n",
+    headers(Headers).
+
+json_chars(0, []) --> [].
+json_chars(N, [C|Cs]) --> [C], { succ(Nn, N) }, json_chars(Nn, Cs).
+
+lsp_metta_request(_{headers: Headers, body: Body}) -->
+    headers(HeadersList),
+    { list_to_assoc(HeadersList, Headers),
+      get_assoc("Content-Length", Headers, LengthS),
+      number_string(Length, LengthS) },
+    json_chars(Length, JsonCodes),
+    { ground(JsonCodes),
+      open_codes_stream(JsonCodes, JsonStream),
+      json_read_dict(JsonStream, Body, []) }.
diff --git a/src/packs/lsp_server_metta/prolog/lsp_metta_changes.pl b/src/packs/lsp_server_metta/prolog/lsp_metta_changes.pl
@@ -1,6 +1,12 @@
 :- module(lsp_metta_changes, [handle_doc_changes/2,
                       doc_text_fallback/2,
                        doc_text/2]).
+:- use_module(lsp_metta_split, [
+    split_text_document/2,
+    split_document_get_multiple_sections/7,
+    coalesce_text/2
+]).
+
 /** <module> LSP changes
  Module for tracking edits to the source, in order to be able to act on
  the code as it is in the editor buffer, before saving.

diff --git a/src/packs/lsp_server_metta/prolog/lsp_metta_parser.pl b/src/packs/lsp_server_metta/prolog/lsp_metta_parser.pl
@@ -1,35 +1,246 @@
-:- module(lsp_metta_parser, [lsp_metta_request//1]).
-/** <module> LSP Parser
-
-Module for parsing the body & headers from an LSP client.
-
-@author James Cash
-*/
-
-:- use_module(library(assoc), [list_to_assoc/2, get_assoc/3]).
-:- use_module(library(codesio), [open_codes_stream/2]).
-:- use_module(library(dcg/basics), [string_without//2]).
-:- use_module(library(http/json), [json_read_dict/3]).
-
-header(Key-Value) -->
-    string_without(":", KeyC), ": ", string_without("\r", ValueC),
-    { string_codes(Key, KeyC), string_codes(Value, ValueC) }.
-
-headers([Header]) -->
-    header(Header), "\r\n\r\n", !.
-headers([Header|Headers]) -->
-    header(Header), "\r\n",
-    headers(Headers).
-
-json_chars(0, []) --> [].
-json_chars(N, [C|Cs]) --> [C], { succ(Nn, N) }, json_chars(Nn, Cs).
-
-lsp_metta_request(_{headers: Headers, body: Body}) -->
-    headers(HeadersList),
-    { list_to_assoc(HeadersList, Headers),
-      get_assoc("Content-Length", Headers, LengthS),
-      number_string(Length, LengthS) },
-    json_chars(Length, JsonCodes),
-    { ground(JsonCodes),
-      open_codes_stream(JsonCodes, JsonStream),
-      json_read_dict(JsonStream, Body, []) }.
+:- module(lsp_metta_parser, [
+    annotated_read_sexpr_list/4
+]).
+
+annotated_position_inc(p(L,C0),p(L,C1),N) :- C1 is C0+N.
+
+annotated_post_newline(p(L,_),p(L1,0)) :-
+    L1 is L+1.
+
+annotated_read_sexpr_list(LC0,LC0,Stream,[]) :- at_end_of_stream(Stream),!.
+annotated_read_sexpr_list(LC0,LC2,Stream,[Item|L]) :-
+    annotated_read_sexpr(LC0,LC1,Stream,Item),
+    %debug(server,"x ~w",[Item]),
+    annotated_read_sexpr_list(LC1,LC2,Stream,L).
+
+annotated_read_sexpr(LC0,LC1,I,O):- annotated_cont_sexpr(LC0,LC1,')',I,O).
+
+%! annotated_cont_sexpr(+EndChar:atom, +Stream:stream, -Item) is det.
+%
+% Reads a single item (S-expression or comment) from the specified stream, handling different formats and encodings.
+% Throws an error with stream position if the S-expression cannot be parsed.
+% @arg EndChar Character that denotes the end of a symbol.
+% @arg Stream Stream from which to read.
+% @arg Item The item read from the stream.
+annotated_cont_sexpr(LCi,LC1,EndChar, Stream, Item) :-
+    annotated_skip_spaces(LCi,LCj,Stream),  % Ignore whitespace before reading the expression.
+    annotated_position_inc(LCj,LC0,1),
+    get_char(Stream, Char),
+    (   Char = '(' -> (annotated_read_list(LC0,LC1,')', Stream, Item))  % If '(', read an S-expression list.
+    ;   Char = '[' -> (annotated_read_list(LC0,LC1,']', Stream, It3m), Item = ['[...]',It3m])  % If '[', read an S-expression list.
+    ;   Char = '{' -> (annotated_read_list(LC0,LC1,'}', Stream, It3m), Item = ['{...}',It3m])  % If '{', read an S-expression list.
+    ;   Char = '"' -> (annotated_read_quoted_string(LC0,LC1,Stream, '"', Item))  % Read a quoted string.
+    ;   Char = '!' -> (annotated_read_sexpr(LC0,LC1,Stream, Subr), Item = exec(Subr))  % Read called directive
+    ;   Char = '\'' -> (annotated_read_quoted_symbol(LC0,LC1,Stream, '\'', Item))  % Read a quoted symbol.
+    ;   Char = '`' -> (annotated_read_quoted_symbol(LC0,LC1,Stream, '`', Item))  % Read a backquoted symbol.
+    ;   Char = end_of_file -> (LC1=LC0,Item = end_of_file)  % If EOF, set Item to 'end_of_file'.
+    ;   annotated_read_symbolic(LC0, LC1, EndChar, Stream, Char, Item)
+    ), !.
+
+%! annotated_read_quoted_string(+Stream:stream, +EndChar:atom, -String:atom) is det.
+%
+% Reads a quoted string from the stream until the corresponding ending quote is found.
+% Handles escape sequences within the string.
+% Throws an error with stream position if the quoted string cannot be parsed.
+% @arg Stream Stream from which to read.
+% @arg EndChar Character that denotes the end of the quoted string.
+% @arg String The string read from the stream.
+annotated_read_quoted_string(LC0, LC1, Stream, EndChar, String) :-
+    annotated_read_until_char(Stream, EndChar, Chars),  % Read characters until the ending quote.
+    uft8_count_to_utf16_count(Chars,Total,1),
+    string_chars(String,Chars),
+    annotated_position_inc(LC0,LC1,Total).
+
+%! annotated_read_quoted_symbol(+Stream:stream, +EndChar:atom, -Symbol:atom) is det.
+%
+% Reads a quoted symbol from the stream, handling escapes and storing the result as a symbol.
+% Throws an error with stream position if the quoted symbol cannot be parsed.
+% @arg Stream Stream from which to read.
+% @arg EndChar Character that closes the quoted symbol.
+% @arg Symbol The symbol read from the stream.
+annotated_read_quoted_symbol(LC0, LC1, Stream, EndChar, Symbol) :-
+    annotated_read_until_char(Stream, EndChar, Chars),
+    ((EndChar == '\'', Chars = [Char])
+             -> Symbol='#\\'(Char); atom_chars(Symbol, Chars)),
+    uft8_count_to_utf16_count(Chars,Total,1),
+   annotated_position_inc(LC0,LC1,Total).
+
+%! annotated_read_until_char(+Stream:stream, +EndChar:atom, -Chars:list) is det.
+%
+% Reads characters from the stream until the specified end character is encountered.
+% This function is used to help read quoted strings and symbols.
+% Throws an error with stream position if the end character is not found.
+% @arg Stream Stream from which to read.
+% @arg EndChar Character that indicates the end of the reading.
+% @arg Chars List of characters read until the end character.
+annotated_read_until_char(Stream, EndChar, Chars) :-
+    get_char(Stream, Char),
+    (   Char = end_of_file -> throw_stream_error(Stream, unexpected_end_of_file(annotated_read_until_char(EndChar)))
+    ;   Char = EndChar -> Chars = []
+    ;   Char = '\\' -> get_char(Stream, NextChar),
+                       annotated_read_until_char(Stream, EndChar, RestChars),
+                       Chars = [NextChar | RestChars]
+    ;   annotated_read_until_char(Stream, EndChar, RestChars),
+        Chars = [Char | RestChars]
+    ).
+
+%! annotated_read_list(+EndChar:atom, +Stream:stream, -List:list) is det.
+%
+% Reads a list from the stream until the closing parenthesis is encountered.
+% It skips comments while reading the list but asserts them with their positions.
+% Throws an error with stream position if the list cannot be parsed correctly.
+% @arg Stream Stream from which to read.
+% @arg List The list read from the stream.
+% @arg EndChar Character that denotes the end of the list.
+annotated_read_list(LCi,LC2,EndChar, Stream, List) :-
+    annotated_skip_spaces(LCi,LC0,Stream),  % Skip any leading spaces before reading.
+    peek_char(Stream, Char), !,
+    ( Char = EndChar ->  % Closing parenthesis signals the end of the list.
+        annotated_position_inc(LC0,LC2,1),
+        get_char(Stream, _),  % Consume the closing parenthesis.
+        List = []
+    ; Char = end_of_file ->  % Unexpected end of file inside the list.
+        LC2=LC0,
+        List = [incomplete]
+    ; annotated_cont_sexpr(LC0, LC1, EndChar, Stream, Element),  % Read the next S-expression.
+        annotated_read_list(LC1, LC2, EndChar, Stream, Rest),  % Continue reading the rest of the list.
+        List = [Element | Rest]  % Add the element to the result list.
+    ), !.
+
+%! annotated_skip_spaces(+Stream:stream) is det.
+%
+% Skips whitespace characters in the input stream.
+% If a comment is encountered, reads the comment and asserts it.
+% @arg Stream Stream from which to skip spaces.
+annotated_skip_spaces(LC0,LC1,Stream) :-
+    peek_char(Stream, Char),
+    (   Char = ';' ->
+            (annotated_read_single_line_comment(Stream),
+            annotated_post_newline(LC0,LC0a),
+            annotated_skip_spaces(LC0a,LC1,Stream))  % If the character is ';', read a single-line comment.
+    ;   char_type(Char,end_of_line) ->
+            (get_char(Stream, _),
+            annotated_post_newline(LC0,LC0a),
+            annotated_skip_spaces(LC0a,LC1,Stream))
+    ;   (char_type(Char,white);char_type(Char,space);char_type(Char,cntrl)) ->
+            (get_char(Stream, Char2),
+            uft8_count_to_utf16_count([Char2],Size,0),
+            annotated_position_inc(LC0,LC0a,Size),
+            annotated_skip_spaces(LC0a,LC1,Stream))  % Consume the space and continue.
+    ;   LC1=LC0  % Non-space character found; stop skipping.
+    ), !.
+
+%! annotated_skip_spaces_until_eol(+Stream:stream) is det.
+%
+% Skips whitespace characters in the input stream.
+% If a comment is encountered, reads the comment and asserts it.
+% @arg Stream Stream from which to skip spaces.
+annotated_skip_spaces_until_eol(LC0,LC1,Stream,EolFound) :-
+   peek_char(Stream, Char),
+    (   Char = ';' ->
+            (annotated_read_single_line_comment(LC0,LC0a,Stream),
+            annotated_post_newline(LC0a,LC1),
+            EolFound=true)
+    ;   char_type(Char,end_of_line) ->
+            (get_char(Stream, _),
+            annotated_post_newline(LC0,LC1),
+            EolFound=true)
+    ;   (char_type(Char,white);char_type(Char,space);char_type(Char,cntrl)) ->
+            (get_char(Stream, _),
+            annotated_skip_spaces_until_eol(LC0,LC1,Stream,EolFound))  % Consume the space and continue.
+    ;   Char=end_of_file -> LC1=LC0,EolFound=true
+    ;   (LC1=LC0,EolFound=false)  % Non-space character found; stop skipping.
+    ), !.
+
+annotated_get_blank_lines(LC0,LCblank,LCLeftover,StartOfIncompleteLinePos,Stream) :-
+    seek(Stream,0,current,CurrentPos),
+    annotated_skip_spaces_until_eol(LC0,LC1,Stream,EolFound),
+    (EolFound
+        -> (annotated_get_blank_lines(LC1,LCblank,LCLeftover,StartOfIncompleteLinePos,Stream))
+        ; (LCblank=LC0,LCLeftover=LC1,StartOfIncompleteLinePos=CurrentPos)).
+
+%! annotated_read_single_line_comment(+Stream:stream) is det.
+%
+% Reads a single-line comment from the stream and asserts it with the position.
+% A comment starts with ';' and continues to the end of the line.
+% @arg Stream The input stream from which to read.
+annotated_read_single_line_comment(Stream) :-
+    read_line_to_string(Stream, _Comment).
+
+uft8_count_to_utf16_count(Chars,Size,Additional) :-
+    maplist(char_code,Chars,Codes),
+    uft8_count_to_utf16_count_aux(Codes,Sum),
+    Size is Sum+Additional.
+
+uft8_count_to_utf16_count_aux([],0).
+uft8_count_to_utf16_count_aux([C|T],Sum) :- C<128,!,uft8_count_to_utf16_count_aux(T,Sum0),Sum is Sum0+1.
+uft8_count_to_utf16_count_aux([C,_|T],Sum) :- C<224,!,uft8_count_to_utf16_count_aux(T,Sum0),Sum is Sum0+1.
+uft8_count_to_utf16_count_aux([C,_,_|T],Sum) :- C<240,!,uft8_count_to_utf16_count_aux(T,Sum0),Sum is Sum0+1.
+uft8_count_to_utf16_count_aux([_,_,_,_|T],Sum) :- uft8_count_to_utf16_count_aux(T,Sum0),Sum is Sum0+2.
+
+%! annotated_read_symbolic(+EndChar:atom, +Stream:stream, +FirstChar:atom, -Symbolic:atom) is det.
+%
+% Reads a symbolic expression starting with a specific character, possibly incorporating more complex syntaxes.
+% Throws an error with stream position if the symbolic expression cannot be parsed.
+% @arg EndChar Character that indicates the end of the reading unless escaped.
+% @arg Stream Stream from which to read.
+% @arg FirstChar The first character of the symbolic expression.
+% @arg Symbolic The complete symbolic expression read.
+annotated_read_symbolic(LC0, LC1, EndChar, Stream, FirstChar, Item) :-
+    annotated_read_symbolic_cont(EndChar, Stream, RestChars),
+    annotated_classify_and_convert_charseq_([FirstChar| RestChars], Symbolic), !,
+    uft8_count_to_utf16_count([FirstChar| RestChars],Total,-1),
+    annotated_position_inc(LC0,LC1,Total),
+    LC0=p(L,C0a),LC1=p(_,C1),
+    C0 is C0a-1,
+    Item=a(L,C0,C1,Symbolic).
+
+%! annotated_classify_and_convert_charseq_(+Chars:list, -Symbolic:term) is det.
+%
+% Helper predicate that attempts to classify the character sequence.
+% Handles special cases such as Prolog variables and numbers.
+%
+% @param Chars    The input list of characters.
+% @param Symbolic The resultant Prolog term or symbol.
+
+% Case 1: If the character sequence starts with '$', treat it as a variable.
+annotated_classify_and_convert_charseq_(['$'| RestChars], var(Symbolic)) :-
+    !,atom_chars(Symbolic, RestChars).  % Convert the rest of the characters into a variable name.
+% Case 2: Attempt to interpret the characters as a Prolog term using `read_from_chars/2`.
+% This handles more complex syntaxes like numbers, dates, etc.
+annotated_classify_and_convert_charseq_(Chars, Symbolic) :-
+    notrace(catch(read_from_chars(Chars, Symbolic), _, fail)),  % Safely attempt to parse the characters.
+    atomic(Symbolic),!.  % Ensure the result is atomic.
+% Case 3: If no other case applies, convert the characters directly into an atom.
+annotated_classify_and_convert_charseq_(Chars, Symbolic) :-
+    atom_chars(Symbolic, Chars).  % Convert the character sequence into an atom.
+
+%! annotated_read_symbolic_cont(+EndChar:atom, +Stream:stream, -Chars:list) is det.
+%
+% Continues reading symbolic characters from the stream until a delimiter is encountered.
+% If a backslash is followed by a delimiter, the delimiter is added as a regular character.
+% @arg EndChar Character that indicates the end of the reading unless escaped.
+% @arg Stream Stream from which to read.
+% @arg Chars List of characters read, forming part of a symbolic expression.
+annotated_read_symbolic_cont(EndChar, Stream, Chars) :-
+    peek_char(Stream, NextChar),
+    (   annotated_is_delimiter(NextChar) -> Chars = []  % Stop when a delimiter is found.
+    ;   EndChar == NextChar -> Chars = []  % Stop when an EndChar is found.
+    ; ( get_char(Stream, NextChar),
+        (   NextChar = '\\' ->  % If it's a backslash, read the next char.
+          ( get_char(Stream, EscapedChar),
+            annotated_read_symbolic_cont(EndChar, Stream, RestChars),
+            Chars = [EscapedChar | RestChars] ) % Add the escaped char normally.
+        ; ( annotated_read_symbolic_cont(EndChar, Stream, RestChars),
+            Chars = [NextChar | RestChars] ) % Continue reading the symbolic characters.
+        ))
+    ), !.
+
+%! annotated_is_delimiter(+Char:atom) is semidet.
+%
+% Determines if a character is a delimiter for reading symbolic expressions.
+% @arg Char Character to check.
+annotated_is_delimiter(Char) :-
+    char_type(Char, space) ;  % Space is a delimiter.
+    arg(_, v('(', ')', end_of_file), Char).  % Other delimiters include parentheses and end of file.
+