diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e49b27e..19eb7cf 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -276,19 +276,19 @@ jobs: - name: Install Nix dependencies run: | - nix develop .github/nix#${{ matrix.package }} --command true + nix develop .#${{ matrix.package }} --command true - name: Build in development Shell run: | - nix develop .github/nix#${{ matrix.package }} --command make + nix develop .#${{ matrix.package }} --command make - name: Run tests in development Shell run: | - nix develop .github/nix#${{ matrix.package }} --command make check + nix develop .#${{ matrix.package }} --command make check - name: Build with Nix run: | - nix build .github/nix#${{ matrix.package }} --print-build-logs + nix build .#${{ matrix.package }} --print-build-logs nix-flake-checks: runs-on: ubuntu-latest @@ -305,4 +305,4 @@ jobs: access-tokens = github.com=${{ secrets.GITHUB_TOKEN }} - name: Run flake checks - run: nix flake check .github/nix --print-build-logs + run: nix flake check . --print-build-logs diff --git a/.github/nix/with-nixpkgs.nix b/.nix/with-nixpkgs.nix similarity index 79% rename from .github/nix/with-nixpkgs.nix rename to .nix/with-nixpkgs.nix index 5b99344..24d3f86 100644 --- a/.github/nix/with-nixpkgs.nix +++ b/.nix/with-nixpkgs.nix @@ -4,9 +4,9 @@ in { packages.with-nixpkgs = pkgs.stdenv.mkDerivation { name = "morbig"; - ## NOTE: The use of `../..` matters because the path is taken as relative to - ## the current file, and therefore to `.github/nix/`. - src = ../..; + ## NOTE: The use of `./..` matters because the path is taken as relative to + ## the current file, and therefore to `.nix/`. + src = ./..; nativeBuildInputs = with opkgs; [ ## Basic ones, always necessary diff --git a/.github/nix/with-opam-nix.nix b/.nix/with-opam-nix.nix similarity index 62% rename from .github/nix/with-opam-nix.nix rename to .nix/with-opam-nix.nix index 3ee6272..b504ae6 100644 --- a/.github/nix/with-opam-nix.nix +++ b/.nix/with-opam-nix.nix @@ -1,10 +1,10 @@ { ... }: { perSystem = { inputs', pkgs, ... }: - ## NOTE: The use of `../..` matters because the path is taken as relative to - ## the current file, and therefore to `.github/nix/`. + ## NOTE: The use of `./..` matters because the path is taken as relative to + ## the current file, and therefore to `.nix/`. let scope = - inputs'.opam-nix.lib.buildOpamProject { inherit pkgs; } "morbig" ../.. { + inputs'.opam-nix.lib.buildOpamProject { inherit pkgs; } "morbig" ./.. { ocaml-base-compiler = "*"; }; in { packages.with-opam-nix = scope.morbig // { inherit scope; }; }; diff --git a/dune-project b/dune-project index 40861b3..b989dbb 100644 --- a/dune-project +++ b/dune-project @@ -33,3 +33,4 @@ (generate_opam_files true) (using menhir 2.0) +(formatting (enabled_for dune)) diff --git a/.github/nix/flake.lock b/flake.lock similarity index 97% rename from .github/nix/flake.lock rename to flake.lock index ae43500..57253a1 100644 --- a/.github/nix/flake.lock +++ b/flake.lock @@ -257,11 +257,11 @@ "nixpkgs-stable": "nixpkgs-stable" }, "locked": { - "lastModified": 1681831107, - "narHash": "sha256-pXl3DPhhul9NztSetUJw2fcN+RI3sGOYgKu29xpgnqw=", + "lastModified": 1682596858, + "narHash": "sha256-Hf9XVpqaGqe/4oDGr30W8HlsWvJXtMsEPHDqHZA6dDg=", "owner": "cachix", "repo": "pre-commit-hooks.nix", - "rev": "b7ca8f6fff42f6af75c17f9438fed1686b7d855d", + "rev": "fb58866e20af98779017134319b5663b8215d912", "type": "github" }, "original": { diff --git a/.github/nix/flake.nix b/flake.nix similarity index 63% rename from .github/nix/flake.nix rename to flake.nix index a326104..afa6caf 100644 --- a/.github/nix/flake.nix +++ b/flake.nix @@ -16,8 +16,8 @@ systems = [ "x86_64-linux" ]; imports = [ - ./with-nixpkgs.nix - ./with-opam-nix.nix + .nix/with-nixpkgs.nix + .nix/with-opam-nix.nix inputs.pre-commit-hooks.flakeModule ]; @@ -38,6 +38,25 @@ ocp-indent.enable = true; nixfmt.enable = true; deadnix.enable = true; + + ## NOTE: The version of the `dune-fmt` hook in `pre-commit-hooks.nix` + ## forgets to bring OCaml in the environment. In the meantime, we use + ## our own; will change back to `dune-fmt.enable = true` later. + tmp-dune-fmt = { + enable = true; + name = "dune-fmt"; + description = "Runs Dune's formatters on the code tree."; + entry = let + dune-fmt = pkgs.writeShellApplication { + name = "dune-fmt"; + text = '' + export PATH=${pkgs.ocaml}/bin:$PATH + exec ${pkgs.dune_3}/bin/dune fmt "$@" + ''; + }; + in "${dune-fmt}/bin/dune-fmt"; + pass_filenames = false; + }; }; }; diff --git a/src/API.mli b/src/API.mli index 35c6172..0163c2b 100644 --- a/src/API.mli +++ b/src/API.mli @@ -14,41 +14,41 @@ (** {3 Parsing shell scripts} *) (** [parse_file filename] performs the syntactic analysis of - [filename] and returns a concrete syntax tree if [filename] content - is syntactically correct. + [filename] and returns a concrete syntax tree if [filename] content + is syntactically correct. - Raises exceptions from {!Errors}. *) + Raises exceptions from {!Errors}. *) val parse_file: string -> CST.program (** [parse_string filename content] is similar to [parse_file] except - the script source code is provided as a string. *) + the script source code is provided as a string. *) val parse_string: string -> string -> CST.program (** {3 Serialization of CST} *) (** [load_binary_cst cin] retrieves a serialized CST from - input_channel [cin]. *) + input_channel [cin]. *) val load_binary_cst: in_channel -> CST.program (** [save_binary_cst cout cst] stores a serialized [cst] in [cout]. *) val save_binary_cst: out_channel -> CST.program -> unit (** [load_json_cst cin] retrieves a CST in JSON format from - input_channel [cin]. *) + input_channel [cin]. *) val load_json_cst: in_channel -> CST.program (** [save_json_cst cout cst] stores a [cst] using JSON format in - [cout]. *) + [cout]. *) val save_json_cst: out_channel -> CST.program -> unit (** [save_dot_cst cout cst] stores a [cst] using DOT format in - [cout]. *) + [cout]. *) val save_dot_cst: out_channel -> CST.program -> unit (** {3 CST helpers} *) (** [on_located f] applies [f] on a located value, preserving its - location. *) + location. *) val on_located : ('a -> 'b) -> 'a CST.located -> 'b (** [start_of_position p] returns the beginning of a position [p]. *) @@ -58,16 +58,16 @@ val start_of_position : CST.position -> Lexing.position val end_of_position : CST.position -> Lexing.position (** [filename_of_position p] returns the filename of a position - [p]. *) + [p]. *) val filename_of_position : CST.position -> string (** [string_of_lexing_position p] returns a human-readable - representation of the lexing position [p], using a format - recognized by Emacs, and other decent editors. *) + representation of the lexing position [p], using a format + recognized by Emacs, and other decent editors. *) val string_of_lexing_position : Lexing.position -> string (** {3 POSIX related helpers} *) (** [remove_quotes s] yields a copy of string [s], with all quotes - removed as described in the POSIX specification.*) + removed as described in the POSIX specification.*) val remove_quotes : string -> string diff --git a/src/CST.mli b/src/CST.mli index 532bdcc..8fe74f8 100644 --- a/src/CST.mli +++ b/src/CST.mli @@ -280,7 +280,7 @@ and filename = (** The two IoHere constructors have two arguments. The second argument is the word holding the contents of the here document, which does not figure in the grammar. - *) +*) and io_here = | IoHere_DLess_HereEnd of here_end' * word' ref | IoHere_DLessDash_HereEnd of here_end' * word' ref diff --git a/src/CSTHelpers.ml b/src/CSTHelpers.ml index 4499432..52a8615 100644 --- a/src/CSTHelpers.ml +++ b/src/CSTHelpers.ml @@ -22,16 +22,16 @@ let with_pos p v = } let dummy_lexing_position = { - pos_fname = ""; - pos_lnum = -1; - pos_bol = -1; - pos_cnum = -1; - } + pos_fname = ""; + pos_lnum = -1; + pos_bol = -1; + pos_cnum = -1; +} let dummy_position = { - start_p = dummy_lexing_position; - end_p = dummy_lexing_position; - } + start_p = dummy_lexing_position; + end_p = dummy_lexing_position; +} let with_poss p1 p2 v = with_pos { start_p = p1; end_p = p2 } v @@ -72,10 +72,10 @@ let string_of_position p = let filename = filename_of_position p in let l = line p.start_p in let c1, c2 = characters p.start_p p.end_p in - if filename = "" then - Printf.sprintf "Line %d, characters %d-%d" l c1 c2 - else - Printf.sprintf "File \"%s\", line %d, characters %d-%d" filename l c1 c2 + if filename = "" then + Printf.sprintf "Line %d, characters %d-%d" l c1 c2 + else + Printf.sprintf "File \"%s\", line %d, characters %d-%d" filename l c1 c2 let compare_positions p1 p2 = compare p1.start_p.pos_cnum p2.start_p.pos_cnum @@ -94,42 +94,42 @@ let empty_program = let nonempty_program p = match p with | Program_LineBreak _ -> - false + false | _ -> - true + true let rec concat_complete_commands' cs1 cs2 = let pos = merge_positions cs1.position cs2.position in with_pos pos @@ - match cs2.value with - | CompleteCommands_CompleteCommand c2 -> - let nl = with_pos cs1.position NewLineList_NewLine in - CompleteCommands_CompleteCommands_NewlineList_CompleteCommand ( - cs1, - nl, - c2) - | CompleteCommands_CompleteCommands_NewlineList_CompleteCommand (cs2, - nl, - c2) -> - CompleteCommands_CompleteCommands_NewlineList_CompleteCommand ( - concat_complete_commands' cs1 cs2, - nl, - c2 - ) + match cs2.value with + | CompleteCommands_CompleteCommand c2 -> + let nl = with_pos cs1.position NewLineList_NewLine in + CompleteCommands_CompleteCommands_NewlineList_CompleteCommand ( + cs1, + nl, + c2) + | CompleteCommands_CompleteCommands_NewlineList_CompleteCommand (cs2, + nl, + c2) -> + CompleteCommands_CompleteCommands_NewlineList_CompleteCommand ( + concat_complete_commands' cs1 cs2, + nl, + c2 + ) let concat_programs p1 p2 = let pos = merge_positions p1.position p2.position in with_pos pos @@ - match p1.value, p2.value with - | Program_LineBreak _, p | p, Program_LineBreak _ -> - p - | Program_LineBreak_CompleteCommands_LineBreak (pnl1, cs1, _snl1), - Program_LineBreak_CompleteCommands_LineBreak (_pnl2, cs2, snl2) -> - Program_LineBreak_CompleteCommands_LineBreak ( - pnl1, - concat_complete_commands' cs1 cs2, - snl2 - ) + match p1.value, p2.value with + | Program_LineBreak _, p | p, Program_LineBreak _ -> + p + | Program_LineBreak_CompleteCommands_LineBreak (pnl1, cs1, _snl1), + Program_LineBreak_CompleteCommands_LineBreak (_pnl2, cs2, snl2) -> + Program_LineBreak_CompleteCommands_LineBreak ( + pnl1, + concat_complete_commands' cs1 cs2, + snl2 + ) (* Helpers about words and names *) @@ -146,14 +146,14 @@ let string_of_word (Word (s, _)) = s let word_placeholder () = ref { - value = Word ("", []); - position = dummy_position - } + value = Word ("", []); + position = dummy_position + } module NameSet = Set.Make (struct - type t = name - let compare (Name s1) (Name s2) = String.compare s1 s2 -end) + type t = name + let compare (Name s1) (Name s2) = String.compare s1 s2 + end) let special_builtins_regexp = [ "break" ; ":" ; "continue" ; "." ; "eval" ; "exec" ; @@ -176,57 +176,57 @@ let make_function_name (Name s) = (** [wordlist_of_cmd_suffix] extracts the list of all words from a cmd_sufix *) let rec wordlist_of_cmd_suffix = function | CmdSuffix_IoRedirect _io_redirect' -> - [] + [] | CmdSuffix_CmdSuffix_IoRedirect (cmd_suffix',_io_redirect') -> - wordlist_of_cmd_suffix cmd_suffix'.value + wordlist_of_cmd_suffix cmd_suffix'.value | CmdSuffix_Word word'-> - [word'] + [word'] | CmdSuffix_CmdSuffix_Word (cmd_suffix',word') -> - (wordlist_of_cmd_suffix cmd_suffix'.value) @ [word'] + (wordlist_of_cmd_suffix cmd_suffix'.value) @ [word'] let io_redirect_list_of_cmd_prefix cmd_prefix = let rec aux acc = function | CmdPrefix_IoRedirect io_redirect' -> - io_redirect' :: acc + io_redirect' :: acc | CmdPrefix_CmdPrefix_IoRedirect (cmd_prefix', io_redirect') -> - aux (io_redirect' :: acc) cmd_prefix'.value + aux (io_redirect' :: acc) cmd_prefix'.value | CmdPrefix_AssignmentWord _ -> - acc + acc | CmdPrefix_CmdPrefix_AssignmentWord (cmd_prefix', _) -> - aux acc cmd_prefix'.value + aux acc cmd_prefix'.value in aux [] cmd_prefix let io_redirect_list_of_cmd_suffix cmd_suffix = let rec aux acc = function | CmdSuffix_IoRedirect io_redirect' -> - io_redirect' :: acc + io_redirect' :: acc | CmdSuffix_CmdSuffix_IoRedirect (cmd_suffix', io_redirect') -> - aux (io_redirect' :: acc) cmd_suffix'.value + aux (io_redirect' :: acc) cmd_suffix'.value | CmdSuffix_Word _ -> - acc + acc | CmdSuffix_CmdSuffix_Word (cmd_suffix', _) -> - aux acc cmd_suffix'.value + aux acc cmd_suffix'.value in aux [] cmd_suffix let io_redirect_list_of_redirect_list redirect_list = let rec aux acc = function | RedirectList_IoRedirect io_redirect' -> - io_redirect' :: acc + io_redirect' :: acc | RedirectList_RedirectList_IoRedirect (redirect_list', io_redirect') -> - aux (io_redirect' :: acc) redirect_list'.value + aux (io_redirect' :: acc) redirect_list'.value in aux [] redirect_list let io_redirect_list_of_simple_command = function | SimpleCommand_CmdPrefix_CmdWord_CmdSuffix (cmd_prefix', _, cmd_suffix') -> - (io_redirect_list_of_cmd_prefix cmd_prefix'.value) - @ (io_redirect_list_of_cmd_suffix cmd_suffix'.value) + (io_redirect_list_of_cmd_prefix cmd_prefix'.value) + @ (io_redirect_list_of_cmd_suffix cmd_suffix'.value) | SimpleCommand_CmdPrefix_CmdWord (cmd_prefix', _) | SimpleCommand_CmdPrefix cmd_prefix' -> - io_redirect_list_of_cmd_prefix cmd_prefix'.value + io_redirect_list_of_cmd_prefix cmd_prefix'.value | SimpleCommand_CmdName_CmdSuffix (_, cmd_suffix') -> - io_redirect_list_of_cmd_suffix cmd_suffix'.value + io_redirect_list_of_cmd_suffix cmd_suffix'.value | SimpleCommand_CmdName _ -> - [] + [] diff --git a/src/ExtMenhirLib.ml b/src/ExtMenhirLib.ml index 332a47e..de773c8 100644 --- a/src/ExtMenhirLib.ml +++ b/src/ExtMenhirLib.ml @@ -14,10 +14,10 @@ open MenhirLib.General let current_items parsing_state = match Lazy.force (stack parsing_state) with - | Nil -> - [] - | Cons (Element (s, _, _, _), _) -> - items s + | Nil -> + [] + | Cons (Element (s, _, _, _), _) -> + items s type 'a status = | AcceptedNow of 'a @@ -26,15 +26,15 @@ type 'a status = let rec close checkpoint = match checkpoint with - | AboutToReduce (_, _) | Shifting _ -> close (resume checkpoint) - | Rejected | HandlingError _ -> Wrong - | Accepted x -> AcceptedNow x - | InputNeeded _ -> Fine + | AboutToReduce (_, _) | Shifting _ -> close (resume checkpoint) + | Rejected | HandlingError _ -> Wrong + | Accepted x -> AcceptedNow x + | InputNeeded _ -> Fine let accepted_token checkpoint token = match checkpoint with - | InputNeeded _ -> close (offer checkpoint token) - | _ -> Wrong + | InputNeeded _ -> close (offer checkpoint token) + | _ -> Wrong let is_accepted_token checkpoint token = accepted_token checkpoint token <> Wrong @@ -57,7 +57,7 @@ let rec finished = function our needs. Hence, we introduce an extential type for weaken this precision. *) type nonterminal = - AnyN : 'a Parser.MenhirInterpreter.nonterminal -> nonterminal + AnyN : 'a Parser.MenhirInterpreter.nonterminal -> nonterminal let nonterminal_of_production p = match lhs p with @@ -67,7 +67,7 @@ let nonterminal_of_production p = exception EmptyStack type 'b top_symbol_processor = { - perform : 'a. 'a symbol * 'a -> 'b + perform : 'a. 'a symbol * 'a -> 'b } let on_top_symbol env f = diff --git a/src/aliases.ml b/src/aliases.ml index e92683a..df5bbe0 100644 --- a/src/aliases.ml +++ b/src/aliases.ml @@ -51,16 +51,16 @@ type state = | NextWordSubstituted type t = { - state : state; - definitions : (string * string) list - } + state : state; + definitions : (string * string) list +} type aliases = t let empty = { - state = NoRecentSubstitution; - definitions = [] - } + state = NoRecentSubstitution; + definitions = [] +} (** [bind_aliases to_bind aliases] returns an alias table obtained from [aliases] by adding all entries from [to_bind]. *) @@ -86,29 +86,29 @@ let binder_from_alias (x:CST.cmd_suffix) = let open List in let wl = wordlist_of_cmd_suffix x in fold_right (fun a accu -> - let s = bounded_split (regexp "=") (on_located unWord a) 2 in - if List.length s < 2 then - accu - else - (hd s, hd (tl s)):: accu) + let s = bounded_split (regexp "=") (on_located unWord a) 2 in + if List.length s < 2 then + accu + else + (hd s, hd (tl s)):: accu) wl [] let unalias_argument (x:CST.cmd_suffix) = CSTHelpers.( - List.map (on_located unWord) (wordlist_of_cmd_suffix x) -) + List.map (on_located unWord) (wordlist_of_cmd_suffix x) + ) let as_aliasing_related_command = function | SimpleCommand_CmdName_CmdSuffix ({ value = CmdName_Word w ; _ }, suffix) -> begin match w.value with - | Word ("alias", _) -> - let l = binder_from_alias suffix.value in - Some (Alias l) - | Word ("unalias", _) -> - let l = unalias_argument suffix.value in - Some (if l = ["-a"] then Reset else Unalias l) - | _ -> - None + | Word ("alias", _) -> + let l = binder_from_alias suffix.value in + Some (Alias l) + | Word ("unalias", _) -> + let l = unalias_argument suffix.value in + Some (if l = ["-a"] then Reset else Unalias l) + | _ -> + None end | SimpleCommand_CmdName _ | SimpleCommand_CmdPrefix_CmdWord_CmdSuffix _ @@ -126,27 +126,27 @@ let interpret aliases cst = let level = ref 0 in let at_toplevel () = !level = 0 in let analyzer = object - inherit [_] CSTVisitors.iter as super - method! visit_compound_command env cmd = - incr level; - super # visit_compound_command env cmd; - decr level - - method! visit_simple_command' _ cmd' = - match as_aliasing_related_command cmd'.value with - | Some alias_command -> - if at_toplevel () then match alias_command with - | Alias x -> aliases := bind_aliases x !aliases - | Unalias x -> aliases := unbind_aliases x !aliases - | Reset -> aliases := empty - else - raise (Errors.DuringAliasing( - cmd'.position.start_p, - "(un)alias in a nested command structure" - )) - | None -> - () - end + inherit [_] CSTVisitors.iter as super + method! visit_compound_command env cmd = + incr level; + super # visit_compound_command env cmd; + decr level + + method! visit_simple_command' _ cmd' = + match as_aliasing_related_command cmd'.value with + | Some alias_command -> + if at_toplevel () then match alias_command with + | Alias x -> aliases := bind_aliases x !aliases + | Unalias x -> aliases := unbind_aliases x !aliases + | Reset -> aliases := empty + else + raise (Errors.DuringAliasing( + cmd'.position.start_p, + "(un)alias in a nested command structure" + )) + | None -> + () + end in analyzer#visit_complete_command () cst; !aliases @@ -161,10 +161,10 @@ let substitute aliases w = let rec about_to_reduce_cmd_name checkpoint = match checkpoint with | AboutToReduce (_, production) -> - if lhs production = X (N N_linebreak) || lhs production = X (N N_word) then - about_to_reduce_cmd_name (resume checkpoint) - else - lhs production = X (N N_cmd_name) + if lhs production = X (N N_linebreak) || lhs production = X (N N_word) then + about_to_reduce_cmd_name (resume checkpoint) + else + lhs production = X (N N_cmd_name) | InputNeeded _ -> let dummy = Lexing.dummy_pos in let token = NAME (Name "a_word"), dummy, dummy in @@ -192,13 +192,13 @@ let rec about_to_reduce_word checkpoint = false (** [inside_a_substitution_combo state] is true if a sequence of alias - substitution is triggered by the following cornercase rule of the - standard.*) + substitution is triggered by the following cornercase rule of the + standard.*) (*specification: - If the value of the alias replacing the word ends in a , the - shell shall check the next command word for alias substitution; this - process shall continue until a word is found that is not a valid alias - or an alias value does not end in a . + If the value of the alias replacing the word ends in a , the + shell shall check the next command word for alias substitution; this + process shall continue until a word is found that is not a valid alias + or an alias value does not end in a . *) let inside_a_substitution_combo = function | CommandNameSubstituted | NextWordSubstituted -> true @@ -232,15 +232,15 @@ let only_if_end_with_whitespace word aliases state = let alias_substitution aliases checkpoint word = perform (aliases, word) @@ fun () -> if about_to_reduce_cmd_name checkpoint - && not (Keyword.is_reserved_word word) + && not (Keyword.is_reserved_word word) then let word = substitute aliases word in only_if_end_with_whitespace word aliases CommandNameSubstituted else - if about_to_reduce_word checkpoint - && inside_a_substitution_combo aliases.state - then - let word' = substitute aliases word in - only_if_end_with_whitespace word' aliases NextWordSubstituted - else - (aliases, word) + if about_to_reduce_word checkpoint + && inside_a_substitution_combo aliases.state + then + let word' = substitute aliases word in + only_if_end_with_whitespace word' aliases NextWordSubstituted + else + (aliases, word) diff --git a/src/assignment.ml b/src/assignment.ml index f32242a..e1f0a8b 100644 --- a/src/assignment.ml +++ b/src/assignment.ml @@ -44,15 +44,15 @@ open Name let recognize_assignment checkpoint pretoken word_cst = FirstSuccessMonad.( match word_cst with | [WordAssignmentWord ((Name n) as name, w)] -> - if is_name n then - let (_, pstart, pstop) = pretoken in - let token = ASSIGNMENT_WORD (name, w) in - if accepted_token checkpoint (token, pstart, pstop) <> Wrong then - return token - else - fail - else - fail + if is_name n then + let (_, pstart, pstop) = pretoken in + let token = ASSIGNMENT_WORD (name, w) in + if accepted_token checkpoint (token, pstart, pstop) <> Wrong then + return token + else + fail + else + fail | _ -> - fail -) + fail + ) diff --git a/src/c/CAPI.ml b/src/c/CAPI.ml index 06fce6d..040510b 100644 --- a/src/c/CAPI.ml +++ b/src/c/CAPI.ml @@ -24,40 +24,40 @@ let ccst_of_json_program j = in let rec aux = function | `Assoc [ "value", v; "position", p ] -> - let start_p, end_p = location p in - Location (start_p, end_p, aux v) + let start_p, end_p = location p in + Location (start_p, end_p, aux v) | `List (`String k :: children) -> - Node (k, aux' (`List children)) + Node (k, aux' (`List children)) | `Variant (k, None) -> - Node (k, [||]) + Node (k, [||]) | `Variant (k, Some children) -> - Node (k, aux' children) + Node (k, aux' children) | `String s -> - Data s + Data s | `List l -> - Node ("Tuple", aux' (`List l)) + Node ("Tuple", aux' (`List l)) | json -> - unexpected_case json + unexpected_case json and aux' = function | `List c -> - Array.of_list (List.map aux c) + Array.of_list (List.map aux c) | `Assoc m -> - aux' (`List (snd (List.split m))) + aux' (`List (snd (List.split m))) | json -> - unexpected_case json + unexpected_case json and position = function | `Assoc [ "pos_fname", `String pos_fname; "pos_lnum", `Int pos_lnum; "pos_bol", `Int pos_bol; "pos_cnum", `Int pos_cnum ] -> - CST.({ pos_fname; pos_lnum; pos_bol; pos_cnum }) + CST.({ pos_fname; pos_lnum; pos_bol; pos_cnum }) | json -> - unexpected_case json + unexpected_case json and location = function | `Assoc [ "start_p", start_p; "end_p", end_p ] -> - (position start_p, position end_p) + (position start_p, position end_p) | json -> - unexpected_case json + unexpected_case json in aux j diff --git a/src/dune b/src/dune index b4be1a9..f8b51d5 100644 --- a/src/dune +++ b/src/dune @@ -7,7 +7,8 @@ (rule (targets version.ml) (action - (with-stdout-to version.ml + (with-stdout-to + version.ml (echo "let current=\"%{version:morbig}\"\n")))) (executable @@ -16,18 +17,27 @@ (libraries str)) (rule - (targets CSTSerializers.ml) (deps CST.mli) - (action (with-stdout-to %{targets} (run ./CST_derivings_generator.exe serializers)))) + (targets CSTSerializers.ml) + (deps CST.mli) + (action + (with-stdout-to + %{targets} + (run ./CST_derivings_generator.exe serializers)))) (rule - (targets CSTVisitors.ml) (deps CST.mli) - (action (with-stdout-to %{targets} (run ./CST_derivings_generator.exe visitors)))) + (targets CSTVisitors.ml) + (deps CST.mli) + (action + (with-stdout-to + %{targets} + (run ./CST_derivings_generator.exe visitors)))) (library (name morbig) (public_name morbig) (libraries str menhirLib ppx_deriving_yojson.runtime visitors.runtime) - (preprocess (pps ppx_deriving_yojson visitors.ppx)) + (preprocess + (pps ppx_deriving_yojson visitors.ppx)) (flags :standard -w -3) ; FIXME: remove this when Yojson and its PPX are fixed. (modules :standard \ morbigDriver CST_derivings_generator) (modules_without_implementation CST)) @@ -37,5 +47,7 @@ (public_name morbig) (ocamlopt_flags :standard) (libraries morbig) - (preprocess (pps ppx_deriving_yojson visitors.ppx)) ;; Avoid warning about incomplete merlin files. + (preprocess + (pps ppx_deriving_yojson visitors.ppx)) + ;; Avoid warning about incomplete merlin files. (modules morbigDriver)) diff --git a/src/engine.ml b/src/engine.ml index 9d59617..4eecb8e 100644 --- a/src/engine.ml +++ b/src/engine.ml @@ -22,202 +22,202 @@ open Assignment open Aliases type state = { - checkpoint : program located checkpoint; - aliases : Aliases.t; - } + checkpoint : program located checkpoint; + aliases : Aliases.t; +} module type Lexer = - sig - val initialize: prelexer_state -> Lexing.lexbuf -> unit - val next_token: state -> token * lexing_position * lexing_position * aliases - val at_eof: unit -> bool option - val shift: unit -> unit - val empty_input: unit -> bool - val current_position: unit -> lexing_position - val roll_back_to_last_parsing_state: unit -> state - end +sig + val initialize: prelexer_state -> Lexing.lexbuf -> unit + val next_token: state -> token * lexing_position * lexing_position * aliases + val at_eof: unit -> bool option + val shift: unit -> unit + val empty_input: unit -> bool + val current_position: unit -> lexing_position + val roll_back_to_last_parsing_state: unit -> state +end let parse partial (module Lexer : Lexer) = - (**--------------**) - (** Parsing loop. *) - (**--------------**) + (**--------------**) + (** Parsing loop. *) + (**--------------**) let rec parse csts { aliases; checkpoint } = match checkpoint with - (** + (** - If the parser requires some extra input to continue - the analyze, [next_token] is called with the current - parsing state as argument. + If the parser requires some extra input to continue + the analyze, [next_token] is called with the current + parsing state as argument. - *) - | InputNeeded _parsing_state -> - let (token, ps, pe, aliases) = - Lexer.next_token { aliases; checkpoint } - in - parse csts { aliases; checkpoint = offer checkpoint (token, ps, pe) } + *) + | InputNeeded _parsing_state -> + let (token, ps, pe, aliases) = + Lexer.next_token { aliases; checkpoint } + in + parse csts { aliases; checkpoint = offer checkpoint (token, ps, pe) } - (** + (** - If the parser has recognized a complete command and - we are not at the end of the input, we restart a parser - on the sequel. + If the parser has recognized a complete command and + we are not at the end of the input, we restart a parser + on the sequel. - *) - | Accepted cst -> - begin match Lexer.at_eof () with + *) + | Accepted cst -> + begin match Lexer.at_eof () with | None -> - (** The only way for a complete command to be accepted is - to have been concluded by an EOF. (See the grammar.) *) - assert false + (** The only way for a complete command to be accepted is + to have been concluded by an EOF. (See the grammar.) *) + assert false | Some true -> - (** The EOF token was a real end-of-file marker. *) - if Options.(backend () = NoSerialisation) then - [] - else cst :: csts + (** The EOF token was a real end-of-file marker. *) + if Options.(backend () = NoSerialisation) then + [] + else cst :: csts | Some false -> - (** The EOF token was a pseudo end-of-file marker. - Probably generated by a NEWLINE promoted to a EOF. *) - Lexer.shift (); - let checkpoint = entry_point (Lexer.current_position ()) in - let csts = - if Options.(backend () = NoSerialisation) then - [] - else - cst :: csts - in - parse csts { aliases; checkpoint } - end - - (** - - The parser has rejected the input. - - *) - | Rejected -> - (** - - We sometimes want to recognize a *prefix* of the input stream. - - Therefore, if a token produces a parse error, it might be - possible that the currently read prefix of the input - already is a valid shell script. To check that, we roll - back to the previous state and we inject EOF to check if - the fragment of the input already read can be recognized as - a complete command. - - This procedure leads to a longest-prefix parsing. - - *) - if partial then ( - (** 1. Rollback to the state preceding the last token insertion. *) - let state = Lexer.roll_back_to_last_parsing_state () in - (** 2. Put back EOF, see if the prefix is accepted. *) - match accepted_raw_token state.checkpoint EOF with - | AcceptedNow cst -> - (** 2.b Yes? Stop here. - Put back the token that caused the syntax error. - Return the CST *) - [cst] - | _status -> - (** 2.a No? It is a syntax error. *) - parse_error () - ) else - parse_error () - - (** + (** The EOF token was a pseudo end-of-file marker. + Probably generated by a NEWLINE promoted to a EOF. *) + Lexer.shift (); + let checkpoint = entry_point (Lexer.current_position ()) in + let csts = + if Options.(backend () = NoSerialisation) then + [] + else + cst :: csts + in + parse csts { aliases; checkpoint } + end - We have no specific treatment of parsing errors. + (** - *) - | HandlingError _env -> - parse csts { aliases; checkpoint = resume checkpoint } + The parser has rejected the input. + *) + | Rejected -> (** - The shell grammar follows a parsing-dependent lexical - analysis: they are some places where a reserved word must be - recognized as a simple word when it cannot be written at a - given place of the input (see - [recognize_reserved_word_if_relevant] defined - earlier). However, they are some other places where this - conversion from reserved words to simple words is forbidden. - - For instance, while the input - - `` echo else `` - - is syntactically correct, the input - - `` else echo `` - - is not. + We sometimes want to recognize a *prefix* of the input stream. - Instead of complicating - [recognize_reserved_word_if_relevant], we decided to detect a - posteriori when the conversion from reserved words to simple - words should not have been made. This detection is easily - feasible because there is actually only one place in the - grammar where this conversion is forbidden: a reserved word - can never be converted to a simple word where a [cmd_word] is - expected. + Therefore, if a token produces a parse error, it might be + possible that the currently read prefix of the input + already is a valid shell script. To check that, we roll + back to the previous state and we inject EOF to check if + the fragment of the input already read can be recognized as + a complete command. - Fortunately, menhir gives us the control back when it is - about to reduce a nonterminal. Therefore, it is possible to - detect when a simple word, which is also a reserved word, has - been reduced to a [cmd_word]. - - The reduction of a [cmd_word] is also the place where we - can detect if an alias command is invoked. + This procedure leads to a longest-prefix parsing. *) - | AboutToReduce (env, production) -> - let nt = nonterminal_of_production production in - - let rec reject_cmd_words_which_are_reserved_words () = - if is_cmd () && top_is_keyword () then parse_error () - and is_cmd () = - nt = AnyN N_cmd_word || nt = AnyN N_cmd_name - and top_is_keyword () = - on_top_symbol env { perform } - and perform : type a. a symbol * a -> _ = function - | N N_word, Word (w, _) -> is_reserved_word w - | _ -> false - in + if partial then ( + (** 1. Rollback to the state preceding the last token insertion. *) + let state = Lexer.roll_back_to_last_parsing_state () in + (** 2. Put back EOF, see if the prefix is accepted. *) + match accepted_raw_token state.checkpoint EOF with + | AcceptedNow cst -> + (** 2.b Yes? Stop here. + Put back the token that caused the syntax error. + Return the CST *) + [cst] + | _status -> + (** 2.a No? It is a syntax error. *) + parse_error () + ) else + parse_error () + + (** + + We have no specific treatment of parsing errors. + + *) + | HandlingError _env -> + parse csts { aliases; checkpoint = resume checkpoint } + + (** + + The shell grammar follows a parsing-dependent lexical + analysis: they are some places where a reserved word must be + recognized as a simple word when it cannot be written at a + given place of the input (see + [recognize_reserved_word_if_relevant] defined + earlier). However, they are some other places where this + conversion from reserved words to simple words is forbidden. + + For instance, while the input + + `` echo else `` + + is syntactically correct, the input + + `` else echo `` + + is not. + + Instead of complicating + [recognize_reserved_word_if_relevant], we decided to detect a + posteriori when the conversion from reserved words to simple + words should not have been made. This detection is easily + feasible because there is actually only one place in the + grammar where this conversion is forbidden: a reserved word + can never be converted to a simple word where a [cmd_word] is + expected. + + Fortunately, menhir gives us the control back when it is + about to reduce a nonterminal. Therefore, it is possible to + detect when a simple word, which is also a reserved word, has + been reduced to a [cmd_word]. + + The reduction of a [cmd_word] is also the place where we + can detect if an alias command is invoked. + + *) + | AboutToReduce (env, production) -> + let nt = nonterminal_of_production production in + + let rec reject_cmd_words_which_are_reserved_words () = + if is_cmd () && top_is_keyword () then parse_error () + and is_cmd () = + nt = AnyN N_cmd_word || nt = AnyN N_cmd_name + and top_is_keyword () = + on_top_symbol env { perform } + and perform : type a. a symbol * a -> _ = function + | N N_word, Word (w, _) -> is_reserved_word w + | _ -> false + in - let rec process_alias_command_if_present () = - if nt = AnyN N_complete_commands then - on_top_symbol env { perform = interpret_alias_command } - else - let checkpoint = resume checkpoint in - parse csts { aliases; checkpoint } - and interpret_alias_command: type a. a symbol * a -> _ = function - | N N_complete_command, cst -> - let aliases = Aliases.interpret aliases cst in - let checkpoint = resume checkpoint in - parse csts { aliases; checkpoint } - | _ -> - assert false (* By correctness of the underlying LR automaton. *) - in - reject_cmd_words_which_are_reserved_words (); - process_alias_command_if_present () + let rec process_alias_command_if_present () = + if nt = AnyN N_complete_commands then + on_top_symbol env { perform = interpret_alias_command } + else + let checkpoint = resume checkpoint in + parse csts { aliases; checkpoint } + and interpret_alias_command: type a. a symbol * a -> _ = function + | N N_complete_command, cst -> + let aliases = Aliases.interpret aliases cst in + let checkpoint = resume checkpoint in + parse csts { aliases; checkpoint } + | _ -> + assert false (* By correctness of the underlying LR automaton. *) + in + reject_cmd_words_which_are_reserved_words (); + process_alias_command_if_present () - (** + (** - The other intermediate steps of the parser are ignored. + The other intermediate steps of the parser are ignored. - *) - | Shifting (_, _, _) -> - parse csts { aliases; checkpoint = resume checkpoint } + *) + | Shifting (_, _, _) -> + parse csts { aliases; checkpoint = resume checkpoint } - and parse_error : type a. unit -> a = fun () -> - raise (Errors.DuringParsing (Lexer.current_position ())) + and parse_error : type a. unit -> a = fun () -> + raise (Errors.DuringParsing (Lexer.current_position ())) in parse [] { - aliases = Aliases.empty; - checkpoint = entry_point (Lexer.current_position ()) - } + aliases = Aliases.empty; + checkpoint = entry_point (Lexer.current_position ()) + } let recognize_word_if_relevant checkpoint word = accepted_token checkpoint word <> Wrong @@ -290,10 +290,10 @@ module Lexer (U : sig end) : Lexer = struct (token, pstart, pstop, aliases) in match pretoken with - | Pretoken.IoNumber i -> - return (IO_NUMBER (IONumber i)) + | Pretoken.IoNumber i -> + return (IO_NUMBER (IONumber i)) - | Pretoken.PreWord (w0, cst) -> + | Pretoken.PreWord (w0, cst) -> (*specification: @@ -320,92 +320,92 @@ module Lexer (U : sig end) : Lexer = struct rules, or applies globally. *) - let new_aliases, w = alias_substitution aliases checkpoint w0 in - let word = - if w == w0 then - WORD (Word (w, List.(flatten (map parse_pattern cst)))) - else - WORD (Word (w, [WordLiteral w])) - in - let well_delimited_keyword = - match previous_token () with - | Some (Semicolon | DSEMI | NEWLINE | Rbrace | Rparen | Uppersand - | Fi) -> true - | _ -> match previous_token ~n:1 () with - | Some For -> true - | _ -> false - in - let token = FirstSuccessMonad.( + let new_aliases, w = alias_substitution aliases checkpoint w0 in + let word = + if w == w0 then + WORD (Word (w, List.(flatten (map parse_pattern cst)))) + else + WORD (Word (w, [WordLiteral w])) + in + let well_delimited_keyword = + match previous_token () with + | Some (Semicolon | DSEMI | NEWLINE | Rbrace | Rparen | Uppersand + | Fi) -> true + | _ -> match previous_token ~n:1 () with + | Some For -> true + | _ -> false + in + let token = FirstSuccessMonad.( (recognize_assignment checkpoint p cst) +> (recognize_reserved_word_if_relevant well_delimited_keyword checkpoint p w) +> return word ) - in - if HDL.next_word_is_here_document_delimiter () then - (*specification: + in + if HDL.next_word_is_here_document_delimiter () then + (*specification: - 2.7.4 Here-Document + 2.7.4 Here-Document - If any part of word is quoted, the delimiter shall be - formed by performing quote removal on word, and the - here-document lines shall not be expanded. Otherwise, - the delimiter shall be the word itself. + If any part of word is quoted, the delimiter shall be + formed by performing quote removal on word, and the + here-document lines shall not be expanded. Otherwise, + the delimiter shall be the word itself. - *) - HDL.push_here_document_delimiter w cst; - return ~new_aliases (FirstSuccessMonad.should_succeed token) + *) + HDL.push_here_document_delimiter w cst; + return ~new_aliases (FirstSuccessMonad.should_succeed token) - | Pretoken.EOF -> - real_eof := true; - return EOF + | Pretoken.EOF -> + real_eof := true; + return EOF - | Pretoken.Operator ((DLESS r | DLESSDASH r) as token) -> - let dashed = match token with DLESSDASH _ -> true | _ -> false in - HDL.push_here_document_operator dashed r; - return token + | Pretoken.Operator ((DLESS r | DLESSDASH r) as token) -> + let dashed = match token with DLESSDASH _ -> true | _ -> false in + HDL.push_here_document_operator dashed r; + return token - | Pretoken.Operator token -> - return token + | Pretoken.Operator token -> + return token - | Pretoken.NEWLINE -> + | Pretoken.NEWLINE -> (** The interpretation of the pretoken [NEWLINE] depends on the parsing context: *) (** If we are to recognize a here-document, [NEWLINE] triggers the here-document lexing mode. *) - if HDL.next_line_is_here_document () then ( - HDL.start_here_document_lexing (); - next_token { aliases; checkpoint } - ) + if HDL.next_line_is_here_document () then ( + HDL.start_here_document_lexing (); + next_token { aliases; checkpoint } + ) (** If the input is completed, [NEWLINE] is interpreted as the end-of-file marker. *) - else if finished (offer checkpoint (EOF, pstart, pstop)) then ( - return EOF - ) + else if finished (offer checkpoint (EOF, pstart, pstop)) then ( + return EOF + ) (** If the input is not completed but [NEWLINE] as a meaning from the point of view of the grammar, it is promoted as a token and communicated to the parser. *) - else if is_accepted_token checkpoint (NEWLINE, pstart, pstop) then - return NEWLINE + else if is_accepted_token checkpoint (NEWLINE, pstart, pstop) then + return NEWLINE - else if is_accepted_token checkpoint (Semicolon, pstart, pstop) then - return Semicolon + else if is_accepted_token checkpoint (Semicolon, pstart, pstop) then + return Semicolon (** Otherwise, a [NEWLINE] is simply layout and is ignored. *) - else (* next_token { aliases; checkpoint }*) - raise (Errors.DuringParsing pstart) + else (* next_token { aliases; checkpoint }*) + raise (Errors.DuringParsing pstart) let last_state = ref None let copy_position p = Lexing.{ - pos_fname = p.pos_fname; - pos_lnum = p.pos_lnum; - pos_bol = p.pos_bol; - pos_cnum = p.pos_cnum + pos_fname = p.pos_fname; + pos_lnum = p.pos_lnum; + pos_bol = p.pos_bol; + pos_cnum = p.pos_cnum } let next_token ({ aliases; checkpoint } as state) = @@ -422,11 +422,11 @@ module Lexer (U : sig end) : Lexer = struct match !last_state with | None -> assert false (** By precondition. *) | Some (state, _token, _curr_p) -> - (* FIXME: Temporary adhoc treatment of rollback. *) - let pos = (lexbuf ()).lex_curr_p.pos_cnum in - (lexbuf ()).lex_curr_p <- - { (lexbuf ()).lex_curr_p with pos_cnum = pos - 1 }; - state + (* FIXME: Temporary adhoc treatment of rollback. *) + let pos = (lexbuf ()).lex_curr_p.pos_cnum in + (lexbuf ()).lex_curr_p <- + { (lexbuf ()).lex_curr_p with pos_cnum = pos - 1 }; + state let shift () = tokens := [] diff --git a/src/errors.ml b/src/errors.ml index 0f3bb12..5e42500 100644 --- a/src/errors.ml +++ b/src/errors.ml @@ -19,20 +19,20 @@ exception DuringIO of string let string_of_error = function | DuringParsing pos -> - Printf.sprintf "%s: Syntax error." - CSTHelpers.(string_of_lexing_position pos) + Printf.sprintf "%s: Syntax error." + CSTHelpers.(string_of_lexing_position pos) | DuringLexing (pos, msg) -> - Printf.sprintf "%s: Lexical error (%s)." - CSTHelpers.(string_of_lexing_position pos) - msg + Printf.sprintf "%s: Lexical error (%s)." + CSTHelpers.(string_of_lexing_position pos) + msg | DuringIO msg -> - Printf.sprintf "Input/Output error (%s)." msg + Printf.sprintf "Input/Output error (%s)." msg | DuringAliasing (pos,msg) -> - Printf.sprintf "%s: Alias handling limitation (%s)." - CSTHelpers.(string_of_lexing_position pos) - msg + Printf.sprintf "%s: Alias handling limitation (%s)." + CSTHelpers.(string_of_lexing_position pos) + msg | Failure s -> - "Failure: " ^ s ^ "." + "Failure: " ^ s ^ "." | Sys_error s -> - "Error: " ^ s ^ "." + "Error: " ^ s ^ "." | e -> raise e diff --git a/src/extPervasives.ml b/src/extPervasives.ml index d00c195..6f6b12b 100644 --- a/src/extPervasives.ml +++ b/src/extPervasives.ml @@ -14,7 +14,7 @@ let rec nat_exp k = function | 0 -> 1 | 1 -> k | n -> let l = nat_exp k (n / 2) in - l * l * (if n mod 2 = 0 then 1 else k) + l * l * (if n mod 2 = 0 then 1 else k) let comment f message = let y = f () in @@ -83,11 +83,11 @@ let option_map o f = match o with let string_cut_at k s = String.( - if length s > k then - sub s 0 k ^ "..." - else - s -) + if length s > k then + sub s 0 k ^ "..." + else + s + ) exception InvalidSuffix of string * string @@ -97,13 +97,13 @@ let string_split k s = try String.sub s 0 k, String.sub s k (n - k) with _ -> assert false let string_remove_suffix suffix s = String.( - let k = length s - length suffix in - if k < 0 then raise (InvalidSuffix (s, suffix)); - let r = try sub s 0 k with _ -> assert false in - let c = try sub s k (length suffix) with _ -> assert false in - if suffix <> c then raise (InvalidSuffix (s, suffix)); - r -) + let k = length s - length suffix in + if k < 0 then raise (InvalidSuffix (s, suffix)); + let r = try sub s 0 k with _ -> assert false in + let c = try sub s k (length suffix) with _ -> assert false in + if suffix <> c then raise (InvalidSuffix (s, suffix)); + r + ) let string_last_char s = String.(s.[length s - 1]) @@ -147,9 +147,9 @@ let string_strip s = let n = String.length s in if n > 0 then let lastchar = s.[n-1] in - if lastchar = '\n' - then try String.sub s 0 (n-1) with _ -> assert false - else s + if lastchar = '\n' + then try String.sub s 0 (n-1) with _ -> assert false + else s else s let reduce default f l = @@ -161,30 +161,30 @@ let reduce default f l = let repeat n f = let rec aux i = - if i = n then - [] - else - f i :: aux (i + 1) + if i = n then + [] + else + f i :: aux (i + 1) in aux 0 let rec take n l = if n = 0 then [], l else match l with - | [] -> - [], [] - | x :: xs -> - let ys, xs = take (n - 1) xs in - x :: ys, xs + | [] -> + [], [] + | x :: xs -> + let ys, xs = take (n - 1) xs in + x :: ys, xs let take_until pred l = let rec aux accu = function - | [] -> [], l - | x :: xs -> - if pred x then - List.rev accu, x :: xs - else - aux (x :: accu) xs + | [] -> [], l + | x :: xs -> + if pred x then + List.rev accu, x :: xs + else + aux (x :: accu) xs in aux [] l @@ -221,8 +221,8 @@ end = struct let ( >>= ) x f = match x with - | None -> fail - | Some x -> f x + | None -> fail + | Some x -> f x let rec reduce default f = function | [] -> return default @@ -230,8 +230,8 @@ end = struct let ( +> ) x y = match x with - | None -> y - | Some _ -> x + | None -> y + | Some _ -> x let run x = x @@ -259,10 +259,10 @@ let pp_to_string pp arg = Buffer.contents b let lexing_make filename contents = Lexing.( - let lexbuf = Lexing.from_string contents in - lexbuf.lex_curr_p <- { lexbuf.lex_curr_p with pos_fname = filename }; - lexbuf -) + let lexbuf = Lexing.from_string contents in + lexbuf.lex_curr_p <- { lexbuf.lex_curr_p with pos_fname = filename }; + lexbuf + ) let ( <$> ) x f = f (); x diff --git a/src/hereDocument.ml b/src/hereDocument.ml index 094666d..62f7866 100644 --- a/src/hereDocument.ml +++ b/src/hereDocument.ml @@ -30,19 +30,19 @@ end = struct here-document associated with the first operator shall be supplied first by the application and shall be read first by the shell. - *) + *) type delimiter_info = { - (** information about a delimiter of a here document: *) - word: string; - (** delimiting word, with quotes removed *) - quoted: bool; - (** parts of delimiting word quoted ? *) - dashed: bool; - (** here operator <<- ? *) - contents_placeholder: CST.word CST.located ref - (** placeholder for the contents of the here document *) - } + (** information about a delimiter of a here document: *) + word: string; + (** delimiting word, with quotes removed *) + quoted: bool; + (** parts of delimiting word quoted ? *) + dashed: bool; + (** here operator <<- ? *) + contents_placeholder: CST.word CST.located ref + (** placeholder for the contents of the here document *) + } let delimiters_queue = (Queue.create (): delimiter_info Queue.t) let dashed_tmp = ref (None: bool option) let word_ref_tmp = ref (None: word located ref option) @@ -71,7 +71,7 @@ end = struct - they have not been assigned a value (state NoHereDocuments), - or they have been assigned a value which has been used up by push_here_document_delimiter (state GotDelimiter). - *) + *) assert (!dashed_tmp = None); dashed_tmp := Some dashed; assert (!word_ref_tmp = None); @@ -81,7 +81,7 @@ end = struct let push_here_document_delimiter _w cst = (* we accept a push of a delimiting word only if we have already received information about an operator which has not yet been used. - *) + *) assert (!state = GotHereOperator); let quoted_flag = ref false in let dashed = match !dashed_tmp with @@ -95,19 +95,19 @@ end = struct let rec unquote = function | [] -> "" | WordDoubleQuoted s :: w -> - quoted_flag := true; - QuoteRemoval.on_string (unword s) ^ unquote w + quoted_flag := true; + QuoteRemoval.on_string (unword s) ^ unquote w | WordSingleQuoted s :: w -> - quoted_flag := true; - unword s ^ unquote w + quoted_flag := true; + unword s ^ unquote w | (WordLiteral s | WordName s) :: w -> - let s' = Str.(global_replace (regexp "\\") "" s) in - if s <> s' then quoted_flag := true; - s' ^ unquote w + let s' = Str.(global_replace (regexp "\\") "" s) in + if s <> s' then quoted_flag := true; + s' ^ unquote w | WordVariable (VariableAtom (s, NoAttribute)) :: w -> - "$" ^ s ^ unquote w + "$" ^ s ^ unquote w | _ -> - failwith "Unsupported expansion in here document delimiter" + failwith "Unsupported expansion in here document delimiter" in unquote cst in @@ -121,12 +121,12 @@ end = struct If any part of word is quoted, the delimiter shall be formed by performing quote removal on word, and the here-document lines shall not be expanded. Otherwise, the delimiter shall be the word itself. - *) - word = unquoted_w; - quoted; - dashed; - contents_placeholder = word_ref - } delimiters_queue; + *) + word = unquoted_w; + quoted; + dashed; + contents_placeholder = word_ref + } delimiters_queue; state := GotDelimiter let next_here_document lexbuf current = @@ -135,7 +135,7 @@ end = struct the next and continues until there is a line containing only the delimiter and a , with no characters in between. Then the next here-document starts, if there is one. - *) + *) assert (!state = InsideHereDocuments); let delimiter_info = try @@ -166,7 +166,7 @@ end = struct (*specification: If the redirection operator is "<<-", all leading characters shall be stripped from input lines ... - *) + *) if delimiter_info.dashed then QuoteRemoval.remove_tabs_at_linestart contents else @@ -192,12 +192,12 @@ end = struct in match result with | [Pretoken.NEWLINE, p1, p2] -> - (* Special case for empty here document or ended by EOF. *) - (Word ("", []), p1, p2) + (* Special case for empty here document or ended by EOF. *) + (Word ("", []), p1, p2) | [Pretoken.EOF, _, pos] -> - raise (Errors.DuringParsing pos) + raise (Errors.DuringParsing pos) | result -> - located_word_of result + located_word_of result in store_here_document delimiter_info.word cst doc doc_start line_end; if Queue.is_empty delimiters_queue then state := NoHereDocuments; @@ -205,7 +205,7 @@ end = struct Lexing.({ line_end with pos_cnum = line_end.pos_cnum - 1; pos_bol = line_end.pos_bol - 1; - }) + }) in (Pretoken.NEWLINE, before_stop, line_end) @@ -217,7 +217,7 @@ end = struct (* if we have a value in dashed_tmp this means that we have read a here operator for which we have not yet seen the corresponding delimiting word. - *) + *) !dashed_tmp <> None let next_line_is_here_document () = diff --git a/src/hereDocument.mli b/src/hereDocument.mli index f565718..e3f33c7 100644 --- a/src/hereDocument.mli +++ b/src/hereDocument.mli @@ -13,40 +13,40 @@ - internal registration of redirection operators (<< or <<-), and of the delimiter word that follows such an operator. - scannig of the contents of a series of here documents. - *) +*) module Lexer : functor (U : sig end) -> - sig - val push_here_document_operator : - bool -> CST.word CST.located ref -> unit - (** [push_here_document_operator dashed word_ref] registers a redirection - operator: - - [dashed] is [true] when the operator is <<-, and [false] if << - - [word_ref] is a reference to a located word. This reference will - later be assigned the contents of the here document. - *) - val push_here_document_delimiter : string -> CST.word_cst -> unit - (** [push_here_document_delimiter word] registers [word] as the - delimiting word pertaining to the preceding redirection operator. - *) + sig + val push_here_document_operator : + bool -> CST.word CST.located ref -> unit + (** [push_here_document_operator dashed word_ref] registers a redirection + operator: + - [dashed] is [true] when the operator is <<-, and [false] if << + - [word_ref] is a reference to a located word. This reference will + later be assigned the contents of the here document. + *) + val push_here_document_delimiter : string -> CST.word_cst -> unit + (** [push_here_document_delimiter word] registers [word] as the + delimiting word pertaining to the preceding redirection operator. + *) - val start_here_document_lexing : unit -> unit - (** start scanning the here documents that we have registered. *) - val next_here_document : - Lexing.lexbuf -> PrelexerState.t -> - Pretoken.t * Lexing.position * Lexing.position - (** scans the contents of a here document including the line containing - the delimiter. Returns the pretoken containing the contents of the - here document. As a side effect, assigns the contents of the here - document to the reference that was registered by - [push_here_document_operator]. - *) + val start_here_document_lexing : unit -> unit + (** start scanning the here documents that we have registered. *) + val next_here_document : + Lexing.lexbuf -> PrelexerState.t -> + Pretoken.t * Lexing.position * Lexing.position + (** scans the contents of a here document including the line containing + the delimiter. Returns the pretoken containing the contents of the + here document. As a side effect, assigns the contents of the here + document to the reference that was registered by + [push_here_document_operator]. + *) - val inside_here_document : unit -> bool - (** Are we currently reading a sequence of here documents? *) - val next_word_is_here_document_delimiter : unit -> bool - (** Must the next word be a here document delimiter? *) - val next_line_is_here_document : unit -> bool - (** Do we have to read here documents starting from the next line? *) - end + val inside_here_document : unit -> bool + (** Are we currently reading a sequence of here documents? *) + val next_word_is_here_document_delimiter : unit -> bool + (** Must the next word be a here document delimiter? *) + val next_line_is_here_document : unit -> bool + (** Do we have to read here documents starting from the next line? *) + end diff --git a/src/jsonHelpers.ml b/src/jsonHelpers.ml index d539deb..03886bb 100644 --- a/src/jsonHelpers.ml +++ b/src/jsonHelpers.ml @@ -11,13 +11,13 @@ let rec json_filter_positions = function | `Assoc sjl -> - if List.for_all (fun (s, _j) -> s = "value" || s = "position") sjl then - let (_, j) = List.find (fun (s, _) -> s = "value") sjl in - json_filter_positions j - else - `Assoc (List.map (fun (s, j) -> - Format.printf "%s@." s; (s, json_filter_positions j)) sjl - ) + if List.for_all (fun (s, _j) -> s = "value" || s = "position") sjl then + let (_, j) = List.find (fun (s, _) -> s = "value") sjl in + json_filter_positions j + else + `Assoc (List.map (fun (s, j) -> + Format.printf "%s@." s; (s, json_filter_positions j)) sjl + ) | `Bool b -> `Bool b | `Float f -> `Float f | `Int i -> `Int i @@ -40,9 +40,9 @@ let save_as_json simplified cout csts = let load_from_json cin = Yojson.Safe.from_channel cin |> CSTSerializers.program_of_yojson |> Ppx_deriving_yojson_runtime.Result.(function - | Ok cst -> cst - | Error msg -> raise (Errors.DuringIO msg) - ) + | Ok cst -> cst + | Error msg -> raise (Errors.DuringIO msg) + ) let json_to_dot cout json = Printf.( @@ -54,25 +54,25 @@ let json_to_dot cout json = in let rec traverse = function | `List (`String name :: children) -> - let nodeid = fresh () in - fprintf cout "%s [label=\"%s\"];\n" nodeid name; - let childrenids = List.map traverse children in - List.iter (fun c -> fprintf cout "%s -> %s;\n" nodeid c) childrenids; - nodeid + let nodeid = fresh () in + fprintf cout "%s [label=\"%s\"];\n" nodeid name; + let childrenids = List.map traverse children in + List.iter (fun c -> fprintf cout "%s -> %s;\n" nodeid c) childrenids; + nodeid | `String name -> - let nodeid = fresh () in - fprintf cout "%s [label=\"%s\"];\n" nodeid (String.escaped name); - nodeid + let nodeid = fresh () in + fprintf cout "%s [label=\"%s\"];\n" nodeid (String.escaped name); + nodeid | `List [x] -> - traverse x + traverse x | `List children -> - let nodeid = fresh () in - fprintf cout "%s [shape=point];\n" nodeid; - let childrenids = List.map traverse children in - List.iter (fun c -> fprintf cout "%s -> %s;\n" nodeid c) childrenids; - nodeid + let nodeid = fresh () in + fprintf cout "%s [shape=point];\n" nodeid; + let childrenids = List.map traverse children in + List.iter (fun c -> fprintf cout "%s -> %s;\n" nodeid c) childrenids; + nodeid | _ -> - assert false + assert false in fprintf cout "digraph {\n"; ignore (traverse json); diff --git a/src/keyword.ml b/src/keyword.ml index 78e11c2..6596d16 100644 --- a/src/keyword.ml +++ b/src/keyword.ml @@ -39,30 +39,30 @@ open Parser.MenhirInterpreter *) let keywords = [ - "if", If, X (T T_If); - "then", Then, X (T T_Then); - "else", Else, X (T T_Else); - "elif", Elif, X (T T_Elif); - "fi", Fi, X (T T_Fi); - "do", Do, X (T T_Do); - "done", Done, X (T T_Done); - "case", Case, X (T T_Case); - "esac", Esac, X (T T_Esac); - "while", While, X (T T_While); - "until", Until, X (T T_Until); - "for", For, X (T T_For); - "{", Lbrace, X (T T_Lbrace); - "}", Rbrace, X (T T_Rbrace); - "!", Bang, X (T T_Bang); - "in", In, X (T T_In); + "if", If, X (T T_If); + "then", Then, X (T T_Then); + "else", Else, X (T T_Else); + "elif", Elif, X (T T_Elif); + "fi", Fi, X (T T_Fi); + "do", Do, X (T T_Do); + "done", Done, X (T T_Done); + "case", Case, X (T T_Case); + "esac", Esac, X (T T_Esac); + "while", While, X (T T_While); + "until", Until, X (T T_Until); + "for", For, X (T T_For); + "{", Lbrace, X (T T_Lbrace); + "}", Rbrace, X (T T_Rbrace); + "!", Bang, X (T T_Bang); + "in", In, X (T T_In); ] let keyword_of_string = let t = Hashtbl.create 13 in List.iter (fun (s, kwd, _) -> Hashtbl.add t s kwd) keywords; fun w -> FirstSuccessMonad.( - try return (Hashtbl.find t w) with Not_found -> fail - ) + try return (Hashtbl.find t w) with Not_found -> fail + ) let is_reserved_word w = FirstSuccessMonad.run (keyword_of_string w) <> None diff --git a/src/morbigDriver.ml b/src/morbigDriver.ml index b4b1bb3..fd35f05 100644 --- a/src/morbigDriver.ml +++ b/src/morbigDriver.ml @@ -19,17 +19,17 @@ let save input_filename (cst : CST.program) = else let cout = open_out (output_file_of_input_file input_filename) in begin match backend () with - | Bin -> save_binary_cst cout cst - | Json -> save_json_cst cout cst - | SimpleJson -> JsonHelpers.save_as_json true cout cst - | Dot -> JsonHelpers.save_as_dot cout cst - | NoSerialisation -> assert false + | Bin -> save_binary_cst cout cst + | Json -> save_json_cst cout cst + | SimpleJson -> JsonHelpers.save_as_json true cout cst + | Dot -> JsonHelpers.save_as_dot cout cst + | NoSerialisation -> assert false end; close_out cout ) (** write the concrete syntax tree [cst] to the output file - corresponding to [input_filename]. The format and the name of the - output file are determined by the program options. *) + corresponding to [input_filename]. The format and the name of the + output file are determined by the program options. *) let save_error input_filename message = let eout = open_out (input_filename ^ ".morbigerror") in @@ -37,7 +37,7 @@ let save_error input_filename message = output_string eout "\n"; close_out eout (** write string [message] to the error file corresponding to - [input_filename]. *) + [input_filename]. *) let not_a_script input_filename = Options.skip_nosh () @@ -49,10 +49,10 @@ let nb_inputs_erroneous = ref 0 let show_stats () = if Options.display_stats () then begin - Printf.printf "Number of input files: %i\n" !nb_inputs; - Printf.printf "Number of skipped files: %i\n" !nb_inputs_skipped; - Printf.printf "Number of rejected files: %i\n" !nb_inputs_erroneous - end + Printf.printf "Number of input files: %i\n" !nb_inputs; + Printf.printf "Number of skipped files: %i\n" !nb_inputs_skipped; + Printf.printf "Number of rejected files: %i\n" !nb_inputs_erroneous + end let parse_one_file input_filename = Debug.printf "Trying to open: %s\n" input_filename; @@ -80,9 +80,9 @@ let parse_input_files_provided_via_stdin () = let parse_input_files_provided_on_command_line () = if List.length (Options.input_files ()) <= 0 then begin - Printf.eprintf "morbig: no input files.\n"; - exit 1 - end; + Printf.eprintf "morbig: no input files.\n"; + exit 1 + end; List.iter parse_one_file (Options.input_files ()) let parse_input_files () = diff --git a/src/nesting.ml b/src/nesting.ml index bce8cf8..43427c0 100644 --- a/src/nesting.ml +++ b/src/nesting.ml @@ -18,15 +18,15 @@ type t = let to_string = function | Backquotes (c, level) -> - Printf.sprintf "@%c[%d]" c level + Printf.sprintf "@%c[%d]" c level | Parentheses -> - "(" + "(" | Braces -> - "{" + "{" | DQuotes -> - "\"" + "\"" | HereDocument (dashed, delimiter) -> - Printf.sprintf "HereDoc[%B, %s]" dashed delimiter + Printf.sprintf "HereDoc[%B, %s]" dashed delimiter let rec under_backquoted_style_command_substitution = function | [] -> false diff --git a/src/patternMatchingRecognizer.ml b/src/patternMatchingRecognizer.ml index eea5e3f..2168422 100644 --- a/src/patternMatchingRecognizer.ml +++ b/src/patternMatchingRecognizer.ml @@ -39,71 +39,71 @@ open CST let recognize_re_bracket_expression s start = let module Prelexer : sig - val current_position : unit -> int - val lexing_position : unit -> Lexing.position - val next_token : unit -> token * Lexing.position * Lexing.position - val after_starting_hat : unit -> bool - val after_starting_bracket : unit -> bool - val just_before_ending_bracket : unit -> bool - val read_string : unit -> string - end = struct - - let current_position = ref start - - let next_char () = - if !current_position < String.length s then ( - let c = s.[!current_position] in - incr current_position; - Some c - ) else None - - let eof_reached () = - !current_position >= String.length s - - let lexbuf = - Lexing.from_function @@ fun b count -> - let rec aux i = - if i = count then count - else match next_char () with - | None -> i - | Some c -> Bytes.set b i c; aux (i + 1) - in - aux 0 - - let lookahead_buffer = Queue.create () - - let with_positions token = - (token, lexbuf.Lexing.lex_start_p, lexbuf.Lexing.lex_curr_p) - - let lex () = - REBracketExpressionLexer.token lexbuf |> with_positions - - let next_token () = - if Queue.is_empty lookahead_buffer then - lex () - else if eof_reached () then - with_positions REBracketExpressionParser.EOF - else - Queue.pop lookahead_buffer + val current_position : unit -> int + val lexing_position : unit -> Lexing.position + val next_token : unit -> token * Lexing.position * Lexing.position + val after_starting_hat : unit -> bool + val after_starting_bracket : unit -> bool + val just_before_ending_bracket : unit -> bool + val read_string : unit -> string + end = struct + + let current_position = ref start + + let next_char () = + if !current_position < String.length s then ( + let c = s.[!current_position] in + incr current_position; + Some c + ) else None + + let eof_reached () = + !current_position >= String.length s + + let lexbuf = + Lexing.from_function @@ fun b count -> + let rec aux i = + if i = count then count + else match next_char () with + | None -> i + | Some c -> Bytes.set b i c; aux (i + 1) + in + aux 0 + + let lookahead_buffer = Queue.create () + + let with_positions token = + (token, lexbuf.Lexing.lex_start_p, lexbuf.Lexing.lex_curr_p) + + let lex () = + REBracketExpressionLexer.token lexbuf |> with_positions + + let next_token () = + if Queue.is_empty lookahead_buffer then + lex () + else if eof_reached () then + with_positions REBracketExpressionParser.EOF + else + Queue.pop lookahead_buffer - let read_string () = - String.sub s start (!current_position - start) + let read_string () = + String.sub s start (!current_position - start) - let current_position () = - start + lexbuf.Lexing.lex_start_p.pos_cnum + let current_position () = + start + lexbuf.Lexing.lex_start_p.pos_cnum - let lexing_position () = lexbuf.Lexing.lex_start_p + let lexing_position () = lexbuf.Lexing.lex_start_p - let after_starting_bracket () = - (lexing_position ()).pos_cnum = 1 + let after_starting_bracket () = + (lexing_position ()).pos_cnum = 1 - let just_before_ending_bracket () = - (lexing_position ()).pos_cnum = String.length s - 2 - start + let just_before_ending_bracket () = + (lexing_position ()).pos_cnum = String.length s - 2 - start - let after_starting_hat () = - (lexing_position ()).pos_cnum = 2 && s.[1] = '^' + let after_starting_hat () = + (lexing_position ()).pos_cnum = 2 && s.[1] = '^' - end + end in (*specification: @@ -120,7 +120,7 @@ let recognize_re_bracket_expression s start = When found anywhere but first (after an initial '^', if any) in a bracket expression - *) + *) (** The specification phrasing is a bit ackward. We interpret it as a definition for tokens HAT, MINUS and RBRACKET as well a definition @@ -136,48 +136,48 @@ let recognize_re_bracket_expression s start = (f token, p1, p2) in rewrite_token (function - | HAT -> - if Prelexer.after_starting_bracket () then - HAT - else - (* by 2.13.1, is in the context of - shell. *) - COLL_ELEM_SINGLE '!' - | (COLL_ELEM_SINGLE '^') as token -> - if Prelexer.after_starting_bracket () - && Options.error_on_unspecified () - then - let msg = - "Unquoted at the beginning of a bracket expression \ - has an unspecified semantics." - in - raise (Errors.DuringLexing (Prelexer.lexing_position (), msg)) - else - token - | (RBRACKET | MINUS) as token -> - let final_minus = - (token = MINUS) && (Prelexer.just_before_ending_bracket ()) - in - if Prelexer.(after_starting_bracket () - || after_starting_hat () - || final_minus) - then - COLL_ELEM_SINGLE (if token = MINUS then '-' else ']') - else - token - | token -> - token) + | HAT -> + if Prelexer.after_starting_bracket () then + HAT + else + (* by 2.13.1, is in the context of + shell. *) + COLL_ELEM_SINGLE '!' + | (COLL_ELEM_SINGLE '^') as token -> + if Prelexer.after_starting_bracket () + && Options.error_on_unspecified () + then + let msg = + "Unquoted at the beginning of a bracket expression \ + has an unspecified semantics." + in + raise (Errors.DuringLexing (Prelexer.lexing_position (), msg)) + else + token + | (RBRACKET | MINUS) as token -> + let final_minus = + (token = MINUS) && (Prelexer.just_before_ending_bracket ()) + in + if Prelexer.(after_starting_bracket () + || after_starting_hat () + || final_minus) + then + COLL_ELEM_SINGLE (if token = MINUS then '-' else ']') + else + token + | token -> + token) in let rec parse checkpoint = match checkpoint with | InputNeeded _ -> - parse (offer checkpoint (next_token ())) + parse (offer checkpoint (next_token ())) | Accepted cst -> - Some (cst, Prelexer.read_string (), Prelexer.current_position ()) + Some (cst, Prelexer.read_string (), Prelexer.current_position ()) | Rejected -> - None + None | _ -> - parse (resume checkpoint) + parse (resume checkpoint) in parse (Incremental.bracket_expression (Prelexer.lexing_position ())) @@ -211,11 +211,11 @@ let process s : (string * word_component) list = | '?' -> produce "?" WordGlobAny | '*' -> produce "*" WordGlobAll | '[' -> begin match recognize_re_bracket_expression s i with - | Some (re_bracket_exp, rs, j) -> - produce rs (WordReBracketExpression re_bracket_exp) ~next:j - | None -> - push '[' - end + | Some (re_bracket_exp, rs, j) -> + produce rs (WordReBracketExpression re_bracket_exp) ~next:j + | None -> + push '[' + end | c -> push c in analyze [] 0 |> List.rev diff --git a/src/prelexerState.ml b/src/prelexerState.ml index 96bc890..60b1032 100644 --- a/src/prelexerState.ml +++ b/src/prelexerState.ml @@ -34,10 +34,10 @@ module AtomBuffer : sig val last_line : t -> string end = struct type t = { - mutable buffer : atom list; - mutable strings_len : int; - mutable strings : string list; - } + mutable buffer : atom list; + mutable strings_len : int; + mutable strings : string list; + } let too_many_strings = 1024 @@ -54,20 +54,20 @@ end = struct let push_string b s = match b with | WordComponent (s', WordLiteral l) :: csts -> - let cst = WordComponent (s' ^ s, WordLiteral (l ^ s)) in - cst :: csts + let cst = WordComponent (s' ^ s, WordLiteral (l ^ s)) in + cst :: csts | csts -> - let cst = WordComponent (s, WordLiteral (s)) in - cst :: csts + let cst = WordComponent (s, WordLiteral (s)) in + cst :: csts let normalize b = if b.strings <> [] then begin - let s = String.concat "" (List.rev b.strings) in - let buffer = push_string b.buffer s in - b.strings <- []; - b.strings_len <- 0; - b.buffer <- buffer - end + let s = String.concat "" (List.rev b.strings) in + let buffer = push_string b.buffer s in + b.strings <- []; + b.strings_len <- 0; + b.buffer <- buffer + end let get b = normalize b; b.buffer @@ -88,9 +88,9 @@ end = struct let buffer_as_strings b = let rec aux accu = function | WordComponent (s, _) :: atoms -> - aux (s :: accu) atoms + aux (s :: accu) atoms | _ -> - accu + accu in List.rev (aux [] b) @@ -98,14 +98,14 @@ end = struct let last_line_of_strings ss = let rec aux accu = function | s :: ss -> - if Str.string_match ExtPervasives.newline_regexp s 0 then - match ExtPervasives.(list_last (lines s)) with - | None -> assert false (* By the if-condition. *) - | Some s -> s :: accu - else - aux (s :: accu) ss + if Str.string_match ExtPervasives.newline_regexp s 0 then + match ExtPervasives.(list_last (lines s)) with + | None -> assert false (* By the if-condition. *) + | Some s -> s :: accu + else + aux (s :: accu) ss | [] -> - accu + accu in aux [] ss |> String.concat "" in @@ -117,8 +117,8 @@ end = struct end type prelexer_state = { - nesting_context : Nesting.t list; - buffer : AtomBuffer.t + nesting_context : Nesting.t list; + buffer : AtomBuffer.t } let buffer current = @@ -127,8 +127,8 @@ let buffer current = type t = prelexer_state let initial_state = { - nesting_context = []; - buffer = AtomBuffer.make []; + nesting_context = []; + buffer = AtomBuffer.make []; } let at_toplevel current = @@ -139,19 +139,19 @@ let at_toplevel current = let push_word_component csts w = match csts, w with | WordComponent (s', WordLiteral l') :: csts, (s, WordLiteral l) -> - WordComponent (s' ^ s, WordLiteral (l' ^ l)) :: csts + WordComponent (s' ^ s, WordLiteral (l' ^ l)) :: csts | _, (s, a) -> - WordComponent (s, a) :: csts + WordComponent (s, a) :: csts - let push_string b s = - let buffer = AtomBuffer.push_string b.buffer s in - { b with buffer } +let push_string b s = + let buffer = AtomBuffer.push_string b.buffer s in + { b with buffer } let parse_pattern : word_component -> word_component list = function | WordLiteral w -> - snd (List.split (PatternMatchingRecognizer.process w)) + snd (List.split (PatternMatchingRecognizer.process w)) | c -> - [c] + [c] let push_character b c = push_string b (String.make 1 c) @@ -163,13 +163,13 @@ let push_separated_string b s = let pop_character = function | WordComponent (s, WordLiteral _c) :: buffer -> - let sequel = try String.(sub s 0 (length s - 1)) with _ -> assert false in - if sequel = "" then - buffer - else - WordComponent (sequel, WordLiteral sequel) :: buffer + let sequel = try String.(sub s 0 (length s - 1)) with _ -> assert false in + if sequel = "" then + buffer + else + WordComponent (sequel, WordLiteral sequel) :: buffer | _ -> - assert false + assert false (** [push_word_closing_character b c] push a character [c] to mark it as part of the string representing the current word literal but @@ -202,8 +202,8 @@ let push_parameter ?(with_braces=false) ?(attribute=NoAttribute) b id = let v = VariableAtom (id, attribute) in let p = if with_braces then - (* The ParameterLength attribute is a special case. - The "#" syntax of the operator shows up _before_ the identifier it modifies. *) + (* The ParameterLength attribute is a special case. + The "#" syntax of the operator shows up _before_ the identifier it modifies. *) match attribute with | ParameterLength -> "${#" ^ id ^ "}" | _ -> "${" ^ id ^ string_of_attribute attribute ^ "}" @@ -248,19 +248,19 @@ let push_quoting_mark k b = let pop_quotation k b = let rec aux squote quote = function | [] -> - (squote, quote, []) + (squote, quote, []) | QuotingMark k' :: buffer when k = k' -> - (squote, quote, buffer) + (squote, quote, buffer) | (AssignmentMark | QuotingMark _) :: buffer -> - aux squote quote buffer (* FIXME: Check twice. *) + aux squote quote buffer (* FIXME: Check twice. *) | WordComponent (w, WordEmpty) :: buffer -> - aux (w ^ squote) quote buffer + aux (w ^ squote) quote buffer | WordComponent (w, c) :: buffer -> - aux (w ^ squote) (c :: quote) buffer + aux (w ^ squote) (c :: quote) buffer in (* The last character is removed from the quote since it is the closing character. *) -(* let buffer = pop_character b.buffer in *) + (* let buffer = pop_character b.buffer in *) let squote, quote, buffer = aux "" [] (buffer b) in let word = Word (squote, quote) in let quoted_word = @@ -296,35 +296,35 @@ let recognize_assignment current = let current' = { current with buffer } in match prefix with | AssignmentMark :: WordComponent (s, _) :: prefix -> - assert (s.[String.length s - 1] = '='); (* By after_equal unique call. *) - (* [s] is a valid name. We have an assignment here. *) - let lhs = try String.(sub s 0 (length s - 1)) with _ -> assert false in - - (* FIXME: The following check could be done directly with - ocamllex rules, right?*) - - if Name.is_name lhs then ( - let rhs_string = contents_of_atom_list rhs in - let crhs = components_of_atom_list rhs in - let cst = WordComponent ( - s ^ rhs_string, - WordAssignmentWord (Name lhs, Word (rhs_string, crhs))) - in - let buffer = AtomBuffer.make (cst :: prefix) in - { current with buffer } - ) else + assert (s.[String.length s - 1] = '='); (* By after_equal unique call. *) + (* [s] is a valid name. We have an assignment here. *) + let lhs = try String.(sub s 0 (length s - 1)) with _ -> assert false in + + (* FIXME: The following check could be done directly with + ocamllex rules, right?*) + + if Name.is_name lhs then ( + let rhs_string = contents_of_atom_list rhs in + let crhs = components_of_atom_list rhs in + let cst = WordComponent ( + s ^ rhs_string, + WordAssignmentWord (Name lhs, Word (rhs_string, crhs))) + in + let buffer = AtomBuffer.make (cst :: prefix) in + { current with buffer } + ) else (* If [lhs] is not a name, then the corresponding word literal must be merged with the preceding one, if it exists. *) ( - begin match List.rev rhs with - | WordComponent (s_rhs, WordLiteral s_rhs') :: rev_rhs -> + begin match List.rev rhs with + | WordComponent (s_rhs, WordLiteral s_rhs') :: rev_rhs -> let word = WordComponent (s ^ s_rhs, WordLiteral (s ^ s_rhs')) in let buffer = AtomBuffer.make (List.rev rev_rhs @ word :: prefix) in { current with buffer } - | _ -> + | _ -> current' - end) + end) | _ -> current' (** [(return ?with_newline lexbuf current tokens)] returns a list of @@ -346,8 +346,8 @@ let digit_regexp = Str.regexp "^[0-9]+$" let return ?(with_newline=false) lexbuf (current : prelexer_state) tokens = assert ( - not (List.exists (function (Pretoken.PreWord _)->true |_-> false) tokens) - ); + not (List.exists (function (Pretoken.PreWord _)->true |_-> false) tokens) + ); let current = recognize_assignment current in @@ -364,12 +364,12 @@ let return ?(with_newline=false) lexbuf (current : prelexer_state) tokens = Str.(string_match digit_regexp d 0) in let followed_by_redirection = Parser.(function - | Pretoken.Operator (LESSAND | GREATAND | DGREAT | DLESS _ - | CLOBBER | LESS | GREAT | LESSGREAT) :: _ -> - true - | _ -> - false - ) in + | Pretoken.Operator (LESSAND | GREATAND | DGREAT | DLESS _ + | CLOBBER | LESS | GREAT | LESSGREAT) :: _ -> + true + | _ -> + false + ) in (*specification: @@ -406,7 +406,7 @@ let return ?(with_newline=false) lexbuf (current : prelexer_state) tokens = | WordComponent (_, s) -> [s] | AssignmentMark -> [] | QuotingMark _ -> [] - ) (buffer current))) + ) (buffer current))) in let csts = TildePrefix.recognize csts in [Pretoken.PreWord (w, csts)] @@ -420,13 +420,13 @@ exception NotAWord of string let word_of b = let rec aux w cst = Pretoken.(function | [] -> - Word (w, cst) + Word (w, cst) | (p, _, _) :: ps -> - match preword_of_pretoken p with - | EOF -> assert false (* Because of [word_of] calling context. *) - | PreWord (w', cst') -> aux (w ^ w') (cst @ cst') ps - | _ -> assert false (* By preword_of_pretoken. *) - ) in + match preword_of_pretoken p with + | EOF -> assert false (* Because of [word_of] calling context. *) + | PreWord (w', cst') -> aux (w ^ w') (cst @ cst') ps + | _ -> assert false (* By preword_of_pretoken. *) + ) in aux "" [] b let located_word_of = function @@ -447,30 +447,30 @@ let provoke_error current lexbuf = let escape_analysis ?(for_backquote=false) ?(for_dquotes=false) level current = let current = AtomBuffer.last_line current.buffer in let number_of_backslashes_to_escape = Nesting.( - (* FIXME: We will be looking for the general pattern here. *) - match level with - | Backquotes ('`', _) :: Backquotes ('`', _) :: Backquotes ('`', _) :: _ -> - [3] - | Backquotes ('`', _) :: Backquotes ('`', _) :: _ -> - [2] - | DQuotes :: Backquotes ('`', _) :: [] -> - [1; 2] - | DQuotes :: Backquotes ('`', _) :: DQuotes :: _ -> - if for_backquote then [3] else [2] - | DQuotes :: Backquotes ('`', _) :: _ :: DQuotes :: _ -> - [2] - | Backquotes ('`', _) :: DQuotes :: _ -> - if for_dquotes then [2] else [1] - | Backquotes ('`', _) :: _ :: DQuotes :: _ -> - [2] - | [Backquotes ('`', _)] -> - if for_backquote then - [3] - else - [1; 2] - | _ -> - [1] - ) + (* FIXME: We will be looking for the general pattern here. *) + match level with + | Backquotes ('`', _) :: Backquotes ('`', _) :: Backquotes ('`', _) :: _ -> + [3] + | Backquotes ('`', _) :: Backquotes ('`', _) :: _ -> + [2] + | DQuotes :: Backquotes ('`', _) :: [] -> + [1; 2] + | DQuotes :: Backquotes ('`', _) :: DQuotes :: _ -> + if for_backquote then [3] else [2] + | DQuotes :: Backquotes ('`', _) :: _ :: DQuotes :: _ -> + [2] + | Backquotes ('`', _) :: DQuotes :: _ -> + if for_dquotes then [2] else [1] + | Backquotes ('`', _) :: _ :: DQuotes :: _ -> + [2] + | [Backquotes ('`', _)] -> + if for_backquote then + [3] + else + [1; 2] + | _ -> + [1] + ) in if Options.debug () then ( let current' = List.(concat (map rev (map string_to_char_list [current]))) in @@ -484,8 +484,8 @@ let escape_analysis ?(for_backquote=false) ?(for_dquotes=false) level current = let backslashes_before = ExtPervasives.count_end_character '\\' current in if List.exists (fun k -> - backslashes_before >= k && (k - backslashes_before) mod (k + 1) = 0 - ) number_of_backslashes_to_escape + backslashes_before >= k && (k - backslashes_before) mod (k + 1) = 0 + ) number_of_backslashes_to_escape then ( (** There is no special meaning for this character. It is escaped. *) @@ -503,7 +503,7 @@ let escape_analysis ?(for_backquote=false) ?(for_dquotes=false) level current = closing the current subshell, it is opening a new one. - *) + *) Some backslashes_before let escape_analysis_predicate ?(for_backquote=false) ?(for_dquotes=false) level current = @@ -605,8 +605,8 @@ let backquote_depth current = let current_depth = escape_analysis ~for_backquote:true current.nesting_context current |> function - | Some d -> d - | None -> assert false (* By usage of backquote_depth. *) + | Some d -> d + | None -> assert false (* By usage of backquote_depth. *) in if Options.debug () then Printf.eprintf "Backquote depth: %d =?= %d\n" @@ -620,23 +620,23 @@ let backquote_depth current = let found_current_here_document_delimiter ?buffer current = match current.nesting_context with | Nesting.HereDocument (dashed, delimiter) :: _ -> - let last_chunk = - match buffer with - | None -> - AtomBuffer.last_line current.buffer - | Some buffer -> - Buffer.( - let n = length buffer in - let k = String.length delimiter * 2 in - try sub buffer (max 0 (n - k)) (min k n) with _ -> assert false - ) - in - let open QuoteRemoval in - let preprocess = if dashed then remove_tabs_at_linestart else fun x -> x in - let last_line = option_map (string_last_line last_chunk) preprocess in - last_line = Some delimiter + let last_chunk = + match buffer with + | None -> + AtomBuffer.last_line current.buffer + | Some buffer -> + Buffer.( + let n = length buffer in + let k = String.length delimiter * 2 in + try sub buffer (max 0 (n - k)) (min k n) with _ -> assert false + ) + in + let open QuoteRemoval in + let preprocess = if dashed then remove_tabs_at_linestart else fun x -> x in + let last_line = option_map (string_last_line last_chunk) preprocess in + last_line = Some delimiter | _ -> - false + false let remove_contents_suffix pos end_marker contents cst = let contents = try @@ -648,18 +648,18 @@ let remove_contents_suffix pos end_marker contents cst = let rec aux cst = match cst with | (WordLiteral contents) :: cst -> - begin match lines contents with - | [] | [_] -> + begin match lines contents with + | [] | [_] -> aux cst - | rest -> + | rest -> let rest = List.(rev (tl (rev rest))) in let suffix = String.concat "\n" rest ^ "\n" in WordLiteral suffix :: cst - end + end | _ :: cst -> - aux cst + aux cst | [] -> - [] + [] in contents, List.(rev (aux (rev cst))) @@ -679,4 +679,4 @@ let debug ?(rule="") lexbuf current = Lexing.( rule (String.concat " " (List.map Nesting.to_string current.nesting_context)) (string_of_atom_list (buffer current)) -) + ) diff --git a/src/pretoken.ml b/src/pretoken.ml index 94ba83c..bc0f00b 100644 --- a/src/pretoken.ml +++ b/src/pretoken.ml @@ -28,24 +28,24 @@ let string_of_pretoken = function let operators = Hashtbl.( let t = create 17 in List.iter (fun (sym, tok) -> add t sym tok) [ - "&&", AND_IF; - "||", OR_IF; - ";;", DSEMI; - "<&", LESSAND; - ">&", GREATAND; - "<>", LESSGREAT; - ">>", DGREAT; - ">|", CLOBBER; - "|", Pipe; - "(", Lparen; - ")", Rparen; - "<", LESS; - ">", GREAT; - ";", Semicolon; - "&", Uppersand - ]; + "&&", AND_IF; + "||", OR_IF; + ";;", DSEMI; + "<&", LESSAND; + ">&", GREATAND; + "<>", LESSGREAT; + ">>", DGREAT; + ">|", CLOBBER; + "|", Pipe; + "(", Lparen; + ")", Rparen; + "<", LESS; + ">", GREAT; + ";", Semicolon; + "&", Uppersand + ]; t - ) + ) let optoken_of_string s = try @@ -53,27 +53,27 @@ let optoken_of_string s = with Not_found -> Printf.eprintf "Internal error: `%s' is not a valid operator token.\n" - s; + s; assert false let preword_of_operator = function - | AND_IF -> "&&" - | OR_IF -> "||" - | DSEMI -> ";;" - | LESSAND -> "<&" - | GREATAND -> ">&" - | LESSGREAT -> "<>" - | DGREAT -> ">>" - | CLOBBER -> ">|" - | Pipe -> "|" - | Lparen -> "(" - | Rparen -> ")" - | LESS -> "<" - | DLESS _ -> "<<" - | GREAT -> ">" - | Semicolon -> ";" - | Uppersand -> "&" - | _ -> assert false (* By definition of operators. *) + | AND_IF -> "&&" + | OR_IF -> "||" + | DSEMI -> ";;" + | LESSAND -> "<&" + | GREATAND -> ">&" + | LESSGREAT -> "<>" + | DGREAT -> ">>" + | CLOBBER -> ">|" + | Pipe -> "|" + | Lparen -> "(" + | Rparen -> ")" + | LESS -> "<" + | DLESS _ -> "<<" + | GREAT -> ">" + | Semicolon -> ";" + | Uppersand -> "&" + | _ -> assert false (* By definition of operators. *) let preword_of_pretoken = function | IoNumber s -> PreWord (s, [WordLiteral s]) diff --git a/src/pretokenizer.ml b/src/pretokenizer.ml index 53bcfdf..3fc0bf0 100644 --- a/src/pretokenizer.ml +++ b/src/pretokenizer.ml @@ -19,11 +19,11 @@ let make (current : PrelexerState.t) lexbuf = *) let q = Queue.create () in let push x = Queue.push x q in - let rec aux () = - try - Queue.take q - with Queue.Empty -> - List.iter (fun x -> Queue.push x q) (pretokenizer lexbuf); - aux () - in - aux, push + let rec aux () = + try + Queue.take q + with Queue.Empty -> + List.iter (fun x -> Queue.push x q) (pretokenizer lexbuf); + aux () + in + aux, push diff --git a/src/quoteRemoval.ml b/src/quoteRemoval.ml index 2e649d2..e0f89cb 100644 --- a/src/quoteRemoval.ml +++ b/src/quoteRemoval.ml @@ -34,15 +34,15 @@ let on_string s = while !i '\'') do - keep () - done; - (* skip the final single quote *) - if !i '\'') do + keep () + done; + (* skip the final single quote *) + if !i - if s.[!i] = '\\' - then begin state := Backslash; skip () end - else keep () + if s.[!i] = '\\' + then begin state := Backslash; skip () end + else keep () | Backslash -> begin if List.mem s.[!i] ['$'; '\''; '"'; '\\'] then keep () else begin pushbackslash(); keep () end; @@ -85,4 +85,4 @@ let backslash_as_in_doublequotes s = let remove_tabs_at_linestart = let space = Str.regexp "^\t+" in fun s -> - Str.(global_replace space "" s) + Str.(global_replace space "" s) diff --git a/src/scripts.ml b/src/scripts.ml index d98ae47..7216bb8 100644 --- a/src/scripts.ml +++ b/src/scripts.ml @@ -11,9 +11,9 @@ let other_scripts_magic_strings = List.map Str.regexp [ - "#![ ]*/usr/bin/perl.*"; - "#![ ]*/bin/bash.*" - ] + "#![ ]*/usr/bin/perl.*"; + "#![ ]*/bin/bash.*" + ] let is_other_script filename = (* check whether [filename] is a script other than /bin/sh *) @@ -25,8 +25,8 @@ let is_other_script filename = (function r -> Str.string_match r firstline 0) other_scripts_magic_strings with End_of_file -> - (** An empty file is not considered as a script.*) - false + (** An empty file is not considered as a script.*) + false let elf_magic_number = Bytes.of_string "\x7FELF" diff --git a/src/tildePrefix.ml b/src/tildePrefix.ml index 14c7dd0..3153742 100644 --- a/src/tildePrefix.ml +++ b/src/tildePrefix.ml @@ -44,18 +44,18 @@ let find_login s = let rec make_tilde_prefix_explicit rhs_assignment = function | (WordLiteral s) as cst when s <> "" -> - if s.[0] = '~' then ( - if rhs_assignment then - let s = String.split_on_char ':' s in - List.(flatten (map find_login s)) - else - find_login s - ) else [cst] + if s.[0] = '~' then ( + if rhs_assignment then + let s = String.split_on_char ':' s in + List.(flatten (map find_login s)) + else + find_login s + ) else [cst] | WordAssignmentWord (name, Word (s, csts)) -> - let csts = recognize ~rhs_assignment:true csts in - [WordAssignmentWord (name, Word (s, csts))] + let csts = recognize ~rhs_assignment:true csts in + [WordAssignmentWord (name, Word (s, csts))] | cst -> - [cst] + [cst] and recognize ?(rhs_assignment=false) csts = List.(flatten (map (make_tilde_prefix_explicit rhs_assignment) csts)) diff --git a/src/token.ml b/src/token.ml index 74fed54..2ace00b 100644 --- a/src/token.ml +++ b/src/token.ml @@ -38,7 +38,7 @@ let string_of_token = function | GREATAND -> "GREATAND" | WORD w -> Printf.sprintf "WORD(%s)" (unWord w) | ASSIGNMENT_WORD w -> - Printf.sprintf "ASSIGNMENT_WORD(%s)" (string_of_assignment_word w) + Printf.sprintf "ASSIGNMENT_WORD(%s)" (string_of_assignment_word w) | NAME w -> Printf.sprintf "NAME(%s)" (unName w) | IO_NUMBER io -> Printf.sprintf "IONUMBER(%s)" (string_of_io_number io) | Do -> "Do"