diff --git a/.github/workflows/windows-build-clang.yaml b/.github/workflows/windows-build-clang.yaml index 5c56822f1ab..c10159caf1d 100644 --- a/.github/workflows/windows-build-clang.yaml +++ b/.github/workflows/windows-build-clang.yaml @@ -61,7 +61,6 @@ jobs: run: cmake --build build --parallel %NUMBER_OF_PROCESSORS% - name: Run Tests - timeout-minutes: 10 env: GTEST_OUTPUT: "xml:opengoal-test-report.xml" run: ./build/bin/goalc-test.exe --gtest_color=yes --gtest_brief=0 --gtest_filter="-*MANUAL_TEST*" diff --git a/.vs/launch.vs.json b/.vs/launch.vs.json index ec09354c911..0de260c4227 100644 --- a/.vs/launch.vs.json +++ b/.vs/launch.vs.json @@ -143,13 +143,6 @@ "name": "Game - Jak 2 - Runtime (release)", "args": ["-v", "--game", "jak2", "--", "-boot", "-fakeiso"] }, - { - "type": "default", - "project": "CMakeLists.txt", - "projectTarget": "goalc.exe (bin\\goalc.exe)", - "name": "REPL", - "args": ["--user-auto"] - }, { "type": "default", "project": "CMakeLists.txt", diff --git a/.vscode/settings.json b/.vscode/settings.json index fe629a0f673..545e13bd622 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -7,5 +7,6 @@ }, "editor.wordBasedSuggestions": "matchingDocuments", "editor.snippetSuggestions": "top" - } + }, + "cmake.configureOnOpen": false } diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index cb0f075e794..38978be7d8c 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -66,6 +66,7 @@ add_library(common type_system/TypeSpec.cpp type_system/TypeSystem.cpp util/Assert.cpp + util/ast_util.cpp util/BitUtils.cpp util/compress.cpp util/crc32.cpp @@ -87,7 +88,7 @@ add_library(common util/Timer.cpp util/unicode_util.cpp versions/versions.cpp - ) + "util/trie_map.h") target_link_libraries(common fmt lzokay replxx libzstd_static tree-sitter sqlite3 libtinyfiledialogs) diff --git a/common/formatter/formatter.cpp b/common/formatter/formatter.cpp index 243ebdd28f3..60182519925 100644 --- a/common/formatter/formatter.cpp +++ b/common/formatter/formatter.cpp @@ -2,8 +2,11 @@ #include "formatter_tree.h" +#include "common/formatter/rules/formatting_rules.h" +#include "common/formatter/rules/rule_config.h" #include "common/log/log.h" #include "common/util/FileUtil.h" +#include "common/util/ast_util.h" #include "common/util/string_util.h" #include "tree_sitter/api.h" @@ -400,8 +403,6 @@ std::string join_formatted_lines(const std::vector& lines, std::optional formatter::format_code(const std::string& source) { // Create a parser. std::shared_ptr parser(ts_parser_new(), TreeSitterParserDeleter()); - - // Set the parser's language (JSON in this case). ts_parser_set_language(parser.get(), tree_sitter_opengoal()); // Build a syntax tree based on source code stored in a string. diff --git a/common/formatter/formatter.h b/common/formatter/formatter.h index 0ce2a11e29a..58484cedc47 100644 --- a/common/formatter/formatter.h +++ b/common/formatter/formatter.h @@ -3,13 +3,8 @@ #include #include -#include "common/formatter/rules/formatting_rules.h" -#include "common/formatter/rules/rule_config.h" - #include "tree_sitter/api.h" -// TODO: -// - Considering _eventually_ adding line-length heuristics namespace formatter { struct TreeSitterParserDeleter { diff --git a/common/type_system/TypeSystem.cpp b/common/type_system/TypeSystem.cpp index 9171ed13610..40efe45d97a 100644 --- a/common/type_system/TypeSystem.cpp +++ b/common/type_system/TypeSystem.cpp @@ -1442,6 +1442,21 @@ std::vector TypeSystem::search_types_by_parent_type( return results; } +std::vector TypeSystem::search_types_by_parent_type_strict( + const std::string& parent_type) { + std::vector results = {}; + for (const auto& [type_name, type_info] : m_types) { + // Only NullType's have no parent + if (!type_info->has_parent()) { + continue; + } + if (type_info->get_parent() == parent_type) { + results.push_back(type_name); + } + } + return results; +} + std::vector TypeSystem::search_types_by_minimum_method_id( const int minimum_method_id, const std::optional>& existing_matches) { diff --git a/common/type_system/TypeSystem.h b/common/type_system/TypeSystem.h index f28cb9a6935..dbd61bd825a 100644 --- a/common/type_system/TypeSystem.h +++ b/common/type_system/TypeSystem.h @@ -278,6 +278,7 @@ class TypeSystem { std::vector search_types_by_parent_type( const std::string& parent_type, const std::optional>& existing_matches = {}); + std::vector search_types_by_parent_type_strict(const std::string& parent_type); std::vector search_types_by_minimum_method_id( const int minimum_method_id, diff --git a/common/util/FileUtil.cpp b/common/util/FileUtil.cpp index 9ac5abfec1a..212419544df 100644 --- a/common/util/FileUtil.cpp +++ b/common/util/FileUtil.cpp @@ -736,4 +736,25 @@ std::string get_majority_file_line_endings(const std::string& file_contents) { return "\n"; } +std::pair get_majority_file_line_endings_and_count( + const std::string& file_contents) { + size_t lf_count = 0; + size_t crlf_count = 0; + + for (size_t i = 0; i < file_contents.size(); ++i) { + if (file_contents[i] == '\n') { + if (i > 0 && file_contents[i - 1] == '\r') { + crlf_count++; + } else { + lf_count++; + } + } + } + + if (crlf_count > lf_count) { + return {lf_count + crlf_count, "\r\n"}; + } + return {lf_count + crlf_count, "\n"}; +} + } // namespace file_util diff --git a/common/util/FileUtil.h b/common/util/FileUtil.h index e33e1be09f5..82cd89b397d 100644 --- a/common/util/FileUtil.h +++ b/common/util/FileUtil.h @@ -72,4 +72,6 @@ std::vector sort_filepaths(const std::vector& paths, const b void copy_file(const fs::path& src, const fs::path& dst); std::string make_screenshot_filepath(const GameVersion game_version, const std::string& name = ""); std::string get_majority_file_line_endings(const std::string& file_contents); +std::pair get_majority_file_line_endings_and_count( + const std::string& file_contents); } // namespace file_util diff --git a/common/util/Range.h b/common/util/Range.h index 99f30181091..0fb2e7a8fc9 100644 --- a/common/util/Range.h +++ b/common/util/Range.h @@ -67,4 +67,4 @@ class Range { private: T m_start = {}; T m_end = {}; -}; \ No newline at end of file +}; diff --git a/common/util/Trie.h b/common/util/Trie.h index d108bfeb8fd..5790813de63 100644 --- a/common/util/Trie.h +++ b/common/util/Trie.h @@ -12,7 +12,7 @@ * It owns the memory for the objects it stores. * Doing an insert will create a copy of your object. * - * Other that deleting the whole thing, there is no support for removing a node. + * Other than deleting the whole thing, there is no support for removing a node. */ template class Trie { diff --git a/common/util/ast_util.cpp b/common/util/ast_util.cpp new file mode 100644 index 00000000000..8fef6d99c64 --- /dev/null +++ b/common/util/ast_util.cpp @@ -0,0 +1,31 @@ +#include "ast_util.h" + +namespace ast_util { +std::string get_source_code(const std::string& source, const TSNode& node) { + uint32_t start = ts_node_start_byte(node); + uint32_t end = ts_node_end_byte(node); + return source.substr(start, end - start); +} + +void search_for_forms_that_begin_with(const std::string& source, + const TSNode curr_node, + const std::vector& prefix, + std::vector& results) { + if (ts_node_child_count(curr_node) == 0) { + return; + } + std::vector node_elements; + bool added = false; + for (size_t i = 0; i < ts_node_child_count(curr_node); i++) { + const auto child_node = ts_node_child(curr_node, i); + const auto contents = get_source_code(source, child_node); + node_elements.push_back(contents); + // Check for a match + if (node_elements == prefix && !added) { + results.push_back(curr_node); + added = true; + } + search_for_forms_that_begin_with(source, child_node, prefix, results); + } +} +} // namespace ast_util diff --git a/common/util/ast_util.h b/common/util/ast_util.h new file mode 100644 index 00000000000..8889120d474 --- /dev/null +++ b/common/util/ast_util.h @@ -0,0 +1,15 @@ +#pragma once + +#include +#include + +#include "tree_sitter/api.h" + +namespace ast_util { +std::string get_source_code(const std::string& source, const TSNode& node); +void search_for_forms_that_begin_with(const std::string& source, + const TSNode curr_node, + const std::vector& prefix, + std::vector& results); + +} // namespace ast_util diff --git a/common/util/trie_map.h b/common/util/trie_map.h new file mode 100644 index 00000000000..8b4b02baed1 --- /dev/null +++ b/common/util/trie_map.h @@ -0,0 +1,160 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +// TrieMap class +template +class TrieMap { + private: + // TrieNode structure + struct TrieNode { + std::unordered_map> children; + std::vector> elements; + }; + + std::shared_ptr root; + + public: + TrieMap() : root(std::make_shared()) {} + + // Insert an element with a key into the TrieMap and return the inserted element + std::shared_ptr insert(const std::string& key, const T& element) { + std::shared_ptr shared_element = std::make_shared(element); + std::shared_ptr node = root; + for (char c : key) { + if (node->children.find(c) == node->children.end()) { + node->children[c] = std::make_shared(); + } + node = node->children[c]; + } + // Store element at the leaf node + node->elements.push_back(shared_element); + return shared_element; + } + + // Retrieve elements with a given prefix + std::vector> retrieve_with_prefix(const std::string& prefix) const { + std::vector> result; + std::shared_ptr node = root; + // Traverse to the node representing the prefix + for (char c : prefix) { + if (node->children.find(c) == node->children.end()) { + return result; // No elements with the given prefix + } + node = node->children[c]; + } + // Gather all elements stored at or below this node + retrieve_elements(node, result); + return result; + } + + // Retrieve elements with an exact key match + std::vector> retrieve_with_exact(const std::string& key) const { + std::vector> result; + std::shared_ptr node = root; + // Traverse to the node representing the key + for (char c : key) { + if (node->children.find(c) == node->children.end()) { + return result; // No elements with the given key + } + node = node->children[c]; + } + // Return elements stored at this node + return node->elements; + } + + // Remove the specified element from the TrieMap + void remove(const std::shared_ptr& element) { remove_element(root, element); } + + // Return the total number of elements stored in the TrieMap + int size() const { + int count = 0; + count_elements(root, count); + return count; + } + + // Return a vector containing shared pointers to all elements stored in the TrieMap + std::vector> get_all_elements() const { + std::vector> result; + get_all_elements_helper(root, result); + return result; + } + + private: + // Recursive function to retrieve elements stored at or below the given node + void retrieve_elements(std::shared_ptr node, + std::vector>& result) const { + // Add elements stored at this node to the result + for (const auto& element : node->elements) { + result.push_back(element); + } + // Recursively traverse children + for (const auto& child : node->children) { + retrieve_elements(child.second, result); + } + } + + // Recursive function to remove the specified element from the TrieMap + bool remove_element(std::shared_ptr node, const std::shared_ptr& element) { + // Remove the element if it exists at this node + auto& elements = node->elements; + auto it = std::find(elements.begin(), elements.end(), element); + if (it != elements.end()) { + elements.erase(it); + return true; + } + // Recursively search children + for (auto& child : node->children) { + if (remove_element(child.second, element)) { + // Remove child node if it's empty after removal + if (child.second->elements.empty() && child.second->children.empty()) { + node->children.erase(child.first); + } + return true; + } + } + return false; + } + + // Recursive function to count elements stored at or below the given node + void count_elements(std::shared_ptr node, int& count) const { + // Increment count by the number of elements stored at this node + count += node->elements.size(); + // Recursively traverse children + for (const auto& child : node->children) { + count_elements(child.second, count); + } + } + + // Recursive helper function to collect all elements stored in the TrieMap + void get_all_elements_helper(std::shared_ptr node, + std::vector>& result) const { + // Add elements stored at this node to the result + for (const auto& element : node->elements) { + result.push_back(element); + } + // Recursively traverse children + for (const auto& child : node->children) { + get_all_elements_helper(child.second, result); + } + } +}; + +// TrieMap trie_map; +// +//// Insert elements +// std::shared_ptr inserted_element_1 = trie_map.insert("apple", "A fruit"); +// std::shared_ptr inserted_element_2 = trie_map.insert("app", "An application"); +// std::shared_ptr inserted_element_3 = trie_map.insert("banana", "Another fruit"); +// std::shared_ptr inserted_element_4 = trie_map.insert("apple", "Another apple"); +// +//// Remove an element +// trie_map.remove(inserted_element_1); +// +//// Retrieve elements with a prefix +// std::vector> prefix_results = trie_map.retrieve_with_prefix("app"); diff --git a/goalc/CMakeLists.txt b/goalc/CMakeLists.txt index f2ae00b0959..c9b58bb8175 100644 --- a/goalc/CMakeLists.txt +++ b/goalc/CMakeLists.txt @@ -32,6 +32,7 @@ add_library(compiler compiler/CompilerSettings.cpp compiler/CodeGenerator.cpp compiler/StaticObject.cpp + compiler/symbol_info.cpp compiler/compilation/Asm.cpp compiler/compilation/Atoms.cpp compiler/compilation/CompilerControl.cpp diff --git a/goalc/compiler/Compiler.cpp b/goalc/compiler/Compiler.cpp index 7873f798d56..2dccd8297d4 100644 --- a/goalc/compiler/Compiler.cpp +++ b/goalc/compiler/Compiler.cpp @@ -25,8 +25,9 @@ Compiler::Compiler(GameVersion version, : m_version(version), m_goos(user_profile), m_debugger(&m_listener, &m_goos.reader, version), + m_make(repl_config, user_profile), m_repl(std::move(repl)), - m_make(repl_config, user_profile) { + m_symbol_info(&m_goos.reader.db) { m_listener.add_debugger(&m_debugger); m_listener.set_default_port(version); m_ts.add_builtin_types(m_version); @@ -57,9 +58,7 @@ Compiler::Compiler(GameVersion version, // add built-in forms to symbol info for (const auto& [builtin_name, builtin_info] : g_goal_forms) { - SymbolInfo::Metadata sym_meta; - sym_meta.docstring = builtin_info.first; - m_symbol_info.add_builtin(builtin_name, sym_meta); + m_symbol_info.add_builtin(builtin_name, builtin_info.first); } // load auto-complete history, only if we are running in the interactive mode. @@ -463,6 +462,10 @@ void Compiler::asm_file(const CompilationOptions& options) { file_path = candidate_paths.at(0).string(); } + // Evict any symbols we have indexed for this file, this is what + // helps to ensure we have an up to date and accurate symbol index + m_symbol_info.evict_symbols_using_file_index(file_path); + auto code = m_goos.reader.read_from_file({file_path}); std::string obj_file_name = file_path; @@ -489,7 +492,7 @@ void Compiler::asm_file(const CompilationOptions& options) { if (options.disassemble) { codegen_and_disassemble_object_file(obj_file, &data, &disasm, options.disasm_code_only); if (options.disassembly_output_file.empty()) { - printf("%s\n", disasm.c_str()); + lg::print("{}\n", disasm); } else { file_util::write_text_file(options.disassembly_output_file, disasm); } @@ -502,7 +505,7 @@ void Compiler::asm_file(const CompilationOptions& options) { if (m_listener.is_connected()) { m_listener.send_code(data, obj_file_name); } else { - printf("WARNING - couldn't load because listener isn't connected\n"); // todo log warn + lg::print("WARNING - couldn't load because listener isn't connected\n"); // todo log warn } } @@ -515,15 +518,15 @@ void Compiler::asm_file(const CompilationOptions& options) { } } else { if (options.load) { - printf("WARNING - couldn't load because coloring is not enabled\n"); + lg::print("WARNING - couldn't load because coloring is not enabled\n"); } if (options.write) { - printf("WARNING - couldn't write because coloring is not enabled\n"); + lg::print("WARNING - couldn't write because coloring is not enabled\n"); } if (options.disassemble) { - printf("WARNING - couldn't disassemble because coloring is not enabled\n"); + lg::print("WARNING - couldn't disassemble because coloring is not enabled\n"); } } } diff --git a/goalc/compiler/Compiler.h b/goalc/compiler/Compiler.h index fc70268fb55..a4e1c56db5b 100644 --- a/goalc/compiler/Compiler.h +++ b/goalc/compiler/Compiler.h @@ -12,7 +12,8 @@ #include "goalc/compiler/CompilerSettings.h" #include "goalc/compiler/Env.h" #include "goalc/compiler/IR.h" -#include "goalc/compiler/SymbolInfo.h" +#include "goalc/compiler/docs/DocTypes.h" +#include "goalc/compiler/symbol_info.h" #include "goalc/data_compiler/game_text_common.h" #include "goalc/debugger/Debugger.h" #include "goalc/emitter/Register.h" @@ -96,9 +97,20 @@ class Compiler { std::vector> const& user_data); bool knows_object_file(const std::string& name); MakeSystem& make_system() { return m_make; } - std::set lookup_symbol_infos_starting_with(const std::string& prefix) const; - std::vector* lookup_exact_name_info(const std::string& name) const; + std::vector> lookup_symbol_info_by_file( + const std::string& file_path) const; + std::vector> lookup_symbol_info_by_prefix( + const std::string& prefix) const; + std::set lookup_symbol_names_starting_with(const std::string& prefix) const; + std::vector> lookup_exact_name_info( + const std::string& name) const; std::optional lookup_typespec(const std::string& symbol_name); + TypeSystem& type_system() { return m_ts; }; + // TODO - rename these types / namespaces -- consolidate with SymbolInfo and whatever else tries + // to also do this work + std::tuple, + std::unordered_map> + generate_per_file_symbol_info(); private: GameVersion m_version; @@ -110,7 +122,9 @@ class Compiler { listener::Listener m_listener; goos::Interpreter m_goos; Debugger m_debugger; + // TODO - this should be able to be removed, these are stored in `m_symbol_info` std::unordered_map m_macro_specs; + // TODO - this should be able to be removed, these are stored in `m_symbol_info` std::unordered_map m_symbol_types; std::unordered_map @@ -120,9 +134,9 @@ class Compiler { CompilerSettings m_settings; bool m_throw_on_define_extern_redefinition = false; std::unordered_set m_allow_inconsistent_definition_symbols; - SymbolInfoMap m_symbol_info; - std::unique_ptr m_repl; MakeSystem m_make; + std::unique_ptr m_repl; + symbol_info::SymbolInfoMap m_symbol_info; struct DebugStats { int num_spills = 0; @@ -307,7 +321,7 @@ class Compiler { int offset, Env* env); - std::string make_symbol_info_description(const SymbolInfo& info); + std::string make_symbol_info_description(const std::shared_ptr info); MathMode get_math_mode(const TypeSpec& ts); bool is_number(const TypeSpec& ts); diff --git a/goalc/compiler/SymbolInfo.h b/goalc/compiler/SymbolInfo.h deleted file mode 100644 index f042b263178..00000000000 --- a/goalc/compiler/SymbolInfo.h +++ /dev/null @@ -1,249 +0,0 @@ -#pragma once - -#include -#include -#include - -#include "common/goos/Object.h" -#include "common/util/Assert.h" -#include "common/util/Trie.h" - -#include "goalc/compiler/Val.h" - -/*! - * Info about a single symbol, representing one of: - * - Global variable - * - Global function - * - Type - * - Constant - * - Macro - * - Builtin keyword of the OpenGOAL language - */ -class SymbolInfo { - public: - struct Metadata { - std::string docstring = ""; - }; - - // TODO - states - // TODO - enums - enum class Kind { - GLOBAL_VAR, - FWD_DECLARED_SYM, - FUNCTION, - TYPE, - CONSTANT, - MACRO, - LANGUAGE_BUILTIN, - METHOD, - INVALID - }; - - static SymbolInfo make_global(const std::string& name, - const goos::Object& defining_form, - const std::optional meta = {}) { - SymbolInfo info; - info.m_kind = Kind::GLOBAL_VAR; - info.m_name = name; - info.m_def_form = defining_form; - if (meta) { - info.m_meta = *meta; - } - return info; - } - - static SymbolInfo make_fwd_declared_sym(const std::string& name, - const goos::Object& defining_form) { - SymbolInfo info; - info.m_kind = Kind::FWD_DECLARED_SYM; - info.m_name = name; - info.m_def_form = defining_form; - return info; - } - - static SymbolInfo make_function(const std::string& name, - const std::vector args, - const goos::Object& defining_form, - const std::optional meta = {}) { - SymbolInfo info; - info.m_kind = Kind::FUNCTION; - info.m_name = name; - info.m_def_form = defining_form; - if (meta) { - info.m_meta = *meta; - } - info.m_args = args; - return info; - } - - static SymbolInfo make_type(const std::string& name, - const goos::Object& defining_form, - const std::optional meta = {}) { - SymbolInfo info; - info.m_kind = Kind::TYPE; - info.m_name = name; - info.m_def_form = defining_form; - if (meta) { - info.m_meta = *meta; - } - return info; - } - - static SymbolInfo make_constant(const std::string& name, - const goos::Object& defining_form, - const std::optional meta = {}) { - SymbolInfo info; - info.m_kind = Kind::CONSTANT; - info.m_name = name; - info.m_def_form = defining_form; - if (meta) { - info.m_meta = *meta; - } - return info; - } - - static SymbolInfo make_macro(const std::string& name, - const goos::Object& defining_form, - const std::optional meta = {}) { - SymbolInfo info; - info.m_kind = Kind::MACRO; - info.m_name = name; - info.m_def_form = defining_form; - if (meta) { - info.m_meta = *meta; - } - return info; - } - - static SymbolInfo make_builtin(const std::string& name, const std::optional meta = {}) { - SymbolInfo info; - info.m_kind = Kind::LANGUAGE_BUILTIN; - info.m_name = name; - if (meta) { - info.m_meta = *meta; - } - return info; - } - - static SymbolInfo make_method(const std::string& method_name, - const std::vector args, - const MethodInfo& method_info, - const goos::Object& defining_form) { - SymbolInfo info; - info.m_kind = Kind::METHOD; - info.m_name = method_name; - info.m_method_info = method_info; - info.m_def_form = defining_form; - info.m_meta.docstring = - info.m_method_info.docstring.has_value() ? info.m_method_info.docstring.value() : ""; - info.m_args = args; - return info; - } - - const std::string& name() const { return m_name; } - const MethodInfo& method_info() const { return m_method_info; } - Kind kind() const { return m_kind; } - const goos::Object& src_form() const { return m_def_form; } - const Metadata& meta() const { return m_meta; } - const std::vector& args() const { return m_args; } - - private: - Kind m_kind = Kind::INVALID; - goos::Object m_def_form; - std::string m_name; - MethodInfo m_method_info; - Metadata m_meta; - std::vector m_args; - - std::string m_return_type; -}; - -/*! - * A map of symbol info. It internally stores the info in a prefix tree so you can quickly get - * a list of all symbols starting with a given prefix. - */ -class SymbolInfoMap { - public: - SymbolInfoMap() = default; - void add_global(const std::string& name, - const goos::Object& defining_form, - const std::optional meta = {}) { - m_map[name]->push_back(SymbolInfo::make_global(name, defining_form, meta)); - } - - void add_fwd_dec(const std::string& name, const goos::Object& defining_form) { - m_map[name]->push_back(SymbolInfo::make_fwd_declared_sym(name, defining_form)); - } - - // The m_symbol_types container stores TypeSpecs -- this does have argument information but not - // the names, which is why they have to be explicitly provided - void add_function(const std::string& name, - const std::vector args, - const goos::Object& defining_form, - const std::optional meta = {}) { - m_map[name]->push_back(SymbolInfo::make_function(name, args, defining_form, meta)); - } - - void add_type(const std::string& name, - const goos::Object& defining_form, - const std::optional meta = {}) { - m_map[name]->push_back(SymbolInfo::make_type(name, defining_form, meta)); - } - - void add_constant(const std::string& name, - const goos::Object& defining_form, - const std::optional meta = {}) { - m_map[name]->push_back(SymbolInfo::make_constant(name, defining_form, meta)); - } - - void add_macro(const std::string& name, - const goos::Object& defining_form, - const std::optional meta = {}) { - m_map[name]->push_back(SymbolInfo::make_macro(name, defining_form, meta)); - } - - void add_builtin(const std::string& name, const std::optional meta = {}) { - m_map[name]->push_back(SymbolInfo::make_builtin(name, meta)); - } - - // The m_symbol_types container stores TypeSpecs -- this does have argument information but not - // the names, which is why they have to be explicitly provided - void add_method(const std::string& method_name, - const std::vector args, - const MethodInfo& method_info, - const goos::Object& defining_form) { - m_map[method_name]->push_back( - SymbolInfo::make_method(method_name, args, method_info, defining_form)); - } - - std::vector* lookup_exact_name(const std::string& name) const { - return m_map.lookup(name); - } - - std::set lookup_symbols_starting_with(const std::string& prefix) const { - std::set result; - auto lookup = m_map.lookup_prefix(prefix); - for (auto& x : lookup) { - for (auto& y : *x) { - result.insert(y.name()); - } - } - return result; - } - - int symbol_count() const { return m_map.size(); } - - std::vector get_all_symbols() const { - std::vector info; - auto lookup = m_map.get_all_nodes(); - for (auto& x : lookup) { - for (auto& y : *x) { - info.push_back(y); - } - } - return info; - } - - private: - Trie> m_map; -}; diff --git a/goalc/compiler/compilation/CompilerControl.cpp b/goalc/compiler/compilation/CompilerControl.cpp index e53cfd4890e..d7517ac0066 100644 --- a/goalc/compiler/compilation/CompilerControl.cpp +++ b/goalc/compiler/compilation/CompilerControl.cpp @@ -14,8 +14,8 @@ #include "goalc/compiler/Compiler.h" #include "goalc/compiler/IR.h" -#include "goalc/compiler/SymbolInfo.h" #include "goalc/compiler/docs/DocTypes.h" +#include "goalc/compiler/symbol_info.h" #include "goalc/data_compiler/dir_tpages.h" #include "goalc/data_compiler/game_count.h" #include "goalc/data_compiler/game_text_common.h" @@ -358,35 +358,36 @@ Val* Compiler::compile_reload(const goos::Object& form, const goos::Object& rest return get_none(); } -std::string Compiler::make_symbol_info_description(const SymbolInfo& info) { - switch (info.kind()) { - case SymbolInfo::Kind::GLOBAL_VAR: +std::string Compiler::make_symbol_info_description( + const std::shared_ptr info) { + switch (info->m_kind) { + case symbol_info::Kind::GLOBAL_VAR: return fmt::format("[Global Variable] Type: {} Defined: {}", - m_symbol_types.at(m_goos.intern_ptr(info.name())).print(), - m_goos.reader.db.get_info_for(info.src_form())); - case SymbolInfo::Kind::LANGUAGE_BUILTIN: - return fmt::format("[Built-in Form] {}\n", info.name()); - case SymbolInfo::Kind::METHOD: + m_symbol_types.at(m_goos.intern_ptr(info->m_name)).print(), + m_goos.reader.db.get_info_for(info->m_def_form)); + case symbol_info::Kind::LANGUAGE_BUILTIN: + return fmt::format("[Built-in Form] {}\n", info->m_name); + case symbol_info::Kind::METHOD: return fmt::format("[Method] Type: {} Method Name: {} Defined: {}", - info.method_info().defined_in_type, info.name(), - m_goos.reader.db.get_info_for(info.src_form())); - case SymbolInfo::Kind::TYPE: - return fmt::format("[Type] Name: {} Defined: {}", info.name(), - m_goos.reader.db.get_info_for(info.src_form())); - case SymbolInfo::Kind::MACRO: - return fmt::format("[Macro] Name: {} Defined: {}", info.name(), - m_goos.reader.db.get_info_for(info.src_form())); - case SymbolInfo::Kind::CONSTANT: + info->m_method_info.defined_in_type, info->m_name, + m_goos.reader.db.get_info_for(info->m_def_form)); + case symbol_info::Kind::TYPE: + return fmt::format("[Type] Name: {} Defined: {}", info->m_name, + m_goos.reader.db.get_info_for(info->m_def_form)); + case symbol_info::Kind::MACRO: + return fmt::format("[Macro] Name: {} Defined: {}", info->m_name, + m_goos.reader.db.get_info_for(info->m_def_form)); + case symbol_info::Kind::CONSTANT: return fmt::format( - "[Constant] Name: {} Value: {} Defined: {}", info.name(), - m_global_constants.at(m_goos.reader.symbolTable.intern(info.name().c_str())).print(), - m_goos.reader.db.get_info_for(info.src_form())); - case SymbolInfo::Kind::FUNCTION: - return fmt::format("[Function] Name: {} Defined: {}", info.name(), - m_goos.reader.db.get_info_for(info.src_form())); - case SymbolInfo::Kind::FWD_DECLARED_SYM: - return fmt::format("[Forward-Declared] Name: {} Defined: {}", info.name(), - m_goos.reader.db.get_info_for(info.src_form())); + "[Constant] Name: {} Value: {} Defined: {}", info->m_name, + m_global_constants.at(m_goos.reader.symbolTable.intern(info->m_name.c_str())).print(), + m_goos.reader.db.get_info_for(info->m_def_form)); + case symbol_info::Kind::FUNCTION: + return fmt::format("[Function] Name: {} Defined: {}", info->m_name, + m_goos.reader.db.get_info_for(info->m_def_form)); + case symbol_info::Kind::FWD_DECLARED_SYM: + return fmt::format("[Forward-Declared] Name: {} Defined: {}", info->m_name, + m_goos.reader.db.get_info_for(info->m_def_form)); default: ASSERT(false); return {}; @@ -398,11 +399,11 @@ Val* Compiler::compile_get_info(const goos::Object& form, const goos::Object& re auto args = get_va(form, rest); va_check(form, args, {goos::ObjectType::SYMBOL}, {}); - auto result = m_symbol_info.lookup_exact_name(args.unnamed.at(0).as_symbol().name_ptr); - if (!result) { + const auto result = m_symbol_info.lookup_exact_name(args.unnamed.at(0).as_symbol().name_ptr); + if (result.empty()) { lg::print("No results found.\n"); } else { - for (auto& info : *result) { + for (const auto& info : result) { lg::print("{}", make_symbol_info_description(info)); } } @@ -437,9 +438,12 @@ replxx::Replxx::completions_t Compiler::find_symbols_or_object_file_by_prefix( completions.push_back(fmt::format("\"{}\")", match)); } } else { + // TODO - GOAL's method calling syntax sucks for method name auto-completion + // maybe something that could be improved? Though it would be a radical departure from + // the syntax const auto [token, stripped_leading_paren] = m_repl->get_current_repl_token(context); // Otherwise, look for symbols - auto possible_forms = lookup_symbol_infos_starting_with(token); + auto possible_forms = lookup_symbol_names_starting_with(token); for (auto& x : possible_forms) { completions.push_back(stripped_leading_paren ? "(" + x : x); @@ -456,7 +460,7 @@ replxx::Replxx::hints_t Compiler::find_hints_by_prefix(std::string const& contex (void)contextLen; (void)user_data; auto token = m_repl->get_current_repl_token(context); - auto possible_forms = lookup_symbol_infos_starting_with(token.first); + auto possible_forms = lookup_symbol_names_starting_with(token.first); replxx::Replxx::hints_t hints; @@ -497,9 +501,8 @@ void Compiler::repl_coloring( curr_symbol.second.erase(0, 1); curr_symbol.first++; } - std::vector* sym_match = lookup_exact_name_info(curr_symbol.second); - if (sym_match != nullptr && sym_match->size() == 1) { - SymbolInfo sym_info = sym_match->at(0); + const auto matching_symbols = lookup_exact_name_info(curr_symbol.second); + if (matching_symbols.size() == 1) { for (int pos = curr_symbol.first; pos <= int(i); pos++) { // TODO - currently just coloring all types brown/gold // - would be nice to have a different color for globals, functions, etc @@ -541,7 +544,7 @@ void Compiler::repl_coloring( } } - // TODO - general syntax highlighting with regexes (quotes, symbols, etc) + // TODO - general syntax highlighting with AST } Val* Compiler::compile_autocomplete(const goos::Object& form, const goos::Object& rest, Env* env) { @@ -550,7 +553,7 @@ Val* Compiler::compile_autocomplete(const goos::Object& form, const goos::Object va_check(form, args, {goos::ObjectType::SYMBOL}, {}); Timer timer; - auto result = m_symbol_info.lookup_symbols_starting_with(args.unnamed.at(0).as_symbol().name_ptr); + auto result = m_symbol_info.lookup_names_starting_with(args.unnamed.at(0).as_symbol().name_ptr); auto time = timer.getMs(); for (auto& x : result) { @@ -581,25 +584,33 @@ Val* Compiler::compile_update_macro_metadata(const goos::Object& form, auto arg_spec = m_goos.parse_arg_spec(form, args.unnamed.at(2)); m_macro_specs[name] = arg_spec; - - SymbolInfo::Metadata sym_meta; - sym_meta.docstring = args.unnamed.at(1).as_string()->data; - m_symbol_info.add_macro(name, form, sym_meta); + m_symbol_info.add_macro(name, arg_spec, form, args.unnamed.at(1).as_string()->data); return get_none(); } -std::set Compiler::lookup_symbol_infos_starting_with(const std::string& prefix) const { +std::vector> Compiler::lookup_symbol_info_by_file( + const std::string& file_path) const { + return m_symbol_info.lookup_symbols_by_file(file_path); +} + +std::vector> Compiler::lookup_symbol_info_by_prefix( + const std::string& prefix) const { + return m_symbol_info.lookup_symbols_starting_with(prefix); +} + +std::set Compiler::lookup_symbol_names_starting_with(const std::string& prefix) const { if (m_goos.reader.check_string_is_valid(prefix)) { - return m_symbol_info.lookup_symbols_starting_with(prefix); + return m_symbol_info.lookup_names_starting_with(prefix); } return {}; } -std::vector* Compiler::lookup_exact_name_info(const std::string& name) const { +std::vector> Compiler::lookup_exact_name_info( + const std::string& name) const { if (m_goos.reader.check_string_is_valid(name)) { return m_symbol_info.lookup_exact_name(name); } else { - return nullptr; + return {}; } } @@ -611,55 +622,11 @@ std::optional Compiler::lookup_typespec(const std::string& symbol_name return {}; } -Val* Compiler::compile_load_project(const goos::Object& form, const goos::Object& rest, Env*) { - auto args = get_va(form, rest); - va_check(form, args, {goos::ObjectType::STRING}, {}); - m_make.load_project_file(args.unnamed.at(0).as_string()->data); - return get_none(); -} - -Val* Compiler::compile_make(const goos::Object& form, const goos::Object& rest, Env*) { - auto args = get_va(form, rest); - va_check(form, args, {goos::ObjectType::STRING}, - {{"force", {false, {goos::ObjectType::SYMBOL}}}, - {"verbose", {false, {goos::ObjectType::SYMBOL}}}}); - bool force = false; - if (args.has_named("force")) { - force = get_true_or_false(form, args.get_named("force")); - } - - bool verbose = false; - if (args.has_named("verbose")) { - verbose = get_true_or_false(form, args.get_named("verbose")); - } - - m_make.make(args.unnamed.at(0).as_string()->data, force, verbose); - return get_none(); -} - -Val* Compiler::compile_print_debug_compiler_stats(const goos::Object& form, - const goos::Object& rest, - Env*) { - auto args = get_va(form, rest); - va_check(form, args, {}, {}); - - lg::print("Spill operations (total): {}\n", m_debug_stats.num_spills); - lg::print("Spill operations (v1 only): {}\n", m_debug_stats.num_spills_v1); - lg::print("Eliminated moves: {}\n", m_debug_stats.num_moves_eliminated); - lg::print("Total functions: {}\n", m_debug_stats.total_funcs); - lg::print("Functions requiring v1: {}\n", m_debug_stats.funcs_requiring_v1_allocator); - lg::print("Size of autocomplete prefix tree: {}\n", m_symbol_info.symbol_count()); - - return get_none(); -} - -Val* Compiler::compile_gen_docs(const goos::Object& form, const goos::Object& rest, Env*) { - auto args = get_va(form, rest); - va_check(form, args, {goos::ObjectType::STRING}, {}); - - const auto& doc_path = fs::path(args.unnamed.at(0).as_string()->data); - lg::info("Saving docs to: {}", doc_path.string()); - +std::tuple, + std::unordered_map> +Compiler::generate_per_file_symbol_info() { + // TODO - remove this function, all required information has been consolidated into `SymbolInfo` + // it just has to be serialized in the same way, I will do it later const auto symbols = m_symbol_info.get_all_symbols(); std::unordered_map all_symbols; @@ -673,7 +640,7 @@ Val* Compiler::compile_gen_docs(const goos::Object& form, const goos::Object& re lg::info("Processing [{}/{}] symbols...", count, symbols.size()); } std::optional def_loc; - const auto& goos_info = m_goos.reader.db.get_short_info_for(sym_info.src_form()); + const auto& goos_info = m_goos.reader.db.get_short_info_for(sym_info->m_def_form); if (goos_info) { Docs::DefinitionLocation new_def_loc; new_def_loc.filename = file_util::convert_to_unix_path_separators(file_util::split_path_at( @@ -684,15 +651,15 @@ Val* Compiler::compile_gen_docs(const goos::Object& form, const goos::Object& re } Docs::SymbolDocumentation sym_doc; - sym_doc.name = sym_info.name(); - sym_doc.description = sym_info.meta().docstring; - sym_doc.kind = sym_info.kind(); + sym_doc.name = sym_info->m_name; + sym_doc.description = sym_info->m_docstring; + sym_doc.kind = sym_info->m_kind; sym_doc.def_location = def_loc; - if (all_symbols.count(sym_info.name()) > 1) { - lg::error("A symbol was defined twice, how did this happen? {}", sym_info.name()); + if (all_symbols.count(sym_info->m_name) > 1) { + lg::error("A symbol was defined twice, how did this happen? {}", sym_info->m_name); } else { - all_symbols.emplace(sym_info.name(), sym_doc); + all_symbols.emplace(sym_info->m_name, sym_doc); } Docs::FileDocumentation file_doc; @@ -711,36 +678,36 @@ Val* Compiler::compile_gen_docs(const goos::Object& form, const goos::Object& re } // TODO - states / enums / built-ins - if (sym_info.kind() == SymbolInfo::Kind::GLOBAL_VAR || - sym_info.kind() == SymbolInfo::Kind::CONSTANT) { + if (sym_info->m_kind == symbol_info::Kind::GLOBAL_VAR || + sym_info->m_kind == symbol_info::Kind::CONSTANT) { Docs::VariableDocumentation var; - var.name = sym_info.name(); - var.description = sym_info.meta().docstring; - if (sym_info.kind() == SymbolInfo::Kind::CONSTANT) { + var.name = sym_info->m_name; + var.description = sym_info->m_docstring; + if (sym_info->m_kind == symbol_info::Kind::CONSTANT) { var.type = "unknown"; // Unfortunately, constants are not properly typed } else { var.type = m_symbol_types.at(m_goos.intern_ptr(var.name)).base_type(); } var.def_location = def_loc; - if (sym_info.kind() == SymbolInfo::Kind::GLOBAL_VAR) { + if (sym_info->m_kind == symbol_info::Kind::GLOBAL_VAR) { file_doc.global_vars.push_back(var); } else { file_doc.constants.push_back(var); } - } else if (sym_info.kind() == SymbolInfo::Kind::FUNCTION) { + } else if (sym_info->m_kind == symbol_info::Kind::FUNCTION) { Docs::FunctionDocumentation func; - func.name = sym_info.name(); - func.description = sym_info.meta().docstring; + func.name = sym_info->m_name; + func.description = sym_info->m_docstring; func.def_location = def_loc; - func.args = Docs::get_args_from_docstring(sym_info.args(), func.description); + func.args = Docs::get_args_from_docstring(sym_info->m_args, func.description); // The last arg in the typespec is the return type const auto& func_type = m_symbol_types.at(m_goos.intern_ptr(func.name)); func.return_type = func_type.last_arg().base_type(); file_doc.functions.push_back(func); - } else if (sym_info.kind() == SymbolInfo::Kind::TYPE) { + } else if (sym_info->m_kind == symbol_info::Kind::TYPE) { Docs::TypeDocumentation type; - type.name = sym_info.name(); - type.description = sym_info.meta().docstring; + type.name = sym_info->m_name; + type.description = sym_info->m_docstring; type.def_location = def_loc; const auto& type_info = m_ts.lookup_type(type.name); type.parent_type = type_info->get_parent(); @@ -784,10 +751,10 @@ Val* Compiler::compile_gen_docs(const goos::Object& form, const goos::Object& re type.states.push_back(state_doc); } file_doc.types.push_back(type); - } else if (sym_info.kind() == SymbolInfo::Kind::MACRO) { + } else if (sym_info->m_kind == symbol_info::Kind::MACRO) { Docs::MacroDocumentation macro_doc; - macro_doc.name = sym_info.name(); - macro_doc.description = sym_info.meta().docstring; + macro_doc.name = sym_info->m_name; + macro_doc.description = sym_info->m_docstring; macro_doc.def_location = def_loc; const auto& arg_spec = m_macro_specs[macro_doc.name]; for (const auto& arg : arg_spec.unnamed) { @@ -804,16 +771,16 @@ Val* Compiler::compile_gen_docs(const goos::Object& form, const goos::Object& re macro_doc.variadic_arg = arg_spec.rest; } file_doc.macros.push_back(macro_doc); - } else if (sym_info.kind() == SymbolInfo::Kind::METHOD) { + } else if (sym_info->m_kind == symbol_info::Kind::METHOD) { Docs::MethodDocumentation method_doc; - method_doc.name = sym_info.name(); - method_doc.description = sym_info.meta().docstring; + method_doc.name = sym_info->m_name; + method_doc.description = sym_info->m_docstring; method_doc.def_location = def_loc; - const auto& method_info = sym_info.method_info(); + const auto& method_info = sym_info->m_method_info; method_doc.id = method_info.id; - method_doc.type = sym_info.method_info().defined_in_type; + method_doc.type = sym_info->m_method_info.defined_in_type; method_doc.is_override = method_info.overrides_parent; - method_doc.args = Docs::get_args_from_docstring(sym_info.args(), method_doc.description); + method_doc.args = Docs::get_args_from_docstring(sym_info->m_args, method_doc.description); // The last arg in the typespec is the return type const auto& method_type = method_info.type; method_doc.return_type = method_type.last_arg().base_type(); @@ -822,6 +789,59 @@ Val* Compiler::compile_gen_docs(const goos::Object& form, const goos::Object& re } file_docs[file_doc_key] = file_doc; } + return {all_symbols, file_docs}; +} + +Val* Compiler::compile_load_project(const goos::Object& form, const goos::Object& rest, Env*) { + auto args = get_va(form, rest); + va_check(form, args, {goos::ObjectType::STRING}, {}); + m_make.load_project_file(args.unnamed.at(0).as_string()->data); + return get_none(); +} + +Val* Compiler::compile_make(const goos::Object& form, const goos::Object& rest, Env*) { + auto args = get_va(form, rest); + va_check(form, args, {goos::ObjectType::STRING}, + {{"force", {false, {goos::ObjectType::SYMBOL}}}, + {"verbose", {false, {goos::ObjectType::SYMBOL}}}}); + bool force = false; + if (args.has_named("force")) { + force = get_true_or_false(form, args.get_named("force")); + } + + bool verbose = false; + if (args.has_named("verbose")) { + verbose = get_true_or_false(form, args.get_named("verbose")); + } + + m_make.make(args.unnamed.at(0).as_string()->data, force, verbose); + return get_none(); +} + +Val* Compiler::compile_print_debug_compiler_stats(const goos::Object& form, + const goos::Object& rest, + Env*) { + auto args = get_va(form, rest); + va_check(form, args, {}, {}); + + lg::print("Spill operations (total): {}\n", m_debug_stats.num_spills); + lg::print("Spill operations (v1 only): {}\n", m_debug_stats.num_spills_v1); + lg::print("Eliminated moves: {}\n", m_debug_stats.num_moves_eliminated); + lg::print("Total functions: {}\n", m_debug_stats.total_funcs); + lg::print("Functions requiring v1: {}\n", m_debug_stats.funcs_requiring_v1_allocator); + lg::print("Size of autocomplete prefix tree: {}\n", m_symbol_info.symbol_count()); + + return get_none(); +} + +Val* Compiler::compile_gen_docs(const goos::Object& form, const goos::Object& rest, Env*) { + auto args = get_va(form, rest); + va_check(form, args, {goos::ObjectType::STRING}, {}); + + const auto& doc_path = fs::path(args.unnamed.at(0).as_string()->data); + lg::info("Saving docs to: {}", doc_path.string()); + + const auto [all_symbols, file_docs] = generate_per_file_symbol_info(); json symbol_map_data(all_symbols); file_util::write_text_file( diff --git a/goalc/compiler/compilation/Define.cpp b/goalc/compiler/compilation/Define.cpp index ff8e6df4e42..a91e515593b 100644 --- a/goalc/compiler/compilation/Define.cpp +++ b/goalc/compiler/compilation/Define.cpp @@ -11,10 +11,10 @@ */ Val* Compiler::compile_define(const goos::Object& form, const goos::Object& rest, Env* env) { auto args = get_va(form, rest); - SymbolInfo::Metadata sym_meta; + std::string docstring; // Grab the docstring (if it's there) and then rip it out so we can do the normal validation if (args.unnamed.size() == 3 && args.unnamed.at(1).is_string()) { - sym_meta.docstring = args.unnamed.at(1).as_string()->data; + docstring = args.unnamed.at(1).as_string()->data; args.unnamed.erase(args.unnamed.begin() + 1); } @@ -35,6 +35,7 @@ Val* Compiler::compile_define(const goos::Object& form, const goos::Object& rest auto sym_val = fe->alloc_val(symbol_string(sym), m_ts.make_typespec("symbol")); auto compiled_val = compile_error_guard(val, env); auto as_lambda = dynamic_cast(compiled_val); + auto in_gpr = compiled_val->to_gpr(form, fe); if (as_lambda) { // there are two cases in which we save a function body that is passed to a define: // 1. It generated code [so went through the compiler] and the allow_inline flag is set. @@ -50,11 +51,14 @@ Val* Compiler::compile_define(const goos::Object& form, const goos::Object& rest } // Most defines come via macro invokations, we want the TRUE defining form location // if we can get it + // TODO - test the return value changes if (env->macro_expand_env()) { - m_symbol_info.add_function(symbol_string(sym), as_lambda->lambda.params, - env->macro_expand_env()->root_form(), sym_meta); + m_symbol_info.add_function(symbol_string(sym), in_gpr->type().last_arg().base_type(), + as_lambda->lambda.params, env->macro_expand_env()->root_form(), + docstring); } else { - m_symbol_info.add_function(symbol_string(sym), as_lambda->lambda.params, form, sym_meta); + m_symbol_info.add_function(symbol_string(sym), in_gpr->type().last_arg().base_type(), + as_lambda->lambda.params, form, docstring); } } @@ -62,7 +66,6 @@ Val* Compiler::compile_define(const goos::Object& form, const goos::Object& rest throw_compiler_error(form, "Cannot define {} because it cannot be set.", sym_val->print()); } - auto in_gpr = compiled_val->to_gpr(form, fe); auto existing_type = m_symbol_types.find(sym.as_symbol()); if (existing_type == m_symbol_types.end()) { m_symbol_types[sym.as_symbol()] = in_gpr->type(); @@ -79,7 +82,7 @@ Val* Compiler::compile_define(const goos::Object& form, const goos::Object& rest if (!as_lambda) { // Don't double-add functions as globals - m_symbol_info.add_global(symbol_string(sym), form, sym_meta); + m_symbol_info.add_global(symbol_string(sym), in_gpr->type().base_type(), form, docstring); } env->emit(form, std::make_unique(sym_val, in_gpr)); diff --git a/goalc/compiler/compilation/Macro.cpp b/goalc/compiler/compilation/Macro.cpp index 2fbcf0229c9..01115b860d7 100644 --- a/goalc/compiler/compilation/Macro.cpp +++ b/goalc/compiler/compilation/Macro.cpp @@ -45,6 +45,7 @@ Val* Compiler::compile_goos_macro(const goos::Object& o, env->function_env()->alloc_env(env, name.as_symbol(), macro->body, o); try { const auto& compile_result = compile(goos_result, compile_env_for_macro); + // TODO - is this critical (do the args and such change?)? m_macro_specs.emplace(macro->name, macro->args); return compile_result; } catch (CompilerException& ce) { @@ -180,10 +181,9 @@ Val* Compiler::compile_define_constant(const goos::Object& form, rest = &pair_cdr(*rest); // check for potential docstring - SymbolInfo::Metadata sym_meta; + std::string docstring = ""; if (rest->is_pair() && pair_car(*rest).is_string() && !pair_cdr(*rest).is_empty_list()) { - std::string docstring = pair_car(*rest).as_string()->data; - sym_meta.docstring = docstring; + docstring = pair_car(*rest).as_string()->data; rest = &pair_cdr(*rest); } @@ -218,7 +218,7 @@ Val* Compiler::compile_define_constant(const goos::Object& form, // TODO - eventually, it'd be nice if global constants were properly typed // and this information was propagated - m_symbol_info.add_constant(sym.name_ptr, form, sym_meta); + m_symbol_info.add_constant(sym.name_ptr, form, docstring); return get_none(); } diff --git a/goalc/compiler/compilation/Type.cpp b/goalc/compiler/compilation/Type.cpp index 3f7c9cfa92c..c871f916a52 100644 --- a/goalc/compiler/compilation/Type.cpp +++ b/goalc/compiler/compilation/Type.cpp @@ -441,7 +441,7 @@ Val* Compiler::compile_deftype(const goos::Object& form, const goos::Object& res } } - m_symbol_info.add_type(result.type.base_type(), form); + m_symbol_info.add_type(result.type.base_type(), result.type_info, form); // return none, making the value of (deftype..) unusable return get_none(); diff --git a/goalc/compiler/docs/DocTypes.cpp b/goalc/compiler/docs/DocTypes.cpp index 4c1cb024614..41ff71e72c0 100644 --- a/goalc/compiler/docs/DocTypes.cpp +++ b/goalc/compiler/docs/DocTypes.cpp @@ -123,14 +123,15 @@ void to_json(json& j, const MacroDocumentation& obj) { } } -std::vector get_args_from_docstring(std::vector args, - std::string docstring) { +std::vector get_args_from_docstring( + std::vector args, + std::string docstring) { std::vector arg_docs; for (const auto& arg : args) { ArgumentDocumentation arg_doc; arg_doc.name = arg.name; // TODO - is this type reliable? - arg_doc.type = arg.type.base_type(); + arg_doc.type = arg.type; arg_docs.push_back(arg_doc); } if (docstring.empty()) { diff --git a/goalc/compiler/docs/DocTypes.h b/goalc/compiler/docs/DocTypes.h index d237cd127b2..1f4af5f8f9a 100644 --- a/goalc/compiler/docs/DocTypes.h +++ b/goalc/compiler/docs/DocTypes.h @@ -3,12 +3,15 @@ #include #include -#include "goalc/compiler/SymbolInfo.h" +#include "goalc/compiler/symbol_info.h" #include "third-party/json.hpp" using json = nlohmann::json; +// TODO - deprecate this file in factor of the now consolidated `SymbolInfo` +// which now contains comprehensive info on all forms of symbols + namespace Docs { struct DefinitionLocation { @@ -69,6 +72,7 @@ struct FieldDocumentation { void to_json(json& j, const FieldDocumentation& obj); struct TypeMethodDocumentation { + // TODO - relevant? int id; std::string name; bool is_override = false; @@ -89,6 +93,7 @@ struct TypeDocumentation { std::optional def_location; int size; std::vector fields = {}; + // TODO - who cares, remove this probably int method_count; std::vector methods = {}; std::vector states = {}; @@ -96,10 +101,14 @@ struct TypeDocumentation { void to_json(json& j, const TypeDocumentation& obj); struct MethodDocumentation { + // TODO - relevant? int id; bool is_builtin; std::string name; std::string description = ""; + // TODO - this is `object` sometimes, for example `(defmethod print ((this light))` + // i believe this is because we always grab the first symbol, but of course, overridden methods + // dont work like that so things are likely working as intended std::string type; std::optional def_location; // TODO - need to track function calls to determine this, obviously cant be determined from just @@ -139,13 +148,14 @@ struct SymbolDocumentation { // TODO - forward declared symbols std::string name; std::string description = ""; - SymbolInfo::Kind kind; + symbol_info::Kind kind; std::optional def_location = {}; std::vector forward_declared_in = {}; }; void to_json(json& j, const SymbolDocumentation& obj); -std::vector get_args_from_docstring(std::vector args, - std::string docstring); +std::vector get_args_from_docstring( + std::vector args, + std::string docstring); } // namespace Docs diff --git a/goalc/compiler/symbol_info.cpp b/goalc/compiler/symbol_info.cpp new file mode 100644 index 00000000000..3a6565a7762 --- /dev/null +++ b/goalc/compiler/symbol_info.cpp @@ -0,0 +1,318 @@ +#include "symbol_info.h" + +#include "common/log/log.h" +#include "common/util/FileUtil.h" +#include "common/util/string_util.h" + +namespace symbol_info { +void SymbolInfo::update_args_from_docstring() { + if (m_docstring.empty()) { + return; + } + auto lines = str_util::split(m_docstring); + for (const auto& line : lines) { + const auto trimmed_line = str_util::ltrim(line); + if (str_util::starts_with(trimmed_line, "@param")) { + // Get the info from the @param line + const auto& tokens = + str_util::regex_get_capture_groups(trimmed_line, "(@param.)\\s?([^\\s]*)\\s(.*)"); + if (tokens.size() != 3) { + lg::warn("invalid docstring line - {}, skipping", trimmed_line); + continue; + } + const auto& param_type = str_util::trim(tokens[0]); + const auto& param_name = str_util::trim(tokens[1]); + const auto& param_description = str_util::trim(tokens[2]); + // Locate the appropriate arg based on the name + for (auto& arg : m_args) { + if (arg.name == param_name) { + arg.description = param_description; + if (param_type == "@param") { + // a normal arg, nothing fancy + } else if (param_type == "@param_") { + // it's unused + arg.is_unused = true; + } else if (param_type == "@param!") { + // the params value is mutated within the function body + arg.is_mutated = true; + } else if (param_type == "@param?") { + // the param is optional -- there are checks to see if it was provided or not so its + // safe to pass "nothing" + arg.is_optional = true; + } + } + } + } + } +} + +void SymbolInfo::set_definition_location(const goos::TextDb* textdb) { + const auto& goos_info = textdb->get_short_info_for(m_def_form); + if (goos_info) { + DefinitionLocation def_loc; + def_loc.line_idx = goos_info->line_idx_to_display; + def_loc.char_idx = goos_info->pos_in_line; + def_loc.file_path = file_util::convert_to_unix_path_separators(goos_info->filename); + m_def_location = def_loc; + } +} + +void SymbolInfoMap::add_symbol_to_file_index(const std::string& file_path, + std::shared_ptr symbol) { + if (m_file_symbol_index.find(file_path) == m_file_symbol_index.end()) { + m_file_symbol_index[file_path] = {}; + } + m_file_symbol_index[file_path].push_back(symbol); +} + +void SymbolInfoMap::add_global(const std::string& name, + const std::string& type, + const goos::Object& defining_form, + const std::string& docstring) { + SymbolInfo info = { + .m_kind = Kind::GLOBAL_VAR, + .m_name = name, + .m_def_form = defining_form, + .m_docstring = docstring, + .m_type = type, + }; + info.set_definition_location(m_textdb); + const auto inserted_symbol = m_symbol_map.insert(name, info); + if (info.m_def_location) { + add_symbol_to_file_index(info.m_def_location->file_path, inserted_symbol); + } +} + +void SymbolInfoMap::add_fwd_dec(const std::string& name, const goos::Object& defining_form) { + SymbolInfo info = {.m_kind = Kind::FWD_DECLARED_SYM, .m_name = name, .m_def_form = defining_form}; + info.set_definition_location(m_textdb); + const auto inserted_symbol = m_symbol_map.insert(name, info); + if (info.m_def_location) { + add_symbol_to_file_index(info.m_def_location->file_path, inserted_symbol); + } +} + +void SymbolInfoMap::add_function(const std::string& name, + const std::string& return_type, + const std::vector& args, + const goos::Object& defining_form, + const std::string& docstring) { + SymbolInfo info = { + .m_kind = Kind::FUNCTION, + .m_name = name, + .m_def_form = defining_form, + .m_docstring = docstring, + .m_return_type = return_type, + }; + for (const auto& goal_arg : args) { + ArgumentInfo arg_info; + arg_info.name = goal_arg.name; + arg_info.type_spec = goal_arg.type; + // TODO - is this reliable? + arg_info.type = goal_arg.type.base_type(); + info.m_args.push_back(arg_info); + } + info.update_args_from_docstring(); + info.set_definition_location(m_textdb); + const auto inserted_symbol = m_symbol_map.insert(name, info); + if (info.m_def_location) { + add_symbol_to_file_index(info.m_def_location->file_path, inserted_symbol); + } +} + +void SymbolInfoMap::add_type(const std::string& name, + Type* type_info, + const goos::Object& defining_form, + const std::string& docstring) { + SymbolInfo info = { + .m_kind = Kind::TYPE, + .m_name = name, + .m_def_form = defining_form, + .m_docstring = docstring, + .m_parent_type = type_info->get_parent(), + .m_type_size = type_info->get_size_in_memory(), + }; + // Only structure types have fields + auto as_structure_type = dynamic_cast(type_info); + if (as_structure_type) { // generate the inspect method + for (const auto& field : as_structure_type->fields()) { + // TODO - field docstrings arent a thing, yet! + FieldInfo field_info = { + .name = field.name(), + .description = "", + .type = field.type().base_type(), + .is_array = field.is_array(), + .is_dynamic = field.is_dynamic(), + .is_inline = field.is_inline(), + }; + info.m_type_fields.push_back(field_info); + } + } + for (const auto& method : type_info->get_methods_defined_for_type()) { + if (method.type.base_type() == "state") { + TypeStateInfo state_info = { + .name = method.name, + .is_virtual = true, + .id = method.id, + }; + info.m_type_states.push_back(state_info); + } else { + TypeMethodInfo method_info = { + .id = method.id, + .name = method.name, + .is_override = method.overrides_parent, + }; + info.m_type_methods.push_back(method_info); + } + } + for (const auto& [state_name, state_info] : type_info->get_states_declared_for_type()) { + TypeStateInfo type_state_info = { + .name = state_name, + .is_virtual = false, + }; + info.m_type_states.push_back(type_state_info); + } + info.set_definition_location(m_textdb); + const auto inserted_symbol = m_symbol_map.insert(name, info); + if (info.m_def_location) { + add_symbol_to_file_index(info.m_def_location->file_path, inserted_symbol); + } +} + +void SymbolInfoMap::add_constant(const std::string& name, + const goos::Object& defining_form, + const std::string& docstring) { + SymbolInfo info = { + .m_kind = Kind::CONSTANT, + .m_name = name, + .m_def_form = defining_form, + .m_docstring = docstring, + // TODO - unfortunately, constants are not properly typed + .m_type = "unknown", + }; + info.set_definition_location(m_textdb); + const auto inserted_symbol = m_symbol_map.insert(name, info); + if (info.m_def_location) { + add_symbol_to_file_index(info.m_def_location->file_path, inserted_symbol); + } +} + +void SymbolInfoMap::add_macro(const std::string& name, + const goos::ArgumentSpec arg_spec, + const goos::Object& defining_form, + const std::string& docstring) { + SymbolInfo info = { + .m_kind = Kind::MACRO, + .m_name = name, + .m_def_form = defining_form, + .m_docstring = docstring, + }; + for (const auto& arg : arg_spec.unnamed) { + info.m_macro_args.push_back(arg); + } + for (const auto& arg : arg_spec.named) { + std::optional def_value; + if (arg.second.has_default) { + def_value = arg.second.default_value.print(); + } + info.m_macro_kwargs.push_back({arg.first, def_value}); + } + if (!arg_spec.rest.empty()) { + info.m_variadic_arg = arg_spec.rest; + } + info.set_definition_location(m_textdb); + const auto inserted_symbol = m_symbol_map.insert(name, info); + if (info.m_def_location) { + add_symbol_to_file_index(info.m_def_location->file_path, inserted_symbol); + } +} + +void SymbolInfoMap::add_builtin(const std::string& name, const std::string& docstring) { + SymbolInfo info = { + .m_kind = Kind::LANGUAGE_BUILTIN, + .m_name = name, + .m_docstring = docstring, + }; + info.set_definition_location(m_textdb); + m_symbol_map.insert(name, info); +} + +void SymbolInfoMap::add_method(const std::string& method_name, + const std::vector& args, + const MethodInfo& method_info, + const goos::Object& defining_form) { + SymbolInfo info = { + .m_kind = Kind::METHOD, + .m_name = method_name, + .m_method_info = method_info, + .m_method_builtin = method_info.id <= 9, + }; + if (method_info.docstring) { + info.m_docstring = method_info.docstring.value(); + } + for (const auto& goal_arg : args) { + ArgumentInfo arg_info; + arg_info.name = goal_arg.name; + arg_info.type_spec = goal_arg.type; + // TODO - is this reliable? + arg_info.type = goal_arg.type.base_type(); + info.m_args.push_back(arg_info); + } + info.update_args_from_docstring(); + info.set_definition_location(m_textdb); + const auto inserted_symbol = m_symbol_map.insert(method_name, info); + if (info.m_def_location) { + add_symbol_to_file_index(info.m_def_location->file_path, inserted_symbol); + } +} + +std::vector> SymbolInfoMap::lookup_symbols_by_file( + const std::string& file_path) const { + if (m_file_symbol_index.find(file_path) != m_file_symbol_index.end()) { + return m_file_symbol_index.at(file_path); + } + return {}; +} + +std::vector> SymbolInfoMap::lookup_exact_name( + const std::string& name) const { + return m_symbol_map.retrieve_with_exact(name); +} + +std::vector> SymbolInfoMap::lookup_symbols_starting_with( + const std::string& prefix) const { + std::vector> symbols; + const auto lookup = m_symbol_map.retrieve_with_prefix(prefix); + for (const auto& result : lookup) { + symbols.push_back(result); + } + return symbols; +} + +std::set SymbolInfoMap::lookup_names_starting_with(const std::string& prefix) const { + std::set names; + const auto lookup = m_symbol_map.retrieve_with_prefix(prefix); + for (const auto& result : lookup) { + names.insert(result->m_name); + } + return names; +} + +int SymbolInfoMap::symbol_count() const { + return m_symbol_map.size(); +} + +std::vector> SymbolInfoMap::get_all_symbols() const { + return m_symbol_map.get_all_elements(); +} + +void SymbolInfoMap::evict_symbols_using_file_index(const std::string& file_path) { + const auto standardized_path = file_util::convert_to_unix_path_separators(file_path); + if (m_file_symbol_index.find(standardized_path) != m_file_symbol_index.end()) { + for (const auto& symbol : m_file_symbol_index.at(standardized_path)) { + m_symbol_map.remove(symbol); + } + m_file_symbol_index.erase(standardized_path); + } +} +} // namespace symbol_info diff --git a/goalc/compiler/symbol_info.h b/goalc/compiler/symbol_info.h new file mode 100644 index 00000000000..51eb3a9b922 --- /dev/null +++ b/goalc/compiler/symbol_info.h @@ -0,0 +1,173 @@ +#pragma once + +#include +#include +#include + +#include "common/goos/Object.h" +#include "common/util/Assert.h" +#include "common/util/trie_map.h" + +#include "goalc/compiler/Val.h" + +namespace symbol_info { + +// TODO - states +// TODO - enums +enum class Kind { + GLOBAL_VAR, + FWD_DECLARED_SYM, + FUNCTION, + TYPE, + CONSTANT, + MACRO, + LANGUAGE_BUILTIN, + METHOD, + INVALID +}; + +struct DefinitionLocation { + std::string file_path; + uint32_t line_idx; + uint32_t char_idx; + // TODO - store the extent of the symbol definition as well +}; + +struct ArgumentInfo { + std::string name; + // TODO - anything use this? + TypeSpec type_spec; + std::string type; + std::string description = ""; + // !var + bool is_mutated = false; + // ?var + bool is_optional = false; + // _var + bool is_unused = false; +}; + +struct FieldInfo { + std::string name; + // TODO - DefinitionLocation def_location; + std::string description = ""; + std::string type; + // ?? TODO + bool is_array = false; + // :dynamic + bool is_dynamic = false; + // :inline + bool is_inline = false; +}; + +struct TypeMethodInfo { + int id; // TODO - is this even relevant anymore? + std::string name; + // TODO - DefinitionLocation def_location; + bool is_override = false; +}; + +struct TypeStateInfo { + std::string name; + // TODO - DefinitionLocation def_location; + bool is_virtual = false; + std::optional id; // TODO - is this even relevant anymore? +}; + +/*! + * Info about a single symbol, representing one of: + * - Global variable + * - Global function + * - Type + * - Constant + * - Macro + * - Builtin keyword of the OpenGOAL language + */ +struct SymbolInfo { + Kind m_kind = Kind::INVALID; + std::string m_name; + goos::Object m_def_form; + std::optional m_def_location; + std::string m_docstring = ""; + std::string m_type = ""; + // Method or Function Related + std::vector m_args = {}; + std::string m_return_type = ""; + // Method Related + MethodInfo m_method_info; + bool m_method_builtin = false; + // Type Related + std::string m_parent_type = ""; + int m_type_size = -1; + // NOTE - removed method count...seems unnecessary? + std::vector m_type_fields = {}; + std::vector m_type_methods = {}; + std::vector m_type_states = {}; + // Macro Related + std::vector m_macro_args = {}; + std::vector>> m_macro_kwargs = {}; + std::optional m_variadic_arg = {}; + // TODO: need to track references for this, this is a TODO for LSP work + // bool is_unused = false; + + void update_args_from_docstring(); + void set_definition_location(const goos::TextDb* textdb); +}; + +/*! + * A map of symbol info. It internally stores the info in a prefix tree so you can quickly get + * a list of all symbols starting with a given prefix. + */ +class SymbolInfoMap { + goos::TextDb* m_textdb; + TrieMap m_symbol_map; + // Indexes references to symbols by the file they are defined within + // This allows us to not only efficiently retrieve symbols by file, but also allows us to + // cleanup symbols when files are re-compiled. + std::unordered_map>> m_file_symbol_index; + + void add_symbol_to_file_index(const std::string& file_path, std::shared_ptr symbol); + + public: + SymbolInfoMap(goos::TextDb* textdb) : m_textdb(textdb) {} + void add_global(const std::string& name, + const std::string& type, + const goos::Object& defining_form, + const std::string& docstring = ""); + void add_fwd_dec(const std::string& name, const goos::Object& defining_form); + void add_function(const std::string& name, + const std::string& return_type, + const std::vector& args, + const goos::Object& defining_form, + const std::string& docstring = ""); + void add_type(const std::string& name, + Type* type_info, + const goos::Object& defining_form, + const std::string& docstring = ""); + void add_constant(const std::string& name, + const goos::Object& defining_form, + const std::string& docstring = ""); + void add_macro(const std::string& name, + const goos::ArgumentSpec arg_spec, + const goos::Object& defining_form, + const std::string& docstring = ""); + void add_builtin(const std::string& name, const std::string& docstring = ""); + void add_method(const std::string& method_name, + const std::vector& args, + const MethodInfo& method_info, + const goos::Object& defining_form); + std::vector> lookup_symbols_by_file( + const std::string& file_path) const; + std::vector> lookup_exact_name(const std::string& name) const; + std::vector> lookup_symbols_starting_with( + const std::string& prefix) const; + std::set lookup_names_starting_with(const std::string& prefix) const; + int symbol_count() const; + std::vector> get_all_symbols() const; + // Uses the per-file index to find and evict symbols globally + // This should be done before re-compiling a file, symbols will be re-added to the DB if they are + // found again + void evict_symbols_using_file_index(const std::string& file_path); +}; + +} // namespace symbol_info diff --git a/goalc/main.cpp b/goalc/main.cpp index b080243c9d1..3e4e1c92878 100644 --- a/goalc/main.cpp +++ b/goalc/main.cpp @@ -8,6 +8,7 @@ #include "common/util/diff.h" #include "common/util/string_util.h" #include "common/util/term_util.h" +#include "common/util/trie_map.h" #include "common/util/unicode_util.h" #include "common/versions/versions.h" diff --git a/lsp/CMakeLists.txt b/lsp/CMakeLists.txt index 114abcea1cc..37d117b625d 100644 --- a/lsp/CMakeLists.txt +++ b/lsp/CMakeLists.txt @@ -1,5 +1,14 @@ add_executable(lsp handlers/lsp_router.cpp + handlers/initialize.cpp + handlers/text_document/completion.cpp + handlers/text_document/document_color.cpp + handlers/text_document/document_symbol.cpp + handlers/text_document/document_synchronization.cpp + handlers/text_document/formatting.cpp + handlers/text_document/go_to.cpp + handlers/text_document/hover.cpp + handlers/text_document/type_hierarchy.cpp main.cpp protocol/common_types.cpp protocol/completion.cpp @@ -10,10 +19,12 @@ add_executable(lsp protocol/formatting.cpp protocol/hover.cpp protocol/progress_report.cpp + protocol/type_hierarchy.cpp state/data/mips_instruction.cpp state/lsp_requester.cpp state/workspace.cpp - transport/stdio.cpp) + transport/stdio.cpp + lsp_util.cpp) target_compile_definitions(lsp PRIVATE -DJSON_DIAGNOSTICS=1) diff --git a/lsp/protocol/initialize_result.h b/lsp/handlers/initialize.cpp similarity index 89% rename from lsp/protocol/initialize_result.h rename to lsp/handlers/initialize.cpp index 8199545cfef..ae95cd9f0ac 100644 --- a/lsp/protocol/initialize_result.h +++ b/lsp/handlers/initialize.cpp @@ -1,19 +1,11 @@ -// TODO - convert this to a proper class +#include "initialize.h" -#include "third-party/json.hpp" - -using json = nlohmann::json; - -class InitializeResult { - public: - InitializeResult(){}; - json to_json() { return result; } - - private: +namespace lsp_handlers { +std::optional initialize(Workspace& workspace, int id, json params) { json text_document_sync{ {"openClose", true}, {"change", 1}, // Full sync - {"willSave", false}, + {"willSave", true}, {"willSaveWaitUntil", false}, {"save", {{"includeText", false}}}, }; @@ -55,6 +47,9 @@ class InitializeResult { {"renameProvider", false}, {"documentLinkProvider", document_link_provider}, {"executeCommandProvider", execute_command_provider}, + {"typeHierarchyProvider", true}, {"experimental", {}}, }}}; -}; + return result; +} +} // namespace lsp_handlers diff --git a/lsp/handlers/initialize.h b/lsp/handlers/initialize.h index 69ee45537a7..8fc2289923b 100644 --- a/lsp/handlers/initialize.h +++ b/lsp/handlers/initialize.h @@ -1,14 +1,10 @@ #pragma once #include "common/log/log.h" +#include "common/util/json_util.h" -#include "lsp/protocol/initialize_result.h" +#include "lsp/state/workspace.h" -#include "third-party/json.hpp" - -using json = nlohmann::json; - -std::optional initialize_handler(Workspace& /*workspace*/, int /*id*/, json /*params*/) { - InitializeResult result; - return result.to_json(); +namespace lsp_handlers { +std::optional initialize(Workspace& workspace, int id, json params); } diff --git a/lsp/handlers/lsp_router.cpp b/lsp/handlers/lsp_router.cpp index 7664e4c85b2..8d744f6760c 100644 --- a/lsp/handlers/lsp_router.cpp +++ b/lsp/handlers/lsp_router.cpp @@ -3,6 +3,7 @@ #include "common/log/log.h" #include "lsp/handlers/initialize.h" +#include "lsp/handlers/text_document/type_hierarchy.h" #include "lsp/protocol/error_codes.h" #include "text_document/completion.h" #include "text_document/document_color.h" @@ -14,6 +15,14 @@ #include "fmt/core.h" +json error_resp(ErrorCodes error_code, const std::string& error_message) { + json error{ + {"code", static_cast(error_code)}, + {"message", error_message}, + }; + return json{{"error", error}}; +} + LSPRoute::LSPRoute() : m_route_type(LSPRouteType::NOOP) {} LSPRoute::LSPRoute(std::function notification_handler) @@ -29,41 +38,43 @@ LSPRoute::LSPRoute(std::function(Workspace&, int, json)> req : m_route_type(LSPRouteType::REQUEST_RESPONSE), m_request_handler(request_handler) {} void LSPRouter::init_routes() { + m_routes["exit"] = LSPRoute([](Workspace& /*workspace*/, nlohmann::json /*params*/) { + lg::info("Shutting down LSP due to explicit request"); + exit(0); + }); m_routes["shutdown"] = LSPRoute( [](Workspace& /*workspace*/, int /*id*/, nlohmann::json /*params*/) -> std::optional { - lg::info("Shutting down LSP due to explicit request"); - exit(0); + lg::info("Received shutdown request"); + return error_resp(ErrorCodes::UnknownErrorCode, "Problem occurred while existing"); }); - m_routes["initialize"] = LSPRoute(initialize_handler); + m_routes["initialize"] = LSPRoute(lsp_handlers::initialize); m_routes["initialize"].m_generic_post_action = [](Workspace& workspace) { workspace.set_initialized(true); }; m_routes["initialized"] = LSPRoute(); - m_routes["textDocument/documentSymbol"] = LSPRoute(document_symbols_handler); - m_routes["textDocument/didOpen"] = LSPRoute(did_open_handler, did_open_push_diagnostics); - m_routes["textDocument/didChange"] = LSPRoute(did_change_handler, did_change_push_diagnostics); - m_routes["textDocument/didClose"] = LSPRoute(did_close_handler); - m_routes["textDocument/hover"] = LSPRoute(hover_handler); - m_routes["textDocument/definition"] = LSPRoute(go_to_definition_handler); - m_routes["textDocument/completion"] = LSPRoute(get_completions_handler); - m_routes["textDocument/documentColor"] = LSPRoute(document_color_handler); - m_routes["textDocument/formatting"] = LSPRoute(formatting_handler); + m_routes["textDocument/documentSymbol"] = LSPRoute(lsp_handlers::document_symbols); + m_routes["textDocument/didOpen"] = + LSPRoute(lsp_handlers::did_open, lsp_handlers::did_open_push_diagnostics); + m_routes["textDocument/didChange"] = + LSPRoute(lsp_handlers::did_change, lsp_handlers::did_change_push_diagnostics); + m_routes["textDocument/didClose"] = LSPRoute(lsp_handlers::did_close); + m_routes["textDocument/willSave"] = LSPRoute(lsp_handlers::will_save); + m_routes["textDocument/hover"] = LSPRoute(lsp_handlers::hover); + m_routes["textDocument/definition"] = LSPRoute(lsp_handlers::go_to_definition); + m_routes["textDocument/completion"] = LSPRoute(lsp_handlers::get_completions); + m_routes["textDocument/documentColor"] = LSPRoute(lsp_handlers::document_color); + m_routes["textDocument/formatting"] = LSPRoute(lsp_handlers::formatting); + m_routes["textDocument/prepareTypeHierarchy"] = LSPRoute(lsp_handlers::prepare_type_hierarchy); + m_routes["typeHierarchy/supertypes"] = LSPRoute(lsp_handlers::supertypes_type_hierarchy); + m_routes["typeHierarchy/subtypes"] = LSPRoute(lsp_handlers::subtypes_type_hierarchy); // TODO - m_routes["textDocument/signatureHelp"] = LSPRoute(get_completions_handler); - // Not Yet Supported Routes, noops + // Not Supported Routes, noops m_routes["$/cancelRequest"] = LSPRoute(); m_routes["textDocument/documentLink"] = LSPRoute(); m_routes["textDocument/codeLens"] = LSPRoute(); m_routes["textDocument/colorPresentation"] = LSPRoute(); } -json error_resp(ErrorCodes error_code, const std::string& error_message) { - json error{ - {"code", static_cast(error_code)}, - {"message", error_message}, - }; - return json{{"error", error}}; -} - std::string LSPRouter::make_response(const json& result) { json content = result; content["jsonrpc"] = "2.0"; diff --git a/lsp/handlers/text_document/completion.cpp b/lsp/handlers/text_document/completion.cpp new file mode 100644 index 00000000000..738d764ca68 --- /dev/null +++ b/lsp/handlers/text_document/completion.cpp @@ -0,0 +1,58 @@ +#include "completion.h" + +namespace lsp_handlers { + +std::unordered_map completion_item_kind_map = { + {symbol_info::Kind::CONSTANT, LSPSpec::CompletionItemKind::Constant}, + {symbol_info::Kind::FUNCTION, LSPSpec::CompletionItemKind::Function}, + {symbol_info::Kind::FWD_DECLARED_SYM, LSPSpec::CompletionItemKind::Reference}, + {symbol_info::Kind::GLOBAL_VAR, LSPSpec::CompletionItemKind::Variable}, + {symbol_info::Kind::INVALID, LSPSpec::CompletionItemKind::Text}, + {symbol_info::Kind::LANGUAGE_BUILTIN, LSPSpec::CompletionItemKind::Function}, + {symbol_info::Kind::MACRO, LSPSpec::CompletionItemKind::Operator}, + {symbol_info::Kind::METHOD, LSPSpec::CompletionItemKind::Method}, + {symbol_info::Kind::TYPE, LSPSpec::CompletionItemKind::Class}, +}; + +std::optional get_completions(Workspace& workspace, int /*id*/, json params) { + auto converted_params = params.get(); + const auto file_type = workspace.determine_filetype_from_uri(converted_params.textDocument.m_uri); + + if (file_type != Workspace::FileType::OpenGOAL) { + return nullptr; + } + auto maybe_tracked_file = workspace.get_tracked_og_file(converted_params.textDocument.m_uri); + if (!maybe_tracked_file) { + return nullptr; + } + std::vector items; + const auto& tracked_file = maybe_tracked_file.value().get(); + // The cursor position in the context of completions is always 1 character ahead of the text, we + // move it back 1 spot so we can actually detect what the user has typed so far + LSPSpec::Position new_position = converted_params.position; + if (new_position.m_character > 0) { + new_position.m_character--; + } + const auto symbol = tracked_file.get_symbol_at_position(new_position); + if (!symbol) { + lg::debug("get_completions - no symbol to work from"); + } else { + const auto matching_symbols = + workspace.get_symbols_starting_with(tracked_file.m_game_version, symbol.value()); + lg::debug("get_completions - found {} symbols", matching_symbols.size()); + + for (const auto& symbol : matching_symbols) { + LSPSpec::CompletionItem item; + item.label = symbol->m_name; + item.kind = completion_item_kind_map.at(symbol->m_kind); + // TODO - flesh out this more fully when auto-complete with non-globals works as well + items.push_back(item); + } + } + LSPSpec::CompletionList list_result; + list_result.isIncomplete = false; // we want further typing to re-evaluate the list + list_result.items = items; + return list_result; +} + +} // namespace lsp_handlers diff --git a/lsp/handlers/text_document/completion.h b/lsp/handlers/text_document/completion.h index e75767d1668..3c9bbc028af 100644 --- a/lsp/handlers/text_document/completion.h +++ b/lsp/handlers/text_document/completion.h @@ -2,17 +2,13 @@ #include +#include "common/util/json_util.h" + #include "lsp/protocol/common_types.h" #include "lsp/protocol/completion.h" #include "lsp/state/data/mips_instructions.h" #include "lsp/state/workspace.h" -std::optional get_completions_handler(Workspace& /*workspace*/, int /*id*/, json params) { - auto converted_params = params.get(); - - // TODO - these need to be cached, - - // TODO - implement response object - - return json::array(); +namespace lsp_handlers { +std::optional get_completions(Workspace& workspace, int id, json params); } diff --git a/lsp/handlers/text_document/document_color.cpp b/lsp/handlers/text_document/document_color.cpp new file mode 100644 index 00000000000..5b10a9d9d68 --- /dev/null +++ b/lsp/handlers/text_document/document_color.cpp @@ -0,0 +1,157 @@ +#include "lsp/protocol/document_color.h" + +#include + +#include "lsp/protocol/common_types.h" +#include "lsp/state/workspace.h" + +int hex_to_dec(const std::string& hex) { + std::string cleaned_string = hex; + if (cleaned_string.starts_with("#x")) { + cleaned_string = cleaned_string.substr(2); + } + return std::stoi(cleaned_string, nullptr, 16); +} + +std::unordered_map>> + game_font_colors = {{GameVersion::Jak1, + { + {0, {223.0, 239.0, 223.0, 255.0}}, {1, {255.0, 255.0, 255.0, 255.0}}, + {2, {255.0, 255.0, 255.0, 127.0}}, {3, {255.0, 191.0, 63.0, 255.0}}, + {4, {255.0, 199.0, 0.0, 255.0}}, {5, {255.0, 255.0, 0.0, 255.0}}, + {6, {63.0, 255.0, 63.0, 255.0}}, {7, {127.0, 127.0, 255.0, 255.0}}, + {8, {-1.0, 255.0, 255.0, 255.0}}, {9, {255.0, 127.0, 255.0, 255.0}}, + {10, {191.0, 255.0, 255.0, 255.0}}, {11, {127.0, 191.0, 191.0, 255.0}}, + {12, {255.0, 255.0, 255.0, 255.0}}, {13, {159.0, 159.0, 159.0, 255.0}}, + {14, {255.0, 167.0, 0.0, 255.0}}, {15, {223.0, 255.0, 95.0, 255.0}}, + {16, {143.0, 175.0, 15.0, 255.0}}, {17, {175.0, 191.0, 175.0, 255.0}}, + {18, {127.0, 143.0, 127.0, 255.0}}, {19, {95.0, 63.0, 95.0, 255.0}}, + {20, {255.0, 241.0, 143.0, 255.0}}, {21, {63.0, 187.0, 239.0, 255.0}}, + {22, {57.0, 57.0, 57.0, 255.0}}, {23, {127.0, 127.0, 127.0, 255.0}}, + {24, {243.0, 153.0, 201.0, 255.0}}, {25, {243.0, 103.0, 103.0, 255.0}}, + {26, {31.0, 201.0, 151.0, 255.0}}, {27, {139.0, 147.0, 239.0, 255.0}}, + {28, {173.0, 251.0, 255.0, 255.0}}, {29, {253.0, 245.0, 101.0, 255.0}}, + {30, {241.0, 241.0, 3.0, 255.0}}, {31, {141.0, 207.0, 243.0, 255.0}}, + {32, {223.0, 239.0, 223.0, 255.0}}, {33, {191.0, -1.0, 0.0, 255.0}}, + {34, {255.0, 191.0, 63.0, 255.0}}, + }}, + {GameVersion::Jak2, + { + {0, {223.0, 239.0, 223.0, 255.0}}, {1, {255.0, 255.0, 255.0, 255.0}}, + {2, {255.0, 255.0, 255.0, 127.0}}, {3, {255.0, 63.0, 0.0, 255.0}}, + {4, {255.0, 199.0, 0.0, 255.0}}, {5, {255.0, 255.0, 0.0, 255.0}}, + {6, {63.0, 255.0, 63.0, 255.0}}, {7, {0.0, 63.0, 255.0, 255.0}}, + {8, {0.0, 255.0, 255.0, 255.0}}, {9, {255.0, 127.0, 255.0, 255.0}}, + {10, {191.0, 255.0, 255.0, 255.0}}, {11, {127.0, 191.0, 191.0, 255.0}}, + {12, {255.0, 255.0, 255.0, 255.0}}, {13, {159.0, 159.0, 159.0, 255.0}}, + {14, {255.0, 167.0, 0.0, 255.0}}, {15, {223.0, 255.0, 95.0, 255.0}}, + {16, {143.0, 175.0, 31.0, 255.0}}, {17, {175.0, 191.0, 175.0, 255.0}}, + {18, {127.0, 143.0, 127.0, 255.0}}, {19, {95.0, 63.0, 95.0, 255.0}}, + {20, {255.0, 241.0, 143.0, 255.0}}, {21, {63.0, 187.0, 239.0, 255.0}}, + {22, {57.0, 57.0, 57.0, 255.0}}, {23, {127.0, 127.0, 127.0, 255.0}}, + {24, {243.0, 153.0, 201.0, 255.0}}, {25, {243.0, 103.0, 103.0, 255.0}}, + {26, {31.0, 201.0, 151.0, 255.0}}, {27, {139.0, 147.0, 239.0, 255.0}}, + {28, {173.0, 251.0, 255.0, 255.0}}, {29, {253.0, 245.0, 101.0, 255.0}}, + {30, {241.0, 241.0, 3.0, 255.0}}, {31, {141.0, 207.0, 243.0, 255.0}}, + {32, {127.0, 255.0, 255.0, 255.0}}, {33, {127.0, 255.0, 255.0, 255.0}}, + {34, {255.0, 255.0, 255.0, 255.0}}, {35, {63.0, 127.0, 127.0, 191.0}}, + {36, {223.0, 239.0, 223.0, 255.0}}, {37, {191.0, 0.0, 0.0, 255.0}}, + {38, {255.0, 191.0, 63.0, 255.0}}, {39, {0.0, 0.0, 1.0, 255.0}}, + }}}; + +namespace lsp_handlers { + +std::optional document_color(Workspace& workspace, int /*id*/, json raw_params) { + auto params = raw_params.get(); + auto file_type = workspace.determine_filetype_from_uri(params.textDocument.m_uri); + const auto game_version = workspace.determine_game_version_from_uri(params.textDocument.m_uri); + + json colors = json::array(); + + if (!game_version || file_type != Workspace::FileType::OpenGOAL) { + return colors; + } + + auto maybe_tracked_file = workspace.get_tracked_og_file(params.textDocument.m_uri); + if (!maybe_tracked_file) { + return colors; + } + const auto& tracked_file = maybe_tracked_file.value().get(); + + // Search for `(new 'static 'rgba....` forms as these can be colored + // for example - `(new 'static 'rgba :r #x70 :g #x78 :b #x70 :a #x80)` + const auto rgba_results = + tracked_file.search_for_forms_that_begin_with({"(", "new", "'static", "'rgba"}); + for (const auto& result : rgba_results) { + // Iterate the forms and find the color and alpha info + float red = 0.0f; + float green = 0.0f; + float blue = 0.0f; + float alpha = 0.0f; + int token_idx = 0; + while (token_idx < result.tokens.size()) { + const auto& token = result.tokens[token_idx]; + // in OpenGOAL -- 255 is equal to 128, so we double every value and subtract 1 + if (token == ":r" && result.tokens.size() > token_idx + 1) { + red = static_cast((hex_to_dec(result.tokens[token_idx + 1]) * 2) - 1) / 255.0f; + } else if (token == ":g" && result.tokens.size() > token_idx + 1) { + green = static_cast((hex_to_dec(result.tokens[token_idx + 1]) * 2) - 1) / 255.0f; + } else if (token == ":b" && result.tokens.size() > token_idx + 1) { + blue = static_cast((hex_to_dec(result.tokens[token_idx + 1]) * 2) - 1) / 255.0f; + } else if (token == ":a" && result.tokens.size() > token_idx + 1) { + alpha = static_cast((hex_to_dec(result.tokens[token_idx + 1]) * 2) - 1) / 255.0f; + } + token_idx++; + } + LSPSpec::ColorInformation color_info; + color_info.range = {{(uint32_t)result.start_point.first, (uint32_t)result.start_point.second}, + {(uint32_t)result.end_point.first, (uint32_t)result.end_point.second}}; + color_info.color = LSPSpec::Color{red, green, blue, alpha}; + colors.push_back(color_info); + } + // Also search for the `(static-rgba ...` macro + const auto static_rgba_results = + tracked_file.search_for_forms_that_begin_with({"(", "static-rgba"}); + for (const auto& result : static_rgba_results) { + float red = static_cast((hex_to_dec(result.tokens[2]) * 2) - 1) / 255.0f; + float green = static_cast((hex_to_dec(result.tokens[3]) * 2) - 1) / 255.0f; + float blue = static_cast((hex_to_dec(result.tokens[4]) * 2) - 1) / 255.0f; + float alpha = static_cast((hex_to_dec(result.tokens[5]) * 2) - 1) / 255.0f; + LSPSpec::ColorInformation color_info; + color_info.range = {{(uint32_t)result.start_point.first, (uint32_t)result.start_point.second}, + {(uint32_t)result.end_point.first, (uint32_t)result.end_point.second}}; + color_info.color = LSPSpec::Color{red, green, blue, alpha}; + colors.push_back(color_info); + } + + // Search for `(font-color ...` forms + const auto font_color_results = + tracked_file.search_for_forms_that_begin_with({"(", "font-color"}); + const auto font_color_enum_entries = + workspace.get_enum_entries("font-color", game_version.value()); + if (!font_color_enum_entries.empty() && + game_font_colors.find(game_version.value()) != game_font_colors.end()) { + for (const auto& result : font_color_results) { + const auto font_color = result.tokens[2]; + if (font_color_enum_entries.find(font_color) != font_color_enum_entries.end()) { + const auto font_color_val = font_color_enum_entries.at(font_color); + if (game_font_colors[game_version.value()].find(font_color_val) != + game_font_colors[game_version.value()].end()) { + const auto& [red, green, blue, alpha] = + game_font_colors[game_version.value()].at(font_color_val); + LSPSpec::ColorInformation color_info; + color_info.range = { + {(uint32_t)result.start_point.first, (uint32_t)result.start_point.second}, + {(uint32_t)result.end_point.first, (uint32_t)result.end_point.second}}; + color_info.color = + LSPSpec::Color{red / 255.0f, green / 255.0f, blue / 255.0f, alpha / 255.0f}; + colors.push_back(color_info); + } + } + } + } + + return colors; +} + +} // namespace lsp_handlers diff --git a/lsp/handlers/text_document/document_color.h b/lsp/handlers/text_document/document_color.h index df4e5fbcf79..c4767122753 100644 --- a/lsp/handlers/text_document/document_color.h +++ b/lsp/handlers/text_document/document_color.h @@ -2,76 +2,14 @@ #include -#include "common/util/string_util.h" +#include "common/util/json_util.h" #include "lsp/protocol/common_types.h" #include "lsp/protocol/document_color.h" +#include "lsp/state/workspace.h" -float hexToFloat(const std::string& hex) { - int value = std::stoi(hex, nullptr, 16); - return static_cast(value) / 255.0f; -} +namespace lsp_handlers { -std::optional color_hexstring_to_lsp_color(const std::string& color_name) { - if (!str_util::contains(color_name, "#")) { - return {}; - } - const auto color_tokens = str_util::split(color_name, '#'); - const auto hexstring = color_tokens.at(1); - std::string red_hex = hexstring.substr(0, 2); - std::string green_hex = hexstring.substr(2, 2); - std::string blue_hex = hexstring.substr(4, 2); +std::optional document_color(Workspace& workspace, int id, json raw_params); - float red = hexToFloat(red_hex); - float green = hexToFloat(green_hex); - float blue = hexToFloat(blue_hex); - - return LSPSpec::Color{red, green, blue, 1.0}; -} - -std::optional document_color_handler(Workspace& /*workspace*/, int /*id*/, json raw_params) { - auto params = raw_params.get(); - json colors = json::array(); - - // TODO - hex strings aren't desirable in the `font-color` enum - // this could be used for the `new 'static 'rgba` instances but that requires proper - // AST support as it cannot (and should not) be assumed that all 4 components will be on the same - // line - return colors; - - //// Iterate through document, mark text colors ourselves - // auto file_type = workspace.determine_filetype_from_uri(params.textDocument.m_uri); - - // if (file_type == Workspace::FileType::OpenGOAL) { - // auto tracked_file = workspace.get_tracked_og_file(params.textDocument.m_uri); - // if (!tracked_file) { - // return {}; - // } - - // // This is something that is ok to be a regex, because it's very niche - // for (int i = 0; i < tracked_file->m_lines.size(); i++) { - // const auto& line = tracked_file->m_lines.at(i); - // std::smatch matches; - // std::regex regex("\\(font-color ([^)]*)\\)"); - - // std::sregex_iterator iter(line.begin(), line.end(), regex); - // std::sregex_iterator end; - - // for (; iter != end; iter++) { - // std::smatch match = *iter; - // std::string capture_group = match.str(1); - // LSPSpec::ColorInformation color_info; - // color_info.range = {{i, match.position(1)}, {i, match.position(1) + match.size()}}; - // const auto color = color_hexstring_to_lsp_color(capture_group); - // if (!color) { - // continue; - // } - // color_info.color = color.value(); - // colors.push_back(color_info); - // lg::debug("color - {}", capture_group); - // } - // } - //} - - // return colors; -} +} // namespace lsp_handlers diff --git a/lsp/handlers/text_document/document_symbol.cpp b/lsp/handlers/text_document/document_symbol.cpp new file mode 100644 index 00000000000..ddde0acef9d --- /dev/null +++ b/lsp/handlers/text_document/document_symbol.cpp @@ -0,0 +1,53 @@ +#include "document_symbol.h" + +#include "third-party/json.hpp" + +using json = nlohmann::json; + +std::optional ir_symbols(Workspace& workspace, LSPSpec::DocumentSymbolParams params) { + json symbols = json::array(); + auto maybe_tracked_file = workspace.get_tracked_ir_file(params.m_textDocument.m_uri); + if (!maybe_tracked_file) { + return symbols; + } + + const auto& tracked_file = maybe_tracked_file.value().get(); + for (const auto& symbol : tracked_file.m_symbols) { + symbols.push_back(symbol); + } + + return symbols; +} + +std::optional og_symbols(Workspace& workspace, LSPSpec::DocumentSymbolParams params) { + json symbols = json::array(); + auto maybe_tracked_file = workspace.get_tracked_og_file(params.m_textDocument.m_uri); + if (!maybe_tracked_file) { + return symbols; + } + + const auto& tracked_file = maybe_tracked_file.value().get(); + for (const auto& symbol : tracked_file.m_symbols) { + symbols.push_back(symbol); + } + + return symbols; +} + +namespace lsp_handlers { + +std::optional document_symbols(Workspace& workspace, int /*id*/, json params) { + auto converted_params = params.get(); + const auto file_type = + workspace.determine_filetype_from_uri(converted_params.m_textDocument.m_uri); + + if (file_type == Workspace::FileType::OpenGOALIR) { + return ir_symbols(workspace, converted_params); + } else if (file_type == Workspace::FileType::OpenGOAL) { + return og_symbols(workspace, converted_params); + } + + return json::array(); +} + +} // namespace lsp_handlers diff --git a/lsp/handlers/text_document/document_symbol.h b/lsp/handlers/text_document/document_symbol.h index 66d8159f075..0b4d48f8c1b 100644 --- a/lsp/handlers/text_document/document_symbol.h +++ b/lsp/handlers/text_document/document_symbol.h @@ -2,26 +2,13 @@ #include +#include "common/util/json_util.h" + #include "lsp/protocol/common_types.h" #include "lsp/state/workspace.h" -#include "third-party/json.hpp" - -using json = nlohmann::json; - -std::optional document_symbols_handler(Workspace& workspace, int /*id*/, json params) { - auto converted_params = params.get(); - auto tracked_file = workspace.get_tracked_ir_file(converted_params.m_textDocument.m_uri); - - if (!tracked_file) { - return {}; - } +namespace lsp_handlers { - // TODO - convert to type! +std::optional document_symbols(Workspace& workspace, int id, json params); - json arr = json::array(); - for (const auto& symbol : tracked_file.value().m_symbols) { - arr.push_back(symbol); - } - return arr; -} +} // namespace lsp_handlers diff --git a/lsp/handlers/text_document/document_synchronization.cpp b/lsp/handlers/text_document/document_synchronization.cpp new file mode 100644 index 00000000000..d71c7252562 --- /dev/null +++ b/lsp/handlers/text_document/document_synchronization.cpp @@ -0,0 +1,77 @@ +#include "document_synchronization.h" + +namespace lsp_handlers { + +void did_open(Workspace& workspace, json raw_params) { + auto params = raw_params.get(); + workspace.start_tracking_file(params.m_textDocument.m_uri, params.m_textDocument.m_languageId, + params.m_textDocument.m_text); +} + +void did_change(Workspace& workspace, json raw_params) { + auto params = raw_params.get(); + for (const auto& change : params.m_contentChanges) { + workspace.update_tracked_file(params.m_textDocument.m_uri, change.m_text); + } +} + +void did_close(Workspace& workspace, json raw_params) { + auto params = raw_params.get(); + workspace.stop_tracking_file(params.m_textDocument.m_uri); +} + +void will_save(Workspace& workspace, json raw_params) { + auto params = raw_params.get(); + workspace.tracked_file_will_save(params.textDocument.m_uri); +} + +std::optional did_open_push_diagnostics(Workspace& workspace, json raw_params) { + auto params = raw_params.get(); + const auto file_type = + workspace.determine_filetype_from_languageid(params.m_textDocument.m_languageId); + + LSPSpec::PublishDiagnosticParams publish_params; + publish_params.m_uri = params.m_textDocument.m_uri; + publish_params.m_version = params.m_textDocument.m_version; + + if (file_type == Workspace::FileType::OpenGOALIR) { + auto maybe_tracked_file = workspace.get_tracked_ir_file(params.m_textDocument.m_uri); + if (!maybe_tracked_file) { + return {}; + } + const auto& tracked_file = maybe_tracked_file.value().get(); + publish_params.m_diagnostics = tracked_file.m_diagnostics; + } + + json response; + response["method"] = "textDocument/publishDiagnostics"; + response["params"] = publish_params; + + return response; +} + +std::optional did_change_push_diagnostics(Workspace& workspace, json raw_params) { + auto params = raw_params.get(); + const auto file_type = workspace.determine_filetype_from_uri(params.m_textDocument.m_uri); + + LSPSpec::PublishDiagnosticParams publish_params; + publish_params.m_uri = params.m_textDocument.m_uri; + publish_params.m_version = params.m_textDocument.m_version; + + if (file_type == Workspace::FileType::OpenGOALIR) { + auto maybe_tracked_file = workspace.get_tracked_ir_file(params.m_textDocument.m_uri); + if (!maybe_tracked_file) { + return {}; + } + const auto& tracked_file = maybe_tracked_file.value().get(); + publish_params.m_diagnostics = tracked_file.m_diagnostics; + } + + json response; + response["method"] = "textDocument/publishDiagnostics"; + response["params"] = publish_params; + + return response; +} + +} // namespace lsp_handlers diff --git a/lsp/handlers/text_document/document_synchronization.h b/lsp/handlers/text_document/document_synchronization.h index 579bbde5bec..069660b7352 100644 --- a/lsp/handlers/text_document/document_synchronization.h +++ b/lsp/handlers/text_document/document_synchronization.h @@ -2,76 +2,20 @@ #include +#include "common/util/json_util.h" + #include "lsp/protocol/document_diagnostics.h" #include "lsp/protocol/document_synchronization.h" #include "lsp/state/workspace.h" -#include "third-party/json.hpp" - -using json = nlohmann::json; - -void did_open_handler(Workspace& workspace, json raw_params) { - auto params = raw_params.get(); - workspace.start_tracking_file(params.m_textDocument.m_uri, params.m_textDocument.m_languageId, - params.m_textDocument.m_text); -} - -void did_change_handler(Workspace& workspace, json raw_params) { - auto params = raw_params.get(); - for (const auto& change : params.m_contentChanges) { - workspace.update_tracked_file(params.m_textDocument.m_uri, change.m_text); - } -} - -void did_close_handler(Workspace& workspace, json raw_params) { - auto params = raw_params.get(); - workspace.stop_tracking_file(params.m_textDocument.m_uri); -} - -std::optional did_open_push_diagnostics(Workspace& workspace, json raw_params) { - auto params = raw_params.get(); - const auto file_type = - workspace.determine_filetype_from_languageid(params.m_textDocument.m_languageId); - - LSPSpec::PublishDiagnosticParams publish_params; - publish_params.m_uri = params.m_textDocument.m_uri; - publish_params.m_version = params.m_textDocument.m_version; - - if (file_type == Workspace::FileType::OpenGOALIR) { - auto tracked_file = workspace.get_tracked_ir_file(params.m_textDocument.m_uri); - if (!tracked_file) { - return {}; - } - publish_params.m_diagnostics = tracked_file.value().m_diagnostics; - } - - json response; - response["method"] = "textDocument/publishDiagnostics"; - response["params"] = publish_params; - - return response; -} - -std::optional did_change_push_diagnostics(Workspace& workspace, json raw_params) { - auto params = raw_params.get(); - const auto file_type = workspace.determine_filetype_from_uri(params.m_textDocument.m_uri); - - LSPSpec::PublishDiagnosticParams publish_params; - publish_params.m_uri = params.m_textDocument.m_uri; - publish_params.m_version = params.m_textDocument.m_version; - - if (file_type == Workspace::FileType::OpenGOALIR) { - auto tracked_file = workspace.get_tracked_ir_file(params.m_textDocument.m_uri); +namespace lsp_handlers { - if (!tracked_file) { - return {}; - } - publish_params.m_diagnostics = tracked_file.value().m_diagnostics; - } +void did_open(Workspace& workspace, json raw_params); +void did_change(Workspace& workspace, json raw_params); +void did_close(Workspace& workspace, json raw_params); +void will_save(Workspace& workspace, json raw_params); - json response; - response["method"] = "textDocument/publishDiagnostics"; - response["params"] = publish_params; +std::optional did_open_push_diagnostics(Workspace& workspace, json raw_params); +std::optional did_change_push_diagnostics(Workspace& workspace, json raw_params); - return response; -} +} // namespace lsp_handlers diff --git a/lsp/handlers/text_document/formatting.cpp b/lsp/handlers/text_document/formatting.cpp new file mode 100644 index 00000000000..e0291a00bca --- /dev/null +++ b/lsp/handlers/text_document/formatting.cpp @@ -0,0 +1,38 @@ +#include "formatting.h" + +#include "common/formatter/formatter.h" + +#include "lsp/protocol/common_types.h" +#include "lsp/protocol/formatting.h" +#include "lsp/state/data/mips_instructions.h" +#include "lsp/state/workspace.h" + +namespace lsp_handlers { +std::optional formatting(Workspace& workspace, int id, json raw_params) { + auto params = raw_params.get(); + const auto file_type = workspace.determine_filetype_from_uri(params.textDocument.m_uri); + + if (file_type == Workspace::FileType::OpenGOALIR) { + return nullptr; + } else if (file_type == Workspace::FileType::OpenGOAL) { + auto maybe_tracked_file = workspace.get_tracked_og_file(params.textDocument.m_uri); + if (!maybe_tracked_file) { + return {}; + } + const auto& tracked_file = maybe_tracked_file.value().get(); + const auto result = formatter::format_code(tracked_file.m_content); + if (!result) { + return nullptr; + } + json edits = json::array(); + auto format_edit = LSPSpec::TextEdit(); + format_edit.range = {{0, 0}, {(uint32_t)tracked_file.m_line_count, 0}}; + format_edit.newText = result.value(); + edits.push_back(format_edit); + return edits; + } + + return nullptr; +} + +} // namespace lsp_handlers diff --git a/lsp/handlers/text_document/formatting.h b/lsp/handlers/text_document/formatting.h index 27dc6568e51..0abd82526f4 100644 --- a/lsp/handlers/text_document/formatting.h +++ b/lsp/handlers/text_document/formatting.h @@ -2,36 +2,12 @@ #include -#include "common/formatter/formatter.h" +#include "common/util/json_util.h" -#include "lsp/protocol/common_types.h" -#include "lsp/protocol/formatting.h" -#include "lsp/state/data/mips_instructions.h" #include "lsp/state/workspace.h" -std::optional formatting_handler(Workspace& workspace, int /*id*/, json raw_params) { - auto params = raw_params.get(); - const auto file_type = workspace.determine_filetype_from_uri(params.textDocument.m_uri); +namespace lsp_handlers { - if (file_type == Workspace::FileType::OpenGOALIR) { - return nullptr; - } else if (file_type == Workspace::FileType::OpenGOAL) { - auto tracked_file = workspace.get_tracked_og_file(params.textDocument.m_uri); - if (!tracked_file) { - return nullptr; - } - // TODO move away from holding the content directly - const auto result = formatter::format_code(tracked_file->m_content); - if (!result) { - return nullptr; - } - json edits = json::array(); - auto format_edit = LSPSpec::TextEdit(); - format_edit.range = {{0, 0}, {(uint32_t)tracked_file->m_lines.size(), 0}}; - format_edit.newText = result.value(); - edits.push_back(format_edit); - return edits; - } +std::optional formatting(Workspace& workspace, int id, json raw_params); - return nullptr; -} +} // namespace lsp_handlers diff --git a/lsp/handlers/text_document/go_to.cpp b/lsp/handlers/text_document/go_to.cpp new file mode 100644 index 00000000000..151f493b3fa --- /dev/null +++ b/lsp/handlers/text_document/go_to.cpp @@ -0,0 +1,61 @@ +#include "go_to.h" + +namespace lsp_handlers { +std::optional go_to_definition(Workspace& workspace, int /*id*/, json raw_params) { + auto params = raw_params.get(); + const auto file_type = workspace.determine_filetype_from_uri(params.m_textDocument.m_uri); + + json locations = json::array(); + + if (file_type == Workspace::FileType::OpenGOALIR) { + auto maybe_tracked_file = workspace.get_tracked_ir_file(params.m_textDocument.m_uri); + if (!maybe_tracked_file) { + return {}; + } + const auto& tracked_file = maybe_tracked_file.value().get(); + auto symbol_name = tracked_file.get_symbol_at_position(params.m_position); + if (!symbol_name) { + return {}; + } + auto symbol_info = workspace.get_definition_info_from_all_types(symbol_name.value(), + tracked_file.m_all_types_uri); + if (!symbol_info) { + return {}; + } + LSPSpec::Location location; + location.m_uri = tracked_file.m_all_types_uri; + location.m_range.m_start = {(uint32_t)symbol_info.value().definition_info->line_idx_to_display, + (uint32_t)symbol_info.value().definition_info->pos_in_line}; + location.m_range.m_end = {(uint32_t)symbol_info.value().definition_info->line_idx_to_display, + (uint32_t)symbol_info.value().definition_info->pos_in_line}; + locations.push_back(location); + } else if (file_type == Workspace::FileType::OpenGOAL) { + auto maybe_tracked_file = workspace.get_tracked_og_file(params.m_textDocument.m_uri); + if (!maybe_tracked_file) { + return {}; + } + const auto& tracked_file = maybe_tracked_file.value().get(); + const auto symbol = tracked_file.get_symbol_at_position(params.m_position); + if (!symbol) { + return {}; + } + const auto& symbol_info = workspace.get_global_symbol_info(tracked_file, symbol.value()); + if (!symbol_info) { + return {}; + } + + const auto& def_loc = workspace.get_symbol_def_location(tracked_file, symbol_info.value()); + if (!def_loc) { + return {}; + } + + LSPSpec::Location location; + location.m_uri = def_loc->file_path; + location.m_range.m_start = {(uint32_t)def_loc->line_idx, (uint32_t)def_loc->char_idx}; + location.m_range.m_end = {(uint32_t)def_loc->line_idx, (uint32_t)def_loc->char_idx}; + locations.push_back(location); + } + + return locations; +} +} // namespace lsp_handlers diff --git a/lsp/handlers/text_document/go_to.h b/lsp/handlers/text_document/go_to.h index ba78a47314b..acc145c5efe 100644 --- a/lsp/handlers/text_document/go_to.h +++ b/lsp/handlers/text_document/go_to.h @@ -2,65 +2,11 @@ #include +#include "common/util/json_util.h" + #include "lsp/protocol/common_types.h" -#include "lsp/protocol/hover.h" -#include "lsp/state/data/mips_instructions.h" #include "lsp/state/workspace.h" -std::optional go_to_definition_handler(Workspace& workspace, int /*id*/, json raw_params) { - auto params = raw_params.get(); - const auto file_type = workspace.determine_filetype_from_uri(params.m_textDocument.m_uri); - - json locations = json::array(); - - if (file_type == Workspace::FileType::OpenGOALIR) { - auto tracked_file = workspace.get_tracked_ir_file(params.m_textDocument.m_uri); - if (!tracked_file) { - return {}; - } - auto symbol_name = tracked_file->get_symbol_at_position(params.m_position); - if (!symbol_name) { - return {}; - } - auto symbol_info = workspace.get_definition_info_from_all_types(symbol_name.value(), - tracked_file->m_all_types_uri); - if (!symbol_info) { - return {}; - } - LSPSpec::Location location; - location.m_uri = tracked_file->m_all_types_uri; - location.m_range.m_start = {(uint32_t)symbol_info.value().definition_info->line_idx_to_display, - (uint32_t)symbol_info.value().definition_info->pos_in_line}; - location.m_range.m_end = {(uint32_t)symbol_info.value().definition_info->line_idx_to_display, - (uint32_t)symbol_info.value().definition_info->pos_in_line}; - locations.push_back(location); - } else if (file_type == Workspace::FileType::OpenGOAL) { - auto tracked_file = workspace.get_tracked_og_file(params.m_textDocument.m_uri); - if (!tracked_file) { - return {}; - } - const auto symbol = tracked_file->get_symbol_at_position(params.m_position); - if (!symbol) { - return {}; - } - const auto& symbol_info = - workspace.get_global_symbol_info(tracked_file.value(), symbol.value()); - if (!symbol_info) { - return {}; - } - - const auto& def_loc = - workspace.get_symbol_def_location(tracked_file.value(), symbol_info.value()); - if (!def_loc) { - return {}; - } - - LSPSpec::Location location; - location.m_uri = def_loc->filename; - location.m_range.m_start = {(uint32_t)def_loc->line_idx, (uint32_t)def_loc->char_idx}; - location.m_range.m_end = {(uint32_t)def_loc->line_idx, (uint32_t)def_loc->char_idx}; - locations.push_back(location); - } - - return locations; +namespace lsp_handlers { +std::optional go_to_definition(Workspace& workspace, int id, json raw_params); } diff --git a/lsp/handlers/text_document/hover.cpp b/lsp/handlers/text_document/hover.cpp new file mode 100644 index 00000000000..ad825f5b539 --- /dev/null +++ b/lsp/handlers/text_document/hover.cpp @@ -0,0 +1,269 @@ +#include "hover.h" + +bool is_number(const std::string& s) { + return !s.empty() && std::find_if(s.begin(), s.end(), + [](unsigned char c) { return !std::isdigit(c); }) == s.end(); +} + +std::vector og_method_names = {"new", "delete", "print", "inspect", "length", + "asize-of", "copy", "relocate", "mem-usage"}; + +std::optional hover_handler_ir(Workspace& workspace, + const LSPSpec::TextDocumentPositionParams& params, + const WorkspaceIRFile& tracked_file) { + // See if it's an OpenGOAL symbol or a MIPS mnemonic + auto symbol_name = tracked_file.get_symbol_at_position(params.m_position); + auto token_at_pos = tracked_file.get_mips_instruction_at_position(params.m_position); + if (!symbol_name && !token_at_pos) { + return {}; + } + + LSPSpec::MarkupContent markup; + markup.m_kind = "markdown"; + + // TODO - try specifying the range so it highlights everything, ie. `c.lt.s` + // Prefer symbols + if (symbol_name) { + lg::debug("hover - symbol match - {}", symbol_name.value()); + auto symbol_info = workspace.get_definition_info_from_all_types(symbol_name.value(), + tracked_file.m_all_types_uri); + if (symbol_info && symbol_info.value().docstring.has_value()) { + std::string docstring = symbol_info.value().docstring.value(); + lg::debug("hover - symbol has docstring - {}", docstring); + // A docstring exists, print it! + // By convention, docstrings are assumed to be markdown, they support code-blocks everything + // the only thing extra we do, is replace [[]] with links if available + std::unordered_map symbol_replacements = {}; + std::smatch match; + + std::string::const_iterator searchStart(docstring.cbegin()); + while ( + std::regex_search(searchStart, docstring.cend(), match, std::regex("\\[{2}(.*)\\]{2}"))) { + // Have we already accounted for this symbol? + const auto& name = match[1].str(); + if (symbol_replacements.count(name) != 0) { + continue; + } + // Get this symbols info + auto symbol_info = + workspace.get_definition_info_from_all_types(name, tracked_file.m_all_types_uri); + if (!symbol_info) { + symbol_replacements[name] = fmt::format("_{}_", name); + } else { + // Construct path + auto symbol_uri = + fmt::format("{}#L{}%2C{}", tracked_file.m_all_types_uri, + symbol_info.value().definition_info->line_idx_to_display + 1, + symbol_info.value().definition_info->pos_in_line); + symbol_replacements[name] = fmt::format("[{}]({})", name, symbol_uri); + } + searchStart = match.suffix().first; + } + // Replace all symbol occurences + for (const auto& [key, val] : symbol_replacements) { + docstring = std::regex_replace(docstring, std::regex("\\[{2}" + key + "\\]{2}"), val); + } + + markup.m_value = docstring; + LSPSpec::Hover hover_resp; + hover_resp.m_contents = markup; + return hover_resp; + } else if (!token_at_pos) { + // Check if it's a number, and if so we'll do some numeric conversions + if (!is_number(symbol_name.value())) { + return {}; + } + lg::debug("hover - numeric match - {}", symbol_name.value()); + // Construct the body + std::string body = ""; + uint32_t num = std::atoi(symbol_name.value().data()); + // Assuming it comes in as Decimal + body += "| Base | Value |\n"; + body += "|---------|-------|\n"; + body += fmt::format("| Decimal | `{:d}` |\n", num); + body += fmt::format("| Hex | `{:X}` |\n", num); + // TODO - would be nice to format as groups of 4 + body += fmt::format("| Binary | `{:b}` |\n", num); + if (num >= 16 && (num - 16) % 4 == 0) { + uint32_t method_id = (num - 16) / 4; + std::string method_name = "not built-in"; + if (method_id <= 8) { + method_name = og_method_names.at(method_id); + } + body += fmt::format("| Method ID | `{}` - `{}` |\n", method_id, method_name); + } + body += fmt::format("| Octal | `{:o}` |\n", num); + + markup.m_value = body; + LSPSpec::Hover hover_resp; + hover_resp.m_contents = markup; + return hover_resp; + } + } + + // Otherwise, maybe it's a MIPS instruction + if (token_at_pos) { + lg::debug("hover - token match - {}", token_at_pos.value()); + auto& token = token_at_pos.value(); + std::transform(token.begin(), token.end(), token.begin(), + [](unsigned char c) { return std::tolower(c); }); + // Find the instruction, there are some edge-cases here where they could be multiple + // TODO - havn't addressed `bc` and such instructions! Those need to be prefixed matched + std::vector ee_instructions = {}; + std::vector vu_instructions = {}; + for (const auto& instr : LSPData::MIPS_INSTRUCTION_LIST) { + auto mnemonic_lower = instr.mnemonic; + std::transform(mnemonic_lower.begin(), mnemonic_lower.end(), mnemonic_lower.begin(), + [](unsigned char c) { return std::tolower(c); }); + if (mnemonic_lower == token) { + if (instr.type == "ee") { + ee_instructions.push_back(fmt::format("- _{}_\n\n", instr.description)); + } else { + vu_instructions.push_back(fmt::format("- _{}_\n\n", instr.description)); + } + } + } + + // Construct the body + std::string body = ""; + if (!ee_instructions.empty()) { + body += "**EE Instructions**\n\n"; + for (const auto& instr : ee_instructions) { + body += instr; + } + body += "___\n\n"; + } + + if (!vu_instructions.empty()) { + body += "**VU Instructions**\n\n"; + for (const auto& instr : vu_instructions) { + body += instr; + } + body += "___\n\n"; + } + + markup.m_value = body; + LSPSpec::Hover hover_resp; + hover_resp.m_contents = markup; + return hover_resp; + } + + return {}; +} + +std::string truncate_docstring(const std::string& docstring) { + std::string truncated = ""; + const auto lines = str_util::split(docstring); + for (const auto& line : lines) { + const auto trimmed_line = str_util::ltrim(line); + if (str_util::starts_with(trimmed_line, "@")) { + break; + } + truncated += trimmed_line + "\n"; + } + return truncated; +} + +namespace lsp_handlers { +std::optional hover(Workspace& workspace, int /*id*/, json raw_params) { + auto params = raw_params.get(); + auto file_type = workspace.determine_filetype_from_uri(params.m_textDocument.m_uri); + + if (file_type == Workspace::FileType::OpenGOALIR) { + auto tracked_file = workspace.get_tracked_ir_file(params.m_textDocument.m_uri); + if (!tracked_file) { + return {}; + } + return hover_handler_ir(workspace, params, tracked_file.value()); + } else if (file_type == Workspace::FileType::OpenGOAL) { + auto maybe_tracked_file = workspace.get_tracked_og_file(params.m_textDocument.m_uri); + if (!maybe_tracked_file) { + return {}; + } + const auto& tracked_file = maybe_tracked_file.value().get(); + const auto symbol = tracked_file.get_symbol_at_position(params.m_position); + if (!symbol) { + lg::debug("hover - no symbol"); + return {}; + } + // TODO - there is an issue with docstrings and overridden methods + const auto& symbol_info = workspace.get_global_symbol_info(tracked_file, symbol.value()); + if (!symbol_info) { + lg::debug("hover - no symbol info - {}", symbol.value()); + return {}; + } + LSPSpec::MarkupContent markup; + markup.m_kind = "markdown"; + + const auto args = Docs::get_args_from_docstring(symbol_info.value()->m_args, + symbol_info.value()->m_docstring); + std::string signature = ""; + bool takes_args = true; + if (symbol_info.value()->m_kind == symbol_info::Kind::FUNCTION) { + signature += "function "; + } else if (symbol_info.value()->m_kind == symbol_info::Kind::METHOD) { + signature += "method "; + } else if (symbol_info.value()->m_kind == symbol_info::Kind::MACRO) { + signature += "macro "; + } else { + takes_args = false; + } + // TODO - others useful, probably states? + auto type_info = workspace.get_symbol_typeinfo(tracked_file, symbol.value()); + signature += symbol.value(); + if (takes_args) { + signature += "("; + for (int i = 0; i < (int)args.size(); i++) { + const auto& arg = args.at(i); + if (i == (int)args.size() - 1) { + signature += fmt::format("{}: {}", arg.name, arg.type); + } else { + signature += fmt::format("{}: {}, ", arg.name, arg.type); + } + } + signature += ")"; + if (symbol_info.value()->m_kind == symbol_info::Kind::FUNCTION && type_info) { + signature += fmt::format(": {}", type_info->first.last_arg().base_type()); + } else if (symbol_info.value()->m_kind == symbol_info::Kind::METHOD) { + signature += + fmt::format(": {}", symbol_info.value()->m_method_info.type.last_arg().base_type()); + } + } else if (type_info) { + signature += fmt::format(": {}", type_info->second->get_parent()); + } + + std::string body = fmt::format("```opengoal\n{}\n```\n\n", signature); + body += "___\n\n"; + if (!symbol_info.value()->m_docstring.empty()) { + body += truncate_docstring(symbol_info.value()->m_docstring) + "\n\n"; + } + + // TODO - support @see/@returns/[[reference]] + for (const auto& arg : args) { + std::string param_line = ""; + if (arg.is_mutated) { + param_line += fmt::format("*@param!* `{}: {}`", arg.name, arg.type); + } else if (arg.is_optional) { + param_line += fmt::format("*@param?* `{}: {}`", arg.name, arg.type); + } else if (arg.is_unused) { + param_line += fmt::format("*@param_* `{}: {}`", arg.name, arg.type); + } else { + param_line += fmt::format("*@param* `{}: {}`", arg.name, arg.type); + } + if (!arg.description.empty()) { + param_line += fmt::format(" - {}\n\n", arg.description); + } else { + param_line += "\n\n"; + } + body += param_line; + } + + markup.m_value = body; + LSPSpec::Hover hover_resp; + hover_resp.m_contents = markup; + return hover_resp; + } + + return {}; +} +} // namespace lsp_handlers diff --git a/lsp/handlers/text_document/hover.h b/lsp/handlers/text_document/hover.h index b16b7a3016e..fa3419e84ec 100644 --- a/lsp/handlers/text_document/hover.h +++ b/lsp/handlers/text_document/hover.h @@ -3,278 +3,16 @@ #include #include +#include "common/util/json_util.h" +#include "common/util/string_util.h" + #include "goalc/compiler/docs/DocTypes.h" #include "lsp/protocol/common_types.h" #include "lsp/protocol/hover.h" #include "lsp/state/data/mips_instructions.h" #include "lsp/state/workspace.h" -bool is_number(const std::string& s) { - return !s.empty() && std::find_if(s.begin(), s.end(), - [](unsigned char c) { return !std::isdigit(c); }) == s.end(); -} - -std::vector og_method_names = {"new", "delete", "print", "inspect", "length", - "asize-of", "copy", "relocate", "mem-usage"}; - -std::optional hover_handler_ir(Workspace& workspace, - const LSPSpec::TextDocumentPositionParams& params, - const WorkspaceIRFile& tracked_file) { - // See if it's an OpenGOAL symbol or a MIPS mnemonic - auto symbol_name = tracked_file.get_symbol_at_position(params.m_position); - auto token_at_pos = tracked_file.get_mips_instruction_at_position(params.m_position); - if (!symbol_name && !token_at_pos) { - return {}; - } - - LSPSpec::MarkupContent markup; - markup.m_kind = "markdown"; - - // TODO - try specifying the range so it highlights everything, ie. `c.lt.s` - // Prefer symbols - if (symbol_name) { - lg::debug("hover - symbol match - {}", symbol_name.value()); - auto symbol_info = workspace.get_definition_info_from_all_types(symbol_name.value(), - tracked_file.m_all_types_uri); - if (symbol_info && symbol_info.value().docstring.has_value()) { - std::string docstring = symbol_info.value().docstring.value(); - lg::debug("hover - symbol has docstring - {}", docstring); - // A docstring exists, print it! - // By convention, docstrings are assumed to be markdown, they support code-blocks everything - // the only thing extra we do, is replace [[]] with links if available - std::unordered_map symbol_replacements = {}; - std::smatch match; - - std::string::const_iterator searchStart(docstring.cbegin()); - while ( - std::regex_search(searchStart, docstring.cend(), match, std::regex("\\[{2}(.*)\\]{2}"))) { - // Have we already accounted for this symbol? - const auto& name = match[1].str(); - if (symbol_replacements.count(name) != 0) { - continue; - } - // Get this symbols info - auto symbol_info = - workspace.get_definition_info_from_all_types(name, tracked_file.m_all_types_uri); - if (!symbol_info) { - symbol_replacements[name] = fmt::format("_{}_", name); - } else { - // Construct path - auto symbol_uri = - fmt::format("{}#L{}%2C{}", tracked_file.m_all_types_uri, - symbol_info.value().definition_info->line_idx_to_display + 1, - symbol_info.value().definition_info->pos_in_line); - symbol_replacements[name] = fmt::format("[{}]({})", name, symbol_uri); - } - searchStart = match.suffix().first; - } - // Replace all symbol occurences - for (const auto& [key, val] : symbol_replacements) { - docstring = std::regex_replace(docstring, std::regex("\\[{2}" + key + "\\]{2}"), val); - } - - markup.m_value = docstring; - LSPSpec::Hover hover_resp; - hover_resp.m_contents = markup; - return hover_resp; - } else if (!token_at_pos) { - // Check if it's a number, and if so we'll do some numeric conversions - if (!is_number(symbol_name.value())) { - return {}; - } - lg::debug("hover - numeric match - {}", symbol_name.value()); - // Construct the body - std::string body = ""; - uint32_t num = std::atoi(symbol_name.value().data()); - // Assuming it comes in as Decimal - body += "| Base | Value |\n"; - body += "|---------|-------|\n"; - body += fmt::format("| Decimal | `{:d}` |\n", num); - body += fmt::format("| Hex | `{:X}` |\n", num); - // TODO - would be nice to format as groups of 4 - body += fmt::format("| Binary | `{:b}` |\n", num); - if (num >= 16 && (num - 16) % 4 == 0) { - uint32_t method_id = (num - 16) / 4; - std::string method_name = "not built-in"; - if (method_id <= 8) { - method_name = og_method_names.at(method_id); - } - body += fmt::format("| Method ID | `{}` - `{}` |\n", method_id, method_name); - } - body += fmt::format("| Octal | `{:o}` |\n", num); - - markup.m_value = body; - LSPSpec::Hover hover_resp; - hover_resp.m_contents = markup; - return hover_resp; - } - } - - // Otherwise, maybe it's a MIPS instruction - if (token_at_pos) { - lg::debug("hover - token match - {}", token_at_pos.value()); - auto& token = token_at_pos.value(); - std::transform(token.begin(), token.end(), token.begin(), - [](unsigned char c) { return std::tolower(c); }); - // Find the instruction, there are some edge-cases here where they could be multiple - // TODO - havn't addressed `bc` and such instructions! Those need to be prefixed matched - std::vector ee_instructions = {}; - std::vector vu_instructions = {}; - for (const auto& instr : LSPData::MIPS_INSTRUCTION_LIST) { - auto mnemonic_lower = instr.mnemonic; - std::transform(mnemonic_lower.begin(), mnemonic_lower.end(), mnemonic_lower.begin(), - [](unsigned char c) { return std::tolower(c); }); - if (mnemonic_lower == token) { - if (instr.type == "ee") { - ee_instructions.push_back(fmt::format("- _{}_\n\n", instr.description)); - } else { - vu_instructions.push_back(fmt::format("- _{}_\n\n", instr.description)); - } - } - } - - // Construct the body - std::string body = ""; - if (!ee_instructions.empty()) { - body += "**EE Instructions**\n\n"; - for (const auto& instr : ee_instructions) { - body += instr; - } - body += "___\n\n"; - } - - if (!vu_instructions.empty()) { - body += "**VU Instructions**\n\n"; - for (const auto& instr : vu_instructions) { - body += instr; - } - body += "___\n\n"; - } - - markup.m_value = body; - LSPSpec::Hover hover_resp; - hover_resp.m_contents = markup; - return hover_resp; - } - - return {}; -} - -std::string truncate_docstring(const std::string& docstring) { - std::string truncated = ""; - const auto lines = str_util::split(docstring); - for (const auto& line : lines) { - const auto trimmed_line = str_util::ltrim(line); - if (str_util::starts_with(trimmed_line, "@")) { - break; - } - truncated += trimmed_line + "\n"; - } - return truncated; -} - -std::optional hover_handler(Workspace& workspace, int /*id*/, json raw_params) { - auto params = raw_params.get(); - auto file_type = workspace.determine_filetype_from_uri(params.m_textDocument.m_uri); - - if (file_type == Workspace::FileType::OpenGOALIR) { - auto tracked_file = workspace.get_tracked_ir_file(params.m_textDocument.m_uri); - if (!tracked_file) { - return {}; - } - return hover_handler_ir(workspace, params, tracked_file.value()); - } else if (file_type == Workspace::FileType::OpenGOAL) { - auto tracked_file = workspace.get_tracked_og_file(params.m_textDocument.m_uri); - if (!tracked_file) { - return {}; - } - // TODO - replace with AST usage instead of figuring out the symbol ourselves - const auto symbol = tracked_file->get_symbol_at_position(params.m_position); - if (!symbol) { - lg::debug("hover - no symbol"); - return {}; - } - // TODO - there is an issue with docstrings and overridden methods - const auto& symbol_info = - workspace.get_global_symbol_info(tracked_file.value(), symbol.value()); - if (!symbol_info) { - lg::debug("hover - no symbol info - {}", symbol.value()); - return {}; - } - LSPSpec::MarkupContent markup; - markup.m_kind = "markdown"; - - const auto args = - Docs::get_args_from_docstring(symbol_info->args(), symbol_info->meta().docstring); - std::string signature = ""; - bool takes_args = true; - if (symbol_info->kind() == SymbolInfo::Kind::FUNCTION) { - signature += "function "; - } else if (symbol_info->kind() == SymbolInfo::Kind::METHOD) { - signature += "method "; - } else if (symbol_info->kind() == SymbolInfo::Kind::MACRO) { - signature += "macro "; - } else { - takes_args = false; - } - // TODO - others useful, probably states? - signature += symbol.value(); - if (takes_args) { - signature += "("; - for (int i = 0; i < (int)args.size(); i++) { - const auto& arg = args.at(i); - if (i == (int)args.size() - 1) { - signature += fmt::format("{}: {}", arg.name, arg.type); - } else { - signature += fmt::format("{}: {}, ", arg.name, arg.type); - } - } - signature += ")"; - if (symbol_info->kind() == SymbolInfo::Kind::FUNCTION && - workspace.get_symbol_typespec(tracked_file.value(), symbol.value())) { - signature += - fmt::format(": {}", workspace.get_symbol_typespec(tracked_file.value(), symbol.value()) - ->last_arg() - .base_type()); - } else if (symbol_info->kind() == SymbolInfo::Kind::METHOD) { - signature += fmt::format(": {}", symbol_info->method_info().type.last_arg().base_type()); - } - } else if (workspace.get_symbol_typespec(tracked_file.value(), symbol.value())) { - signature += fmt::format( - ": {}", workspace.get_symbol_typespec(tracked_file.value(), symbol.value())->base_type()); - } - - std::string body = fmt::format("```opengoal\n{}\n```\n\n", signature); - body += "___\n\n"; - if (!symbol_info->meta().docstring.empty()) { - body += truncate_docstring(symbol_info->meta().docstring) + "\n\n"; - } - - // TODO - support @see/@returns/[[reference]] - for (const auto& arg : args) { - std::string param_line = ""; - if (arg.is_mutated) { - param_line += fmt::format("*@param!* `{}: {}`", arg.name, arg.type); - } else if (arg.is_optional) { - param_line += fmt::format("*@param?* `{}: {}`", arg.name, arg.type); - } else if (arg.is_unused) { - param_line += fmt::format("*@param_* `{}: {}`", arg.name, arg.type); - } else { - param_line += fmt::format("*@param* `{}: {}`", arg.name, arg.type); - } - if (!arg.description.empty()) { - param_line += fmt::format(" - {}\n\n", arg.description); - } else { - param_line += "\n\n"; - } - body += param_line; - } - - markup.m_value = body; - LSPSpec::Hover hover_resp; - hover_resp.m_contents = markup; - return hover_resp; - } +namespace lsp_handlers { +std::optional hover(Workspace& workspace, int id, json raw_params); - return {}; -} +} // namespace lsp_handlers diff --git a/lsp/handlers/text_document/type_hierarchy.cpp b/lsp/handlers/text_document/type_hierarchy.cpp new file mode 100644 index 00000000000..5d8657170ee --- /dev/null +++ b/lsp/handlers/text_document/type_hierarchy.cpp @@ -0,0 +1,144 @@ +#include "type_hierarchy.h" + +#include "lsp/lsp_util.h" + +namespace lsp_handlers { +std::optional prepare_type_hierarchy(Workspace& workspace, int /*id*/, json raw_params) { + auto params = raw_params.get(); + const auto file_type = workspace.determine_filetype_from_uri(params.m_textDocument.m_uri); + + if (file_type != Workspace::FileType::OpenGOAL) { + return nullptr; + } + auto maybe_tracked_file = workspace.get_tracked_og_file(params.m_textDocument.m_uri); + if (!maybe_tracked_file) { + return nullptr; + } + const auto& tracked_file = maybe_tracked_file.value().get(); + const auto symbol = tracked_file.get_symbol_at_position(params.m_position); + if (!symbol) { + lg::debug("prepare_type_hierarchy - no symbol"); + return nullptr; + } + + const auto& symbol_info = workspace.get_global_symbol_info(tracked_file, symbol.value()); + if (!symbol_info) { + lg::debug("prepare_type_hierarchy - no symbol info - {}", symbol.value()); + return nullptr; + } + + const auto& def_loc = workspace.get_symbol_def_location(tracked_file, symbol_info.value()); + if (!def_loc) { + return nullptr; + } + + auto type_item = LSPSpec::TypeHierarchyItem(); + type_item.name = symbol.value(); + // TODO - differentiate between struct and class perhaps + type_item.kind = LSPSpec::SymbolKind::Class; + if (symbol_info && !symbol_info.value()->m_docstring.empty()) { + type_item.detail = symbol_info.value()->m_docstring; + } + type_item.uri = lsp_util::uri_from_path(def_loc->file_path); + // TODO - this range is technically not entirely correct, we'd have to parse the defining file + // with an AST to get the true extent of the deftype. But for this purpose, its not really needed + // + // HACK - the definition that our compiler stores is the form itself, so we will add + // the width of the prefix `(deftype ` to the char_index + // TODO - A better way would be to use the AST + type_item.range.m_start = {(uint32_t)def_loc->line_idx, (uint32_t)(def_loc->char_idx + 9)}; + type_item.range.m_end = {(uint32_t)def_loc->line_idx, + (uint32_t)(def_loc->char_idx + 9 + symbol.value().length())}; + type_item.selectionRange.m_start = {(uint32_t)def_loc->line_idx, + (uint32_t)(def_loc->char_idx + 8)}; + type_item.selectionRange.m_end = {(uint32_t)def_loc->line_idx, + (uint32_t)(def_loc->char_idx + 8 + symbol.value().length())}; + + json items = json::array(); + items.push_back(type_item); + return items; +} + +std::optional supertypes_type_hierarchy(Workspace& workspace, int /*id*/, json raw_params) { + auto params = raw_params.get(); + const std::optional game_version = + workspace.determine_game_version_from_uri(params.item.uri); + if (!game_version) { + return nullptr; + } + const auto& parent_type_path = + workspace.get_symbols_parent_type_path(params.item.name, game_version.value()); + json items = json::array(); + for (const auto& parent_type : parent_type_path) { + if (std::get<0>(parent_type) == params.item.name) { + continue; // skip the item itself + } + auto type_item = LSPSpec::TypeHierarchyItem(); + type_item.name = std::get<0>(parent_type); + // TODO - differentiate between struct and class perhaps + type_item.kind = LSPSpec::SymbolKind::Class; + if (!std::get<1>(parent_type).empty()) { + type_item.detail = std::get<1>(parent_type); + } + const auto& def_loc = std::get<2>(parent_type); + type_item.uri = def_loc.filename; + // TODO - this range is technically not entirely correct, we'd have to parse the defining file + // with an AST to get the true entent of the deftype. But for this purpose, its not really + // needed + // + // HACK - the definition that our compiler stores is the form itself, so we will add + // the width of the prefix `(deftype ` to the char_index + // TODO - A better way would be to use the AST + type_item.range.m_start = {(uint32_t)def_loc.line_idx, (uint32_t)(def_loc.char_idx + 9)}; + type_item.range.m_end = {(uint32_t)def_loc.line_idx, + (uint32_t)(def_loc.char_idx + 9 + std::get<0>(parent_type).length())}; + type_item.selectionRange.m_start = {(uint32_t)def_loc.line_idx, + (uint32_t)(def_loc.char_idx + 8)}; + type_item.selectionRange.m_end = { + (uint32_t)def_loc.line_idx, + (uint32_t)(def_loc.char_idx + 8 + std::get<0>(parent_type).length())}; + items.push_back(type_item); + } + return items; +} + +std::optional subtypes_type_hierarchy(Workspace& workspace, int /*id*/, json raw_params) { + auto params = raw_params.get(); + const std::optional game_version = + workspace.determine_game_version_from_uri(params.item.uri); + if (!game_version) { + return nullptr; + } + const auto& parent_type_path = + workspace.get_types_subtypes(params.item.name, game_version.value()); + json items = json::array(); + for (const auto& parent_type : parent_type_path) { + auto type_item = LSPSpec::TypeHierarchyItem(); + type_item.name = std::get<0>(parent_type); + // TODO - differentiate between struct and class perhaps + type_item.kind = LSPSpec::SymbolKind::Class; + if (!std::get<1>(parent_type).empty()) { + type_item.detail = std::get<1>(parent_type); + } + const auto& def_loc = std::get<2>(parent_type); + type_item.uri = def_loc.filename; + // TODO - this range is technically not entirely correct, we'd have to parse the defining file + // with an AST to get the true entent of the deftype. But for this purpose, its not really + // needed + // + // HACK - the definition that our compiler stores is the form itself, so we will add + // the width of the prefix `(deftype ` to the char_index + // TODO - A better way would be to use the AST + type_item.range.m_start = {(uint32_t)def_loc.line_idx, (uint32_t)(def_loc.char_idx + 9)}; + type_item.range.m_end = {(uint32_t)def_loc.line_idx, + (uint32_t)(def_loc.char_idx + 9 + std::get<0>(parent_type).length())}; + type_item.selectionRange.m_start = {(uint32_t)def_loc.line_idx, + (uint32_t)(def_loc.char_idx + 8)}; + type_item.selectionRange.m_end = { + (uint32_t)def_loc.line_idx, + (uint32_t)(def_loc.char_idx + 8 + std::get<0>(parent_type).length())}; + items.push_back(type_item); + } + return items; +} +} // namespace lsp_handlers diff --git a/lsp/handlers/text_document/type_hierarchy.h b/lsp/handlers/text_document/type_hierarchy.h new file mode 100644 index 00000000000..fc7aed9e8d7 --- /dev/null +++ b/lsp/handlers/text_document/type_hierarchy.h @@ -0,0 +1,18 @@ +#pragma once + +#include + +#include "common/util/json_util.h" + +#include "lsp/protocol/common_types.h" +#include "lsp/protocol/type_hierarchy.h" +#include "lsp/state/workspace.h" + +namespace lsp_handlers { + +std::optional prepare_type_hierarchy(Workspace& workspace, int id, json raw_params); + +std::optional supertypes_type_hierarchy(Workspace& workspace, int id, json raw_params); + +std::optional subtypes_type_hierarchy(Workspace& workspace, int id, json raw_params); +} // namespace lsp_handlers diff --git a/lsp/lsp_util.cpp b/lsp/lsp_util.cpp new file mode 100644 index 00000000000..98bc715422d --- /dev/null +++ b/lsp/lsp_util.cpp @@ -0,0 +1,83 @@ +#include "lsp_util.h" + +#include + +#include "common/util/string_util.h" + +#include "fmt/core.h" + +namespace lsp_util { + +std::string url_encode(const std::string& value) { + std::ostringstream escaped; + escaped.fill('0'); + escaped << std::hex; + + for (std::string::const_iterator i = value.begin(), n = value.end(); i != n; ++i) { + std::string::value_type c = (*i); + + // Keep alphanumeric and other accepted characters intact + if (isalnum(c) || c == '-' || c == '_' || c == '.' || c == '~' || c == '/') { + escaped << c; + continue; + } + + // Any other characters are percent-encoded + escaped << std::uppercase; + escaped << '%' << std::setw(2) << int((unsigned char)c); + escaped << std::nouppercase; + } + + return escaped.str(); +} + +std::string url_decode(const std::string& input) { + std::ostringstream decoded; + + for (std::size_t i = 0; i < input.length(); ++i) { + if (input[i] == '%') { + // Check if there are enough characters remaining + if (i + 2 < input.length()) { + // Convert the next two characters after '%' into an integer value + std::istringstream hexStream(input.substr(i + 1, 2)); + int hexValue = 0; + hexStream >> std::hex >> hexValue; + + // Append the decoded character to the result + decoded << static_cast(hexValue); + + // Skip the next two characters + i += 2; + } + } else if (input[i] == '+') { + // Replace '+' with space character ' ' + decoded << ' '; + } else { + // Append the character as is + decoded << input[i]; + } + } + + return decoded.str(); +} + +LSPSpec::DocumentUri uri_from_path(fs::path path) { + auto path_str = file_util::convert_to_unix_path_separators(path.string()); + // vscode works with proper URL encoded URIs for file paths + // which means we have to roll our own... + path_str = url_encode(path_str); + return fmt::format("file:///{}", path_str); +} + +std::string uri_to_path(const LSPSpec::DocumentUri& uri) { + auto decoded_uri = url_decode(uri); + if (str_util::starts_with(decoded_uri, "file:///")) { +#ifdef _WIN32 + decoded_uri = decoded_uri.substr(8); +#else + decoded_uri = decoded_uri.substr(7); +#endif + } + return decoded_uri; +} +} // namespace lsp_util diff --git a/lsp/lsp_util.h b/lsp/lsp_util.h new file mode 100644 index 00000000000..78b097cfe82 --- /dev/null +++ b/lsp/lsp_util.h @@ -0,0 +1,13 @@ +#pragma once +#include + +#include "common/util/FileUtil.h" + +#include "protocol/common_types.h" + +namespace lsp_util { +std::string url_encode(const std::string& value); +std::string url_decode(const std::string& input); +LSPSpec::DocumentUri uri_from_path(fs::path path); +std::string uri_to_path(const LSPSpec::DocumentUri& uri); +}; // namespace lsp_util diff --git a/lsp/main.cpp b/lsp/main.cpp index bfa7c26d041..916ce9dbcaa 100644 --- a/lsp/main.cpp +++ b/lsp/main.cpp @@ -55,6 +55,29 @@ void setup_logging(bool verbose, std::string log_file, bool disable_ansi_colors) lg::initialize(); } +std::string temp_url_encode(const std::string& value) { + std::ostringstream escaped; + escaped.fill('0'); + escaped << std::hex; + + for (std::string::const_iterator i = value.begin(), n = value.end(); i != n; ++i) { + std::string::value_type c = (*i); + + // Keep alphanumeric and other accepted characters intact + if (isalnum(c) || c == '-' || c == '_' || c == '.' || c == '~' || c == '/') { + escaped << c; + continue; + } + + // Any other characters are percent-encoded + escaped << std::uppercase; + escaped << '%' << std::setw(2) << int((unsigned char)c); + escaped << std::nouppercase; + } + + return escaped.str(); +} + int main(int argc, char** argv) { ArgumentGuard u8_guard(argc, argv); @@ -72,6 +95,7 @@ int main(int argc, char** argv) { CLI11_PARSE(app, argc, argv); AppState appstate; + LSPRouter lsp_router; appstate.verbose = verbose; try { @@ -89,6 +113,11 @@ int main(int argc, char** argv) { _setmode(_fileno(stdin), _O_BINARY); #endif + // TODO - make the server check for the process id of the extension host and exit itself if that + // process goes away (the process id comes on the command line as an argument and in the + // initialize request). This is what we do in all our servers since the extension host could die + // unexpected as well. + try { char c; MessageBuffer message_buffer; diff --git a/lsp/protocol/common_types.cpp b/lsp/protocol/common_types.cpp index eed2abac1ba..678ab7f9ad9 100644 --- a/lsp/protocol/common_types.cpp +++ b/lsp/protocol/common_types.cpp @@ -9,6 +9,11 @@ void LSPSpec::from_json(const json& j, Position& obj) { j.at("character").get_to(obj.m_character); } +LSPSpec::Range::Range(Position start, Position end) : m_start(start), m_end(end) {} + +LSPSpec::Range::Range(uint32_t line, uint32_t character) + : m_start({line, character}), m_end({line, character}) {} + void LSPSpec::to_json(json& j, const Range& obj) { // TODO - not sure if this works yet, but nice if it does! j = json{{"start", obj.m_start}, {"end", obj.m_end}}; diff --git a/lsp/protocol/common_types.h b/lsp/protocol/common_types.h index b15d71ebc6f..3d73279d16f 100644 --- a/lsp/protocol/common_types.h +++ b/lsp/protocol/common_types.h @@ -28,6 +28,11 @@ void from_json(const json& j, Position& obj); struct Range { Position m_start; Position m_end; + + Range(){}; + Range(Position start, Position end); + // point constructor + Range(uint32_t line, uint32_t character); }; void to_json(json& j, const Range& obj); void from_json(const json& j, Range& obj); diff --git a/lsp/protocol/completion.cpp b/lsp/protocol/completion.cpp index 684b433e61f..f9ad513b49f 100644 --- a/lsp/protocol/completion.cpp +++ b/lsp/protocol/completion.cpp @@ -1,14 +1,65 @@ #include "completion.h" void LSPSpec::to_json(json& j, const CompletionParams& obj) { - j = json{{"textDocument", obj.m_textDocument}, {"position", obj.m_position}}; + json_serialize(textDocument); + json_serialize(position); } void LSPSpec::from_json(const json& j, CompletionParams& obj) { - j.at("textDocument").get_to(obj.m_textDocument); - j.at("position").get_to(obj.m_position); + json_deserialize_if_exists(textDocument); + json_deserialize_if_exists(position); } -void LSPSpec::to_json(json& /*j*/, const CompletionList& /*obj*/) {} +void LSPSpec::to_json(json& j, const CompletionItemLabelDetails& obj) { + json_serialize_optional(detail); + json_serialize_optional(description); +} + +void LSPSpec::from_json(const json& j, CompletionItemLabelDetails& obj) { + json_deserialize_optional_if_exists(detail); + json_deserialize_optional_if_exists(description); +} + +void LSPSpec::to_json(json& j, const CompletionItem& obj) { + json_serialize(label); + json_serialize_optional(labelDetails); + json_serialize_optional(kind); + json_serialize_optional(tags); + json_serialize_optional(detail); + json_serialize_optional(documentation); + json_serialize_optional(preselect); + json_serialize_optional(sortText); + json_serialize_optional(filterText); + json_serialize_optional(insertText); + json_serialize_optional(textEdit); + json_serialize_optional(textEditText); + json_serialize_optional(additionalTextEdits); + json_serialize_optional(commitCharacters); +} -void LSPSpec::from_json(const json& /*j*/, CompletionList& /*obj*/) {} +void LSPSpec::from_json(const json& j, CompletionItem& obj) { + json_deserialize_if_exists(label); + json_deserialize_optional_if_exists(labelDetails); + json_deserialize_optional_if_exists(kind); + json_deserialize_optional_if_exists(tags); + json_deserialize_optional_if_exists(detail); + json_deserialize_optional_if_exists(documentation); + json_deserialize_optional_if_exists(preselect); + json_deserialize_optional_if_exists(sortText); + json_deserialize_optional_if_exists(filterText); + json_deserialize_optional_if_exists(insertText); + json_deserialize_optional_if_exists(textEdit); + json_deserialize_optional_if_exists(textEditText); + json_deserialize_optional_if_exists(additionalTextEdits); + json_deserialize_optional_if_exists(commitCharacters); +} + +void LSPSpec::to_json(json& j, const CompletionList& obj) { + json_serialize(isIncomplete); + json_serialize(items); +} + +void LSPSpec::from_json(const json& j, CompletionList& obj) { + json_deserialize_if_exists(isIncomplete); + json_deserialize_if_exists(items); +} diff --git a/lsp/protocol/completion.h b/lsp/protocol/completion.h index 9d4dafb0461..1d7cae5c18d 100644 --- a/lsp/protocol/completion.h +++ b/lsp/protocol/completion.h @@ -20,10 +20,10 @@ enum class CompletionTriggerKind { // TODO - look into inheriting structs? struct CompletionParams { - /// @brief The text document. - TextDocumentIdentifier m_textDocument; - /// @brief The position inside the text document. - Position m_position; + /// The text document. + TextDocumentIdentifier textDocument; + /// The position inside the text document. + Position position; }; void to_json(json& j, const CompletionParams& obj); @@ -40,6 +40,9 @@ struct CompletionItemLabelDetails { std::optional description; }; +void to_json(json& j, const CompletionItemLabelDetails& obj); +void from_json(const json& j, CompletionItemLabelDetails& obj); + /// @brief The kind of a completion entry. enum class CompletionItemKind { Text = 1, @@ -95,8 +98,9 @@ struct CompletionItem { /// information. std::optional detail; /// A human-readable string that represents a doc-comment. + /// TODO - can also be MarkupContent std::optional documentation; - // NOTE - skipped deprecated + // NOTE - skipped deprecated (because it's deprecated!) /// Select this item when showing. /// /// *Note* that only one completion item can be selected and that the tool / client decides which @@ -108,17 +112,104 @@ struct CompletionItem { /// A string that should be used when filtering a set of completion items. When omitted the label /// is used as the filter text for this item. std::optional filterText; - // TODO - a lot of other fields... + /// A string that should be inserted into a document when selecting + /// this completion. When omitted the label is used as the insert text + /// for this item. + /// + /// The `insertText` is subject to interpretation by the client side. + /// Some tools might not take the string literally. For example + /// VS Code when code complete is requested in this example + /// `con` and a completion item with an `insertText` of + /// `console` is provided it will only insert `sole`. Therefore it is + /// recommended to use `textEdit` instead since it avoids additional client + /// side interpretation. + std::optional insertText; + /// The format of the insert text. The format applies to both the + /// `insertText` property and the `newText` property of a provided + /// `textEdit`. If omitted defaults to `InsertTextFormat.PlainText`. + /// + /// Please note that the insertTextFormat doesn't apply to + /// `additionalTextEdits`. + // TODO - std::optional insertTextFormat; + /// How whitespace and indentation is handled during completion + /// item insertion. If not provided the client's default value depends on + /// the `textDocument.completion.insertTextMode` client capability. + /// + /// @since 3.16.0 + /// @since 3.17.0 - support for `textDocument.completion.insertTextMode` + // TODO - std::optional insertTextMode; + /// An edit which is applied to a document when selecting this completion. + /// When an edit is provided the value of `insertText` is ignored. + /// + /// *Note:* The range of the edit must be a single line range and it must + /// contain the position at which completion has been requested. + /// + /// Most editors support two different operations when accepting a completion + /// item. One is to insert a completion text and the other is to replace an + /// existing text with a completion text. Since this can usually not be + /// predetermined by a server it can report both ranges. Clients need to + /// signal support for `InsertReplaceEdit`s via the + /// `textDocument.completion.completionItem.insertReplaceSupport` client + /// capability property. + /// + /// *Note 1:* The text edit's range as well as both ranges from an insert + /// replace edit must be a [single line] and they must contain the position + /// at which completion has been requested. + /// *Note 2:* If an `InsertReplaceEdit` is returned the edit's insert range + /// must be a prefix of the edit's replace range, that means it must be + /// contained and starting at the same position. + /// + /// @since 3.16.0 additional type `InsertReplaceEdit` + /// TODO - can also be InsertReplaceEdit + std::optional textEdit; + /// The edit text used if the completion item is part of a CompletionList and + /// CompletionList defines an item default for the text edit range. + /// + /// Clients will only honor this property if they opt into completion list + /// item defaults using the capability `completionList.itemDefaults`. + /// + /// If not provided and a list's default range is provided the label + /// property is used as a text. + /// + /// @since 3.17.0 + std::optional textEditText; + /// An optional array of additional text edits that are applied when + /// selecting this completion. Edits must not overlap (including the same + /// insert position) with the main edit nor with themselves. + /// + /// Additional text edits should be used to change text unrelated to the + /// current cursor position (for example adding an import statement at the + /// top of the file if the completion item will insert an unqualified type). + std::optional> additionalTextEdits; + /// An optional set of characters that when pressed while this completion is + /// active will accept it first and then type that character. *Note* that all + /// commit characters should have `length=1` and that superfluous characters + /// will be ignored. + std::optional> commitCharacters; + /// An optional command that is executed *after* inserting this completion. + /// *Note* that additional modifications to the current document should be + /// described with the additionalTextEdits-property. + // TODO - std::optional command; + /// A data entry field that is preserved on a completion item between + /// a completion and a completion resolve request. + // TODO - LSPAny for data }; +void to_json(json& j, const CompletionItem& obj); +void from_json(const json& j, CompletionItem& obj); + +// Represents a collection of [completion items](#CompletionItem) to be +// presented in the editor. struct CompletionList { - /// This list is not complete. Further typing should result in recomputing this list. + /// This list is not complete. Further typing should result in recomputing + /// this list. /// - /// Recomputed lists have all their items replaced (not appended) in the incomplete completion - /// sessions. - bool m_isIncomplete; + /// Recomputed lists have all their items replaced (not appended) in the + /// incomplete completion sessions. + bool isIncomplete; + // TODO - do itemDefaults /// The completion items. - std::vector m_items; + std::vector items; }; void to_json(json& j, const CompletionList& obj); diff --git a/lsp/protocol/document_synchronization.cpp b/lsp/protocol/document_synchronization.cpp index dedb08b8006..2b1c53a6067 100644 --- a/lsp/protocol/document_synchronization.cpp +++ b/lsp/protocol/document_synchronization.cpp @@ -32,3 +32,13 @@ void LSPSpec::to_json(json& j, const DidCloseTextDocumentParams& obj) { void LSPSpec::from_json(const json& j, DidCloseTextDocumentParams& obj) { j.at("textDocument").get_to(obj.m_textDocument); } + +void LSPSpec::to_json(json& j, const WillSaveTextDocumentParams& obj) { + json_serialize(textDocument); + json_serialize(reason); +} + +void LSPSpec::from_json(const json& j, WillSaveTextDocumentParams& obj) { + json_deserialize_if_exists(textDocument); + json_deserialize_if_exists(reason); +} diff --git a/lsp/protocol/document_synchronization.h b/lsp/protocol/document_synchronization.h index 476aa7c1b6d..c331ee503d6 100644 --- a/lsp/protocol/document_synchronization.h +++ b/lsp/protocol/document_synchronization.h @@ -32,4 +32,24 @@ struct DidCloseTextDocumentParams { void to_json(json& j, const DidCloseTextDocumentParams& obj); void from_json(const json& j, DidCloseTextDocumentParams& obj); +enum class TextDocumentSaveReason { + // Manually triggered, e.g. by the user pressing save, by starting debugging, or by an API call. + Manual = 1, + // Automatic after a delay. + AfterDelay = 2, + // When the editor lost focus. + FocusOut = 3, +}; + +// The parameters send in a will save text document notification. +struct WillSaveTextDocumentParams { + // The document that will be saved. + TextDocumentIdentifier textDocument; + // The 'TextDocumentSaveReason'. + TextDocumentSaveReason reason; +}; + +void to_json(json& j, const WillSaveTextDocumentParams& obj); +void from_json(const json& j, WillSaveTextDocumentParams& obj); + } // namespace LSPSpec diff --git a/lsp/protocol/progress_report.cpp b/lsp/protocol/progress_report.cpp index dcc315880bb..d8ebbef79e0 100644 --- a/lsp/protocol/progress_report.cpp +++ b/lsp/protocol/progress_report.cpp @@ -8,7 +8,7 @@ void LSPSpec::from_json(const json& j, WorkDoneProgressCreateParams& obj) { json_deserialize_if_exists(token); } -void LSPSpec::to_json(json& j, const ProgressPayloadBegin& obj) { +void LSPSpec::to_json(json& j, const WorkDoneProgressBegin& obj) { json_serialize(kind); json_serialize(title); json_serialize(cancellable); @@ -16,7 +16,7 @@ void LSPSpec::to_json(json& j, const ProgressPayloadBegin& obj) { json_serialize_optional(percentage); } -void LSPSpec::from_json(const json& j, ProgressPayloadBegin& obj) { +void LSPSpec::from_json(const json& j, WorkDoneProgressBegin& obj) { json_deserialize_if_exists(kind); json_deserialize_if_exists(title); json_deserialize_if_exists(cancellable); @@ -24,56 +24,43 @@ void LSPSpec::from_json(const json& j, ProgressPayloadBegin& obj) { json_deserialize_optional_if_exists(percentage); } -void LSPSpec::to_json(json& j, const ProgressParamsBegin& obj) { - json_serialize(token); - json_serialize(value); -} - -void LSPSpec::from_json(const json& j, ProgressParamsBegin& obj) { - json_deserialize_if_exists(token); - json_deserialize_if_exists(value); -} - -void LSPSpec::to_json(json& j, const ProgressPayloadReport& obj) { +void LSPSpec::to_json(json& j, const WorkDoneProgressReport& obj) { json_serialize(kind); json_serialize(cancellable); json_serialize_optional(message); json_serialize_optional(percentage); } -void LSPSpec::from_json(const json& j, ProgressPayloadReport& obj) { +void LSPSpec::from_json(const json& j, WorkDoneProgressReport& obj) { json_deserialize_if_exists(kind); json_deserialize_if_exists(cancellable); json_deserialize_optional_if_exists(message); json_deserialize_optional_if_exists(percentage); } -void LSPSpec::to_json(json& j, const ProgressParamsReport& obj) { - json_serialize(token); - json_serialize(value); -} - -void LSPSpec::from_json(const json& j, ProgressParamsReport& obj) { - json_deserialize_if_exists(token); - json_deserialize_if_exists(value); -} - -void LSPSpec::to_json(json& j, const ProgressPayloadEnd& obj) { +void LSPSpec::to_json(json& j, const WorkDoneProgressEnd& obj) { json_serialize(kind); json_serialize_optional(message); } -void LSPSpec::from_json(const json& j, ProgressPayloadEnd& obj) { +void LSPSpec::from_json(const json& j, WorkDoneProgressEnd& obj) { json_deserialize_if_exists(kind); json_deserialize_optional_if_exists(message); } -void LSPSpec::to_json(json& j, const ProgressParamsEnd& obj) { +void LSPSpec::to_json(json& j, const ProgressNotificationPayload& obj) { json_serialize(token); - json_serialize(value); + if (obj.beginValue) { + j["value"] = obj.beginValue.value(); + } else if (obj.reportValue) { + j["value"] = obj.reportValue.value(); + } else { + j["value"] = obj.endValue.value(); + } } -void LSPSpec::from_json(const json& j, ProgressParamsEnd& obj) { +void LSPSpec::from_json(const json& j, ProgressNotificationPayload& obj) { json_deserialize_if_exists(token); - json_deserialize_if_exists(value); + // TODO - not needed, but if so -- deserialize 'value', it's possible to figure out which is the + // right one } diff --git a/lsp/protocol/progress_report.h b/lsp/protocol/progress_report.h index 0e254c34aee..40a0f7ed0bc 100644 --- a/lsp/protocol/progress_report.h +++ b/lsp/protocol/progress_report.h @@ -11,7 +11,7 @@ struct WorkDoneProgressCreateParams { void to_json(json& j, const WorkDoneProgressCreateParams& obj); void from_json(const json& j, WorkDoneProgressCreateParams& obj); -struct ProgressPayloadBegin { +struct WorkDoneProgressBegin { std::string kind = "begin"; // Mandatory title of the progress operation. Used to briefly inform about // the kind of operation being performed. @@ -36,20 +36,10 @@ struct ProgressPayloadBegin { // that are not following this rule. The value range is [0, 100] std::optional percentage; }; -void to_json(json& j, const ProgressPayloadBegin& obj); -void from_json(const json& j, ProgressPayloadBegin& obj); +void to_json(json& j, const WorkDoneProgressBegin& obj); +void from_json(const json& j, WorkDoneProgressBegin& obj); -struct ProgressParamsBegin { - // The progress token provided by the client or server. - std::string token; - // Payload - ProgressPayloadBegin value; -}; - -void to_json(json& j, const ProgressParamsBegin& obj); -void from_json(const json& j, ProgressParamsBegin& obj); - -struct ProgressPayloadReport { +struct WorkDoneProgressReport { std::string kind = "report"; // Controls enablement state of a cancel button. This property is only valid // if a cancel button got requested in the `WorkDoneProgressBegin` payload. @@ -71,35 +61,25 @@ struct ProgressPayloadReport { // that are not following this rule. The value range is [0, 100] std::optional percentage; }; -void to_json(json& j, const ProgressPayloadReport& obj); -void from_json(const json& j, ProgressPayloadReport& obj); - -struct ProgressParamsReport { - // The progress token provided by the client or server. - std::string token; - // Payload - ProgressPayloadReport value; -}; - -void to_json(json& j, const ProgressParamsReport& obj); -void from_json(const json& j, ProgressParamsReport& obj); +void to_json(json& j, const WorkDoneProgressReport& obj); +void from_json(const json& j, WorkDoneProgressReport& obj); -struct ProgressPayloadEnd { +struct WorkDoneProgressEnd { std::string kind = "end"; // Optional, a final message indicating to for example indicate the outcome // of the operation. std::optional message; }; -void to_json(json& j, const ProgressPayloadEnd& obj); -void from_json(const json& j, ProgressPayloadEnd& obj); +void to_json(json& j, const WorkDoneProgressEnd& obj); +void from_json(const json& j, WorkDoneProgressEnd& obj); -struct ProgressParamsEnd { - // The progress token provided by the client or server. +struct ProgressNotificationPayload { std::string token; - // Payload - ProgressPayloadEnd value; + std::optional beginValue; + std::optional reportValue; + std::optional endValue; }; +void to_json(json& j, const ProgressNotificationPayload& obj); +void from_json(const json& j, ProgressNotificationPayload& obj); -void to_json(json& j, const ProgressParamsEnd& obj); -void from_json(const json& j, ProgressParamsEnd& obj); } // namespace LSPSpec diff --git a/lsp/protocol/type_hierarchy.cpp b/lsp/protocol/type_hierarchy.cpp new file mode 100644 index 00000000000..a1502392276 --- /dev/null +++ b/lsp/protocol/type_hierarchy.cpp @@ -0,0 +1,50 @@ +#include "type_hierarchy.h" + +#include "common/util/json_util.h" + +// TODO - there's gotta be a way to share json serialization/deserialization +// figure it out _soon_ +void LSPSpec::to_json(json& j, const TypeHierarchyPrepareParams& obj) { + j = json{{"textDocument", obj.m_textDocument}, {"position", obj.m_position}}; +} + +void LSPSpec::from_json(const json& j, TypeHierarchyPrepareParams& obj) { + j.at("textDocument").get_to(obj.m_textDocument); + j.at("position").get_to(obj.m_position); +} + +void LSPSpec::to_json(json& j, const TypeHierarchyItem& obj) { + json_serialize(name); + json_serialize(kind); + json_serialize_optional(tags); + json_serialize_optional(detail); + json_serialize(uri); + json_serialize(range); + json_serialize(selectionRange); +} + +void LSPSpec::from_json(const json& j, TypeHierarchyItem& obj) { + json_deserialize_if_exists(name); + json_deserialize_if_exists(kind); + json_deserialize_optional_if_exists(tags); + json_deserialize_optional_if_exists(detail); + json_deserialize_if_exists(uri); + json_deserialize_if_exists(range); + json_deserialize_if_exists(selectionRange); +} + +void LSPSpec::to_json(json& j, const TypeHierarchySupertypesParams& obj) { + json_serialize(item); +} + +void LSPSpec::from_json(const json& j, TypeHierarchySupertypesParams& obj) { + json_deserialize_if_exists(item); +} + +void LSPSpec::to_json(json& j, const TypeHierarchySubtypesParams& obj) { + json_serialize(item); +} + +void LSPSpec::from_json(const json& j, TypeHierarchySubtypesParams& obj) { + json_deserialize_if_exists(item); +} diff --git a/lsp/protocol/type_hierarchy.h b/lsp/protocol/type_hierarchy.h new file mode 100644 index 00000000000..305b0d5cb53 --- /dev/null +++ b/lsp/protocol/type_hierarchy.h @@ -0,0 +1,56 @@ +#pragma once + +#include "common_types.h" + +#include "lsp/protocol/document_symbols.h" + +namespace LSPSpec { + +struct TypeHierarchyPrepareParams : TextDocumentPositionParams {}; + +void to_json(json& j, const TypeHierarchyPrepareParams& obj); +void from_json(const json& j, TypeHierarchyPrepareParams& obj); + +struct TypeHierarchyItem { + /// The name of this item. + std::string name; + /// The kind of this item. + SymbolKind kind; + /// Tags for this item. + std::optional> tags; + /// More detail for this item, e.g. the signature of a function. + std::optional detail; + /// The resource identifier of this item. + DocumentUri uri; + /// The range enclosing this symbol not including leading/trailing whitespace + /// but everything else, e.g. comments and code. + Range range; + /// The range that should be selected and revealed when this symbol is being + /// picked, e.g. the name of a function. Must be contained by the + /// `range` of this + Range selectionRange; + /// A data entry field that is preserved between a type hierarchy prepare and + /// supertypes or subtypes requests. It could also be used to identify the + /// type hierarchy in the server, helping improve the performance on + /// resolving supertypes and subtypes. + // ANY data; +}; + +void to_json(json& j, const TypeHierarchyItem& obj); +void from_json(const json& j, TypeHierarchyItem& obj); + +struct TypeHierarchySupertypesParams { + TypeHierarchyItem item; +}; + +void to_json(json& j, const TypeHierarchySupertypesParams& obj); +void from_json(const json& j, TypeHierarchySupertypesParams& obj); + +struct TypeHierarchySubtypesParams { + TypeHierarchyItem item; +}; + +void to_json(json& j, const TypeHierarchySubtypesParams& obj); +void from_json(const json& j, TypeHierarchySubtypesParams& obj); + +} // namespace LSPSpec diff --git a/lsp/state/app.h b/lsp/state/app.h index 62ae4604b91..6e2a1f6761a 100644 --- a/lsp/state/app.h +++ b/lsp/state/app.h @@ -2,6 +2,7 @@ #include "lsp/state/workspace.h" +// TODO - remove this, not really benefiting (never going to have multiple appstates) struct AppState { Workspace workspace; bool verbose; diff --git a/lsp/state/lsp_requester.cpp b/lsp/state/lsp_requester.cpp index 4b95ed580d6..060a6a28f22 100644 --- a/lsp/state/lsp_requester.cpp +++ b/lsp/state/lsp_requester.cpp @@ -42,35 +42,49 @@ void LSPRequester::send_notification(const json& params, const std::string& meth std::cout << request.c_str() << std::flush; } -void LSPRequester::send_progress_create_request(const std::string& token, - const std::string& title) { - LSPSpec::WorkDoneProgressCreateParams params; - params.token = token; - send_request(params, "window/workDoneProgress/create"); - LSPSpec::ProgressPayloadBegin beginPayload; +void LSPRequester::send_progress_create_request(const std::string& title, + const std::string& message, + const int percentage) { + const std::string token = fmt::format("opengoal/{}", title); + LSPSpec::WorkDoneProgressCreateParams createRequest; + createRequest.token = token; + send_request(createRequest, "window/workDoneProgress/create"); + LSPSpec::WorkDoneProgressBegin beginPayload; beginPayload.title = title; - LSPSpec::ProgressParamsBegin beginParams; - beginParams.token = token; - beginParams.value = beginPayload; - send_notification(beginParams, "$/progress"); + beginPayload.cancellable = false; // TODO - maybe one day + beginPayload.message = message; + if (percentage > 0) { + beginPayload.percentage = percentage; + } + LSPSpec::ProgressNotificationPayload notification; + notification.token = token; + notification.beginValue = beginPayload; + send_notification(notification, "$/progress"); } -void LSPRequester::send_progress_update_request(const std::string& token, - const std::string& message) { - LSPSpec::ProgressPayloadReport reportPayload; +void LSPRequester::send_progress_update_request(const std::string& title, + const std::string& message, + const int percentage) { + const std::string token = fmt::format("opengoal/{}", title); + LSPSpec::WorkDoneProgressReport reportPayload; + reportPayload.cancellable = false; // TODO - maybe one day reportPayload.message = message; - LSPSpec::ProgressParamsReport reportParams; - reportParams.token = token; - reportParams.value = reportPayload; - send_notification(reportParams, "$/progress"); + if (percentage > 0) { + reportPayload.percentage = percentage; + } + LSPSpec::ProgressNotificationPayload notification; + notification.token = token; + notification.reportValue = reportPayload; + send_notification(notification, "$/progress"); } -void LSPRequester::send_progress_finish_request(const std::string& token, +void LSPRequester::send_progress_finish_request(const std::string& title, const std::string& message) { - LSPSpec::ProgressPayloadEnd endPayload; + const std::string token = fmt::format("opengoal/{}", title); + LSPSpec::WorkDoneProgressEnd endPayload; endPayload.message = message; - LSPSpec::ProgressParamsEnd endParams; - endParams.token = token; - endParams.value = endPayload; - send_notification(endParams, "$/progress"); + LSPSpec::ProgressNotificationPayload notification; + notification.token = token; + notification.endValue = endPayload; + send_notification(notification, "$/progress"); } diff --git a/lsp/state/lsp_requester.h b/lsp/state/lsp_requester.h index 47986504688..c335ed8ac39 100644 --- a/lsp/state/lsp_requester.h +++ b/lsp/state/lsp_requester.h @@ -9,9 +9,13 @@ class LSPRequester { public: - void send_progress_create_request(const std::string& token, const std::string& title); - void send_progress_update_request(const std::string& token, const std::string& message); - void send_progress_finish_request(const std::string& token, const std::string& message); + void send_progress_create_request(const std::string& title, + const std::string& message, + const int percentage); + void send_progress_update_request(const std::string& title, + const std::string& message, + const int percentage); + void send_progress_finish_request(const std::string& title, const std::string& message); private: void send_request(const json& payload, const std::string& method); diff --git a/lsp/state/workspace.cpp b/lsp/state/workspace.cpp index 6f9599365d6..11421592df1 100644 --- a/lsp/state/workspace.cpp +++ b/lsp/state/workspace.cpp @@ -1,86 +1,23 @@ #include "workspace.h" -#include #include -#include #include "common/log/log.h" +#include "common/util/FileUtil.h" +#include "common/util/ast_util.h" #include "common/util/string_util.h" +#include "lsp/lsp_util.h" #include "lsp/protocol/common_types.h" +#include "tree_sitter/api.h" -std::string url_encode(const std::string& value) { - std::ostringstream escaped; - escaped.fill('0'); - escaped << std::hex; - - for (std::string::const_iterator i = value.begin(), n = value.end(); i != n; ++i) { - std::string::value_type c = (*i); - - // Keep alphanumeric and other accepted characters intact - if (isalnum(c) || c == '-' || c == '_' || c == '.' || c == '~' || c == '/') { - escaped << c; - continue; - } - - // Any other characters are percent-encoded - escaped << std::uppercase; - escaped << '%' << std::setw(2) << int((unsigned char)c); - escaped << std::nouppercase; - } - - return escaped.str(); -} - -std::string url_decode(const std::string& input) { - std::ostringstream decoded; - - for (std::size_t i = 0; i < input.length(); ++i) { - if (input[i] == '%') { - // Check if there are enough characters remaining - if (i + 2 < input.length()) { - // Convert the next two characters after '%' into an integer value - std::istringstream hexStream(input.substr(i + 1, 2)); - int hexValue = 0; - hexStream >> std::hex >> hexValue; - - // Append the decoded character to the result - decoded << static_cast(hexValue); - - // Skip the next two characters - i += 2; - } - } else if (input[i] == '+') { - // Replace '+' with space character ' ' - decoded << ' '; - } else { - // Append the character as is - decoded << input[i]; - } - } - - return decoded.str(); -} - -LSPSpec::DocumentUri uri_from_path(fs::path path) { - auto path_str = file_util::convert_to_unix_path_separators(path.string()); - // vscode works with proper URL encoded URIs for file paths - // which means we have to roll our own... - path_str = url_encode(path_str); - return fmt::format("file:///{}", path_str); +// Declare the `tree_sitter_opengoal` function, which is +// implemented by the `tree-sitter-opengoal` library. +extern "C" { +extern const TSLanguage* tree_sitter_opengoal(); } -std::string uri_to_path(LSPSpec::DocumentUri uri) { - auto decoded_uri = url_decode(uri); - if (str_util::starts_with(decoded_uri, "file:///")) { -#ifdef _WIN32 - decoded_uri = decoded_uri.substr(8); -#else - decoded_uri = decoded_uri.substr(7); -#endif - } - return decoded_uri; -} +const TSLanguage* g_opengoalLang = tree_sitter_opengoal(); Workspace::Workspace(){}; Workspace::~Workspace(){}; @@ -111,18 +48,22 @@ Workspace::FileType Workspace::determine_filetype_from_uri(const LSPSpec::Docume return FileType::Unsupported; } -std::optional Workspace::get_tracked_og_file(const LSPSpec::URI& file_uri) { - if (m_tracked_og_files.find(file_uri) == m_tracked_og_files.end()) { - return {}; +std::optional> Workspace::get_tracked_og_file( + const LSPSpec::URI& file_uri) { + auto it = m_tracked_og_files.find(file_uri); + if (it == m_tracked_og_files.end()) { + return std::nullopt; } - return m_tracked_og_files[file_uri]; + return std::ref(it->second); } -std::optional Workspace::get_tracked_ir_file(const LSPSpec::URI& file_uri) { - if (m_tracked_ir_files.count(file_uri) == 0) { - return {}; +std::optional> Workspace::get_tracked_ir_file( + const LSPSpec::URI& file_uri) { + auto it = m_tracked_ir_files.find(file_uri); + if (it == m_tracked_ir_files.end()) { + return std::nullopt; } - return m_tracked_ir_files[file_uri]; + return std::ref(it->second); } std::optional Workspace::get_definition_info_from_all_types( @@ -143,18 +84,34 @@ std::optional Workspace::get_definition_info_from_all_types( // // This is bad because jak 2 now uses some code from the jak1 folder, and also wouldn't be able to // be determined (jak1 or jak2?) if we had a proper 'common' folder(s). -std::optional determine_game_version_from_uri(const LSPSpec::DocumentUri& uri) { - const auto path = uri_to_path(uri); +std::optional Workspace::determine_game_version_from_uri( + const LSPSpec::DocumentUri& uri) { + const auto path = lsp_util::uri_to_path(uri); if (str_util::contains(path, "goal_src/jak1")) { return GameVersion::Jak1; } else if (str_util::contains(path, "goal_src/jak2")) { return GameVersion::Jak2; + } else if (str_util::contains(path, "goal_src/jak3")) { + return GameVersion::Jak3; } return {}; } -std::optional Workspace::get_global_symbol_info(const WorkspaceOGFile& file, - const std::string& symbol_name) { +std::vector> Workspace::get_symbols_starting_with( + const GameVersion game_version, + const std::string& symbol_prefix) { + if (m_compiler_instances.find(game_version) == m_compiler_instances.end()) { + lg::debug("Compiler not instantiated for game version - {}", + version_to_game_name(game_version)); + return {}; + } + const auto& compiler = m_compiler_instances[game_version].get(); + return compiler->lookup_symbol_info_by_prefix(symbol_prefix); +} + +std::optional> Workspace::get_global_symbol_info( + const WorkspaceOGFile& file, + const std::string& symbol_name) { if (m_compiler_instances.find(file.m_game_version) == m_compiler_instances.end()) { lg::debug("Compiler not instantiated for game version - {}", version_to_game_name(file.m_game_version)); @@ -162,19 +119,21 @@ std::optional Workspace::get_global_symbol_info(const WorkspaceOGFil } const auto& compiler = m_compiler_instances[file.m_game_version].get(); const auto symbol_infos = compiler->lookup_exact_name_info(symbol_name); - if (!symbol_infos || symbol_infos->empty()) { + if (symbol_infos.empty()) { return {}; - } else if (symbol_infos->size() > 1) { + } else if (symbol_infos.size() > 1) { // TODO - handle this (overriden methods is the main issue here) - lg::debug("Found symbol info, but found multiple infos - {}", symbol_infos->size()); + lg::debug("Found symbol info, but found multiple infos - {}", symbol_infos.size()); return {}; } - const auto& symbol = symbol_infos->at(0); + const auto& symbol = symbol_infos.at(0); return symbol; } -std::optional Workspace::get_symbol_typespec(const WorkspaceOGFile& file, - const std::string& symbol_name) { +// TODO - consolidate what is needed into `SymbolInfo` +std::optional> Workspace::get_symbol_typeinfo( + const WorkspaceOGFile& file, + const std::string& symbol_name) { if (m_compiler_instances.find(file.m_game_version) == m_compiler_instances.end()) { lg::debug("Compiler not instantiated for game version - {}", version_to_game_name(file.m_game_version)); @@ -183,30 +142,120 @@ std::optional Workspace::get_symbol_typespec(const WorkspaceOGFile& fi const auto& compiler = m_compiler_instances[file.m_game_version].get(); const auto typespec = compiler->lookup_typespec(symbol_name); if (typespec) { - return typespec; + // NOTE - for some reason calling with the symbol's typespec and the symbol itself produces + // different results! + const auto full_type_info = compiler->type_system().lookup_type_no_throw(symbol_name); + if (full_type_info != nullptr) { + return std::make_pair(typespec.value(), full_type_info); + } } return {}; } -std::optional Workspace::get_symbol_def_location( +std::optional Workspace::get_symbol_def_location( const WorkspaceOGFile& file, - const SymbolInfo& symbol_info) { - if (m_compiler_instances.find(file.m_game_version) == m_compiler_instances.end()) { + const std::shared_ptr symbol_info) { + const auto& def_loc = symbol_info->m_def_location; + if (!def_loc) { + return {}; + } + return def_loc; +} + +std::vector> +Workspace::get_symbols_parent_type_path(const std::string& symbol_name, + const GameVersion game_version) { + if (m_compiler_instances.find(game_version) == m_compiler_instances.end()) { lg::debug("Compiler not instantiated for game version - {}", - version_to_game_name(file.m_game_version)); + version_to_game_name(game_version)); return {}; } - const auto& compiler = m_compiler_instances[file.m_game_version].get(); - std::optional def_loc; - const auto& goos_info = compiler->get_goos().reader.db.get_short_info_for(symbol_info.src_form()); - if (goos_info) { + + // name, docstring, def_loc + std::vector> parents = {}; + + const auto& compiler = m_compiler_instances[game_version].get(); + const auto parent_path = compiler->type_system().get_path_up_tree(symbol_name); + for (const auto& parent : parent_path) { + const auto symbol_infos = compiler->lookup_exact_name_info(parent); + if (symbol_infos.empty()) { + continue; + } + std::shared_ptr symbol_info; + if (symbol_infos.size() > 1) { + for (const auto& info : symbol_infos) { + if (info->m_kind == symbol_info::Kind::TYPE) { + symbol_info = info; + } + } + } else { + symbol_info = symbol_infos.at(0); + } + if (!symbol_info) { + continue; + } + const auto& def_loc = symbol_info->m_def_location; + if (!def_loc) { + continue; + } Docs::DefinitionLocation new_def_loc; - new_def_loc.filename = uri_from_path(goos_info->filename); - new_def_loc.line_idx = goos_info->line_idx_to_display; - new_def_loc.char_idx = goos_info->pos_in_line; - def_loc = new_def_loc; + new_def_loc.filename = lsp_util::uri_from_path(def_loc->file_path); + new_def_loc.line_idx = def_loc->line_idx; + new_def_loc.char_idx = def_loc->char_idx; + parents.push_back({parent, symbol_info->m_docstring, new_def_loc}); } - return def_loc; + return parents; +} + +std::vector> +Workspace::get_types_subtypes(const std::string& symbol_name, const GameVersion game_version) { + if (m_compiler_instances.find(game_version) == m_compiler_instances.end()) { + lg::debug("Compiler not instantiated for game version - {}", + version_to_game_name(game_version)); + return {}; + } + + // name, docstring, def_loc + std::vector> subtypes = {}; + + const auto& compiler = m_compiler_instances[game_version].get(); + const auto subtype_names = + compiler->type_system().search_types_by_parent_type_strict(symbol_name); + for (const auto& subtype_name : subtype_names) { + const auto symbol_infos = compiler->lookup_exact_name_info(subtype_name); + if (symbol_infos.empty()) { + continue; + } else if (symbol_infos.size() > 1) { + continue; + } + const auto& symbol_info = symbol_infos.at(0); + const auto& def_loc = symbol_info->m_def_location; + if (!def_loc) { + continue; + } + Docs::DefinitionLocation new_def_loc; + new_def_loc.filename = lsp_util::uri_from_path(def_loc->file_path); + new_def_loc.line_idx = def_loc->line_idx; + new_def_loc.char_idx = def_loc->char_idx; + subtypes.push_back({subtype_name, symbol_info->m_docstring, new_def_loc}); + } + return subtypes; +} + +std::unordered_map Workspace::get_enum_entries(const std::string& enum_name, + const GameVersion game_version) { + if (m_compiler_instances.find(game_version) == m_compiler_instances.end()) { + lg::debug("Compiler not instantiated for game version - {}", + version_to_game_name(game_version)); + return {}; + } + + const auto& compiler = m_compiler_instances[game_version].get(); + const auto enum_info = compiler->type_system().try_enum_lookup(enum_name); + if (!enum_info) { + return {}; + } + return enum_info->entries(); } void Workspace::start_tracking_file(const LSPSpec::DocumentUri& file_uri, @@ -225,33 +274,51 @@ void Workspace::start_tracking_file(const LSPSpec::DocumentUri& file_uri, } } } else if (language_id == "opengoal") { + if (m_tracked_og_files.find(file_uri) != m_tracked_og_files.end()) { + lg::debug("Already tracking - {}", file_uri); + return; + } auto game_version = determine_game_version_from_uri(file_uri); if (!game_version) { lg::debug("Could not determine game version from path - {}", file_uri); return; } - // TODO - this should happen on a separate thread so the LSP is not blocking during this lengthy - // step + if (m_compiler_instances.find(*game_version) == m_compiler_instances.end()) { lg::debug( "first time encountering a OpenGOAL file for game version - {}, initializing a compiler", version_to_game_name(*game_version)); - const auto project_path = file_util::try_get_project_path_from_path(uri_to_path(file_uri)); + const auto project_path = + file_util::try_get_project_path_from_path(lsp_util::uri_to_path(file_uri)); lg::debug("Detected project path - {}", project_path.value()); if (!file_util::setup_project_path(project_path)) { lg::debug("unable to setup project path, not initializing a compiler"); return; } - m_requester.send_progress_create_request("indexing-jak2", "Indexing - Jak 2"); + const std::string progress_title = + fmt::format("Compiling {}", version_to_game_name_external(game_version.value())); + m_requester.send_progress_create_request(progress_title, "compiling project", -1); m_compiler_instances.emplace(game_version.value(), std::make_unique(game_version.value())); - // TODO - if this fails, annotate some errors, adjust progress - m_compiler_instances.at(*game_version)->run_front_end_on_string("(make-group \"all-code\")"); - m_requester.send_progress_finish_request("indexing-jak2", "Indexed - Jak 2"); + try { + // TODO - this should happen on a separate thread so the LSP is not blocking during this + // lengthy step + // TODO - make this a setting (disable indexing) + // TODO - ask water if there is a fancy way to reduce memory usage (disabling coloring, + // etc?) + m_compiler_instances.at(*game_version) + ->run_front_end_on_string("(make-group \"all-code\")"); + m_requester.send_progress_finish_request(progress_title, "indexed"); + } catch (std::exception& e) { + // TODO - If it fails, annotate errors (DIAGNOSTIC TODO) + m_requester.send_progress_finish_request(progress_title, "failed"); + lg::debug("error when {}", progress_title); + } } - // TODO - otherwise, just `ml` the file instead of rebuilding the entire thing - // TODO - if the file fails to `ml`, annotate some errors - m_tracked_og_files[file_uri] = WorkspaceOGFile(content, *game_version); + m_tracked_og_files.emplace(file_uri, WorkspaceOGFile(file_uri, content, *game_version)); + m_tracked_og_files[file_uri].update_symbols( + m_compiler_instances.at(*game_version) + ->lookup_symbol_info_by_file(lsp_util::uri_to_path(file_uri))); } } @@ -260,7 +327,7 @@ void Workspace::update_tracked_file(const LSPSpec::DocumentUri& file_uri, lg::debug("potentially updating - {}", file_uri); // Check if the file is already tracked or not, this is done because change events don't give // language details it's assumed you are keeping track of that! - if (m_tracked_ir_files.count(file_uri) != 0) { + if (m_tracked_ir_files.find(file_uri) != m_tracked_ir_files.end()) { lg::debug("updating tracked IR file - {}", file_uri); WorkspaceIRFile file(content); m_tracked_ir_files[file_uri] = file; @@ -274,52 +341,210 @@ void Workspace::update_tracked_file(const LSPSpec::DocumentUri& file_uri, all_types_file->m_game_version = file.m_game_version; all_types_file->update_type_system(); } - } - - if (m_tracked_all_types_files.count(file_uri) != 0) { + } else if (m_tracked_all_types_files.find(file_uri) != m_tracked_all_types_files.end()) { lg::debug("updating tracked all types file - {}", file_uri); // If the all-types file has changed, re-parse it // NOTE - this assumes its still for the same game version! m_tracked_all_types_files[file_uri]->update_type_system(); + } else if (m_tracked_og_files.find(file_uri) != m_tracked_og_files.end()) { + lg::debug("updating tracked OG file - {}", file_uri); + m_tracked_og_files[file_uri].parse_content(content); + // re-`ml` the file + const auto game_version = m_tracked_og_files[file_uri].m_game_version; + if (m_compiler_instances.find(game_version) == m_compiler_instances.end()) { + lg::debug("No compiler initialized for - {}", version_to_game_name(game_version)); + return; + } + } +} + +void Workspace::tracked_file_will_save(const LSPSpec::DocumentUri& file_uri) { + lg::debug("file will be saved - {}", file_uri); + if (m_tracked_og_files.find(file_uri) != m_tracked_og_files.end()) { + // goalc is not an incremental compiler (yet) so I believe it will be a better UX + // to re-compile on the file save, rather than as the user is typing + const auto game_version = m_tracked_og_files[file_uri].m_game_version; + if (m_compiler_instances.find(game_version) == m_compiler_instances.end()) { + lg::debug("No compiler initialized for - {}", version_to_game_name(game_version)); + return; + } + CompilationOptions options; + options.filename = lsp_util::uri_to_path(file_uri); + // re-compile the file + m_compiler_instances.at(game_version)->asm_file(options); + // Update symbols for this specific file + const auto symbol_infos = + m_compiler_instances.at(game_version)->lookup_symbol_info_by_file(options.filename); + m_tracked_og_files[file_uri].update_symbols(symbol_infos); } +} + +void Workspace::update_global_index(const GameVersion game_version){ + // TODO - project wide indexing potentially (ie. finding references) }; void Workspace::stop_tracking_file(const LSPSpec::DocumentUri& file_uri) { - if (m_tracked_ir_files.count(file_uri) != 0) { - m_tracked_ir_files.erase(file_uri); - } - if (m_tracked_all_types_files.count(file_uri) != 0) { - m_tracked_all_types_files.erase(file_uri); + m_tracked_ir_files.erase(file_uri); + m_tracked_all_types_files.erase(file_uri); + m_tracked_og_files.erase(file_uri); +} + +WorkspaceOGFile::WorkspaceOGFile(const LSPSpec::DocumentUri& uri, + const std::string& content, + const GameVersion& game_version) + : m_uri(uri), m_game_version(game_version), version(0) { + const auto [line_count, line_ending] = + file_util::get_majority_file_line_endings_and_count(content); + m_line_count = line_count; + m_line_ending = line_ending; + lg::info("Added new OG file. {} symbols and {} diagnostics", m_symbols.size(), + m_diagnostics.size()); + parse_content(content); +} + +void WorkspaceOGFile::parse_content(const std::string& content) { + m_content = content; + auto parser = ts_parser_new(); + if (ts_parser_set_language(parser, g_opengoalLang)) { + // Get the AST for the current state of the file + // TODO - eventually, we should consider doing partial updates of the AST + // but right now the LSP just receives the entire document so that's a larger change. + m_ast.reset(ts_parser_parse_string(parser, NULL, m_content.c_str(), m_content.length()), + TreeSitterTreeDeleter()); } + ts_parser_delete(parser); } -WorkspaceOGFile::WorkspaceOGFile(const std::string& content, const GameVersion& game_version) - : m_content(content), m_game_version(game_version) { - const auto line_ending = file_util::get_majority_file_line_endings(content); - m_lines = str_util::split_string(content, line_ending); - lg::info("Added new OG file. {} lines with {} symbols and {} diagnostics", m_lines.size(), - m_symbols.size(), m_diagnostics.size()); +void WorkspaceOGFile::update_symbols( + const std::vector>& symbol_infos) { + m_symbols.clear(); + // TODO - sorting by definition location would be nice (maybe VSCode already does this?) + for (const auto& symbol_info : symbol_infos) { + LSPSpec::DocumentSymbol lsp_sym; + lsp_sym.m_name = symbol_info->m_name; + lsp_sym.m_detail = symbol_info->m_docstring; + switch (symbol_info->m_kind) { + case symbol_info::Kind::CONSTANT: + lsp_sym.m_kind = LSPSpec::SymbolKind::Constant; + break; + case symbol_info::Kind::FUNCTION: + lsp_sym.m_kind = LSPSpec::SymbolKind::Function; + break; + case symbol_info::Kind::GLOBAL_VAR: + lsp_sym.m_kind = LSPSpec::SymbolKind::Variable; + break; + case symbol_info::Kind::MACRO: + lsp_sym.m_kind = LSPSpec::SymbolKind::Operator; + break; + case symbol_info::Kind::METHOD: + lsp_sym.m_name = fmt::format("{}::{}", symbol_info->m_type, symbol_info->m_name); + lsp_sym.m_kind = LSPSpec::SymbolKind::Method; + break; + case symbol_info::Kind::TYPE: + lsp_sym.m_kind = LSPSpec::SymbolKind::Class; + break; + default: + lsp_sym.m_kind = LSPSpec::SymbolKind::Object; + break; + } + if (symbol_info->m_def_location) { + lsp_sym.m_range = LSPSpec::Range(symbol_info->m_def_location->line_idx, + symbol_info->m_def_location->char_idx); + } else { + lsp_sym.m_range = LSPSpec::Range(0, 0); + } + // TODO - would be nice to make this accurate but we don't store that info yet + lsp_sym.m_selectionRange = lsp_sym.m_range; + if (symbol_info->m_kind == symbol_info::Kind::TYPE) { + std::vector type_symbols = {}; + for (const auto& field : symbol_info->m_type_fields) { + LSPSpec::DocumentSymbol field_sym; + field_sym.m_name = field.name; + field_sym.m_detail = field.description; + if (field.is_array) { + field_sym.m_kind = LSPSpec::SymbolKind::Array; + } else { + field_sym.m_kind = LSPSpec::SymbolKind::Field; + } + // TODO - we don't store the line number for fields + field_sym.m_range = lsp_sym.m_range; + field_sym.m_selectionRange = lsp_sym.m_selectionRange; + type_symbols.push_back(field_sym); + } + for (const auto& method : symbol_info->m_type_methods) { + LSPSpec::DocumentSymbol method_sym; + method_sym.m_name = method.name; + method_sym.m_kind = LSPSpec::SymbolKind::Method; + // TODO - we don't store the line number for fields + method_sym.m_range = lsp_sym.m_range; + method_sym.m_selectionRange = lsp_sym.m_selectionRange; + type_symbols.push_back(method_sym); + } + for (const auto& state : symbol_info->m_type_states) { + LSPSpec::DocumentSymbol state_sym; + state_sym.m_name = state.name; + state_sym.m_kind = LSPSpec::SymbolKind::Event; + // TODO - we don't store the line number for fields + state_sym.m_range = lsp_sym.m_range; + state_sym.m_selectionRange = lsp_sym.m_selectionRange; + type_symbols.push_back(state_sym); + } + lsp_sym.m_children = type_symbols; + } + m_symbols.push_back(lsp_sym); + } } std::optional WorkspaceOGFile::get_symbol_at_position( const LSPSpec::Position position) const { - // Split the line on typical word boundaries - std::string line = m_lines.at(position.m_line); - std::smatch matches; - std::regex regex("[\\w\\.\\-_!<>*?]+"); - std::regex_token_iterator rend; + if (m_ast) { + TSNode root_node = ts_tree_root_node(m_ast.get()); + TSNode found_node = + ts_node_descendant_for_point_range(root_node, {position.m_line, position.m_character}, + {position.m_line, position.m_character}); + if (!ts_node_has_error(found_node)) { + uint32_t start = ts_node_start_byte(found_node); + uint32_t end = ts_node_end_byte(found_node); + const std::string node_str = m_content.substr(start, end - start); + lg::debug("AST SAP - {}", node_str); + const std::string node_name = ts_node_type(found_node); + if (node_name == "sym_name") { + return node_str; + } + } else { + // found_node = ts_node_child(found_node, 0); + // TODO - maybe get this one (but check if has an error) + return {}; + } + } + return {}; +} - std::regex_token_iterator match(line.begin(), line.end(), regex); - while (match != rend) { - auto match_start = std::distance(line.begin(), match->first); - auto match_end = match_start + match->length(); - if (position.m_character >= match_start && position.m_character <= match_end) { - return match->str(); +std::vector WorkspaceOGFile::search_for_forms_that_begin_with( + std::vector prefix) const { + std::vector results = {}; + if (!m_ast) { + return results; + } + + TSNode root_node = ts_tree_root_node(m_ast.get()); + std::vector found_nodes = {}; + ast_util::search_for_forms_that_begin_with(m_content, root_node, prefix, found_nodes); + + for (const auto& node : found_nodes) { + std::vector tokens = {}; + for (size_t i = 0; i < ts_node_child_count(node); i++) { + const auto child_node = ts_node_child(node, i); + const auto contents = ast_util::get_source_code(m_content, child_node); + tokens.push_back(contents); } - match++; + const auto start_point = ts_node_start_point(node); + const auto end_point = ts_node_end_point(node); + results.push_back( + {tokens, {start_point.row, start_point.column}, {end_point.row, end_point.column}}); } - return {}; + return results; } WorkspaceIRFile::WorkspaceIRFile(const std::string& content) { @@ -356,7 +581,7 @@ void WorkspaceIRFile::find_all_types_path(const std::string& line) { const auto& game_version = matches[1]; const auto& all_types_path = matches[2]; lg::debug("Found DTS Path - {} : {}", game_version.str(), all_types_path.str()); - auto all_types_uri = uri_from_path(fs::path(all_types_path.str())); + auto all_types_uri = lsp_util::uri_from_path(fs::path(all_types_path.str())); lg::debug("DTS URI - {}", all_types_uri); if (valid_game_version(game_version.str())) { m_game_version = game_name_to_version(game_version.str()); @@ -381,8 +606,6 @@ void WorkspaceIRFile::find_function_symbol(const uint32_t line_num_zero_based, lg::info("Adding Symbol - {}", match.str()); LSPSpec::DocumentSymbol new_symbol; new_symbol.m_name = match.str(); - // TODO - function doc-string - // new_symbol.m_detail = ... new_symbol.m_kind = LSPSpec::SymbolKind::Function; LSPSpec::Range symbol_range; symbol_range.m_start = {line_num_zero_based, 0}; diff --git a/lsp/state/workspace.h b/lsp/state/workspace.h index 997a9bb6c5c..8253f551da1 100644 --- a/lsp/state/workspace.h +++ b/lsp/state/workspace.h @@ -2,6 +2,7 @@ #include #include +#include #include #include "common/util/FileUtil.h" @@ -14,20 +15,49 @@ #include "lsp/protocol/document_symbols.h" #include "lsp/state/lsp_requester.h" +#include "third-party/tree-sitter/tree-sitter/lib/src/tree.h" + +// TODO - +// https://sourcegraph.com/github.com/ensisoft/detonator@36f626caf957d0734865a8f5641be6170d997f45/-/blob/editor/app/lua-tools.cpp?L116:15-116:30 + +struct TreeSitterTreeDeleter { + void operator()(TSTree* ptr) const { ts_tree_delete(ptr); } +}; + +struct OpenGOALFormResult { + std::vector tokens; + std::pair start_point; + std::pair end_point; +}; + +struct OGGlobalIndex { + std::unordered_map global_symbols = {}; + std::unordered_map per_file_symbols = {}; +}; + class WorkspaceOGFile { public: WorkspaceOGFile(){}; - WorkspaceOGFile(const std::string& content, const GameVersion& game_version); - // TODO - make private - int32_t version; - // TODO - keep an AST of the file instead + WorkspaceOGFile(const LSPSpec::DocumentUri& uri, + const std::string& content, + const GameVersion& game_version); + LSPSpec::DocumentUri m_uri; std::string m_content; - std::vector m_lines; + int m_line_count = 0; + std::string m_line_ending; + GameVersion m_game_version; std::vector m_symbols; std::vector m_diagnostics; - GameVersion m_game_version; + void parse_content(const std::string& new_content); + void update_symbols(const std::vector>& symbol_infos); std::optional get_symbol_at_position(const LSPSpec::Position position) const; + std::vector search_for_forms_that_begin_with( + std::vector prefix) const; + + private: + int32_t version; + std::shared_ptr m_ast; }; class WorkspaceIRFile { @@ -93,23 +123,40 @@ class Workspace { // and it's a lot faster to check the end of a string, then multiple tracked file maps FileType determine_filetype_from_languageid(const std::string& language_id); FileType determine_filetype_from_uri(const LSPSpec::DocumentUri& file_uri); + std::optional determine_game_version_from_uri(const LSPSpec::DocumentUri& uri); void start_tracking_file(const LSPSpec::DocumentUri& file_uri, const std::string& language_id, const std::string& content); void update_tracked_file(const LSPSpec::DocumentUri& file_uri, const std::string& content); + void tracked_file_will_save(const LSPSpec::DocumentUri& file_uri); + void update_global_index(const GameVersion game_version); void stop_tracking_file(const LSPSpec::DocumentUri& file_uri); - std::optional get_tracked_og_file(const LSPSpec::URI& file_uri); - std::optional get_tracked_ir_file(const LSPSpec::URI& file_uri); + std::optional> get_tracked_og_file( + const LSPSpec::URI& file_uri); + std::optional> get_tracked_ir_file( + const LSPSpec::URI& file_uri); std::optional get_definition_info_from_all_types( const std::string& symbol_name, const LSPSpec::DocumentUri& all_types_uri); - std::optional get_global_symbol_info(const WorkspaceOGFile& file, - const std::string& symbol_name); - std::optional get_symbol_typespec(const WorkspaceOGFile& file, - const std::string& symbol_name); - std::optional get_symbol_def_location(const WorkspaceOGFile& file, - const SymbolInfo& symbol_info); + std::vector> get_symbols_starting_with( + const GameVersion game_version, + const std::string& symbol_prefix); + std::optional> get_global_symbol_info( + const WorkspaceOGFile& file, + const std::string& symbol_name); + std::optional> get_symbol_typeinfo(const WorkspaceOGFile& file, + const std::string& symbol_name); + std::optional get_symbol_def_location( + const WorkspaceOGFile& file, + const std::shared_ptr symbol_info); + std::vector> + get_symbols_parent_type_path(const std::string& symbol_name, const GameVersion game_version); + std::vector> get_types_subtypes( + const std::string& symbol_name, + const GameVersion game_version); + std::unordered_map get_enum_entries(const std::string& enum_name, + const GameVersion game_version); private: LSPRequester m_requester; @@ -126,5 +173,7 @@ class Workspace { // and then we can track projects instead of games // // Until that decoupling happens, things like this will remain fairly clunky. + // TODO - change this to a shared_ptr so it can more easily be passed around functions std::unordered_map> m_compiler_instances; + std::unordered_map m_global_indicies; }; diff --git a/test/common/formatter/corpus/blank-lines.test.gc b/test/common/formatter/corpus/blank-lines.test.gc index dc321569500..25ed606a807 100644 --- a/test/common/formatter/corpus/blank-lines.test.gc +++ b/test/common/formatter/corpus/blank-lines.test.gc @@ -11,4 +11,4 @@ Separate Top Level (println "test") -(println "test") +(println "test") \ No newline at end of file diff --git a/third-party/tree-sitter/tree-sitter-opengoal/grammar.js b/third-party/tree-sitter/tree-sitter-opengoal/grammar.js index d8ab3de5a3b..d9c13654324 100644 --- a/third-party/tree-sitter/tree-sitter-opengoal/grammar.js +++ b/third-party/tree-sitter/tree-sitter-opengoal/grammar.js @@ -137,8 +137,7 @@ module.exports = grammar({ [], inline: $ => - [$._kwd_unqualified, - $._sym_unqualified], + [$._sym_unqualified], rules: { // THIS MUST BE FIRST -- even though this doesn't look like it matters @@ -206,7 +205,7 @@ module.exports = grammar({ seq(field('numberOfArgs', $._format_token), '*'), '?', "Newline", - seq(repeat(choice($._format_token, ',')), /[$mrRbBdDgGxXeEoOsStTfF]/), + seq(repeat(choice($._format_token, ',')), /[$mrRbBdDgGxXeEoOsStTfHhJjKkLlNnVwWyYzZ]/), ), format_specifier: $ => prec.left(seq( diff --git a/third-party/tree-sitter/tree-sitter-opengoal/grammar.json b/third-party/tree-sitter/tree-sitter-opengoal/grammar.json index 9c200f71d1b..28e204339b0 100644 --- a/third-party/tree-sitter/tree-sitter-opengoal/grammar.json +++ b/third-party/tree-sitter/tree-sitter-opengoal/grammar.json @@ -646,7 +646,7 @@ }, { "type": "PATTERN", - "value": "[$mrRbBdDgGxXeEoOsStTfF]" + "value": "[$mrRbBdDgGxXeEoOsStTfHhJjKkLlNnVwWyYzZ]" } ] } @@ -1041,9 +1041,7 @@ "precedences": [], "externals": [], "inline": [ - "ReferenceError", "_sym_unqualified" ], "supertypes": [] } - diff --git a/third-party/tree-sitter/tree-sitter-opengoal/parser.c b/third-party/tree-sitter/tree-sitter-opengoal/parser.c index 7afe53daa2b..8543b2f3ffc 100644 --- a/third-party/tree-sitter/tree-sitter-opengoal/parser.c +++ b/third-party/tree-sitter/tree-sitter-opengoal/parser.c @@ -1,7 +1,6 @@ -#include +#include "tree_sitter/parser.h" #if defined(__GNUC__) || defined(__clang__) -#pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wmissing-field-initializers" #endif @@ -16,7 +15,7 @@ #define MAX_ALIAS_SEQUENCE_LENGTH 4 #define PRODUCTION_ID_COUNT 12 -enum { +enum ts_symbol_identifiers { sym__ws = 1, sym_comment = 2, sym_block_comment = 3, @@ -531,7 +530,7 @@ static const TSSymbolMetadata ts_symbol_metadata[] = { }, }; -enum { +enum ts_field_identifiers { field_close = 1, field_marker = 2, field_name = 3, @@ -722,20 +721,50 @@ static inline bool sym_kwd_lit_character_set_2(int32_t c) { : (c <= 8287 || c == 12288)))))); } -static inline bool aux_sym_str_lit_token1_character_set_1(int32_t c) { - return (c < 'b' - ? (c < 'O' +static inline bool aux_sym_format_directive_type_token11_character_set_1(int32_t c) { + return (c < 'R' + ? (c < 'G' ? (c < 'B' ? c == '$' - : c <= 'G') - : (c <= 'O' || (c < 'X' - ? (c >= 'R' && c <= 'T') - : c <= 'X'))) - : (c <= 'g' || (c < 'r' - ? (c < 'o' - ? c == 'm' + : c <= 'E') + : (c <= 'L' || c == 'O')) + : (c <= 'T' || (c < 'r' + ? (c < 'b' + ? (c >= 'X' && c <= 'Z') : c <= 'o') - : (c <= 't' || c == 'x')))); + : (c <= 't' || (c >= 'x' && c <= 'z'))))); +} + +static inline bool aux_sym_format_directive_type_token11_character_set_2(int32_t c) { + return (c < 'R' + ? (c < 'G' + ? (c < 'B' + ? c == '$' + : c <= 'E') + : (c <= 'L' || c == 'O')) + : (c <= 'T' || (c < 'r' + ? (c < 'b' + ? (c >= 'V' && c <= 'Z') + : c <= 'o') + : (c <= 't' || (c >= 'x' && c <= 'z'))))); +} + +static inline bool aux_sym_format_directive_type_token11_character_set_3(int32_t c) { + return (c < 'R' + ? (c < 'G' + ? (c < 'B' + ? c == '$' + : (c <= 'B' || (c >= 'D' && c <= 'E'))) + : (c <= 'H' || (c < 'N' + ? (c >= 'J' && c <= 'L') + : c <= 'O'))) + : (c <= 'T' || (c < 'j' + ? (c < 'b' + ? (c >= 'V' && c <= 'Z') + : (c <= 'b' || (c >= 'd' && c <= 'h'))) + : (c <= 'o' || (c < 'w' + ? (c >= 'r' && c <= 't') + : c <= 'z'))))); } static inline bool aux_sym__sym_unqualified_token1_character_set_1(int32_t c) { @@ -795,184 +824,242 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { eof = lexer->eof(lexer); switch (state) { case 0: - if (eof) ADVANCE(22); - if (lookahead == '\n') ADVANCE(66); - if (lookahead == '\r') ADVANCE(66); - if (lookahead == '"') ADVANCE(65); - if (lookahead == '#') ADVANCE(66); - if (lookahead == '%') ADVANCE(66); - if (lookahead == '&') ADVANCE(66); - if (lookahead == '\'') ADVANCE(66); - if (lookahead == '(') ADVANCE(66); - if (lookahead == ')') ADVANCE(66); - if (lookahead == '*') ADVANCE(66); - if (lookahead == ',') ADVANCE(66); - if (lookahead == '/') ADVANCE(66); - if (lookahead == ':') ADVANCE(66); - if (lookahead == ';') ADVANCE(66); - if (lookahead == '?') ADVANCE(66); - if (lookahead == '@') ADVANCE(66); - if (lookahead == 'V') ADVANCE(66); - if (lookahead == '\\') ADVANCE(32); - if (lookahead == '^') ADVANCE(66); - if (lookahead == '_') ADVANCE(66); - if (lookahead == '`') ADVANCE(66); - if (lookahead == 'v') ADVANCE(66); - if (lookahead == '|') ADVANCE(66); - if (lookahead == '~') ADVANCE(42); + if (eof) ADVANCE(23); + if (lookahead == '\n') ADVANCE(68); + if (lookahead == '\r') ADVANCE(68); + if (lookahead == '"') ADVANCE(67); + if (lookahead == '#') ADVANCE(68); + if (lookahead == '%') ADVANCE(68); + if (lookahead == '&') ADVANCE(68); + if (lookahead == '\'') ADVANCE(68); + if (lookahead == '(') ADVANCE(68); + if (lookahead == ')') ADVANCE(68); + if (lookahead == '*') ADVANCE(68); + if (lookahead == ',') ADVANCE(68); + if (lookahead == '/') ADVANCE(68); + if (lookahead == ':') ADVANCE(68); + if (lookahead == ';') ADVANCE(68); + if (lookahead == '?') ADVANCE(68); + if (lookahead == '@') ADVANCE(68); + if (lookahead == 'V') ADVANCE(68); + if (lookahead == '\\') ADVANCE(33); + if (lookahead == '^') ADVANCE(68); + if (lookahead == '_') ADVANCE(68); + if (lookahead == '`') ADVANCE(68); + if (lookahead == 'v') ADVANCE(68); + if (lookahead == '|') ADVANCE(68); + if (lookahead == '~') ADVANCE(43); if (lookahead == '<' || - lookahead == '>') ADVANCE(66); + lookahead == '>') ADVANCE(68); if (lookahead == 'A' || - lookahead == 'a') ADVANCE(66); + lookahead == 'a') ADVANCE(68); if (lookahead == 'C' || - lookahead == 'c') ADVANCE(66); + lookahead == 'c') ADVANCE(68); if (lookahead == 'I' || - lookahead == 'i') ADVANCE(66); + lookahead == 'i') ADVANCE(68); if (lookahead == 'P' || - lookahead == 'p') ADVANCE(66); + lookahead == 'p') ADVANCE(68); if (lookahead == 'W' || - lookahead == 'w') ADVANCE(66); - if (('[' <= lookahead && lookahead <= ']')) ADVANCE(66); - if (('{' <= lookahead && lookahead <= '}')) ADVANCE(66); - if (aux_sym_str_lit_token1_character_set_1(lookahead)) ADVANCE(66); - if (lookahead != 0) ADVANCE(66); + lookahead == 'w') ADVANCE(68); + if (('[' <= lookahead && lookahead <= ']')) ADVANCE(68); + if (('{' <= lookahead && lookahead <= '}')) ADVANCE(68); + if (lookahead == '$' || + ('B' <= lookahead && lookahead <= 'E') || + ('G' <= lookahead && lookahead <= 'L') || + lookahead == 'N' || + lookahead == 'O' || + ('R' <= lookahead && lookahead <= 'T') || + ('X' <= lookahead && lookahead <= 'o') || + ('r' <= lookahead && lookahead <= 't') || + ('x' <= lookahead && lookahead <= 'z')) ADVANCE(68); + if (lookahead != 0) ADVANCE(68); END_STATE(); case 1: - if (lookahead == '\n') ADVANCE(48); - if (lookahead == '\r') ADVANCE(49); - if (lookahead == '"') ADVANCE(65); - if (lookahead == '#') ADVANCE(35); - if (lookahead == '%') ADVANCE(43); - if (lookahead == '&') ADVANCE(44); - if (lookahead == '\'') ADVANCE(31); - if (lookahead == '*') ADVANCE(61); - if (lookahead == ',') ADVANCE(36); - if (lookahead == ':') ADVANCE(40); - if (lookahead == ';') ADVANCE(59); - if (lookahead == '?') ADVANCE(62); - if (lookahead == '@') ADVANCE(38); - if (lookahead == 'N') ADVANCE(8); - if (lookahead == 'V') ADVANCE(34); - if (lookahead == '^') ADVANCE(47); - if (lookahead == '_') ADVANCE(54); - if (lookahead == '`') ADVANCE(60); - if (lookahead == 'v') ADVANCE(33); - if (lookahead == '|') ADVANCE(45); - if (lookahead == '~') ADVANCE(42); - if (('+' <= lookahead && lookahead <= '-')) ADVANCE(5); + if (lookahead == '\n') ADVANCE(49); + if (lookahead == '\r') ADVANCE(50); + if (lookahead == '"') ADVANCE(67); + if (lookahead == '#') ADVANCE(36); + if (lookahead == '%') ADVANCE(44); + if (lookahead == '&') ADVANCE(45); + if (lookahead == '\'') ADVANCE(32); + if (lookahead == ',') ADVANCE(37); + if (lookahead == ':') ADVANCE(41); + if (lookahead == ';') ADVANCE(60); + if (lookahead == '?') ADVANCE(63); + if (lookahead == '@') ADVANCE(39); + if (lookahead == 'N') ADVANCE(66); + if (lookahead == 'V') ADVANCE(35); + if (lookahead == '^') ADVANCE(48); + if (lookahead == '_') ADVANCE(55); + if (lookahead == '`') ADVANCE(61); + if (lookahead == 'v') ADVANCE(34); + if (lookahead == '|') ADVANCE(46); + if (lookahead == '~') ADVANCE(43); + if (('+' <= lookahead && lookahead <= '-')) ADVANCE(7); if (lookahead == '<' || - lookahead == '>') ADVANCE(58); + lookahead == '>') ADVANCE(59); if (lookahead == 'A' || - lookahead == 'a') ADVANCE(53); + lookahead == 'a') ADVANCE(54); if (lookahead == 'C' || - lookahead == 'c') ADVANCE(46); + lookahead == 'c') ADVANCE(47); if (lookahead == 'I' || - lookahead == 'i') ADVANCE(51); + lookahead == 'i') ADVANCE(52); if (lookahead == 'P' || - lookahead == 'p') ADVANCE(50); + lookahead == 'p') ADVANCE(51); if (lookahead == 'W' || - lookahead == 'w') ADVANCE(52); + lookahead == 'w') ADVANCE(53); if (lookahead == '[' || - lookahead == ']') ADVANCE(57); - if (('{' <= lookahead && lookahead <= '}')) ADVANCE(56); + lookahead == ']') ADVANCE(58); + if (('{' <= lookahead && lookahead <= '}')) ADVANCE(57); if (lookahead == '(' || - lookahead == ')') ADVANCE(55); - if (('0' <= lookahead && lookahead <= '9')) ADVANCE(26); - if (aux_sym_str_lit_token1_character_set_1(lookahead)) ADVANCE(64); + lookahead == ')') ADVANCE(56); + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(27); + if (aux_sym_format_directive_type_token11_character_set_1(lookahead)) ADVANCE(65); END_STATE(); case 2: - if (lookahead == '"') ADVANCE(65); - if (lookahead == '\\') ADVANCE(18); - if (lookahead == '~') ADVANCE(42); - if (lookahead != 0) ADVANCE(66); + if (lookahead == '\n') ADVANCE(49); + if (lookahead == '\r') ADVANCE(50); + if (lookahead == '#') ADVANCE(9); + if (lookahead == '%') ADVANCE(44); + if (lookahead == '&') ADVANCE(45); + if (lookahead == '\'') ADVANCE(32); + if (lookahead == ',') ADVANCE(37); + if (lookahead == ':') ADVANCE(41); + if (lookahead == ';') ADVANCE(60); + if (lookahead == '?') ADVANCE(63); + if (lookahead == '@') ADVANCE(39); + if (lookahead == 'N') ADVANCE(66); + if (lookahead == '^') ADVANCE(48); + if (lookahead == '_') ADVANCE(55); + if (lookahead == '`') ADVANCE(61); + if (lookahead == '|') ADVANCE(46); + if (lookahead == '~') ADVANCE(43); + if (('+' <= lookahead && lookahead <= '-')) ADVANCE(7); + if (lookahead == '<' || + lookahead == '>') ADVANCE(59); + if (lookahead == 'A' || + lookahead == 'a') ADVANCE(54); + if (lookahead == 'C' || + lookahead == 'c') ADVANCE(47); + if (lookahead == 'I' || + lookahead == 'i') ADVANCE(52); + if (lookahead == 'P' || + lookahead == 'p') ADVANCE(51); + if (lookahead == 'W' || + lookahead == 'w') ADVANCE(53); + if (lookahead == '[' || + lookahead == ']') ADVANCE(58); + if (('{' <= lookahead && lookahead <= '}')) ADVANCE(57); + if (lookahead == '(' || + lookahead == ')') ADVANCE(56); + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(27); + if (aux_sym_format_directive_type_token11_character_set_2(lookahead)) ADVANCE(65); END_STATE(); case 3: - if (lookahead == '#') ADVANCE(20); - if (lookahead == '|') ADVANCE(4); - if (lookahead != 0) ADVANCE(3); + if (lookahead == '"') ADVANCE(67); + if (lookahead == '\\') ADVANCE(19); + if (lookahead == '~') ADVANCE(43); + if (lookahead != 0) ADVANCE(68); END_STATE(); case 4: - if (lookahead == '#') ADVANCE(25); - if (lookahead != 0) ADVANCE(3); + if (lookahead == '#') ADVANCE(21); + if (lookahead == '|') ADVANCE(5); + if (lookahead != 0) ADVANCE(4); END_STATE(); case 5: - if (lookahead == '#') ADVANCE(7); - if (('0' <= lookahead && lookahead <= '9')) ADVANCE(26); + if (lookahead == '#') ADVANCE(26); + if (lookahead != 0) ADVANCE(4); END_STATE(); case 6: - if (lookahead == '\\') ADVANCE(19); - if (lookahead == 'b') ADVANCE(14); - if (lookahead == 'f' || - lookahead == 't') ADVANCE(71); - if (lookahead == 'x') ADVANCE(15); - if (lookahead == '|') ADVANCE(3); + if (lookahead == '#') ADVANCE(9); + if (lookahead == '%') ADVANCE(44); + if (lookahead == '&') ADVANCE(45); + if (lookahead == '\'') ADVANCE(32); + if (lookahead == '*') ADVANCE(62); + if (lookahead == ',') ADVANCE(37); + if (lookahead == ':') ADVANCE(41); + if (lookahead == '@') ADVANCE(39); + if (lookahead == '|') ADVANCE(46); + if (lookahead == '~') ADVANCE(43); + if (('+' <= lookahead && lookahead <= '-')) ADVANCE(7); + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(27); + if (aux_sym_format_directive_type_token11_character_set_3(lookahead)) ADVANCE(65); END_STATE(); case 7: - if (lookahead == 'b') ADVANCE(14); - if (lookahead == 'x') ADVANCE(15); + if (lookahead == '#') ADVANCE(9); + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(27); END_STATE(); case 8: - if (lookahead == 'e') ADVANCE(13); + if (lookahead == '\\') ADVANCE(20); + if (lookahead == 'b') ADVANCE(15); + if (lookahead == 'f' || + lookahead == 't') ADVANCE(73); + if (lookahead == 'x') ADVANCE(16); + if (lookahead == '|') ADVANCE(4); END_STATE(); case 9: - if (lookahead == 'e') ADVANCE(63); + if (lookahead == 'b') ADVANCE(15); + if (lookahead == 'x') ADVANCE(16); END_STATE(); case 10: - if (lookahead == 'i') ADVANCE(12); + if (lookahead == 'e') ADVANCE(64); END_STATE(); case 11: - if (lookahead == 'l') ADVANCE(10); + if (lookahead == 'i') ADVANCE(13); END_STATE(); case 12: - if (lookahead == 'n') ADVANCE(9); + if (lookahead == 'l') ADVANCE(11); END_STATE(); case 13: - if (lookahead == 'w') ADVANCE(11); + if (lookahead == 'n') ADVANCE(10); END_STATE(); case 14: - if (lookahead == '0' || - lookahead == '1') ADVANCE(27); + if (lookahead == 'w') ADVANCE(12); END_STATE(); case 15: - if (('0' <= lookahead && lookahead <= '9') || - ('A' <= lookahead && lookahead <= 'F') || - ('a' <= lookahead && lookahead <= 'f')) ADVANCE(29); + if (lookahead == '0' || + lookahead == '1') ADVANCE(28); END_STATE(); case 16: - if (!sym_kwd_lit_character_set_1(lookahead)) ADVANCE(30); + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'F') || + ('a' <= lookahead && lookahead <= 'f')) ADVANCE(30); END_STATE(); case 17: - if (lookahead != 0 && - lookahead != '\n') ADVANCE(32); + if (!sym_kwd_lit_character_set_1(lookahead)) ADVANCE(31); END_STATE(); case 18: if (lookahead != 0 && - lookahead != '\n') ADVANCE(67); + lookahead != '\n') ADVANCE(33); END_STATE(); case 19: if (lookahead != 0 && - lookahead != '\\') ADVANCE(68); - if (lookahead == '\\') ADVANCE(69); + lookahead != '\n') ADVANCE(69); END_STATE(); case 20: if (lookahead != 0 && - lookahead != '|') ADVANCE(3); + lookahead != '\\') ADVANCE(70); + if (lookahead == '\\') ADVANCE(71); END_STATE(); case 21: - if (eof) ADVANCE(22); - if (lookahead == '"') ADVANCE(65); - if (lookahead == '#') ADVANCE(6); - if (lookahead == '\'') ADVANCE(31); - if (lookahead == '(') ADVANCE(81); - if (lookahead == ')') ADVANCE(82); - if (lookahead == ',') ADVANCE(37); - if (lookahead == '/') ADVANCE(72); - if (lookahead == ':') ADVANCE(16); - if (lookahead == ';') ADVANCE(24); - if (lookahead == '`') ADVANCE(60); - if (lookahead == 'n') ADVANCE(77); - if (('+' <= lookahead && lookahead <= '-')) ADVANCE(73); - if (('0' <= lookahead && lookahead <= '9')) ADVANCE(26); + if (lookahead != 0 && + lookahead != '|') ADVANCE(4); + END_STATE(); + case 22: + if (eof) ADVANCE(23); + if (lookahead == '"') ADVANCE(67); + if (lookahead == '#') ADVANCE(8); + if (lookahead == '\'') ADVANCE(32); + if (lookahead == '(') ADVANCE(83); + if (lookahead == ')') ADVANCE(84); + if (lookahead == ',') ADVANCE(38); + if (lookahead == '/') ADVANCE(74); + if (lookahead == ':') ADVANCE(17); + if (lookahead == ';') ADVANCE(25); + if (lookahead == '`') ADVANCE(61); + if (lookahead == 'n') ADVANCE(79); + if (('+' <= lookahead && lookahead <= '-')) ADVANCE(75); + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(27); if (('\t' <= lookahead && lookahead <= '\r') || (28 <= lookahead && lookahead <= ' ') || lookahead == 5760 || @@ -981,18 +1068,18 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { lookahead == 8232 || lookahead == 8233 || lookahead == 8287 || - lookahead == 12288) ADVANCE(23); + lookahead == 12288) ADVANCE(24); if (lookahead != 0 && lookahead != '@' && (lookahead < '[' || '^' < lookahead) && lookahead != '{' && lookahead != '}' && - lookahead != '~') ADVANCE(80); + lookahead != '~') ADVANCE(82); END_STATE(); - case 22: + case 23: ACCEPT_TOKEN(ts_builtin_sym_end); END_STATE(); - case 23: + case 24: ACCEPT_TOKEN(sym__ws); if (('\t' <= lookahead && lookahead <= '\r') || (28 <= lookahead && lookahead <= ' ') || @@ -1002,230 +1089,234 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { lookahead == 8232 || lookahead == 8233 || lookahead == 8287 || - lookahead == 12288) ADVANCE(23); + lookahead == 12288) ADVANCE(24); END_STATE(); - case 24: + case 25: ACCEPT_TOKEN(sym_comment); if (lookahead != 0 && - lookahead != '\n') ADVANCE(24); - END_STATE(); - case 25: - ACCEPT_TOKEN(sym_block_comment); + lookahead != '\n') ADVANCE(25); END_STATE(); case 26: - ACCEPT_TOKEN(aux_sym_num_lit_token1); - if (lookahead == '.') ADVANCE(28); - if (('0' <= lookahead && lookahead <= '9')) ADVANCE(26); + ACCEPT_TOKEN(sym_block_comment); END_STATE(); case 27: ACCEPT_TOKEN(aux_sym_num_lit_token1); - if (lookahead == '0' || - lookahead == '1') ADVANCE(27); + if (lookahead == '.') ADVANCE(29); + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(27); END_STATE(); case 28: ACCEPT_TOKEN(aux_sym_num_lit_token1); - if (('0' <= lookahead && lookahead <= '9')) ADVANCE(28); + if (lookahead == '0' || + lookahead == '1') ADVANCE(28); END_STATE(); case 29: ACCEPT_TOKEN(aux_sym_num_lit_token1); - if (('0' <= lookahead && lookahead <= '9') || - ('A' <= lookahead && lookahead <= 'F') || - ('a' <= lookahead && lookahead <= 'f')) ADVANCE(29); + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(29); END_STATE(); case 30: - ACCEPT_TOKEN(sym_kwd_lit); - if (!sym_kwd_lit_character_set_2(lookahead)) ADVANCE(30); + ACCEPT_TOKEN(aux_sym_num_lit_token1); + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'F') || + ('a' <= lookahead && lookahead <= 'f')) ADVANCE(30); END_STATE(); case 31: - ACCEPT_TOKEN(anon_sym_SQUOTE); + ACCEPT_TOKEN(sym_kwd_lit); + if (!sym_kwd_lit_character_set_2(lookahead)) ADVANCE(31); END_STATE(); case 32: - ACCEPT_TOKEN(aux_sym__format_token_token1); + ACCEPT_TOKEN(anon_sym_SQUOTE); END_STATE(); case 33: - ACCEPT_TOKEN(anon_sym_v); + ACCEPT_TOKEN(aux_sym__format_token_token1); END_STATE(); case 34: - ACCEPT_TOKEN(anon_sym_V); + ACCEPT_TOKEN(anon_sym_v); END_STATE(); case 35: - ACCEPT_TOKEN(anon_sym_POUND); - if (lookahead == 'b') ADVANCE(14); - if (lookahead == 'x') ADVANCE(15); + ACCEPT_TOKEN(anon_sym_V); END_STATE(); case 36: - ACCEPT_TOKEN(anon_sym_COMMA); + ACCEPT_TOKEN(anon_sym_POUND); + if (lookahead == 'b') ADVANCE(15); + if (lookahead == 'x') ADVANCE(16); END_STATE(); case 37: ACCEPT_TOKEN(anon_sym_COMMA); - if (lookahead == '@') ADVANCE(83); END_STATE(); case 38: - ACCEPT_TOKEN(anon_sym_AT); - if (lookahead == ':') ADVANCE(39); + ACCEPT_TOKEN(anon_sym_COMMA); + if (lookahead == '@') ADVANCE(85); END_STATE(); case 39: - ACCEPT_TOKEN(anon_sym_AT_COLON); + ACCEPT_TOKEN(anon_sym_AT); + if (lookahead == ':') ADVANCE(40); END_STATE(); case 40: - ACCEPT_TOKEN(anon_sym_COLON); - if (lookahead == '@') ADVANCE(41); + ACCEPT_TOKEN(anon_sym_AT_COLON); END_STATE(); case 41: - ACCEPT_TOKEN(anon_sym_COLON_AT); + ACCEPT_TOKEN(anon_sym_COLON); + if (lookahead == '@') ADVANCE(42); END_STATE(); case 42: - ACCEPT_TOKEN(anon_sym_TILDE); + ACCEPT_TOKEN(anon_sym_COLON_AT); END_STATE(); case 43: - ACCEPT_TOKEN(anon_sym_PERCENT); + ACCEPT_TOKEN(anon_sym_TILDE); END_STATE(); case 44: - ACCEPT_TOKEN(anon_sym_AMP); + ACCEPT_TOKEN(anon_sym_PERCENT); END_STATE(); case 45: - ACCEPT_TOKEN(anon_sym_PIPE); + ACCEPT_TOKEN(anon_sym_AMP); END_STATE(); case 46: - ACCEPT_TOKEN(aux_sym_format_directive_type_token1); + ACCEPT_TOKEN(anon_sym_PIPE); END_STATE(); case 47: - ACCEPT_TOKEN(aux_sym_format_directive_type_token2); + ACCEPT_TOKEN(aux_sym_format_directive_type_token1); END_STATE(); case 48: - ACCEPT_TOKEN(anon_sym_LF); + ACCEPT_TOKEN(aux_sym_format_directive_type_token2); END_STATE(); case 49: - ACCEPT_TOKEN(anon_sym_CR); + ACCEPT_TOKEN(anon_sym_LF); END_STATE(); case 50: - ACCEPT_TOKEN(aux_sym_format_directive_type_token3); + ACCEPT_TOKEN(anon_sym_CR); END_STATE(); case 51: - ACCEPT_TOKEN(aux_sym_format_directive_type_token4); + ACCEPT_TOKEN(aux_sym_format_directive_type_token3); END_STATE(); case 52: - ACCEPT_TOKEN(aux_sym_format_directive_type_token5); + ACCEPT_TOKEN(aux_sym_format_directive_type_token4); END_STATE(); case 53: - ACCEPT_TOKEN(aux_sym_format_directive_type_token6); + ACCEPT_TOKEN(aux_sym_format_directive_type_token5); END_STATE(); case 54: - ACCEPT_TOKEN(anon_sym__); + ACCEPT_TOKEN(aux_sym_format_directive_type_token6); END_STATE(); case 55: - ACCEPT_TOKEN(aux_sym_format_directive_type_token7); + ACCEPT_TOKEN(anon_sym__); END_STATE(); case 56: - ACCEPT_TOKEN(aux_sym_format_directive_type_token8); + ACCEPT_TOKEN(aux_sym_format_directive_type_token7); END_STATE(); case 57: - ACCEPT_TOKEN(aux_sym_format_directive_type_token9); + ACCEPT_TOKEN(aux_sym_format_directive_type_token8); END_STATE(); case 58: - ACCEPT_TOKEN(aux_sym_format_directive_type_token10); + ACCEPT_TOKEN(aux_sym_format_directive_type_token9); END_STATE(); case 59: - ACCEPT_TOKEN(anon_sym_SEMI); + ACCEPT_TOKEN(aux_sym_format_directive_type_token10); END_STATE(); case 60: - ACCEPT_TOKEN(anon_sym_BQUOTE); + ACCEPT_TOKEN(anon_sym_SEMI); END_STATE(); case 61: - ACCEPT_TOKEN(anon_sym_STAR); + ACCEPT_TOKEN(anon_sym_BQUOTE); END_STATE(); case 62: - ACCEPT_TOKEN(anon_sym_QMARK); + ACCEPT_TOKEN(anon_sym_STAR); END_STATE(); case 63: - ACCEPT_TOKEN(anon_sym_Newline); + ACCEPT_TOKEN(anon_sym_QMARK); END_STATE(); case 64: - ACCEPT_TOKEN(aux_sym_format_directive_type_token11); + ACCEPT_TOKEN(anon_sym_Newline); END_STATE(); case 65: - ACCEPT_TOKEN(anon_sym_DQUOTE); + ACCEPT_TOKEN(aux_sym_format_directive_type_token11); END_STATE(); case 66: + ACCEPT_TOKEN(aux_sym_format_directive_type_token11); + if (lookahead == 'e') ADVANCE(14); + END_STATE(); + case 67: + ACCEPT_TOKEN(anon_sym_DQUOTE); + END_STATE(); + case 68: ACCEPT_TOKEN(aux_sym_str_lit_token1); if (lookahead != 0 && lookahead != '"' && lookahead != '\\' && - lookahead != '~') ADVANCE(66); + lookahead != '~') ADVANCE(68); END_STATE(); - case 67: + case 69: ACCEPT_TOKEN(aux_sym_str_lit_token2); END_STATE(); - case 68: + case 70: ACCEPT_TOKEN(sym_char_lit); END_STATE(); - case 69: + case 71: ACCEPT_TOKEN(sym_char_lit); if (lookahead == 'n' || lookahead == 's' || - lookahead == 't') ADVANCE(68); - END_STATE(); - case 70: - ACCEPT_TOKEN(sym_null_lit); - if (!sym_kwd_lit_character_set_2(lookahead)) ADVANCE(80); - END_STATE(); - case 71: - ACCEPT_TOKEN(sym_bool_lit); + lookahead == 't') ADVANCE(70); END_STATE(); case 72: - ACCEPT_TOKEN(anon_sym_SLASH); + ACCEPT_TOKEN(sym_null_lit); + if (!sym_kwd_lit_character_set_2(lookahead)) ADVANCE(82); END_STATE(); case 73: - ACCEPT_TOKEN(aux_sym__sym_unqualified_token1); - if (lookahead == '#') ADVANCE(74); - if (!aux_sym__sym_unqualified_token1_character_set_1(lookahead)) ADVANCE(80); - if (('0' <= lookahead && lookahead <= '9')) ADVANCE(26); + ACCEPT_TOKEN(sym_bool_lit); END_STATE(); case 74: - ACCEPT_TOKEN(aux_sym__sym_unqualified_token1); - if (lookahead == 'b') ADVANCE(78); - if (lookahead == 'x') ADVANCE(79); - if (!sym_kwd_lit_character_set_2(lookahead)) ADVANCE(80); + ACCEPT_TOKEN(anon_sym_SLASH); END_STATE(); case 75: ACCEPT_TOKEN(aux_sym__sym_unqualified_token1); - if (lookahead == 'e') ADVANCE(70); - if (!sym_kwd_lit_character_set_2(lookahead)) ADVANCE(80); + if (lookahead == '#') ADVANCE(76); + if (!aux_sym__sym_unqualified_token1_character_set_1(lookahead)) ADVANCE(82); + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(27); END_STATE(); case 76: ACCEPT_TOKEN(aux_sym__sym_unqualified_token1); - if (lookahead == 'n') ADVANCE(75); - if (!sym_kwd_lit_character_set_2(lookahead)) ADVANCE(80); + if (lookahead == 'b') ADVANCE(80); + if (lookahead == 'x') ADVANCE(81); + if (!sym_kwd_lit_character_set_2(lookahead)) ADVANCE(82); END_STATE(); case 77: ACCEPT_TOKEN(aux_sym__sym_unqualified_token1); - if (lookahead == 'o') ADVANCE(76); - if (!sym_kwd_lit_character_set_2(lookahead)) ADVANCE(80); + if (lookahead == 'e') ADVANCE(72); + if (!sym_kwd_lit_character_set_2(lookahead)) ADVANCE(82); END_STATE(); case 78: ACCEPT_TOKEN(aux_sym__sym_unqualified_token1); - if (lookahead == '0' || - lookahead == '1') ADVANCE(27); - if (!sym_kwd_lit_character_set_2(lookahead)) ADVANCE(80); + if (lookahead == 'n') ADVANCE(77); + if (!sym_kwd_lit_character_set_2(lookahead)) ADVANCE(82); END_STATE(); case 79: ACCEPT_TOKEN(aux_sym__sym_unqualified_token1); - if (!aux_sym__sym_unqualified_token1_character_set_2(lookahead)) ADVANCE(80); - if (('0' <= lookahead && lookahead <= '9') || - ('A' <= lookahead && lookahead <= 'F') || - ('a' <= lookahead && lookahead <= 'f')) ADVANCE(29); + if (lookahead == 'o') ADVANCE(78); + if (!sym_kwd_lit_character_set_2(lookahead)) ADVANCE(82); END_STATE(); case 80: ACCEPT_TOKEN(aux_sym__sym_unqualified_token1); - if (!sym_kwd_lit_character_set_2(lookahead)) ADVANCE(80); + if (lookahead == '0' || + lookahead == '1') ADVANCE(28); + if (!sym_kwd_lit_character_set_2(lookahead)) ADVANCE(82); END_STATE(); case 81: - ACCEPT_TOKEN(anon_sym_LPAREN); + ACCEPT_TOKEN(aux_sym__sym_unqualified_token1); + if (!aux_sym__sym_unqualified_token1_character_set_2(lookahead)) ADVANCE(82); + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'F') || + ('a' <= lookahead && lookahead <= 'f')) ADVANCE(30); END_STATE(); case 82: - ACCEPT_TOKEN(anon_sym_RPAREN); + ACCEPT_TOKEN(aux_sym__sym_unqualified_token1); + if (!sym_kwd_lit_character_set_2(lookahead)) ADVANCE(82); END_STATE(); case 83: + ACCEPT_TOKEN(anon_sym_LPAREN); + END_STATE(); + case 84: + ACCEPT_TOKEN(anon_sym_RPAREN); + END_STATE(); + case 85: ACCEPT_TOKEN(anon_sym_COMMA_AT); END_STATE(); default: @@ -1235,65 +1326,65 @@ static bool ts_lex(TSLexer *lexer, TSStateId state) { static const TSLexMode ts_lex_modes[STATE_COUNT] = { [0] = {.lex_state = 0}, - [1] = {.lex_state = 21}, + [1] = {.lex_state = 22}, [2] = {.lex_state = 1}, [3] = {.lex_state = 1}, [4] = {.lex_state = 1}, - [5] = {.lex_state = 1}, - [6] = {.lex_state = 21}, - [7] = {.lex_state = 21}, - [8] = {.lex_state = 1}, - [9] = {.lex_state = 21}, - [10] = {.lex_state = 21}, - [11] = {.lex_state = 21}, - [12] = {.lex_state = 21}, - [13] = {.lex_state = 1}, - [14] = {.lex_state = 21}, - [15] = {.lex_state = 21}, - [16] = {.lex_state = 21}, - [17] = {.lex_state = 21}, - [18] = {.lex_state = 1}, - [19] = {.lex_state = 21}, - [20] = {.lex_state = 21}, - [21] = {.lex_state = 21}, - [22] = {.lex_state = 1}, - [23] = {.lex_state = 1}, - [24] = {.lex_state = 21}, - [25] = {.lex_state = 21}, - [26] = {.lex_state = 21}, - [27] = {.lex_state = 21}, - [28] = {.lex_state = 21}, - [29] = {.lex_state = 21}, - [30] = {.lex_state = 21}, - [31] = {.lex_state = 21}, - [32] = {.lex_state = 21}, - [33] = {.lex_state = 21}, - [34] = {.lex_state = 21}, - [35] = {.lex_state = 21}, - [36] = {.lex_state = 21}, - [37] = {.lex_state = 21}, - [38] = {.lex_state = 21}, - [39] = {.lex_state = 21}, - [40] = {.lex_state = 21}, - [41] = {.lex_state = 21}, - [42] = {.lex_state = 1}, - [43] = {.lex_state = 1}, - [44] = {.lex_state = 1}, - [45] = {.lex_state = 1}, - [46] = {.lex_state = 1}, - [47] = {.lex_state = 2}, - [48] = {.lex_state = 2}, - [49] = {.lex_state = 2}, - [50] = {.lex_state = 1}, - [51] = {.lex_state = 2}, - [52] = {.lex_state = 2}, - [53] = {.lex_state = 2}, - [54] = {.lex_state = 2}, - [55] = {.lex_state = 2}, - [56] = {.lex_state = 2}, - [57] = {.lex_state = 2}, + [5] = {.lex_state = 2}, + [6] = {.lex_state = 22}, + [7] = {.lex_state = 22}, + [8] = {.lex_state = 2}, + [9] = {.lex_state = 22}, + [10] = {.lex_state = 22}, + [11] = {.lex_state = 22}, + [12] = {.lex_state = 22}, + [13] = {.lex_state = 2}, + [14] = {.lex_state = 22}, + [15] = {.lex_state = 22}, + [16] = {.lex_state = 22}, + [17] = {.lex_state = 22}, + [18] = {.lex_state = 2}, + [19] = {.lex_state = 22}, + [20] = {.lex_state = 22}, + [21] = {.lex_state = 22}, + [22] = {.lex_state = 2}, + [23] = {.lex_state = 2}, + [24] = {.lex_state = 22}, + [25] = {.lex_state = 22}, + [26] = {.lex_state = 22}, + [27] = {.lex_state = 22}, + [28] = {.lex_state = 22}, + [29] = {.lex_state = 22}, + [30] = {.lex_state = 22}, + [31] = {.lex_state = 22}, + [32] = {.lex_state = 22}, + [33] = {.lex_state = 22}, + [34] = {.lex_state = 22}, + [35] = {.lex_state = 22}, + [36] = {.lex_state = 22}, + [37] = {.lex_state = 22}, + [38] = {.lex_state = 22}, + [39] = {.lex_state = 22}, + [40] = {.lex_state = 22}, + [41] = {.lex_state = 22}, + [42] = {.lex_state = 6}, + [43] = {.lex_state = 6}, + [44] = {.lex_state = 6}, + [45] = {.lex_state = 6}, + [46] = {.lex_state = 6}, + [47] = {.lex_state = 3}, + [48] = {.lex_state = 3}, + [49] = {.lex_state = 3}, + [50] = {.lex_state = 6}, + [51] = {.lex_state = 3}, + [52] = {.lex_state = 3}, + [53] = {.lex_state = 3}, + [54] = {.lex_state = 3}, + [55] = {.lex_state = 3}, + [56] = {.lex_state = 3}, + [57] = {.lex_state = 3}, [58] = {.lex_state = 0}, - [59] = {.lex_state = 17}, + [59] = {.lex_state = 18}, }; static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = { @@ -1405,8 +1496,8 @@ static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = { [anon_sym_BQUOTE] = ACTIONS(41), [anon_sym_QMARK] = ACTIONS(41), [anon_sym_Newline] = ACTIONS(41), - [aux_sym_format_directive_type_token11] = ACTIONS(41), - [anon_sym_DQUOTE] = ACTIONS(43), + [aux_sym_format_directive_type_token11] = ACTIONS(43), + [anon_sym_DQUOTE] = ACTIONS(45), }, [3] = { [sym__format_token] = STATE(42), @@ -1445,8 +1536,8 @@ static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = { [anon_sym_BQUOTE] = ACTIONS(41), [anon_sym_QMARK] = ACTIONS(41), [anon_sym_Newline] = ACTIONS(41), - [aux_sym_format_directive_type_token11] = ACTIONS(41), - [anon_sym_DQUOTE] = ACTIONS(45), + [aux_sym_format_directive_type_token11] = ACTIONS(43), + [anon_sym_DQUOTE] = ACTIONS(47), }, [4] = { [sym__format_token] = STATE(42), @@ -1485,18 +1576,20 @@ static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = { [anon_sym_BQUOTE] = ACTIONS(41), [anon_sym_QMARK] = ACTIONS(41), [anon_sym_Newline] = ACTIONS(41), - [aux_sym_format_directive_type_token11] = ACTIONS(41), + [aux_sym_format_directive_type_token11] = ACTIONS(43), }, }; static const uint16_t ts_small_parse_table[] = { - [0] = 10, + [0] = 11, ACTIONS(27), 1, aux_sym_num_lit_token1, ACTIONS(29), 1, anon_sym_SQUOTE, ACTIONS(35), 1, anon_sym_COMMA, + ACTIONS(43), 1, + aux_sym_format_directive_type_token11, STATE(18), 1, sym_format_modifiers, STATE(42), 1, @@ -1511,7 +1604,7 @@ static const uint16_t ts_small_parse_table[] = { ACTIONS(39), 2, anon_sym_AT_COLON, anon_sym_COLON_AT, - ACTIONS(41), 22, + ACTIONS(41), 21, anon_sym_TILDE, anon_sym_PERCENT, anon_sym_AMP, @@ -1533,8 +1626,7 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_BQUOTE, anon_sym_QMARK, anon_sym_Newline, - aux_sym_format_directive_type_token11, - [54] = 14, + [56] = 14, ACTIONS(7), 1, aux_sym_num_lit_token1, ACTIONS(9), 1, @@ -1553,13 +1645,13 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_LPAREN, ACTIONS(25), 1, anon_sym_COMMA_AT, - ACTIONS(47), 1, + ACTIONS(49), 1, ts_builtin_sym_end, - ACTIONS(51), 1, + ACTIONS(53), 1, sym_null_lit, STATE(33), 1, sym__bare_list_lit, - ACTIONS(49), 6, + ACTIONS(51), 6, sym__ws, sym_comment, sym_block_comment, @@ -1578,32 +1670,32 @@ static const uint16_t ts_small_parse_table[] = { sym_unquote_splicing_lit, sym_unquoting_lit, aux_sym_source_repeat1, - [112] = 14, - ACTIONS(53), 1, + [114] = 14, + ACTIONS(55), 1, ts_builtin_sym_end, - ACTIONS(58), 1, + ACTIONS(60), 1, aux_sym_num_lit_token1, - ACTIONS(61), 1, + ACTIONS(63), 1, anon_sym_SQUOTE, - ACTIONS(64), 1, + ACTIONS(66), 1, anon_sym_COMMA, - ACTIONS(67), 1, + ACTIONS(69), 1, anon_sym_BQUOTE, - ACTIONS(70), 1, + ACTIONS(72), 1, anon_sym_DQUOTE, - ACTIONS(73), 1, + ACTIONS(75), 1, sym_null_lit, - ACTIONS(76), 1, + ACTIONS(78), 1, anon_sym_SLASH, - ACTIONS(79), 1, + ACTIONS(81), 1, aux_sym__sym_unqualified_token1, - ACTIONS(82), 1, + ACTIONS(84), 1, anon_sym_LPAREN, - ACTIONS(85), 1, + ACTIONS(87), 1, anon_sym_COMMA_AT, STATE(33), 1, sym__bare_list_lit, - ACTIONS(55), 6, + ACTIONS(57), 6, sym__ws, sym_comment, sym_block_comment, @@ -1622,11 +1714,12 @@ static const uint16_t ts_small_parse_table[] = { sym_unquote_splicing_lit, sym_unquoting_lit, aux_sym_source_repeat1, - [170] = 2, - ACTIONS(90), 2, + [172] = 2, + ACTIONS(92), 3, anon_sym_AT, anon_sym_COLON, - ACTIONS(88), 27, + aux_sym_format_directive_type_token11, + ACTIONS(90), 26, aux_sym_num_lit_token1, anon_sym_SQUOTE, anon_sym_COMMA, @@ -1653,8 +1746,7 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_BQUOTE, anon_sym_QMARK, anon_sym_Newline, - aux_sym_format_directive_type_token11, - [204] = 16, + [206] = 16, ACTIONS(7), 1, aux_sym_num_lit_token1, ACTIONS(9), 1, @@ -1673,20 +1765,20 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_LPAREN, ACTIONS(25), 1, anon_sym_COMMA_AT, - ACTIONS(96), 1, - sym_null_lit, ACTIONS(98), 1, + sym_null_lit, + ACTIONS(100), 1, anon_sym_RPAREN, STATE(33), 1, sym__bare_list_lit, STATE(11), 2, sym__gap, aux_sym__bare_list_lit_repeat1, - ACTIONS(92), 3, + ACTIONS(94), 3, sym__ws, sym_comment, sym_block_comment, - ACTIONS(94), 3, + ACTIONS(96), 3, sym_kwd_lit, sym_char_lit, sym_bool_lit, @@ -1700,7 +1792,7 @@ static const uint16_t ts_small_parse_table[] = { sym_quasi_quoting_lit, sym_unquote_splicing_lit, sym_unquoting_lit, - [266] = 16, + [268] = 16, ACTIONS(7), 1, aux_sym_num_lit_token1, ACTIONS(9), 1, @@ -1719,20 +1811,20 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_LPAREN, ACTIONS(25), 1, anon_sym_COMMA_AT, - ACTIONS(96), 1, + ACTIONS(98), 1, sym_null_lit, - ACTIONS(102), 1, + ACTIONS(104), 1, anon_sym_RPAREN, STATE(33), 1, sym__bare_list_lit, STATE(9), 2, sym__gap, aux_sym__bare_list_lit_repeat1, - ACTIONS(94), 3, + ACTIONS(96), 3, sym_kwd_lit, sym_char_lit, sym_bool_lit, - ACTIONS(100), 3, + ACTIONS(102), 3, sym__ws, sym_comment, sym_block_comment, @@ -1746,39 +1838,39 @@ static const uint16_t ts_small_parse_table[] = { sym_quasi_quoting_lit, sym_unquote_splicing_lit, sym_unquoting_lit, - [328] = 16, - ACTIONS(107), 1, + [330] = 16, + ACTIONS(109), 1, aux_sym_num_lit_token1, - ACTIONS(113), 1, + ACTIONS(115), 1, anon_sym_SQUOTE, - ACTIONS(116), 1, + ACTIONS(118), 1, anon_sym_COMMA, - ACTIONS(119), 1, + ACTIONS(121), 1, anon_sym_BQUOTE, - ACTIONS(122), 1, + ACTIONS(124), 1, anon_sym_DQUOTE, - ACTIONS(125), 1, + ACTIONS(127), 1, sym_null_lit, - ACTIONS(128), 1, + ACTIONS(130), 1, anon_sym_SLASH, - ACTIONS(131), 1, + ACTIONS(133), 1, aux_sym__sym_unqualified_token1, - ACTIONS(134), 1, + ACTIONS(136), 1, anon_sym_LPAREN, - ACTIONS(137), 1, - anon_sym_RPAREN, ACTIONS(139), 1, + anon_sym_RPAREN, + ACTIONS(141), 1, anon_sym_COMMA_AT, STATE(33), 1, sym__bare_list_lit, STATE(11), 2, sym__gap, aux_sym__bare_list_lit_repeat1, - ACTIONS(104), 3, + ACTIONS(106), 3, sym__ws, sym_comment, sym_block_comment, - ACTIONS(110), 3, + ACTIONS(112), 3, sym_kwd_lit, sym_char_lit, sym_bool_lit, @@ -1792,7 +1884,7 @@ static const uint16_t ts_small_parse_table[] = { sym_quasi_quoting_lit, sym_unquote_splicing_lit, sym_unquoting_lit, - [390] = 15, + [392] = 15, ACTIONS(7), 1, aux_sym_num_lit_token1, ACTIONS(9), 1, @@ -1811,18 +1903,18 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_LPAREN, ACTIONS(25), 1, anon_sym_COMMA_AT, - ACTIONS(146), 1, + ACTIONS(148), 1, sym_null_lit, STATE(33), 1, sym__bare_list_lit, STATE(29), 2, sym__gap, aux_sym_quoting_lit_repeat1, - ACTIONS(142), 3, + ACTIONS(144), 3, sym__ws, sym_comment, sym_block_comment, - ACTIONS(144), 3, + ACTIONS(146), 3, sym_kwd_lit, sym_char_lit, sym_bool_lit, @@ -1836,12 +1928,14 @@ static const uint16_t ts_small_parse_table[] = { sym_quasi_quoting_lit, sym_unquote_splicing_lit, sym_unquoting_lit, - [449] = 7, + [451] = 8, ACTIONS(27), 1, aux_sym_num_lit_token1, ACTIONS(29), 1, anon_sym_SQUOTE, - ACTIONS(148), 1, + ACTIONS(43), 1, + aux_sym_format_directive_type_token11, + ACTIONS(150), 1, anon_sym_COMMA, STATE(42), 1, sym__format_token, @@ -1849,7 +1943,7 @@ static const uint16_t ts_small_parse_table[] = { aux_sym_format_modifiers_repeat1, STATE(52), 1, sym_format_directive_type, - ACTIONS(41), 22, + ACTIONS(41), 21, anon_sym_TILDE, anon_sym_PERCENT, anon_sym_AMP, @@ -1871,8 +1965,7 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_BQUOTE, anon_sym_QMARK, anon_sym_Newline, - aux_sym_format_directive_type_token11, - [492] = 15, + [496] = 15, ACTIONS(7), 1, aux_sym_num_lit_token1, ACTIONS(9), 1, @@ -1891,18 +1984,18 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_LPAREN, ACTIONS(25), 1, anon_sym_COMMA_AT, - ACTIONS(152), 1, + ACTIONS(154), 1, sym_null_lit, STATE(33), 1, sym__bare_list_lit, STATE(29), 2, sym__gap, aux_sym_quoting_lit_repeat1, - ACTIONS(142), 3, + ACTIONS(144), 3, sym__ws, sym_comment, sym_block_comment, - ACTIONS(150), 3, + ACTIONS(152), 3, sym_kwd_lit, sym_char_lit, sym_bool_lit, @@ -1916,7 +2009,7 @@ static const uint16_t ts_small_parse_table[] = { sym_quasi_quoting_lit, sym_unquote_splicing_lit, sym_unquoting_lit, - [551] = 15, + [555] = 15, ACTIONS(7), 1, aux_sym_num_lit_token1, ACTIONS(9), 1, @@ -1935,18 +2028,18 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_LPAREN, ACTIONS(25), 1, anon_sym_COMMA_AT, - ACTIONS(158), 1, + ACTIONS(160), 1, sym_null_lit, STATE(33), 1, sym__bare_list_lit, STATE(14), 2, sym__gap, aux_sym_quoting_lit_repeat1, - ACTIONS(154), 3, + ACTIONS(156), 3, sym__ws, sym_comment, sym_block_comment, - ACTIONS(156), 3, + ACTIONS(158), 3, sym_kwd_lit, sym_char_lit, sym_bool_lit, @@ -1960,7 +2053,7 @@ static const uint16_t ts_small_parse_table[] = { sym_quasi_quoting_lit, sym_unquote_splicing_lit, sym_unquoting_lit, - [610] = 15, + [614] = 15, ACTIONS(7), 1, aux_sym_num_lit_token1, ACTIONS(9), 1, @@ -1979,18 +2072,18 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_LPAREN, ACTIONS(25), 1, anon_sym_COMMA_AT, - ACTIONS(162), 1, + ACTIONS(164), 1, sym_null_lit, STATE(33), 1, sym__bare_list_lit, STATE(29), 2, sym__gap, aux_sym_quoting_lit_repeat1, - ACTIONS(142), 3, + ACTIONS(144), 3, sym__ws, sym_comment, sym_block_comment, - ACTIONS(160), 3, + ACTIONS(162), 3, sym_kwd_lit, sym_char_lit, sym_bool_lit, @@ -2004,7 +2097,7 @@ static const uint16_t ts_small_parse_table[] = { sym_quasi_quoting_lit, sym_unquote_splicing_lit, sym_unquoting_lit, - [669] = 15, + [673] = 15, ACTIONS(7), 1, aux_sym_num_lit_token1, ACTIONS(9), 1, @@ -2023,18 +2116,18 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_LPAREN, ACTIONS(25), 1, anon_sym_COMMA_AT, - ACTIONS(168), 1, + ACTIONS(170), 1, sym_null_lit, STATE(33), 1, sym__bare_list_lit, STATE(21), 2, sym__gap, aux_sym_quoting_lit_repeat1, - ACTIONS(164), 3, + ACTIONS(166), 3, sym__ws, sym_comment, sym_block_comment, - ACTIONS(166), 3, + ACTIONS(168), 3, sym_kwd_lit, sym_char_lit, sym_bool_lit, @@ -2048,12 +2141,14 @@ static const uint16_t ts_small_parse_table[] = { sym_quasi_quoting_lit, sym_unquote_splicing_lit, sym_unquoting_lit, - [728] = 7, + [732] = 8, ACTIONS(27), 1, aux_sym_num_lit_token1, ACTIONS(29), 1, anon_sym_SQUOTE, - ACTIONS(148), 1, + ACTIONS(43), 1, + aux_sym_format_directive_type_token11, + ACTIONS(150), 1, anon_sym_COMMA, STATE(42), 1, sym__format_token, @@ -2061,7 +2156,7 @@ static const uint16_t ts_small_parse_table[] = { aux_sym_format_modifiers_repeat1, STATE(57), 1, sym_format_directive_type, - ACTIONS(41), 22, + ACTIONS(41), 21, anon_sym_TILDE, anon_sym_PERCENT, anon_sym_AMP, @@ -2083,8 +2178,7 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_BQUOTE, anon_sym_QMARK, anon_sym_Newline, - aux_sym_format_directive_type_token11, - [771] = 15, + [777] = 15, ACTIONS(7), 1, aux_sym_num_lit_token1, ACTIONS(9), 1, @@ -2103,18 +2197,18 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_LPAREN, ACTIONS(25), 1, anon_sym_COMMA_AT, - ACTIONS(174), 1, + ACTIONS(176), 1, sym_null_lit, STATE(33), 1, sym__bare_list_lit, STATE(12), 2, sym__gap, aux_sym_quoting_lit_repeat1, - ACTIONS(170), 3, + ACTIONS(172), 3, sym__ws, sym_comment, sym_block_comment, - ACTIONS(172), 3, + ACTIONS(174), 3, sym_kwd_lit, sym_char_lit, sym_bool_lit, @@ -2128,7 +2222,7 @@ static const uint16_t ts_small_parse_table[] = { sym_quasi_quoting_lit, sym_unquote_splicing_lit, sym_unquoting_lit, - [830] = 15, + [836] = 15, ACTIONS(7), 1, aux_sym_num_lit_token1, ACTIONS(9), 1, @@ -2147,18 +2241,18 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_LPAREN, ACTIONS(25), 1, anon_sym_COMMA_AT, - ACTIONS(180), 1, + ACTIONS(182), 1, sym_null_lit, STATE(33), 1, sym__bare_list_lit, STATE(16), 2, sym__gap, aux_sym_quoting_lit_repeat1, - ACTIONS(176), 3, + ACTIONS(178), 3, sym__ws, sym_comment, sym_block_comment, - ACTIONS(178), 3, + ACTIONS(180), 3, sym_kwd_lit, sym_char_lit, sym_bool_lit, @@ -2172,7 +2266,7 @@ static const uint16_t ts_small_parse_table[] = { sym_quasi_quoting_lit, sym_unquote_splicing_lit, sym_unquoting_lit, - [889] = 15, + [895] = 15, ACTIONS(7), 1, aux_sym_num_lit_token1, ACTIONS(9), 1, @@ -2191,18 +2285,18 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_LPAREN, ACTIONS(25), 1, anon_sym_COMMA_AT, - ACTIONS(184), 1, + ACTIONS(186), 1, sym_null_lit, STATE(33), 1, sym__bare_list_lit, STATE(29), 2, sym__gap, aux_sym_quoting_lit_repeat1, - ACTIONS(142), 3, + ACTIONS(144), 3, sym__ws, sym_comment, sym_block_comment, - ACTIONS(182), 3, + ACTIONS(184), 3, sym_kwd_lit, sym_char_lit, sym_bool_lit, @@ -2216,8 +2310,10 @@ static const uint16_t ts_small_parse_table[] = { sym_quasi_quoting_lit, sym_unquote_splicing_lit, sym_unquoting_lit, - [948] = 1, - ACTIONS(186), 25, + [954] = 2, + ACTIONS(190), 1, + aux_sym_format_directive_type_token11, + ACTIONS(188), 24, aux_sym_num_lit_token1, anon_sym_SQUOTE, anon_sym_COMMA, @@ -2242,9 +2338,10 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_BQUOTE, anon_sym_QMARK, anon_sym_Newline, + [984] = 2, + ACTIONS(194), 1, aux_sym_format_directive_type_token11, - [976] = 1, - ACTIONS(188), 25, + ACTIONS(192), 24, aux_sym_num_lit_token1, anon_sym_SQUOTE, anon_sym_COMMA, @@ -2269,13 +2366,12 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_BQUOTE, anon_sym_QMARK, anon_sym_Newline, - aux_sym_format_directive_type_token11, - [1004] = 2, - ACTIONS(192), 3, + [1014] = 2, + ACTIONS(198), 3, anon_sym_COMMA, sym_null_lit, aux_sym__sym_unqualified_token1, - ACTIONS(190), 15, + ACTIONS(196), 15, ts_builtin_sym_end, sym__ws, sym_comment, @@ -2291,12 +2387,12 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_LPAREN, anon_sym_RPAREN, anon_sym_COMMA_AT, - [1027] = 2, - ACTIONS(196), 3, + [1037] = 2, + ACTIONS(202), 3, anon_sym_COMMA, sym_null_lit, aux_sym__sym_unqualified_token1, - ACTIONS(194), 15, + ACTIONS(200), 15, ts_builtin_sym_end, sym__ws, sym_comment, @@ -2312,12 +2408,12 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_LPAREN, anon_sym_RPAREN, anon_sym_COMMA_AT, - [1050] = 2, - ACTIONS(200), 3, + [1060] = 2, + ACTIONS(206), 3, anon_sym_COMMA, sym_null_lit, aux_sym__sym_unqualified_token1, - ACTIONS(198), 15, + ACTIONS(204), 15, ts_builtin_sym_end, sym__ws, sym_comment, @@ -2333,12 +2429,12 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_LPAREN, anon_sym_RPAREN, anon_sym_COMMA_AT, - [1073] = 2, - ACTIONS(204), 3, + [1083] = 2, + ACTIONS(210), 3, anon_sym_COMMA, sym_null_lit, aux_sym__sym_unqualified_token1, - ACTIONS(202), 15, + ACTIONS(208), 15, ts_builtin_sym_end, sym__ws, sym_comment, @@ -2354,12 +2450,12 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_LPAREN, anon_sym_RPAREN, anon_sym_COMMA_AT, - [1096] = 2, - ACTIONS(208), 3, + [1106] = 2, + ACTIONS(214), 3, anon_sym_COMMA, sym_null_lit, aux_sym__sym_unqualified_token1, - ACTIONS(206), 15, + ACTIONS(212), 15, ts_builtin_sym_end, sym__ws, sym_comment, @@ -2375,19 +2471,19 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_LPAREN, anon_sym_RPAREN, anon_sym_COMMA_AT, - [1119] = 4, + [1129] = 4, STATE(29), 2, sym__gap, aux_sym_quoting_lit_repeat1, - ACTIONS(210), 3, + ACTIONS(216), 3, sym__ws, sym_comment, sym_block_comment, - ACTIONS(215), 3, + ACTIONS(221), 3, anon_sym_COMMA, sym_null_lit, aux_sym__sym_unqualified_token1, - ACTIONS(213), 10, + ACTIONS(219), 10, aux_sym_num_lit_token1, sym_kwd_lit, anon_sym_SQUOTE, @@ -2398,12 +2494,12 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_SLASH, anon_sym_LPAREN, anon_sym_COMMA_AT, - [1146] = 2, - ACTIONS(219), 3, + [1156] = 2, + ACTIONS(225), 3, anon_sym_COMMA, sym_null_lit, aux_sym__sym_unqualified_token1, - ACTIONS(217), 15, + ACTIONS(223), 15, ts_builtin_sym_end, sym__ws, sym_comment, @@ -2419,12 +2515,12 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_LPAREN, anon_sym_RPAREN, anon_sym_COMMA_AT, - [1169] = 2, - ACTIONS(223), 3, + [1179] = 2, + ACTIONS(229), 3, anon_sym_COMMA, sym_null_lit, aux_sym__sym_unqualified_token1, - ACTIONS(221), 15, + ACTIONS(227), 15, ts_builtin_sym_end, sym__ws, sym_comment, @@ -2440,12 +2536,12 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_LPAREN, anon_sym_RPAREN, anon_sym_COMMA_AT, - [1192] = 2, - ACTIONS(227), 3, + [1202] = 2, + ACTIONS(233), 3, anon_sym_COMMA, sym_null_lit, aux_sym__sym_unqualified_token1, - ACTIONS(225), 15, + ACTIONS(231), 15, ts_builtin_sym_end, sym__ws, sym_comment, @@ -2461,12 +2557,12 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_LPAREN, anon_sym_RPAREN, anon_sym_COMMA_AT, - [1215] = 2, - ACTIONS(231), 3, + [1225] = 2, + ACTIONS(237), 3, anon_sym_COMMA, sym_null_lit, aux_sym__sym_unqualified_token1, - ACTIONS(229), 15, + ACTIONS(235), 15, ts_builtin_sym_end, sym__ws, sym_comment, @@ -2482,12 +2578,12 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_LPAREN, anon_sym_RPAREN, anon_sym_COMMA_AT, - [1238] = 2, - ACTIONS(235), 3, + [1248] = 2, + ACTIONS(241), 3, anon_sym_COMMA, sym_null_lit, aux_sym__sym_unqualified_token1, - ACTIONS(233), 15, + ACTIONS(239), 15, ts_builtin_sym_end, sym__ws, sym_comment, @@ -2503,12 +2599,12 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_LPAREN, anon_sym_RPAREN, anon_sym_COMMA_AT, - [1261] = 2, - ACTIONS(239), 3, + [1271] = 2, + ACTIONS(245), 3, anon_sym_COMMA, sym_null_lit, aux_sym__sym_unqualified_token1, - ACTIONS(237), 15, + ACTIONS(243), 15, ts_builtin_sym_end, sym__ws, sym_comment, @@ -2524,12 +2620,12 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_LPAREN, anon_sym_RPAREN, anon_sym_COMMA_AT, - [1284] = 2, - ACTIONS(243), 3, + [1294] = 2, + ACTIONS(249), 3, anon_sym_COMMA, sym_null_lit, aux_sym__sym_unqualified_token1, - ACTIONS(241), 15, + ACTIONS(247), 15, ts_builtin_sym_end, sym__ws, sym_comment, @@ -2545,12 +2641,12 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_LPAREN, anon_sym_RPAREN, anon_sym_COMMA_AT, - [1307] = 2, - ACTIONS(247), 3, + [1317] = 2, + ACTIONS(253), 3, anon_sym_COMMA, sym_null_lit, aux_sym__sym_unqualified_token1, - ACTIONS(245), 15, + ACTIONS(251), 15, ts_builtin_sym_end, sym__ws, sym_comment, @@ -2566,12 +2662,12 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_LPAREN, anon_sym_RPAREN, anon_sym_COMMA_AT, - [1330] = 2, - ACTIONS(251), 3, + [1340] = 2, + ACTIONS(257), 3, anon_sym_COMMA, sym_null_lit, aux_sym__sym_unqualified_token1, - ACTIONS(249), 15, + ACTIONS(255), 15, ts_builtin_sym_end, sym__ws, sym_comment, @@ -2587,12 +2683,12 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_LPAREN, anon_sym_RPAREN, anon_sym_COMMA_AT, - [1353] = 2, - ACTIONS(255), 3, + [1363] = 2, + ACTIONS(261), 3, anon_sym_COMMA, sym_null_lit, aux_sym__sym_unqualified_token1, - ACTIONS(253), 15, + ACTIONS(259), 15, ts_builtin_sym_end, sym__ws, sym_comment, @@ -2608,12 +2704,12 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_LPAREN, anon_sym_RPAREN, anon_sym_COMMA_AT, - [1376] = 2, - ACTIONS(259), 3, + [1386] = 2, + ACTIONS(265), 3, anon_sym_COMMA, sym_null_lit, aux_sym__sym_unqualified_token1, - ACTIONS(257), 15, + ACTIONS(263), 15, ts_builtin_sym_end, sym__ws, sym_comment, @@ -2629,12 +2725,12 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_LPAREN, anon_sym_RPAREN, anon_sym_COMMA_AT, - [1399] = 2, - ACTIONS(263), 3, + [1409] = 2, + ACTIONS(269), 3, anon_sym_COMMA, sym_null_lit, aux_sym__sym_unqualified_token1, - ACTIONS(261), 14, + ACTIONS(267), 14, sym__ws, sym_comment, sym_block_comment, @@ -2649,29 +2745,29 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_LPAREN, anon_sym_RPAREN, anon_sym_COMMA_AT, - [1421] = 4, - ACTIONS(271), 1, + [1431] = 4, + ACTIONS(277), 1, anon_sym_STAR, - ACTIONS(267), 2, + ACTIONS(273), 2, anon_sym_AT, anon_sym_COLON, - ACTIONS(269), 4, + ACTIONS(275), 4, anon_sym_TILDE, anon_sym_PERCENT, anon_sym_AMP, anon_sym_PIPE, - ACTIONS(265), 6, + ACTIONS(271), 6, aux_sym_num_lit_token1, anon_sym_SQUOTE, anon_sym_COMMA, anon_sym_AT_COLON, anon_sym_COLON_AT, aux_sym_format_directive_type_token11, - [1443] = 2, - ACTIONS(275), 2, + [1453] = 2, + ACTIONS(281), 2, anon_sym_AT, anon_sym_COLON, - ACTIONS(273), 11, + ACTIONS(279), 11, aux_sym_num_lit_token1, anon_sym_SQUOTE, anon_sym_COMMA, @@ -2683,11 +2779,11 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_PIPE, anon_sym_STAR, aux_sym_format_directive_type_token11, - [1461] = 2, - ACTIONS(279), 2, + [1471] = 2, + ACTIONS(285), 2, anon_sym_AT, anon_sym_COLON, - ACTIONS(277), 11, + ACTIONS(283), 11, aux_sym_num_lit_token1, anon_sym_SQUOTE, anon_sym_COMMA, @@ -2699,192 +2795,192 @@ static const uint16_t ts_small_parse_table[] = { anon_sym_PIPE, anon_sym_STAR, aux_sym_format_directive_type_token11, - [1479] = 6, - ACTIONS(281), 1, + [1489] = 6, + ACTIONS(287), 1, aux_sym_num_lit_token1, - ACTIONS(284), 1, + ACTIONS(290), 1, anon_sym_SQUOTE, - ACTIONS(287), 1, + ACTIONS(293), 1, anon_sym_COMMA, - ACTIONS(290), 2, + ACTIONS(296), 2, anon_sym_AT, anon_sym_COLON, STATE(45), 2, sym__format_token, aux_sym_format_modifiers_repeat1, - ACTIONS(292), 3, + ACTIONS(298), 3, anon_sym_AT_COLON, anon_sym_COLON_AT, aux_sym_format_directive_type_token11, - [1502] = 7, + [1512] = 7, ACTIONS(27), 1, aux_sym_num_lit_token1, ACTIONS(29), 1, anon_sym_SQUOTE, - ACTIONS(294), 1, - anon_sym_COMMA, ACTIONS(300), 1, + anon_sym_COMMA, + ACTIONS(306), 1, aux_sym_format_directive_type_token11, - ACTIONS(296), 2, + ACTIONS(302), 2, anon_sym_AT, anon_sym_COLON, - ACTIONS(298), 2, + ACTIONS(304), 2, anon_sym_AT_COLON, anon_sym_COLON_AT, STATE(45), 2, sym__format_token, aux_sym_format_modifiers_repeat1, - [1527] = 4, - ACTIONS(302), 1, + [1537] = 4, + ACTIONS(308), 1, anon_sym_TILDE, - ACTIONS(305), 1, + ACTIONS(311), 1, anon_sym_DQUOTE, - ACTIONS(307), 2, + ACTIONS(313), 2, aux_sym_str_lit_token1, aux_sym_str_lit_token2, STATE(47), 2, sym_format_specifier, aux_sym_str_lit_repeat1, - [1542] = 4, - ACTIONS(43), 1, + [1552] = 4, + ACTIONS(45), 1, anon_sym_DQUOTE, - ACTIONS(310), 1, + ACTIONS(316), 1, anon_sym_TILDE, - ACTIONS(312), 2, + ACTIONS(318), 2, aux_sym_str_lit_token1, aux_sym_str_lit_token2, STATE(47), 2, sym_format_specifier, aux_sym_str_lit_repeat1, - [1557] = 4, - ACTIONS(314), 1, + [1567] = 4, + ACTIONS(320), 1, anon_sym_TILDE, - ACTIONS(316), 1, + ACTIONS(322), 1, anon_sym_DQUOTE, - ACTIONS(318), 2, + ACTIONS(324), 2, aux_sym_str_lit_token1, aux_sym_str_lit_token2, STATE(48), 2, sym_format_specifier, aux_sym_str_lit_repeat1, - [1572] = 5, + [1582] = 5, ACTIONS(27), 1, aux_sym_num_lit_token1, ACTIONS(29), 1, anon_sym_SQUOTE, - ACTIONS(294), 1, - anon_sym_COMMA, ACTIONS(300), 1, + anon_sym_COMMA, + ACTIONS(306), 1, aux_sym_format_directive_type_token11, STATE(45), 2, sym__format_token, aux_sym_format_modifiers_repeat1, - [1589] = 1, - ACTIONS(320), 4, + [1599] = 1, + ACTIONS(326), 4, anon_sym_TILDE, anon_sym_DQUOTE, aux_sym_str_lit_token1, aux_sym_str_lit_token2, - [1596] = 1, - ACTIONS(322), 4, + [1606] = 1, + ACTIONS(328), 4, anon_sym_TILDE, anon_sym_DQUOTE, aux_sym_str_lit_token1, aux_sym_str_lit_token2, - [1603] = 1, - ACTIONS(324), 4, + [1613] = 1, + ACTIONS(330), 4, anon_sym_TILDE, anon_sym_DQUOTE, aux_sym_str_lit_token1, aux_sym_str_lit_token2, - [1610] = 1, - ACTIONS(326), 4, + [1620] = 1, + ACTIONS(332), 4, anon_sym_TILDE, anon_sym_DQUOTE, aux_sym_str_lit_token1, aux_sym_str_lit_token2, - [1617] = 1, - ACTIONS(328), 4, + [1627] = 1, + ACTIONS(334), 4, anon_sym_TILDE, anon_sym_DQUOTE, aux_sym_str_lit_token1, aux_sym_str_lit_token2, - [1624] = 1, - ACTIONS(330), 4, + [1634] = 1, + ACTIONS(336), 4, anon_sym_TILDE, anon_sym_DQUOTE, aux_sym_str_lit_token1, aux_sym_str_lit_token2, - [1631] = 1, - ACTIONS(332), 4, + [1641] = 1, + ACTIONS(338), 4, anon_sym_TILDE, anon_sym_DQUOTE, aux_sym_str_lit_token1, aux_sym_str_lit_token2, - [1638] = 1, - ACTIONS(334), 1, + [1648] = 1, + ACTIONS(340), 1, ts_builtin_sym_end, - [1642] = 1, - ACTIONS(336), 1, + [1652] = 1, + ACTIONS(342), 1, aux_sym__format_token_token1, }; static const uint32_t ts_small_parse_table_map[] = { [SMALL_STATE(5)] = 0, - [SMALL_STATE(6)] = 54, - [SMALL_STATE(7)] = 112, - [SMALL_STATE(8)] = 170, - [SMALL_STATE(9)] = 204, - [SMALL_STATE(10)] = 266, - [SMALL_STATE(11)] = 328, - [SMALL_STATE(12)] = 390, - [SMALL_STATE(13)] = 449, - [SMALL_STATE(14)] = 492, - [SMALL_STATE(15)] = 551, - [SMALL_STATE(16)] = 610, - [SMALL_STATE(17)] = 669, - [SMALL_STATE(18)] = 728, - [SMALL_STATE(19)] = 771, - [SMALL_STATE(20)] = 830, - [SMALL_STATE(21)] = 889, - [SMALL_STATE(22)] = 948, - [SMALL_STATE(23)] = 976, - [SMALL_STATE(24)] = 1004, - [SMALL_STATE(25)] = 1027, - [SMALL_STATE(26)] = 1050, - [SMALL_STATE(27)] = 1073, - [SMALL_STATE(28)] = 1096, - [SMALL_STATE(29)] = 1119, - [SMALL_STATE(30)] = 1146, - [SMALL_STATE(31)] = 1169, - [SMALL_STATE(32)] = 1192, - [SMALL_STATE(33)] = 1215, - [SMALL_STATE(34)] = 1238, - [SMALL_STATE(35)] = 1261, - [SMALL_STATE(36)] = 1284, - [SMALL_STATE(37)] = 1307, - [SMALL_STATE(38)] = 1330, - [SMALL_STATE(39)] = 1353, - [SMALL_STATE(40)] = 1376, - [SMALL_STATE(41)] = 1399, - [SMALL_STATE(42)] = 1421, - [SMALL_STATE(43)] = 1443, - [SMALL_STATE(44)] = 1461, - [SMALL_STATE(45)] = 1479, - [SMALL_STATE(46)] = 1502, - [SMALL_STATE(47)] = 1527, - [SMALL_STATE(48)] = 1542, - [SMALL_STATE(49)] = 1557, - [SMALL_STATE(50)] = 1572, - [SMALL_STATE(51)] = 1589, - [SMALL_STATE(52)] = 1596, - [SMALL_STATE(53)] = 1603, - [SMALL_STATE(54)] = 1610, - [SMALL_STATE(55)] = 1617, - [SMALL_STATE(56)] = 1624, - [SMALL_STATE(57)] = 1631, - [SMALL_STATE(58)] = 1638, - [SMALL_STATE(59)] = 1642, + [SMALL_STATE(6)] = 56, + [SMALL_STATE(7)] = 114, + [SMALL_STATE(8)] = 172, + [SMALL_STATE(9)] = 206, + [SMALL_STATE(10)] = 268, + [SMALL_STATE(11)] = 330, + [SMALL_STATE(12)] = 392, + [SMALL_STATE(13)] = 451, + [SMALL_STATE(14)] = 496, + [SMALL_STATE(15)] = 555, + [SMALL_STATE(16)] = 614, + [SMALL_STATE(17)] = 673, + [SMALL_STATE(18)] = 732, + [SMALL_STATE(19)] = 777, + [SMALL_STATE(20)] = 836, + [SMALL_STATE(21)] = 895, + [SMALL_STATE(22)] = 954, + [SMALL_STATE(23)] = 984, + [SMALL_STATE(24)] = 1014, + [SMALL_STATE(25)] = 1037, + [SMALL_STATE(26)] = 1060, + [SMALL_STATE(27)] = 1083, + [SMALL_STATE(28)] = 1106, + [SMALL_STATE(29)] = 1129, + [SMALL_STATE(30)] = 1156, + [SMALL_STATE(31)] = 1179, + [SMALL_STATE(32)] = 1202, + [SMALL_STATE(33)] = 1225, + [SMALL_STATE(34)] = 1248, + [SMALL_STATE(35)] = 1271, + [SMALL_STATE(36)] = 1294, + [SMALL_STATE(37)] = 1317, + [SMALL_STATE(38)] = 1340, + [SMALL_STATE(39)] = 1363, + [SMALL_STATE(40)] = 1386, + [SMALL_STATE(41)] = 1409, + [SMALL_STATE(42)] = 1431, + [SMALL_STATE(43)] = 1453, + [SMALL_STATE(44)] = 1471, + [SMALL_STATE(45)] = 1489, + [SMALL_STATE(46)] = 1512, + [SMALL_STATE(47)] = 1537, + [SMALL_STATE(48)] = 1552, + [SMALL_STATE(49)] = 1567, + [SMALL_STATE(50)] = 1582, + [SMALL_STATE(51)] = 1599, + [SMALL_STATE(52)] = 1606, + [SMALL_STATE(53)] = 1613, + [SMALL_STATE(54)] = 1620, + [SMALL_STATE(55)] = 1627, + [SMALL_STATE(56)] = 1634, + [SMALL_STATE(57)] = 1641, + [SMALL_STATE(58)] = 1648, + [SMALL_STATE(59)] = 1652, }; static const TSParseActionEntry ts_parse_actions[] = { @@ -2910,149 +3006,154 @@ static const TSParseActionEntry ts_parse_actions[] = { [37] = {.entry = {.count = 1, .reusable = false}}, SHIFT(22), [39] = {.entry = {.count = 1, .reusable = true}}, SHIFT(22), [41] = {.entry = {.count = 1, .reusable = true}}, SHIFT(56), - [43] = {.entry = {.count = 1, .reusable = true}}, SHIFT(25), - [45] = {.entry = {.count = 1, .reusable = true}}, SHIFT(36), - [47] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source, 1), - [49] = {.entry = {.count = 1, .reusable = true}}, SHIFT(7), - [51] = {.entry = {.count = 1, .reusable = false}}, SHIFT(7), - [53] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_source_repeat1, 2), - [55] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_repeat1, 2), SHIFT_REPEAT(7), - [58] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_repeat1, 2), SHIFT_REPEAT(30), - [61] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_repeat1, 2), SHIFT_REPEAT(15), - [64] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_source_repeat1, 2), SHIFT_REPEAT(20), - [67] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_repeat1, 2), SHIFT_REPEAT(19), - [70] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_repeat1, 2), SHIFT_REPEAT(49), - [73] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_source_repeat1, 2), SHIFT_REPEAT(7), - [76] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_repeat1, 2), SHIFT_REPEAT(37), - [79] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_source_repeat1, 2), SHIFT_REPEAT(37), - [82] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_repeat1, 2), SHIFT_REPEAT(10), - [85] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_repeat1, 2), SHIFT_REPEAT(17), - [88] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_format_prefix_parameters, 1), - [90] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_format_prefix_parameters, 1), - [92] = {.entry = {.count = 1, .reusable = true}}, SHIFT(11), - [94] = {.entry = {.count = 1, .reusable = true}}, SHIFT(41), - [96] = {.entry = {.count = 1, .reusable = false}}, SHIFT(41), - [98] = {.entry = {.count = 1, .reusable = true}}, SHIFT(34), - [100] = {.entry = {.count = 1, .reusable = true}}, SHIFT(9), - [102] = {.entry = {.count = 1, .reusable = true}}, SHIFT(26), - [104] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym__bare_list_lit_repeat1, 2, .production_id = 9), SHIFT_REPEAT(11), - [107] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym__bare_list_lit_repeat1, 2, .production_id = 9), SHIFT_REPEAT(30), - [110] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym__bare_list_lit_repeat1, 2, .production_id = 9), SHIFT_REPEAT(41), - [113] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym__bare_list_lit_repeat1, 2, .production_id = 9), SHIFT_REPEAT(15), - [116] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym__bare_list_lit_repeat1, 2, .production_id = 9), SHIFT_REPEAT(20), - [119] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym__bare_list_lit_repeat1, 2, .production_id = 9), SHIFT_REPEAT(19), - [122] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym__bare_list_lit_repeat1, 2, .production_id = 9), SHIFT_REPEAT(49), - [125] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym__bare_list_lit_repeat1, 2, .production_id = 9), SHIFT_REPEAT(41), - [128] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym__bare_list_lit_repeat1, 2, .production_id = 9), SHIFT_REPEAT(37), - [131] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym__bare_list_lit_repeat1, 2, .production_id = 9), SHIFT_REPEAT(37), - [134] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym__bare_list_lit_repeat1, 2, .production_id = 9), SHIFT_REPEAT(10), - [137] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym__bare_list_lit_repeat1, 2, .production_id = 9), - [139] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym__bare_list_lit_repeat1, 2, .production_id = 9), SHIFT_REPEAT(17), - [142] = {.entry = {.count = 1, .reusable = true}}, SHIFT(29), - [144] = {.entry = {.count = 1, .reusable = true}}, SHIFT(31), - [146] = {.entry = {.count = 1, .reusable = false}}, SHIFT(31), - [148] = {.entry = {.count = 1, .reusable = true}}, SHIFT(50), - [150] = {.entry = {.count = 1, .reusable = true}}, SHIFT(28), - [152] = {.entry = {.count = 1, .reusable = false}}, SHIFT(28), - [154] = {.entry = {.count = 1, .reusable = true}}, SHIFT(14), - [156] = {.entry = {.count = 1, .reusable = true}}, SHIFT(38), - [158] = {.entry = {.count = 1, .reusable = false}}, SHIFT(38), - [160] = {.entry = {.count = 1, .reusable = true}}, SHIFT(24), - [162] = {.entry = {.count = 1, .reusable = false}}, SHIFT(24), - [164] = {.entry = {.count = 1, .reusable = true}}, SHIFT(21), - [166] = {.entry = {.count = 1, .reusable = true}}, SHIFT(27), - [168] = {.entry = {.count = 1, .reusable = false}}, SHIFT(27), - [170] = {.entry = {.count = 1, .reusable = true}}, SHIFT(12), - [172] = {.entry = {.count = 1, .reusable = true}}, SHIFT(39), - [174] = {.entry = {.count = 1, .reusable = false}}, SHIFT(39), - [176] = {.entry = {.count = 1, .reusable = true}}, SHIFT(16), - [178] = {.entry = {.count = 1, .reusable = true}}, SHIFT(40), - [180] = {.entry = {.count = 1, .reusable = false}}, SHIFT(40), - [182] = {.entry = {.count = 1, .reusable = true}}, SHIFT(32), - [184] = {.entry = {.count = 1, .reusable = false}}, SHIFT(32), - [186] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_format_modifiers, 1), - [188] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_format_modifiers, 2), - [190] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_unquoting_lit, 3, .production_id = 6), - [192] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_unquoting_lit, 3, .production_id = 6), - [194] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_str_lit, 3), - [196] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_str_lit, 3), - [198] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym__bare_list_lit, 2, .production_id = 4), - [200] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym__bare_list_lit, 2, .production_id = 4), - [202] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_unquote_splicing_lit, 2, .production_id = 3), - [204] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_unquote_splicing_lit, 2, .production_id = 3), - [206] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_quoting_lit, 3, .production_id = 6), - [208] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_quoting_lit, 3, .production_id = 6), - [210] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_quoting_lit_repeat1, 2), SHIFT_REPEAT(29), - [213] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_quoting_lit_repeat1, 2), - [215] = {.entry = {.count = 1, .reusable = false}}, REDUCE(aux_sym_quoting_lit_repeat1, 2), - [217] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_num_lit, 1), - [219] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_num_lit, 1), - [221] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_quasi_quoting_lit, 3, .production_id = 6), - [223] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_quasi_quoting_lit, 3, .production_id = 6), - [225] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_unquote_splicing_lit, 3, .production_id = 6), - [227] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_unquote_splicing_lit, 3, .production_id = 6), - [229] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_list_lit, 1, .production_id = 2), - [231] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_list_lit, 1, .production_id = 2), - [233] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym__bare_list_lit, 3, .production_id = 8), - [235] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym__bare_list_lit, 3, .production_id = 8), - [237] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_str_lit, 2), - [239] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_str_lit, 2), - [241] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_str_lit, 4), - [243] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_str_lit, 4), - [245] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_sym_lit, 1, .production_id = 1), - [247] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_sym_lit, 1, .production_id = 1), - [249] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_quoting_lit, 2, .production_id = 3), - [251] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_quoting_lit, 2, .production_id = 3), - [253] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_quasi_quoting_lit, 2, .production_id = 3), - [255] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_quasi_quoting_lit, 2, .production_id = 3), - [257] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_unquoting_lit, 2, .production_id = 3), - [259] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_unquoting_lit, 2, .production_id = 3), - [261] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym__bare_list_lit_repeat1, 1, .production_id = 5), - [263] = {.entry = {.count = 1, .reusable = false}}, REDUCE(aux_sym__bare_list_lit_repeat1, 1, .production_id = 5), - [265] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_format_modifiers_repeat1, 1), - [267] = {.entry = {.count = 1, .reusable = false}}, REDUCE(aux_sym_format_modifiers_repeat1, 1), - [269] = {.entry = {.count = 1, .reusable = true}}, SHIFT(51), - [271] = {.entry = {.count = 1, .reusable = true}}, SHIFT(53), - [273] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym__format_token, 1, .production_id = 7), - [275] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym__format_token, 1, .production_id = 7), - [277] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym__format_token, 2), - [279] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym__format_token, 2), - [281] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_format_modifiers_repeat1, 2), SHIFT_REPEAT(43), - [284] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_format_modifiers_repeat1, 2), SHIFT_REPEAT(59), - [287] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_format_modifiers_repeat1, 2), SHIFT_REPEAT(45), - [290] = {.entry = {.count = 1, .reusable = false}}, REDUCE(aux_sym_format_modifiers_repeat1, 2), - [292] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_format_modifiers_repeat1, 2), - [294] = {.entry = {.count = 1, .reusable = true}}, SHIFT(45), - [296] = {.entry = {.count = 1, .reusable = false}}, SHIFT(23), - [298] = {.entry = {.count = 1, .reusable = true}}, SHIFT(23), - [300] = {.entry = {.count = 1, .reusable = true}}, SHIFT(54), - [302] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_str_lit_repeat1, 2), SHIFT_REPEAT(4), - [305] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_str_lit_repeat1, 2), - [307] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_str_lit_repeat1, 2), SHIFT_REPEAT(47), - [310] = {.entry = {.count = 1, .reusable = true}}, SHIFT(3), - [312] = {.entry = {.count = 1, .reusable = true}}, SHIFT(47), - [314] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2), - [316] = {.entry = {.count = 1, .reusable = true}}, SHIFT(35), - [318] = {.entry = {.count = 1, .reusable = true}}, SHIFT(48), - [320] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_format_directive_type, 2, .production_id = 10), - [322] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_format_specifier, 3), - [324] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_format_directive_type, 2, .production_id = 11), - [326] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_format_directive_type, 2), - [328] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_format_specifier, 2), - [330] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_format_directive_type, 1), - [332] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_format_specifier, 4), - [334] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(), - [336] = {.entry = {.count = 1, .reusable = true}}, SHIFT(44), + [43] = {.entry = {.count = 1, .reusable = false}}, SHIFT(56), + [45] = {.entry = {.count = 1, .reusable = true}}, SHIFT(25), + [47] = {.entry = {.count = 1, .reusable = true}}, SHIFT(36), + [49] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_source, 1), + [51] = {.entry = {.count = 1, .reusable = true}}, SHIFT(7), + [53] = {.entry = {.count = 1, .reusable = false}}, SHIFT(7), + [55] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_source_repeat1, 2), + [57] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_repeat1, 2), SHIFT_REPEAT(7), + [60] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_repeat1, 2), SHIFT_REPEAT(30), + [63] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_repeat1, 2), SHIFT_REPEAT(15), + [66] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_source_repeat1, 2), SHIFT_REPEAT(20), + [69] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_repeat1, 2), SHIFT_REPEAT(19), + [72] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_repeat1, 2), SHIFT_REPEAT(49), + [75] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_source_repeat1, 2), SHIFT_REPEAT(7), + [78] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_repeat1, 2), SHIFT_REPEAT(37), + [81] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_source_repeat1, 2), SHIFT_REPEAT(37), + [84] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_repeat1, 2), SHIFT_REPEAT(10), + [87] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_source_repeat1, 2), SHIFT_REPEAT(17), + [90] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_format_prefix_parameters, 1), + [92] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_format_prefix_parameters, 1), + [94] = {.entry = {.count = 1, .reusable = true}}, SHIFT(11), + [96] = {.entry = {.count = 1, .reusable = true}}, SHIFT(41), + [98] = {.entry = {.count = 1, .reusable = false}}, SHIFT(41), + [100] = {.entry = {.count = 1, .reusable = true}}, SHIFT(34), + [102] = {.entry = {.count = 1, .reusable = true}}, SHIFT(9), + [104] = {.entry = {.count = 1, .reusable = true}}, SHIFT(26), + [106] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym__bare_list_lit_repeat1, 2, .production_id = 9), SHIFT_REPEAT(11), + [109] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym__bare_list_lit_repeat1, 2, .production_id = 9), SHIFT_REPEAT(30), + [112] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym__bare_list_lit_repeat1, 2, .production_id = 9), SHIFT_REPEAT(41), + [115] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym__bare_list_lit_repeat1, 2, .production_id = 9), SHIFT_REPEAT(15), + [118] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym__bare_list_lit_repeat1, 2, .production_id = 9), SHIFT_REPEAT(20), + [121] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym__bare_list_lit_repeat1, 2, .production_id = 9), SHIFT_REPEAT(19), + [124] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym__bare_list_lit_repeat1, 2, .production_id = 9), SHIFT_REPEAT(49), + [127] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym__bare_list_lit_repeat1, 2, .production_id = 9), SHIFT_REPEAT(41), + [130] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym__bare_list_lit_repeat1, 2, .production_id = 9), SHIFT_REPEAT(37), + [133] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym__bare_list_lit_repeat1, 2, .production_id = 9), SHIFT_REPEAT(37), + [136] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym__bare_list_lit_repeat1, 2, .production_id = 9), SHIFT_REPEAT(10), + [139] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym__bare_list_lit_repeat1, 2, .production_id = 9), + [141] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym__bare_list_lit_repeat1, 2, .production_id = 9), SHIFT_REPEAT(17), + [144] = {.entry = {.count = 1, .reusable = true}}, SHIFT(29), + [146] = {.entry = {.count = 1, .reusable = true}}, SHIFT(31), + [148] = {.entry = {.count = 1, .reusable = false}}, SHIFT(31), + [150] = {.entry = {.count = 1, .reusable = true}}, SHIFT(50), + [152] = {.entry = {.count = 1, .reusable = true}}, SHIFT(28), + [154] = {.entry = {.count = 1, .reusable = false}}, SHIFT(28), + [156] = {.entry = {.count = 1, .reusable = true}}, SHIFT(14), + [158] = {.entry = {.count = 1, .reusable = true}}, SHIFT(38), + [160] = {.entry = {.count = 1, .reusable = false}}, SHIFT(38), + [162] = {.entry = {.count = 1, .reusable = true}}, SHIFT(24), + [164] = {.entry = {.count = 1, .reusable = false}}, SHIFT(24), + [166] = {.entry = {.count = 1, .reusable = true}}, SHIFT(21), + [168] = {.entry = {.count = 1, .reusable = true}}, SHIFT(27), + [170] = {.entry = {.count = 1, .reusable = false}}, SHIFT(27), + [172] = {.entry = {.count = 1, .reusable = true}}, SHIFT(12), + [174] = {.entry = {.count = 1, .reusable = true}}, SHIFT(39), + [176] = {.entry = {.count = 1, .reusable = false}}, SHIFT(39), + [178] = {.entry = {.count = 1, .reusable = true}}, SHIFT(16), + [180] = {.entry = {.count = 1, .reusable = true}}, SHIFT(40), + [182] = {.entry = {.count = 1, .reusable = false}}, SHIFT(40), + [184] = {.entry = {.count = 1, .reusable = true}}, SHIFT(32), + [186] = {.entry = {.count = 1, .reusable = false}}, SHIFT(32), + [188] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_format_modifiers, 1), + [190] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_format_modifiers, 1), + [192] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_format_modifiers, 2), + [194] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_format_modifiers, 2), + [196] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_unquoting_lit, 3, .production_id = 6), + [198] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_unquoting_lit, 3, .production_id = 6), + [200] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_str_lit, 3), + [202] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_str_lit, 3), + [204] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym__bare_list_lit, 2, .production_id = 4), + [206] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym__bare_list_lit, 2, .production_id = 4), + [208] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_unquote_splicing_lit, 2, .production_id = 3), + [210] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_unquote_splicing_lit, 2, .production_id = 3), + [212] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_quoting_lit, 3, .production_id = 6), + [214] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_quoting_lit, 3, .production_id = 6), + [216] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_quoting_lit_repeat1, 2), SHIFT_REPEAT(29), + [219] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_quoting_lit_repeat1, 2), + [221] = {.entry = {.count = 1, .reusable = false}}, REDUCE(aux_sym_quoting_lit_repeat1, 2), + [223] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_num_lit, 1), + [225] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_num_lit, 1), + [227] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_quasi_quoting_lit, 3, .production_id = 6), + [229] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_quasi_quoting_lit, 3, .production_id = 6), + [231] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_unquote_splicing_lit, 3, .production_id = 6), + [233] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_unquote_splicing_lit, 3, .production_id = 6), + [235] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_list_lit, 1, .production_id = 2), + [237] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_list_lit, 1, .production_id = 2), + [239] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym__bare_list_lit, 3, .production_id = 8), + [241] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym__bare_list_lit, 3, .production_id = 8), + [243] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_str_lit, 2), + [245] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_str_lit, 2), + [247] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_str_lit, 4), + [249] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_str_lit, 4), + [251] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_sym_lit, 1, .production_id = 1), + [253] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_sym_lit, 1, .production_id = 1), + [255] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_quoting_lit, 2, .production_id = 3), + [257] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_quoting_lit, 2, .production_id = 3), + [259] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_quasi_quoting_lit, 2, .production_id = 3), + [261] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_quasi_quoting_lit, 2, .production_id = 3), + [263] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_unquoting_lit, 2, .production_id = 3), + [265] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_unquoting_lit, 2, .production_id = 3), + [267] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym__bare_list_lit_repeat1, 1, .production_id = 5), + [269] = {.entry = {.count = 1, .reusable = false}}, REDUCE(aux_sym__bare_list_lit_repeat1, 1, .production_id = 5), + [271] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_format_modifiers_repeat1, 1), + [273] = {.entry = {.count = 1, .reusable = false}}, REDUCE(aux_sym_format_modifiers_repeat1, 1), + [275] = {.entry = {.count = 1, .reusable = true}}, SHIFT(51), + [277] = {.entry = {.count = 1, .reusable = true}}, SHIFT(53), + [279] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym__format_token, 1, .production_id = 7), + [281] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym__format_token, 1, .production_id = 7), + [283] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym__format_token, 2), + [285] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym__format_token, 2), + [287] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_format_modifiers_repeat1, 2), SHIFT_REPEAT(43), + [290] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_format_modifiers_repeat1, 2), SHIFT_REPEAT(59), + [293] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_format_modifiers_repeat1, 2), SHIFT_REPEAT(45), + [296] = {.entry = {.count = 1, .reusable = false}}, REDUCE(aux_sym_format_modifiers_repeat1, 2), + [298] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_format_modifiers_repeat1, 2), + [300] = {.entry = {.count = 1, .reusable = true}}, SHIFT(45), + [302] = {.entry = {.count = 1, .reusable = false}}, SHIFT(23), + [304] = {.entry = {.count = 1, .reusable = true}}, SHIFT(23), + [306] = {.entry = {.count = 1, .reusable = true}}, SHIFT(54), + [308] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_str_lit_repeat1, 2), SHIFT_REPEAT(4), + [311] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_str_lit_repeat1, 2), + [313] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_str_lit_repeat1, 2), SHIFT_REPEAT(47), + [316] = {.entry = {.count = 1, .reusable = true}}, SHIFT(3), + [318] = {.entry = {.count = 1, .reusable = true}}, SHIFT(47), + [320] = {.entry = {.count = 1, .reusable = true}}, SHIFT(2), + [322] = {.entry = {.count = 1, .reusable = true}}, SHIFT(35), + [324] = {.entry = {.count = 1, .reusable = true}}, SHIFT(48), + [326] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_format_directive_type, 2, .production_id = 10), + [328] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_format_specifier, 3), + [330] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_format_directive_type, 2, .production_id = 11), + [332] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_format_directive_type, 2), + [334] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_format_specifier, 2), + [336] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_format_directive_type, 1), + [338] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_format_specifier, 4), + [340] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(), + [342] = {.entry = {.count = 1, .reusable = true}}, SHIFT(44), }; #ifdef __cplusplus extern "C" { #endif #ifdef _WIN32 -#define extern __declspec(dllexport) +#define TS_PUBLIC __declspec(dllexport) +#else +#define TS_PUBLIC __attribute__((visibility("default"))) #endif -extern const TSLanguage *tree_sitter_opengoal(void) { +TS_PUBLIC const TSLanguage *tree_sitter_opengoal() { static const TSLanguage language = { .version = LANGUAGE_VERSION, .symbol_count = SYMBOL_COUNT, diff --git a/third-party/tree-sitter/tree-sitter-opengoal/tree_sitter/alloc.h b/third-party/tree-sitter/tree-sitter-opengoal/tree_sitter/alloc.h new file mode 100644 index 00000000000..1f4466d75c4 --- /dev/null +++ b/third-party/tree-sitter/tree-sitter-opengoal/tree_sitter/alloc.h @@ -0,0 +1,54 @@ +#ifndef TREE_SITTER_ALLOC_H_ +#define TREE_SITTER_ALLOC_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +// Allow clients to override allocation functions +#ifdef TREE_SITTER_REUSE_ALLOCATOR + +extern void *(*ts_current_malloc)(size_t); +extern void *(*ts_current_calloc)(size_t, size_t); +extern void *(*ts_current_realloc)(void *, size_t); +extern void (*ts_current_free)(void *); + +#ifndef ts_malloc +#define ts_malloc ts_current_malloc +#endif +#ifndef ts_calloc +#define ts_calloc ts_current_calloc +#endif +#ifndef ts_realloc +#define ts_realloc ts_current_realloc +#endif +#ifndef ts_free +#define ts_free ts_current_free +#endif + +#else + +#ifndef ts_malloc +#define ts_malloc malloc +#endif +#ifndef ts_calloc +#define ts_calloc calloc +#endif +#ifndef ts_realloc +#define ts_realloc realloc +#endif +#ifndef ts_free +#define ts_free free +#endif + +#endif + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_ALLOC_H_ diff --git a/third-party/tree-sitter/tree-sitter-opengoal/tree_sitter/array.h b/third-party/tree-sitter/tree-sitter-opengoal/tree_sitter/array.h new file mode 100644 index 00000000000..186ba673998 --- /dev/null +++ b/third-party/tree-sitter/tree-sitter-opengoal/tree_sitter/array.h @@ -0,0 +1,287 @@ +#ifndef TREE_SITTER_ARRAY_H_ +#define TREE_SITTER_ARRAY_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "./alloc.h" + +#include +#include +#include +#include +#include + +#ifdef _MSC_VER +#pragma warning(disable : 4101) +#elif defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-variable" +#endif + +#define Array(T) \ + struct { \ + T *contents; \ + uint32_t size; \ + uint32_t capacity; \ + } + +/// Initialize an array. +#define array_init(self) \ + ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL) + +/// Create an empty array. +#define array_new() \ + { NULL, 0, 0 } + +/// Get a pointer to the element at a given `index` in the array. +#define array_get(self, _index) \ + (assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index]) + +/// Get a pointer to the first element in the array. +#define array_front(self) array_get(self, 0) + +/// Get a pointer to the last element in the array. +#define array_back(self) array_get(self, (self)->size - 1) + +/// Clear the array, setting its size to zero. Note that this does not free any +/// memory allocated for the array's contents. +#define array_clear(self) ((self)->size = 0) + +/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is +/// less than the array's current capacity, this function has no effect. +#define array_reserve(self, new_capacity) \ + _array__reserve((Array *)(self), array_elem_size(self), new_capacity) + +/// Free any memory allocated for this array. Note that this does not free any +/// memory allocated for the array's contents. +#define array_delete(self) _array__delete((Array *)(self)) + +/// Push a new `element` onto the end of the array. +#define array_push(self, element) \ + (_array__grow((Array *)(self), 1, array_elem_size(self)), \ + (self)->contents[(self)->size++] = (element)) + +/// Increase the array's size by `count` elements. +/// New elements are zero-initialized. +#define array_grow_by(self, count) \ + (_array__grow((Array *)(self), count, array_elem_size(self)), \ + memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)), \ + (self)->size += (count)) + +/// Append all elements from one array to the end of another. +#define array_push_all(self, other) \ + array_extend((self), (other)->size, (other)->contents) + +/// Append `count` elements to the end of the array, reading their values from the +/// `contents` pointer. +#define array_extend(self, count, contents) \ + _array__splice( \ + (Array *)(self), array_elem_size(self), (self)->size, \ + 0, count, contents \ + ) + +/// Remove `old_count` elements from the array starting at the given `index`. At +/// the same index, insert `new_count` new elements, reading their values from the +/// `new_contents` pointer. +#define array_splice(self, _index, old_count, new_count, new_contents) \ + _array__splice( \ + (Array *)(self), array_elem_size(self), _index, \ + old_count, new_count, new_contents \ + ) + +/// Insert one `element` into the array at the given `index`. +#define array_insert(self, _index, element) \ + _array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element)) + +/// Remove one element from the array at the given `index`. +#define array_erase(self, _index) \ + _array__erase((Array *)(self), array_elem_size(self), _index) + +/// Pop the last element off the array, returning the element by value. +#define array_pop(self) ((self)->contents[--(self)->size]) + +/// Assign the contents of one array to another, reallocating if necessary. +#define array_assign(self, other) \ + _array__assign((Array *)(self), (const Array *)(other), array_elem_size(self)) + +/// Swap one array with another +#define array_swap(self, other) \ + _array__swap((Array *)(self), (Array *)(other)) + +/// Get the size of the array contents +#define array_elem_size(self) (sizeof *(self)->contents) + +/// Search a sorted array for a given `needle` value, using the given `compare` +/// callback to determine the order. +/// +/// If an existing element is found to be equal to `needle`, then the `index` +/// out-parameter is set to the existing value's index, and the `exists` +/// out-parameter is set to true. Otherwise, `index` is set to an index where +/// `needle` should be inserted in order to preserve the sorting, and `exists` +/// is set to false. +#define array_search_sorted_with(self, compare, needle, _index, _exists) \ + _array__search_sorted(self, 0, compare, , needle, _index, _exists) + +/// Search a sorted array for a given `needle` value, using integer comparisons +/// of a given struct field (specified with a leading dot) to determine the order. +/// +/// See also `array_search_sorted_with`. +#define array_search_sorted_by(self, field, needle, _index, _exists) \ + _array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists) + +/// Insert a given `value` into a sorted array, using the given `compare` +/// callback to determine the order. +#define array_insert_sorted_with(self, compare, value) \ + do { \ + unsigned _index, _exists; \ + array_search_sorted_with(self, compare, &(value), &_index, &_exists); \ + if (!_exists) array_insert(self, _index, value); \ + } while (0) + +/// Insert a given `value` into a sorted array, using integer comparisons of +/// a given struct field (specified with a leading dot) to determine the order. +/// +/// See also `array_search_sorted_by`. +#define array_insert_sorted_by(self, field, value) \ + do { \ + unsigned _index, _exists; \ + array_search_sorted_by(self, field, (value) field, &_index, &_exists); \ + if (!_exists) array_insert(self, _index, value); \ + } while (0) + +// Private + +typedef Array(void) Array; + +/// This is not what you're looking for, see `array_delete`. +static inline void _array__delete(Array *self) { + if (self->contents) { + ts_free(self->contents); + self->contents = NULL; + self->size = 0; + self->capacity = 0; + } +} + +/// This is not what you're looking for, see `array_erase`. +static inline void _array__erase(Array *self, size_t element_size, + uint32_t index) { + assert(index < self->size); + char *contents = (char *)self->contents; + memmove(contents + index * element_size, contents + (index + 1) * element_size, + (self->size - index - 1) * element_size); + self->size--; +} + +/// This is not what you're looking for, see `array_reserve`. +static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) { + if (new_capacity > self->capacity) { + if (self->contents) { + self->contents = ts_realloc(self->contents, new_capacity * element_size); + } else { + self->contents = ts_malloc(new_capacity * element_size); + } + self->capacity = new_capacity; + } +} + +/// This is not what you're looking for, see `array_assign`. +static inline void _array__assign(Array *self, const Array *other, size_t element_size) { + _array__reserve(self, element_size, other->size); + self->size = other->size; + memcpy(self->contents, other->contents, self->size * element_size); +} + +/// This is not what you're looking for, see `array_swap`. +static inline void _array__swap(Array *self, Array *other) { + Array swap = *other; + *other = *self; + *self = swap; +} + +/// This is not what you're looking for, see `array_push` or `array_grow_by`. +static inline void _array__grow(Array *self, uint32_t count, size_t element_size) { + uint32_t new_size = self->size + count; + if (new_size > self->capacity) { + uint32_t new_capacity = self->capacity * 2; + if (new_capacity < 8) new_capacity = 8; + if (new_capacity < new_size) new_capacity = new_size; + _array__reserve(self, element_size, new_capacity); + } +} + +/// This is not what you're looking for, see `array_splice`. +static inline void _array__splice(Array *self, size_t element_size, + uint32_t index, uint32_t old_count, + uint32_t new_count, const void *elements) { + uint32_t new_size = self->size + new_count - old_count; + uint32_t old_end = index + old_count; + uint32_t new_end = index + new_count; + assert(old_end <= self->size); + + _array__reserve(self, element_size, new_size); + + char *contents = (char *)self->contents; + if (self->size > old_end) { + memmove( + contents + new_end * element_size, + contents + old_end * element_size, + (self->size - old_end) * element_size + ); + } + if (new_count > 0) { + if (elements) { + memcpy( + (contents + index * element_size), + elements, + new_count * element_size + ); + } else { + memset( + (contents + index * element_size), + 0, + new_count * element_size + ); + } + } + self->size += new_count - old_count; +} + +/// A binary search routine, based on Rust's `std::slice::binary_search_by`. +/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`. +#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \ + do { \ + *(_index) = start; \ + *(_exists) = false; \ + uint32_t size = (self)->size - *(_index); \ + if (size == 0) break; \ + int comparison; \ + while (size > 1) { \ + uint32_t half_size = size / 2; \ + uint32_t mid_index = *(_index) + half_size; \ + comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \ + if (comparison <= 0) *(_index) = mid_index; \ + size -= half_size; \ + } \ + comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \ + if (comparison == 0) *(_exists) = true; \ + else if (comparison < 0) *(_index) += 1; \ + } while (0) + +/// Helper macro for the `_sorted_by` routines below. This takes the left (existing) +/// parameter by reference in order to work with the generic sorting function above. +#define _compare_int(a, b) ((int)*(a) - (int)(b)) + +#ifdef _MSC_VER +#pragma warning(default : 4101) +#elif defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic pop +#endif + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_ARRAY_H_ diff --git a/third-party/tree-sitter/tree-sitter-opengoal/tree_sitter/parser.h b/third-party/tree-sitter/tree-sitter-opengoal/tree_sitter/parser.h index 2b14ac1046b..17b4fde9821 100644 --- a/third-party/tree-sitter/tree-sitter-opengoal/tree_sitter/parser.h +++ b/third-party/tree-sitter/tree-sitter-opengoal/tree_sitter/parser.h @@ -13,9 +13,8 @@ extern "C" { #define ts_builtin_sym_end 0 #define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 -typedef uint16_t TSStateId; - #ifndef TREE_SITTER_API_H_ +typedef uint16_t TSStateId; typedef uint16_t TSSymbol; typedef uint16_t TSFieldId; typedef struct TSLanguage TSLanguage; @@ -130,9 +129,16 @@ struct TSLanguage { * Lexer Macros */ +#ifdef _MSC_VER +#define UNUSED __pragma(warning(suppress : 4101)) +#else +#define UNUSED __attribute__((unused)) +#endif + #define START_LEXER() \ bool result = false; \ bool skip = false; \ + UNUSED \ bool eof = false; \ int32_t lookahead; \ goto start; \ @@ -166,7 +172,7 @@ struct TSLanguage { * Parse Table Macros */ -#define SMALL_STATE(id) id - LARGE_STATE_COUNT +#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT) #define STATE(id) id @@ -176,7 +182,7 @@ struct TSLanguage { {{ \ .shift = { \ .type = TSParseActionTypeShift, \ - .state = state_value \ + .state = (state_value) \ } \ }} @@ -184,7 +190,7 @@ struct TSLanguage { {{ \ .shift = { \ .type = TSParseActionTypeShift, \ - .state = state_value, \ + .state = (state_value), \ .repetition = true \ } \ }} diff --git a/third-party/tree-sitter/tree-sitter/.cargo/config.toml b/third-party/tree-sitter/tree-sitter/.cargo/config.toml new file mode 100644 index 00000000000..35049cbcb13 --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/.cargo/config.toml @@ -0,0 +1,2 @@ +[alias] +xtask = "run --package xtask --" diff --git a/third-party/tree-sitter/tree-sitter/.editorconfig b/third-party/tree-sitter/tree-sitter/.editorconfig new file mode 100644 index 00000000000..53780b3435b --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/.editorconfig @@ -0,0 +1,15 @@ +root = true + +[*] +indent_style = space +indent_size = 2 +tab_width = 8 +end_of_line = lf +insert_final_newline = true + +[*.rs] +indent_size = 4 + +[Makefile] +indent_style = tab +indent_size = 8 diff --git a/third-party/tree-sitter/tree-sitter/.gitattributes b/third-party/tree-sitter/tree-sitter/.gitattributes index 44bf45c7b45..1d9b8cb4279 100644 --- a/third-party/tree-sitter/tree-sitter/.gitattributes +++ b/third-party/tree-sitter/tree-sitter/.gitattributes @@ -1,3 +1,5 @@ +* text=auto eol=lf + /lib/src/unicode/*.h linguist-vendored /lib/src/unicode/LICENSE linguist-vendored diff --git a/third-party/tree-sitter/tree-sitter/.github/ISSUE_TEMPLATE/bug_report.yml b/third-party/tree-sitter/tree-sitter/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 00000000000..4138c3a9f5f --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,41 @@ +name: Bug Report +description: Report a problem +labels: [bug] +body: + - type: textarea + attributes: + label: "Problem" + description: "Describe the current behavior. May include logs, images, or videos." + validations: + required: true + + - type: textarea + attributes: + label: "Steps to reproduce" + placeholder: | + git clone --depth=1 https://github.com/tree-sitter/tree-sitter-ruby + cd tree-sitter-ruby + tree-sitter generate + validations: + required: true + + - type: textarea + attributes: + label: "Expected behavior" + description: "Describe the behavior you expect." + validations: + required: true + + - type: input + attributes: + label: "Tree-sitter version (tree-sitter --version)" + placeholder: "tree-sitter 0.20.9" + validations: + required: true + + - type: input + attributes: + label: "Operating system/version" + placeholder: "macOS 11.5" + validations: + required: true diff --git a/third-party/tree-sitter/tree-sitter/.github/ISSUE_TEMPLATE/config.yml b/third-party/tree-sitter/tree-sitter/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 00000000000..3ba13e0cec6 --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1 @@ +blank_issues_enabled: false diff --git a/third-party/tree-sitter/tree-sitter/.github/ISSUE_TEMPLATE/feature_request.yml b/third-party/tree-sitter/tree-sitter/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 00000000000..388f3675824 --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,23 @@ +name: Feature request +description: Request an enhancement +labels: [enhancement] +body: + - type: markdown + attributes: + value: | + Before requesting: search [existing feature requests](https://github.com/tree-sitter/tree-sitter/labels/enhancement). + + - type: textarea + attributes: + label: "Problem" + description: "Describe the problem to be solved." + placeholder: "No smurf icons available. Smurfs are useful because ..." + validations: + required: false + + - type: textarea + attributes: + label: "Expected behavior" + description: "Describe what the new feature or behavior would look like. How does it solve the problem? Is it worth the cost?" + validations: + required: false diff --git a/third-party/tree-sitter/tree-sitter/.github/actions/cache/action.yml b/third-party/tree-sitter/tree-sitter/.github/actions/cache/action.yml new file mode 100644 index 00000000000..cc81668211a --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/.github/actions/cache/action.yml @@ -0,0 +1,24 @@ +name: 'Cache' +description: "This action caches fixtures" +outputs: + cache-hit: + description: 'Cache hit' + value: ${{ steps.cache_output.outputs.cache-hit }} +runs: + using: "composite" + steps: + - uses: actions/cache@v4 + id: cache_fixtures + with: + path: | + test/fixtures/grammars + target/release/tree-sitter-*.wasm + key: fixtures-${{ join(matrix.*, '_') }}-${{ hashFiles( + 'cli/src/generate/**', + 'script/generate-fixtures*', + 'test/fixtures/grammars/*/**/src/*.c', + '.github/actions/cache/action.yml') }} + + - run: echo "cache-hit=${{ steps.cache_fixtures.outputs.cache-hit }}" >> $GITHUB_OUTPUT + shell: bash + id: cache_output diff --git a/third-party/tree-sitter/tree-sitter/.github/dependabot.yml b/third-party/tree-sitter/tree-sitter/.github/dependabot.yml new file mode 100644 index 00000000000..c2faedbee87 --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/.github/dependabot.yml @@ -0,0 +1,22 @@ +version: 2 +updates: + - package-ecosystem: "cargo" + directory: "/" + schedule: + interval: "weekly" + commit-message: + prefix: "build(deps)" + groups: + cargo: + patterns: + - "*" + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" + commit-message: + prefix: "ci" + groups: + actions: + patterns: + - "*" diff --git a/third-party/tree-sitter/tree-sitter/.github/scripts/close_unresponsive.js b/third-party/tree-sitter/tree-sitter/.github/scripts/close_unresponsive.js new file mode 100644 index 00000000000..effa2646d72 --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/.github/scripts/close_unresponsive.js @@ -0,0 +1,58 @@ +function labeledEvent(data) { + return ( + data.event === "labeled" && data.label.name === "more-information-needed" + ); +} + +const numberOfDaysLimit = 30; +const close_message = `This has been closed since a request for information has \ +not been answered for ${numberOfDaysLimit} days. It can be reopened when the \ +requested information is provided.`; + +module.exports = async ({ github, context }) => { + const owner = context.repo.owner; + const repo = context.repo.repo; + + const issues = await github.rest.issues.listForRepo({ + owner: owner, + repo: repo, + labels: "more-information-needed", + }); + const numbers = issues.data.map((e) => e.number); + + for (const number of numbers) { + const events = await github.paginate( + github.rest.issues.listEventsForTimeline, + { + owner: owner, + repo: repo, + issue_number: number, + }, + (response) => response.data.filter(labeledEvent), + ); + + const latest_response_label = events[events.length - 1]; + + const created_at = new Date(latest_response_label.created_at); + const now = new Date(); + const diff = now - created_at; + const diffDays = diff / (1000 * 60 * 60 * 24); + + if (diffDays > numberOfDaysLimit) { + github.rest.issues.update({ + owner: owner, + repo: repo, + issue_number: number, + state_reason: "not_planned", + state: "closed", + }); + + github.rest.issues.createComment({ + owner: owner, + repo: repo, + issue_number: number, + body: close_message, + }); + } + } +}; diff --git a/third-party/tree-sitter/tree-sitter/.github/scripts/cross.sh b/third-party/tree-sitter/tree-sitter/.github/scripts/cross.sh index 070171926ae..a52f0873589 100644 --- a/third-party/tree-sitter/tree-sitter/.github/scripts/cross.sh +++ b/third-party/tree-sitter/tree-sitter/.github/scripts/cross.sh @@ -1,9 +1,16 @@ #!/bin/bash -set -x +# set -x set -e -if [ "$CROSS" != 1 ]; then +if [ "$BUILD_CMD" != "cross" ]; then + echo "cross.sh - is a helper to assist only in cross compiling environments" >&2 + echo "To use this tool set the BUILD_CMD env var to the \"cross\" value" >&2 + exit 111 +fi + +if [ -z "$CROSS_IMAGE" ]; then + echo "The CROSS_IMAGE env var should be provided" >&2 exit 111 fi diff --git a/third-party/tree-sitter/tree-sitter/.github/scripts/make.sh b/third-party/tree-sitter/tree-sitter/.github/scripts/make.sh index 62aa0c06dec..791925414ee 100644 --- a/third-party/tree-sitter/tree-sitter/.github/scripts/make.sh +++ b/third-party/tree-sitter/tree-sitter/.github/scripts/make.sh @@ -1,9 +1,9 @@ #!/bin/bash -set -x +# set -x set -e -if [ "$CROSS" = 1 ]; then +if [ "$BUILD_CMD" == "cross" ]; then if [ -z "$CC" ]; then echo "make.sh: CC is not set" >&2 exit 111 diff --git a/third-party/tree-sitter/tree-sitter/.github/scripts/remove_response_label.js b/third-party/tree-sitter/tree-sitter/.github/scripts/remove_response_label.js new file mode 100644 index 00000000000..66840324aac --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/.github/scripts/remove_response_label.js @@ -0,0 +1,19 @@ +module.exports = async ({ github, context }) => { + const commenter = context.actor; + const issue = await github.rest.issues.get({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + }); + const author = issue.data.user.login; + const labels = issue.data.labels.map((e) => e.name); + + if (author === commenter && labels.includes("more-information-needed")) { + github.rest.issues.removeLabel({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + name: "more-information-needed", + }); + } +}; diff --git a/third-party/tree-sitter/tree-sitter/.github/scripts/reviewers_remove.js b/third-party/tree-sitter/tree-sitter/.github/scripts/reviewers_remove.js new file mode 100644 index 00000000000..9e44e4ac866 --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/.github/scripts/reviewers_remove.js @@ -0,0 +1,16 @@ +module.exports = async ({ github, context }) => { + const requestedReviewers = await github.rest.pulls.listRequestedReviewers({ + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: context.issue.number, + }); + + const reviewers = requestedReviewers.data.users.map((e) => e.login); + + github.rest.pulls.removeRequestedReviewers({ + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: context.issue.number, + reviewers: reviewers, + }); +}; diff --git a/third-party/tree-sitter/tree-sitter/.github/scripts/tree-sitter.sh b/third-party/tree-sitter/tree-sitter/.github/scripts/tree-sitter.sh index 2e6e31c2906..0cac915308f 100644 --- a/third-party/tree-sitter/tree-sitter/.github/scripts/tree-sitter.sh +++ b/third-party/tree-sitter/tree-sitter/.github/scripts/tree-sitter.sh @@ -1,11 +1,27 @@ #!/bin/bash -set -x +# set -x set -e +if [ -z "$ROOT" ]; then + echo "The ROOT env var should be set to absolute path of a repo root folder" >&2 + exit 111 +fi + +if [ -z "$TARGET" ]; then + echo "The TARGET env var should be equal to a \`cargo build --target \` command value" >&2 + exit 111 +fi + tree_sitter="$ROOT"/target/"$TARGET"/release/tree-sitter -if [ "$CROSS" = 1 ]; then +if [ "$BUILD_CMD" == "cross" ]; then + if [ -z "$CROSS_RUNNER" ]; then + echo "The CROSS_RUNNER env var should be set to a CARGO_TARGET_*_RUNNER env var value" >&2 + echo "that is available in a docker image used by the cross tool under the hood" >&2 + exit 111 + fi + cross.sh $CROSS_RUNNER "$tree_sitter" "$@" else "$tree_sitter" "$@" diff --git a/third-party/tree-sitter/tree-sitter/.github/workflows/CICD.yml b/third-party/tree-sitter/tree-sitter/.github/workflows/CICD.yml deleted file mode 100644 index 7c2351a8974..00000000000 --- a/third-party/tree-sitter/tree-sitter/.github/workflows/CICD.yml +++ /dev/null @@ -1,69 +0,0 @@ -name: CICD - -on: - workflow_dispatch: - pull_request: - push: - branches: - - master - - check/* - -concurrency: - group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}' - cancel-in-progress: true - -jobs: - init: - name: Init - runs-on: ubuntu-latest - steps: - - name: Get PR head ref - if: ${{ github.event_name == 'pull_request' }} - id: ref - run: | - echo "ref=refs/pull/${{ github.event.pull_request.number }}/head" >> $GITHUB_OUTPUT - outputs: - ref: >- - ${{ - (github.event_name == 'pull_request' && startsWith(github.head_ref, 'release/v')) - && steps.ref.outputs.ref - || github.ref - }} - - fast_checks: - name: Fast checks - uses: ./.github/workflows/fast_checks.yml - - full_checks: - name: Full Rust checks - needs: fast_checks - uses: ./.github/workflows/full_rust_checks.yml - - min_version: - name: Minimum supported rust version - needs: fast_checks - uses: ./.github/workflows/msrv.yml - with: - package: tree-sitter-cli - - build: - name: Build & Test - needs: [init, fast_checks] - uses: ./.github/workflows/build.yml - with: - ref: ${{ needs.init.outputs.ref }} - - release: - name: Release - needs: [init, fast_checks, full_checks, min_version, build] - if: > - github.event.pull_request.head.repo.full_name == github.repository && - startsWith(github.head_ref, 'release/v') - uses: ./.github/workflows/release.yml - with: - ref: ${{ needs.init.outputs.ref }} - - publish: - name: Publish - needs: release - uses: ./.github/workflows/publish.yml diff --git a/third-party/tree-sitter/tree-sitter/.github/workflows/build.yml b/third-party/tree-sitter/tree-sitter/.github/workflows/build.yml index 27b310859e2..968fcba057a 100644 --- a/third-party/tree-sitter/tree-sitter/.github/workflows/build.yml +++ b/third-party/tree-sitter/tree-sitter/.github/workflows/build.yml @@ -8,160 +8,190 @@ env: on: workflow_call: inputs: - ref: - default: ${{ github.ref }} - type: string + run_test: + default: true + type: boolean jobs: build: - name: ${{ matrix.job.name }} (${{ matrix.job.target }}) (${{ matrix.job.os }}) - runs-on: ${{ matrix.job.os }} + name: ${{ matrix.platform }} (${{ matrix.target }}) (${{ matrix.os }}) + runs-on: ${{ matrix.os }} + timeout-minutes: 40 strategy: fail-fast: false matrix: - job: - - { name: linux-aarch64 , target: aarch64-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } - - { name: linux-arm , target: arm-unknown-linux-gnueabihf , os: ubuntu-latest , use-cross: true } - - { name: linux-x64 , target: x86_64-unknown-linux-gnu , os: ubuntu-latest } - - { name: linux-x86 , target: i686-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } - - { name: windows-x64 , target: x86_64-pc-windows-msvc , os: windows-latest } - - { name: windows-x86 , target: i686-pc-windows-msvc , os: windows-latest } - - { name: macos-x64 , target: x86_64-apple-darwin , os: macos-latest } + platform: + - linux-arm64 # + - linux-arm # + - linux-x64 # + - linux-x86 # + - linux-powerpc64 # + - windows-arm64 # + - windows-x64 # <-- No C library build - requires an additional adapted Makefile for `cl.exe` compiler + - windows-x86 # -- // -- + - macos-arm64 # + - macos-x64 # + + include: + # When adding a new `target`: + # 1. Define a new platform alias above + # 2. Add a new record to a matrix map in `cli/npm/install.js` + - { platform: linux-arm64 , target: aarch64-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } + - { platform: linux-arm , target: arm-unknown-linux-gnueabi , os: ubuntu-latest , use-cross: true } + - { platform: linux-x64 , target: x86_64-unknown-linux-gnu , os: ubuntu-20.04 , enable-wasm: true } #2272 + - { platform: linux-x86 , target: i686-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } + - { platform: linux-powerpc64 , target: powerpc64-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } + - { platform: windows-arm64 , target: aarch64-pc-windows-msvc , os: windows-latest } + - { platform: windows-x64 , target: x86_64-pc-windows-msvc , os: windows-latest , enable-wasm: true } + - { platform: windows-x86 , target: i686-pc-windows-msvc , os: windows-latest } + - { platform: macos-arm64 , target: aarch64-apple-darwin , os: macos-14 , enable-wasm: true } + - { platform: macos-x64 , target: x86_64-apple-darwin , os: macos-latest , enable-wasm: true } + + # Cross compilers for C library + - { platform: linux-arm64 , cc: aarch64-linux-gnu-gcc , ar: aarch64-linux-gnu-ar } + - { platform: linux-arm , cc: arm-linux-gnueabi-gcc , ar: arm-linux-gnueabi-ar } + - { platform: linux-x86 , cc: i686-linux-gnu-gcc , ar: i686-linux-gnu-ar } + - { platform: linux-powerpc64 , cc: powerpc64-linux-gnu-gcc , ar: powerpc64-linux-gnu-ar } + + # See #2041 tree-sitter issue + - { platform: windows-x64 , rust-test-threads: 1 } + - { platform: windows-x86 , rust-test-threads: 1 } + + # CLI only build + - { platform: windows-arm64 , cli-only: true } env: BUILD_CMD: cargo + EMSCRIPTEN_VERSION: "" + EXE: ${{ contains(matrix.target, 'windows') && '.exe' || '' }} defaults: run: shell: bash steps: - - name: Checkout source code - uses: actions/checkout@v3 - with: - ref: ${{ inputs.ref }} + - uses: actions/checkout@v4 - name: Read Emscripten version run: | - echo "EMSCRIPTEN_VERSION=$(cat cli/emscripten-version)" >> $GITHUB_ENV + echo "EMSCRIPTEN_VERSION=$(cat cli/loader/emscripten-version)" >> $GITHUB_ENV - name: Install Emscripten - uses: mymindstorm/setup-emsdk@v12 + if: ${{ !matrix.cli-only && !matrix.use-cross }} + uses: mymindstorm/setup-emsdk@v14 with: version: ${{ env.EMSCRIPTEN_VERSION }} - - name: Install Rust toolchain - uses: dtolnay/rust-toolchain@stable - with: - targets: ${{ matrix.job.target }} + - run: rustup toolchain install stable --profile minimal + - run: rustup target add ${{ matrix.target }} + - uses: Swatinem/rust-cache@v2 - name: Install cross - if: matrix.job.use-cross + if: ${{ matrix.use-cross }} uses: taiki-e/install-action@v2 with: tool: cross - name: Build custom cross image - if: ${{ matrix.job.use-cross && matrix.job.os == 'ubuntu-latest' }} + if: ${{ matrix.use-cross && matrix.os == 'ubuntu-latest' }} run: | cd .. - target="${{ matrix.job.target }}" + target="${{ matrix.target }}" image=ghcr.io/cross-rs/$target:custom - echo "CROSS_IMAGE=$image" >> $GITHUB_ENV + echo "CROSS_IMAGE=$image" >> $GITHUB_ENV - echo "[target.$target]" >> Cross.toml - echo "image = \"$image\"" >> Cross.toml - echo "CROSS_CONFIG=$PWD/Cross.toml" >> $GITHUB_ENV + echo "[target.$target]" >> Cross.toml + echo "image = \"$image\"" >> Cross.toml + echo "CROSS_CONFIG=$PWD/Cross.toml" >> $GITHUB_ENV - echo "FROM ghcr.io/cross-rs/$target:edge" >> Dockerfile - echo "ENV DEBIAN_FRONTEND=noninteractive" >> Dockerfile - echo "RUN apt-get update && apt-get install -y nodejs" >> Dockerfile + echo "FROM ghcr.io/cross-rs/$target:edge" >> Dockerfile + echo "ENV DEBIAN_FRONTEND=noninteractive" >> Dockerfile + echo "RUN apt-get update && apt-get install -y nodejs" >> Dockerfile docker build -t $image . - docker images - docker run --rm $image env - - cd - - - name: Setup extra env + - name: Setup env extras + env: + RUST_TEST_THREADS: ${{ matrix.rust-test-threads || '' }} + USE_CROSS: ${{ matrix.use-cross }} + TARGET: ${{ matrix.target }} + CC: ${{ matrix.cc }} + AR: ${{ matrix.ar }} + IS_WINDOWS: ${{ contains(matrix.os, 'windows') }} + ENABLE_WASM: ${{ matrix.enable-wasm }} run: | PATH="$PWD/.github/scripts:$PATH" - echo "PATH=$PATH" >> $GITHUB_ENV - echo "ROOT=$PWD" >> $GITHUB_ENV - echo "TREE_SITTER=tree-sitter.sh" >> $GITHUB_ENV + echo "$PWD/.github/scripts" >> $GITHUB_PATH - export TARGET=${{ matrix.job.target }} + echo "TREE_SITTER=tree-sitter.sh" >> $GITHUB_ENV echo "TARGET=$TARGET" >> $GITHUB_ENV + echo "ROOT=$PWD" >> $GITHUB_ENV - USE_CROSS="${{ matrix.job.use-cross }}" - - if [ "$USE_CROSS" == "true" ]; then - echo "BUILD_CMD=cross" >> $GITHUB_ENV - - export CROSS=1; echo "CROSS=$CROSS" >> $GITHUB_ENV + [ -n "$RUST_TEST_THREADS" ] && \ + echo "RUST_TEST_THREADS=$RUST_TEST_THREADS" >> $GITHUB_ENV - runner=$(cross.sh bash -c "env | sed -nr '/^CARGO_TARGET_.*_RUNNER=/s///p'") - [ -n "$runner" ] && echo "CROSS_RUNNER=$runner" >> $GITHUB_ENV - echo "runner: $runner" + [ -n "$CC" ] && echo "CC=$CC" >> $GITHUB_ENV + [ -n "$AR" ] && echo "AR=$AR" >> $GITHUB_ENV - case "$TARGET" in - i686-unknown-linux-gnu) CC=i686-linux-gnu-gcc AR=i686-linux-gnu-ar ;; - aarch64-unknown-linux-gnu) CC=aarch64-linux-gnu-gcc AR=aarch64-linux-gnu-ar ;; - arm-unknown-linux-gnueabihf) CC=arm-unknown-linux-gnueabihf-gcc AR=arm-unknown-linux-gnueabihf-gcc-ar ;; - esac + [ "$IS_WINDOWS" = "false" ] && echo "CFLAGS=-Werror" >> $GITHUB_ENV - [ -n "$CC" ] && echo "CC=$CC" >> $GITHUB_ENV - [ -n "$AR" ] && echo "AR=$AR" >> $GITHUB_ENV + if [ "$ENABLE_WASM" == "true" ]; then + echo "CLI_FEATURES=wasm" >> $GITHUB_ENV fi - case "$TARGET" in - *-windows-*) - echo "RUST_TEST_THREADS=1" >> $GITHUB_ENV # See #2041 tree-sitter issue - ;; - esac + if [ "$USE_CROSS" == "true" ]; then + echo "BUILD_CMD=cross" >> $GITHUB_ENV + runner=$(BUILD_CMD=cross cross.sh bash -c "env | sed -nr '/^CARGO_TARGET_.*_RUNNER=/s///p'") + [ -n "$runner" ] && echo "CROSS_RUNNER=$runner" >> $GITHUB_ENV + fi - name: Build C library - if: "!contains(matrix.job.os, 'windows')" # Requires an additional adapted Makefile for `cl.exe` compiler - run: make.sh CFLAGS="-Werror" -j + if: ${{ !contains(matrix.os, 'windows') }} # Requires an additional adapted Makefile for `cl.exe` compiler + run: make.sh -j - name: Build wasm library + if: ${{ !matrix.cli-only && !matrix.use-cross }} # No sense to build on the same Github runner hosts many times run: script/build-wasm - name: Build CLI - run: $BUILD_CMD build --release --target=${{ matrix.job.target }} + run: $BUILD_CMD build --release --target=${{ matrix.target }} --features=${CLI_FEATURES} + + - run: script/fetch-fixtures - - name: Fetch fixtures - run: script/fetch-fixtures + - uses: ./.github/actions/cache + id: cache - name: Generate fixtures + if: ${{ !matrix.cli-only && inputs.run_test && steps.cache.outputs.cache-hit != 'true' }} # Can't natively run CLI on Github runner's host run: script/generate-fixtures - name: Generate WASM fixtures - if: "!matrix.job.use-cross" + if: ${{ !matrix.cli-only && !matrix.use-cross && inputs.run_test && steps.cache.outputs.cache-hit != 'true' }} # See comment for the "Build wasm library" step run: script/generate-fixtures-wasm - name: Run main tests - run: $BUILD_CMD test --target=${{ matrix.job.target }} + if: ${{ !matrix.cli-only && inputs.run_test }} # Can't natively run CLI on Github runner's host + run: $BUILD_CMD test --target=${{ matrix.target }} --features=${CLI_FEATURES} - name: Run wasm tests - if: "!matrix.job.use-cross" # TODO: Install Emscripten into custom cross images + if: ${{ !matrix.cli-only && !matrix.use-cross && inputs.run_test }} # See comment for the "Build wasm library" step run: script/test-wasm - name: Run benchmarks - if: "!matrix.job.use-cross" # It doesn't make sense to benchmark something in an emulator - run: $BUILD_CMD bench benchmark -p tree-sitter-cli --target=${{ matrix.job.target }} + if: ${{ !matrix.cli-only && !matrix.use-cross && inputs.run_test }} # Cross-compiled benchmarks make no sense + run: $BUILD_CMD bench benchmark -p tree-sitter-cli --target=${{ matrix.target }} - name: Upload CLI artifact - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: - name: tree-sitter.${{ matrix.job.name }} - path: target/${{ matrix.job.target }}/release/tree-sitter${{ contains(matrix.job.target, 'windows') && '.exe' || '' }} + name: tree-sitter.${{ matrix.platform }} + path: target/${{ matrix.target }}/release/tree-sitter${{ env.EXE }} if-no-files-found: error retention-days: 7 - name: Upload WASM artifacts - if: ${{ matrix.job.name == 'linux-x64' }} - uses: actions/upload-artifact@v3 + if: ${{ matrix.platform == 'linux-x64' }} + uses: actions/upload-artifact@v4 with: name: tree-sitter.wasm path: | diff --git a/third-party/tree-sitter/tree-sitter/.github/workflows/checks.yml b/third-party/tree-sitter/tree-sitter/.github/workflows/checks.yml new file mode 100644 index 00000000000..b9e4d57b302 --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/.github/workflows/checks.yml @@ -0,0 +1,24 @@ +name: Full Rust codebase checks + +on: + workflow_call: + +jobs: + run: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - run: rustup toolchain install stable --profile minimal + - uses: Swatinem/rust-cache@v2 + + - run: make lint + + check_c_warnings: + name: Check C warnings + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Make C library to check that it's able to compile without warnings + run: make -j CFLAGS="-Werror" diff --git a/third-party/tree-sitter/tree-sitter/.github/workflows/ci.yml b/third-party/tree-sitter/tree-sitter/.github/workflows/ci.yml new file mode 100644 index 00000000000..88af711490f --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/.github/workflows/ci.yml @@ -0,0 +1,21 @@ +name: CI + +on: + pull_request: + push: + branches: + - 'master' + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.event_name != 'push' }} + +jobs: + checks: + uses: ./.github/workflows/checks.yml + + sanitize: + uses: ./.github/workflows/sanitize.yml + + build: + uses: ./.github/workflows/build.yml diff --git a/third-party/tree-sitter/tree-sitter/.github/workflows/fast_checks.yml b/third-party/tree-sitter/tree-sitter/.github/workflows/fast_checks.yml deleted file mode 100644 index ea474799aa5..00000000000 --- a/third-party/tree-sitter/tree-sitter/.github/workflows/fast_checks.yml +++ /dev/null @@ -1,31 +0,0 @@ -name: Fast checks to fail fast on any simple code issues - -env: - CARGO_TERM_COLOR: always - RUSTFLAGS: "-D warnings" - -on: - workflow_call: - -jobs: - check_rust_formatting: - name: Check Rust formating - runs-on: ubuntu-latest - steps: - - - name: Checkout source code - uses: actions/checkout@v3 - - - name: Run cargo fmt - run: cargo fmt -- --check - - check_c_warnings: - name: Check C warnings - runs-on: ubuntu-latest - steps: - - - name: Checkout source code - uses: actions/checkout@v3 - - - name: Make C library to check that it's able to compile without warnings - run: make -j CFLAGS="-Werror" diff --git a/third-party/tree-sitter/tree-sitter/.github/workflows/full_rust_checks.yml b/third-party/tree-sitter/tree-sitter/.github/workflows/full_rust_checks.yml deleted file mode 100644 index 2cc5f77dcb7..00000000000 --- a/third-party/tree-sitter/tree-sitter/.github/workflows/full_rust_checks.yml +++ /dev/null @@ -1,32 +0,0 @@ -name: Full Rust codebase checks - -env: - CARGO_TERM_COLOR: always - RUSTFLAGS: "-D warnings" - -on: - workflow_call: - -jobs: - run: - name: Run checks - runs-on: ubuntu-latest - steps: - - - name: Checkout source code - uses: actions/checkout@v3 - - - name: Install rust toolchain - uses: dtolnay/rust-toolchain@master - with: - toolchain: stable - components: clippy, rustfmt - - - name: Run cargo fmt - run: cargo fmt -- --check - - # - name: Run clippy - # run: cargo clippy --all-targets - - - name: Run cargo check - run: cargo check --workspace --examples --tests --benches --bins diff --git a/third-party/tree-sitter/tree-sitter/.github/workflows/msrv.yml b/third-party/tree-sitter/tree-sitter/.github/workflows/msrv.yml deleted file mode 100644 index 3697930e811..00000000000 --- a/third-party/tree-sitter/tree-sitter/.github/workflows/msrv.yml +++ /dev/null @@ -1,42 +0,0 @@ -name: Minimum supported rust version - -env: - CARGO_TERM_COLOR: always - RUSTFLAGS: "-D warnings" - -on: - workflow_call: - inputs: - package: - description: Target cargo package name - required: true - type: string - - -jobs: - run: - name: Run checks - runs-on: ubuntu-latest - steps: - - - name: Checkout source code - uses: actions/checkout@v3 - - - name: Get the MSRV from the package metadata - id: msrv - run: cargo metadata --no-deps --format-version 1 | jq -r '"version=" + (.packages[] | select(.name == "${{ inputs.package }}").rust_version)' >> $GITHUB_OUTPUT - - - name: Install rust toolchain (v${{ steps.msrv.outputs.version }}) - uses: dtolnay/rust-toolchain@master - with: - toolchain: ${{ steps.msrv.outputs.version }} - components: clippy, rustfmt - - - name: Run cargo fmt - run: cargo fmt -- --check - - # - name: Run clippy (on minimum supported rust version to prevent warnings we can't fix) - # run: cargo clippy --all-targets - - # - name: Run main tests - # run: cargo test diff --git a/third-party/tree-sitter/tree-sitter/.github/workflows/publish.yml b/third-party/tree-sitter/tree-sitter/.github/workflows/publish.yml deleted file mode 100644 index e1ad3e053a6..00000000000 --- a/third-party/tree-sitter/tree-sitter/.github/workflows/publish.yml +++ /dev/null @@ -1,21 +0,0 @@ -name: Publish to registries - -on: - workflow_call: - -jobs: - crates_io: - name: Publish to Crates.io - runs-on: ubuntu-latest - steps: - - name: Publish packages - run: | - echo "::warning::TODO: add a Crates.io publish logic" - - npm: - name: Publish to npmjs.com - runs-on: ubuntu-latest - steps: - - name: Publish packages - run: | - echo "::warning::TODO: add a npmjs.com publish logic" diff --git a/third-party/tree-sitter/tree-sitter/.github/workflows/release.yml b/third-party/tree-sitter/tree-sitter/.github/workflows/release.yml index 87a0676169d..23170819aa0 100644 --- a/third-party/tree-sitter/tree-sitter/.github/workflows/release.yml +++ b/third-party/tree-sitter/tree-sitter/.github/workflows/release.yml @@ -1,52 +1,27 @@ name: Release - on: - workflow_call: - inputs: - ref: - default: ${{ github.ref }} - type: string + workflow_dispatch: + push: + tags: + - v[0-9]+.[0-9]+.[0-9]+ jobs: - permissions: - name: Check permissions - runs-on: ubuntu-latest - outputs: - release_allowed: ${{ steps.maintainer.outputs.is_maintainer == 'true' }} - steps: - - - name: Is maintainer - id: maintainer - env: - GH_TOKEN: ${{ github.token }} - repo: ${{ github.repository }} - actor: ${{ github.actor }} - run: | - maintainer=$( - gh api "/repos/${repo}/collaborators" | - jq ".[] | {login, maintainer: .permissions | .maintain} | select(.login == \"${actor}\") | .maintainer" - ); - if [ "$maintainer" == "true" ]; then - echo "@${actor} has maintainer level permissions :rocket:" >> $GITHUB_STEP_SUMMARY; - echo "is_maintainer=true" >> $GITHUB_OUTPUT - fi + build: + uses: ./.github/workflows/build.yml + with: + run_test: false release: name: Release - needs: permissions - if: needs.permissions.outputs.release_allowed runs-on: ubuntu-latest + needs: build permissions: contents: write steps: - - - name: Checkout source code - uses: actions/checkout@v3 - with: - ref: ${{ inputs.ref }} + - uses: actions/checkout@v4 - name: Download build artifacts - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: path: artifacts @@ -66,36 +41,60 @@ jobs: rm -rf artifacts ls -l target/ - - name: Get tag name from a release/v* branch name - id: tag_name - env: - tag: ${{ github.head_ref }} - run: echo "tag=${tag#release/}" >> $GITHUB_OUTPUT - - - name: Add a release tag - env: - ref: ${{ inputs.ref }} - tag: ${{ steps.tag_name.outputs.tag }} - message: "Release ${{ steps.tag_name.outputs.tag }}" - run: | - git config user.name "${GITHUB_ACTOR}" - git config user.email "${GITHUB_ACTOR}@users.noreply.github.com" - git tag -a "$tag" HEAD -m "$message" - git push origin "$tag" - - name: Create release uses: softprops/action-gh-release@v1 with: - name: ${{ steps.tag_name.outputs.tag }} - tag_name: ${{ steps.tag_name.outputs.tag }} + name: ${{ github.ref_name }} + tag_name: ${{ github.ref_name }} fail_on_unmatched_files: true files: | target/tree-sitter-*.gz target/tree-sitter.wasm target/tree-sitter.js - - name: Merge release PR + crates_io: + name: Publish CLI to Crates.io + runs-on: ubuntu-latest + needs: release + steps: + - uses: actions/checkout@v4 + + - name: Setup Rust + uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + + - name: Publish crates to Crates.io + uses: katyo/publish-crates@v2 + with: + registry-token: ${{ secrets.CARGO_REGISTRY_TOKEN }} + + npm: + name: Publish lib to npmjs.com + runs-on: ubuntu-latest + needs: release + strategy: + fail-fast: false + matrix: + directory: ["cli/npm", "lib/binding_web"] + steps: + - uses: actions/checkout@v4 + + - name: Build wasm + if: matrix.directory == 'lib/binding_web' + run: ./script/build-wasm + + - name: Setup Node + uses: actions/setup-node@v4 + with: + node-version: 18 + registry-url: "https://registry.npmjs.org" + + - name: Publish lib to npmjs.com env: - GH_TOKEN: ${{ github.token }} + NODE_AUTH_TOKEN: ${{secrets.NPM_TOKEN}} run: | - gh pr merge ${{ github.event.pull_request.html_url }} --match-head-commit $(git rev-parse HEAD) --merge --delete-branch + cd ${{ matrix.directory }} + npm publish diff --git a/third-party/tree-sitter/tree-sitter/.github/workflows/response.yml b/third-party/tree-sitter/tree-sitter/.github/workflows/response.yml new file mode 100644 index 00000000000..663ae6ad87b --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/.github/workflows/response.yml @@ -0,0 +1,35 @@ +name: no_response +on: + schedule: + - cron: '30 1 * * *' # Run every day at 01:30 + workflow_dispatch: + issue_comment: + +jobs: + close: + if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/checkout@v4 + - uses: actions/github-script@v7 + with: + script: | + const script = require('./.github/scripts/close_unresponsive.js') + await script({github, context}) + + remove_label: + if: github.event_name == 'issue_comment' + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/checkout@v4 + - uses: actions/github-script@v7 + with: + script: | + const script = require('./.github/scripts/remove_response_label.js') + await script({github, context}) diff --git a/third-party/tree-sitter/tree-sitter/.github/workflows/reviewers_remove.yml b/third-party/tree-sitter/tree-sitter/.github/workflows/reviewers_remove.yml new file mode 100644 index 00000000000..b10d8c3d23f --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/.github/workflows/reviewers_remove.yml @@ -0,0 +1,17 @@ +name: "reviewers: remove" +on: + pull_request_target: + types: [converted_to_draft, closed] +jobs: + remove-reviewers: + runs-on: ubuntu-latest + permissions: + pull-requests: write + steps: + - uses: actions/checkout@v4 + - name: 'Remove reviewers' + uses: actions/github-script@v7 + with: + script: | + const script = require('./.github/scripts/reviewers_remove.js') + await script({github, context}) diff --git a/third-party/tree-sitter/tree-sitter/.github/workflows/sanitize.yml b/third-party/tree-sitter/tree-sitter/.github/workflows/sanitize.yml new file mode 100644 index 00000000000..995218e26e1 --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/.github/workflows/sanitize.yml @@ -0,0 +1,53 @@ +name: Sanitize + +env: + CARGO_TERM_COLOR: always + RUSTFLAGS: "-D warnings" + +on: + workflow_call: + +jobs: + check_undefined_behaviour: + name: Sanitizer checks + runs-on: ubuntu-latest + timeout-minutes: 20 + env: + TREE_SITTER: ${{ github.workspace }}/target/release/tree-sitter + steps: + - name: Checkout source code + uses: actions/checkout@v4 + + - name: Install UBSAN library + run: sudo apt-get update -y && sudo apt-get install -y libubsan1 + + - run: rustup toolchain install stable --profile minimal + - uses: Swatinem/rust-cache@v2 + + - name: Build CLI + run: cargo build --release + + - run: script/fetch-fixtures + + - uses: ./.github/actions/cache + id: cache + + - if: ${{ steps.cache.outputs.cache-hit != 'true' }} + run: script/generate-fixtures + + - name: Run main tests with undefined behaviour sanitizer (UBSAN) + env: + UBSAN_OPTIONS: halt_on_error=1 + CFLAGS: -fsanitize=undefined + RUSTFLAGS: ${{ env.RUSTFLAGS }} -lubsan + run: cargo test -- --test-threads 1 + + - name: Run main tests with address sanitizer (ASAN) + env: + ASAN_OPTIONS: halt_on_error=1 + CFLAGS: -fsanitize=address + RUSTFLAGS: ${{ env.RUSTFLAGS }} -Zsanitizer=address --cfg=sanitizing + run: | + rustup install nightly + rustup component add rust-src --toolchain nightly-x86_64-unknown-linux-gnu + cargo +nightly test -Z build-std --target x86_64-unknown-linux-gnu -- --test-threads 1 diff --git a/third-party/tree-sitter/tree-sitter/.gitignore b/third-party/tree-sitter/tree-sitter/.gitignore index 834fd20fce9..53550dd76f8 100644 --- a/third-party/tree-sitter/tree-sitter/.gitignore +++ b/third-party/tree-sitter/tree-sitter/.gitignore @@ -7,6 +7,7 @@ log*.html fuzz-results +/tree-sitter.pc test/fixtures/grammars/* !test/fixtures/grammars/.gitkeep package-lock.json @@ -24,4 +25,6 @@ docs/assets/js/tree-sitter.js *.obj *.exp *.lib -*.wasm \ No newline at end of file +*.wasm +.swiftpm +zig-* diff --git a/third-party/tree-sitter/tree-sitter/CHANGELOG.md b/third-party/tree-sitter/tree-sitter/CHANGELOG.md new file mode 100644 index 00000000000..834c620191c --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/CHANGELOG.md @@ -0,0 +1,107 @@ +# Changelog + +## [0.21.0] - 2024-02-21 + +### Breaking +- Remove the apply-all-captures flag, make last-wins precedence the default + + **NOTE**: This change might cause breakage in your grammar's highlight tests. + Just flip the order around of the relevant queries, and keep in mind that the + last query that matches will win. + +### Features +- Use lockfiles to dedup recompilation +- Improve error message for files with an unknown grammar path (https://github.com/tree-sitter/tree-sitter/pull/2475) +- Implement first-line-regex (https://github.com/tree-sitter/tree-sitter/pull/2479) +- Error out if an empty string is in the `extras` array +- Allow specifying an external scanner's files (https://github.com/tree-sitter/tree-sitter/pull/3031) +- Better error info when a scanner is missing required symbols +- **cli**: Add an optional `grammar-path` argument for the playground (https://github.com/tree-sitter/tree-sitter/pull/3014) +- **cli**: Add optional `config-path` argument (https://github.com/tree-sitter/tree-sitter/pull/3050) +- **loader**: Add more commonly used default parser directories + + +### Bug Fixes +- Prettify xml output and add node position info (https://github.com/tree-sitter/tree-sitter/pull/2970) +- Inherited grammar generation +- Properly error out when the word property is an invalid rule +- Update schema for regex flags (https://github.com/tree-sitter/tree-sitter/pull/3006) +- Properly handle Query.matches when filtering out results (https://github.com/tree-sitter/tree-sitter/pull/3013) +- Sexp format edge case with quoted closed parenthesis (https://github.com/tree-sitter/tree-sitter/pull/3016) +- Always push the default files if there's no `externals` +- Don't log NUL characters (https://github.com/tree-sitter/tree-sitter/pull/3037) +- Don't throw an error if the user uses `map` in the grammar (https://github.com/tree-sitter/tree-sitter/pull/3041) +- Remove redundant imports (https://github.com/tree-sitter/tree-sitter/pull/3047) +- **cli**: Installation via a HTTP tunnel proxy (https://github.com/tree-sitter/tree-sitter/pull/2824) +- **cli**: Don't update tests automatically if parse errors are detected (https://github.com/tree-sitter/tree-sitter/pull/3033) +- **cli**: Don't use `long` for `grammar_path` +- **test**: Allow writing updates to tests without erroneous nodes instead of denying all of them if a single error is found +- **test**: Edge case when parsing `UNEXPECTED`/`MISSING` nodes with an indentation level greater than 0 +- **wasm**: Remove C++ mangled symbols (https://github.com/tree-sitter/tree-sitter/pull/2971) + + +### Documentation +- Create issue template (https://github.com/tree-sitter/tree-sitter/pull/2978) +- Document regex limitations +- Mention that `token($.foo)` is illegal +- Explicitly mention behavior of walking outside the given "root" node for a `TSTreeCursor` (https://github.com/tree-sitter/tree-sitter/pull/3021) +- Small fixes (https://github.com/tree-sitter/tree-sitter/pull/2987) +- Add `Tact` language parser (https://github.com/tree-sitter/tree-sitter/pull/3030) +- **web**: Provide deno usage information (https://github.com/tree-sitter/tree-sitter/pull/2498) + + +### Refactor +- Extract regex check into a function and lower its precedence +- `&PathBuf` -> `&Path` (https://github.com/tree-sitter/tree-sitter/pull/3035) +- Name anonymous types in api.h (https://github.com/tree-sitter/tree-sitter/pull/1659) + + +### Testing +- Add quotes around bash variables (https://github.com/tree-sitter/tree-sitter/pull/3023) +- Update html tests + + +### Build System and CI +- Only create release for normal semver tags (https://github.com/tree-sitter/tree-sitter/pull/2973) +- Add useful development targets to makefile (https://github.com/tree-sitter/tree-sitter/pull/2979) +- Remove minimum glibc information in summary page (https://github.com/tree-sitter/tree-sitter/pull/2988) +- Use the native m1 mac runner (https://github.com/tree-sitter/tree-sitter/pull/2995) +- Add editorconfig (https://github.com/tree-sitter/tree-sitter/pull/2998) +- Remove symbolic links from repository (https://github.com/tree-sitter/tree-sitter/pull/2997) +- Move common Cargo.toml keys into the workspace and inherit them (https://github.com/tree-sitter/tree-sitter/pull/3019) +- Remove reviewers when drafting or closing a PR (https://github.com/tree-sitter/tree-sitter/pull/2963) +- Enable creating changelogs with git-cliff (https://github.com/tree-sitter/tree-sitter/pull/3040) +- Cache fixtures (https://github.com/tree-sitter/tree-sitter/pull/3038) +- Don't cancel jobs on master (https://github.com/tree-sitter/tree-sitter/pull/3052) +- Relax caching requirements (https://github.com/tree-sitter/tree-sitter/pull/3051) +- **deps**: Bump clap from 4.4.18 to 4.5.0 (https://github.com/tree-sitter/tree-sitter/pull/3007) +- **deps**: Bump wasmtime from v16.0.0 to v17.0.1 (https://github.com/tree-sitter/tree-sitter/pull/3008) +- **deps**: Bump wasmtime to v18.0.1 (https://github.com/tree-sitter/tree-sitter/pull/3057) +- **sanitize**: Add a timeout of 60 minutes (https://github.com/tree-sitter/tree-sitter/pull/3017) +- **sanitize**: Reduce timeout to 20 minutes (https://github.com/tree-sitter/tree-sitter/pull/3054) + + +### Other +- Document preferred language for scanner (https://github.com/tree-sitter/tree-sitter/pull/2972) +- Add java and tsx to corpus tests (https://github.com/tree-sitter/tree-sitter/pull/2992) +- Provide a CLI flag to open `log.html` (https://github.com/tree-sitter/tree-sitter/pull/2996) +- Some more clippy lints (https://github.com/tree-sitter/tree-sitter/pull/3010) +- Remove deprecated query parsing mechanism (https://github.com/tree-sitter/tree-sitter/pull/3011) +- Print out full compiler arguments ran when it fails (https://github.com/tree-sitter/tree-sitter/pull/3018) +- Deprecate C++ scanners (https://github.com/tree-sitter/tree-sitter/pull/3020) +- Add some documentation to the playground page (https://github.com/tree-sitter/tree-sitter/pull/1495) +- Update relevant rust tests (https://github.com/tree-sitter/tree-sitter/pull/2947) +- Clippy lints (https://github.com/tree-sitter/tree-sitter/pull/3032) +- Error out when multiple arguments are passed to `token`/`token.immediate` (https://github.com/tree-sitter/tree-sitter/pull/3036) +- Tidying +- Prefer turbofish syntax where possible (https://github.com/tree-sitter/tree-sitter/pull/3048) +- Use published wasmtime crates +- Cleaner cast +- Update Cargo.lock +- Get rid of `github_issue_test` file (https://github.com/tree-sitter/tree-sitter/pull/3055) +- **cli**: Use spawn to display `emcc`'s stdout and stderr (https://github.com/tree-sitter/tree-sitter/pull/2494) +- **cli**: Warn users when a query path needed for a subcommand isn't specified in a grammar's package.json +- **generate**: Dedup and warn about duplicate or invalid rules (https://github.com/tree-sitter/tree-sitter/pull/2994) +- **test**: Use different languages for async tests (https://github.com/tree-sitter/tree-sitter/pull/2953) +- **wasm**: Use `SIDE_MODULE=2` to silence warning (https://github.com/tree-sitter/tree-sitter/pull/3003) + diff --git a/third-party/tree-sitter/tree-sitter/CONTRIBUTING.md b/third-party/tree-sitter/tree-sitter/CONTRIBUTING.md index 4f64371073d..42bc7b75f06 100644 --- a/third-party/tree-sitter/tree-sitter/CONTRIBUTING.md +++ b/third-party/tree-sitter/tree-sitter/CONTRIBUTING.md @@ -1 +1 @@ -docs/section-6-contributing.md \ No newline at end of file +See [section-6-contributing.md](./docs/section-6-contributing.md) diff --git a/third-party/tree-sitter/tree-sitter/Cargo.lock b/third-party/tree-sitter/tree-sitter/Cargo.lock index 956a3f41071..180ce544f3a 100644 --- a/third-party/tree-sitter/tree-sitter/Cargo.lock +++ b/third-party/tree-sitter/tree-sitter/Cargo.lock @@ -2,11 +2,23 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aho-corasick" -version = "0.7.20" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" +checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" dependencies = [ "memchr", ] @@ -20,11 +32,65 @@ dependencies = [ "winapi", ] +[[package]] +name = "anstream" +version = "0.6.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d96bd03f33fe50a863e394ee9718a706f988b9079b20c3784fb726e7678b62fb" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" + +[[package]] +name = "anstyle-parse" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" +dependencies = [ + "windows-sys 0.52.0", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" +dependencies = [ + "anstyle", + "windows-sys 0.52.0", +] + [[package]] name = "anyhow" -version = "1.0.70" +version = "1.0.80" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ad32ce52e4161730f7098c077cd2ed6229b5804ccf99e5366be1ab72a98b4e1" + +[[package]] +name = "arbitrary" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7de8ce5e0f9f8d88245311066a578d72b7af3e7088f32783804676302df237e4" +checksum = "7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110" [[package]] name = "ascii" @@ -33,21 +99,42 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d92bec98840b8f03a5ff5413de5293bfcd8bf96467cf5452609f939ec6f5de16" [[package]] -name = "atty" -version = "0.2.14" +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bincode" +version = "1.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" dependencies = [ - "hermit-abi 0.1.19", - "libc", - "winapi", + "serde", ] [[package]] -name = "autocfg" -version = "1.1.0" +name = "bindgen" +version = "0.69.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" +dependencies = [ + "bitflags 2.4.2", + "cexpr", + "clang-sys", + "itertools 0.12.1", + "lazy_static", + "lazycell", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn", + "which 4.4.2", +] [[package]] name = "bitflags" @@ -55,23 +142,33 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitflags" +version = "2.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" + [[package]] name = "bumpalo" -version = "3.12.0" +version = "3.15.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535" +checksum = "7ff69b9dd49fd426c69a0db9fc04dd934cdb6645ff000864d98f7e2af8830eaa" [[package]] name = "bytes" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" +checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" [[package]] name = "cc" -version = "1.0.79" +version = "1.0.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" +checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5" +dependencies = [ + "jobserver", + "libc", +] [[package]] name = "cesu8" @@ -79,33 +176,90 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c" +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cfg_aliases" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" + [[package]] name = "chunked_transfer" -version = "1.4.1" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e4de3bc4ea267985becf712dc6d9eed8b04c953b3fcfb339ebc87acd9804901" + +[[package]] +name = "clang-sys" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cca491388666e04d7248af3f60f0c40cfb0991c72205595d7c396e3510207d1a" +checksum = "67523a3b4be3ce1989d607a828d036249522dd9c1c8de7f4dd2dae43a37369d1" +dependencies = [ + "glob", + "libc", + "libloading", +] [[package]] name = "clap" -version = "2.34.0" +version = "4.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" +checksum = "b230ab84b0ffdf890d5a10abdbc8b83ae1c4918275daea1ab8801f71536b2651" dependencies = [ - "ansi_term", - "atty", - "bitflags", + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", "strsim", - "textwrap", - "unicode-width", - "vec_map", ] +[[package]] +name = "clap_derive" +version = "4.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "307bc0538d5f0f83b8248db3087aa92fe504e4691294d0c96c0eabc33f47ba47" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" + +[[package]] +name = "colorchoice" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" + [[package]] name = "combine" version = "4.6.6" @@ -118,9 +272,9 @@ dependencies = [ [[package]] name = "core-foundation" -version = "0.9.3" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "194a7a9e6de53fa55116934067c844d9d749312f75c6f6d0980e8c252f8c2146" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" dependencies = [ "core-foundation-sys", "libc", @@ -128,18 +282,146 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.8.4" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" + +[[package]] +name = "cranelift-bforest" +version = "0.105.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9515fcc42b6cb5137f76b84c1a6f819782d0cf12473d145d3bc5cd67eedc8bc2" +dependencies = [ + "cranelift-entity", +] + +[[package]] +name = "cranelift-codegen" +version = "0.105.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ad827c6071bfe6d22de1bc331296a29f9ddc506ff926d8415b435ec6a6efce0" +dependencies = [ + "bumpalo", + "cranelift-bforest", + "cranelift-codegen-meta", + "cranelift-codegen-shared", + "cranelift-control", + "cranelift-entity", + "cranelift-isle", + "gimli", + "hashbrown 0.14.3", + "log", + "regalloc2", + "smallvec", + "target-lexicon", +] + +[[package]] +name = "cranelift-codegen-meta" +version = "0.105.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10e6b36237a9ca2ce2fb4cc7741d418a080afa1327402138412ef85d5367bef1" +dependencies = [ + "cranelift-codegen-shared", +] + +[[package]] +name = "cranelift-codegen-shared" +version = "0.105.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c36bf4bfb86898a94ccfa773a1f86e8a5346b1983ff72059bdd2db4600325251" + +[[package]] +name = "cranelift-control" +version = "0.105.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cbf36560e7a6bd1409ca91e7b43b2cc7ed8429f343d7605eadf9046e8fac0d0" +dependencies = [ + "arbitrary", +] + +[[package]] +name = "cranelift-entity" +version = "0.105.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a71e11061a75b1184c09bea97c026a88f08b59ade96a7bb1f259d4ea0df2e942" +dependencies = [ + "serde", + "serde_derive", +] + +[[package]] +name = "cranelift-frontend" +version = "0.105.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af5d4da63143ee3485c7bcedde0a818727d737d1083484a0ceedb8950c89e495" +dependencies = [ + "cranelift-codegen", + "log", + "smallvec", + "target-lexicon", +] + +[[package]] +name = "cranelift-isle" +version = "0.105.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "457a9832b089e26f5eea70dcf49bed8ec6edafed630ce7c83161f24d46ab8085" + +[[package]] +name = "cranelift-native" +version = "0.105.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b490d579df1ce365e1ea359e24ed86d82289fa785153327c2f6a69a59a731e4" +dependencies = [ + "cranelift-codegen", + "libc", + "target-lexicon", +] + +[[package]] +name = "cranelift-wasm" +version = "0.105.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cd747ed7f9a461dda9c388415392f6bb95d1a6ef3b7694d17e0817eb74b7798" +dependencies = [ + "cranelift-codegen", + "cranelift-entity", + "cranelift-frontend", + "itertools 0.10.5", + "log", + "smallvec", + "wasmparser 0.121.2", + "wasmtime-types", +] + +[[package]] +name = "crc32fast" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" +checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" +dependencies = [ + "cfg-if", +] [[package]] name = "ctor" -version = "0.1.26" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d2301688392eb071b0bf1a37be05c469d3cc4dbbd95df672fe28ab021e6a096" +checksum = "ad291aa74992b9b7a7e88c38acbbf6ad7e107f1d90ee8775b7bc1fc3394f485c" dependencies = [ "quote", - "syn 1.0.109", + "syn", +] + +[[package]] +name = "ctrlc" +version = "3.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "672465ae37dc1bc6380a6547a8883d5dd397b0f1faaad4f265726cc7042a5345" +dependencies = [ + "nix", + "windows-sys 0.52.0", ] [[package]] @@ -156,89 +438,127 @@ checksum = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" [[package]] name = "dirs" -version = "3.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30baa043103c9d0c2a57cf537cc2f35623889dc0d405e6c3cccfadbc81c71309" -dependencies = [ - "dirs-sys", -] - -[[package]] -name = "dirs" -version = "4.0.0" +version = "5.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca3aa72a6f96ea37bbc5aa912f6788242832f75369bdfdadcb0e38423f100059" +checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" dependencies = [ "dirs-sys", ] [[package]] name = "dirs-sys" -version = "0.3.7" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b1d1d91c932ef41c0f2663aa8b0ca0342d444d842c06914aa0a7e352d0bada6" +checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" dependencies = [ "libc", + "option-ext", "redox_users", - "winapi", + "windows-sys 0.48.0", ] [[package]] name = "either" -version = "1.8.1" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" + +[[package]] +name = "equivalent" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "errno" -version = "0.3.0" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50d6a0976c999d473fe89ad888d5a284e55366d9dc9038b1ba2aa15128c4afa0" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" dependencies = [ - "errno-dragonfly", "libc", - "windows-sys", + "windows-sys 0.52.0", ] [[package]] -name = "errno-dragonfly" -version = "0.1.2" +name = "fallible-iterator" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" -dependencies = [ - "cc", - "libc", -] +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" [[package]] name = "fastrand" -version = "1.9.0" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" + +[[package]] +name = "filetime" +version = "0.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" +checksum = "1ee447700ac8aa0b2f2bd7bc4462ad686ba06baa6727ac149a2d6277f0d240fd" dependencies = [ - "instant", + "cfg-if", + "libc", + "redox_syscall", + "windows-sys 0.52.0", ] [[package]] name = "form_urlencoded" -version = "1.1.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9c384f161156f5260c24a097c56119f9be8c798586aecc13afbcbe7b7e26bf8" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" dependencies = [ "percent-encoding", ] +[[package]] +name = "fs4" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57b1e34e369d7f0151309821497440bd0266b86c77ccd69717c3b67e5eaeffe4" +dependencies = [ + "rustix", + "windows-sys 0.52.0", +] + [[package]] name = "getrandom" -version = "0.2.8" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" +checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" dependencies = [ "cfg-if", "libc", "wasi", ] +[[package]] +name = "gimli" +version = "0.28.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" +dependencies = [ + "fallible-iterator", + "indexmap", + "stable_deref_trait", +] + +[[package]] +name = "git2" +version = "0.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b3ba52851e73b46a4c3df1d89343741112003f0f6f13beb0dfac9e457c3fdcd" +dependencies = [ + "bitflags 2.4.2", + "libc", + "libgit2-sys", + "log", + "openssl-probe", + "openssl-sys", + "url", +] + [[package]] name = "glob" version = "0.3.1" @@ -247,24 +567,36 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "hashbrown" -version = "0.12.3" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" +dependencies = [ + "ahash", +] [[package]] -name = "hermit-abi" -version = "0.1.19" +name = "hashbrown" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" dependencies = [ - "libc", + "ahash", ] [[package]] -name = "hermit-abi" -version = "0.3.1" +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "home" +version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" +checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" +dependencies = [ + "windows-sys 0.52.0", +] [[package]] name = "html-escape" @@ -277,15 +609,15 @@ dependencies = [ [[package]] name = "httpdate" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" [[package]] name = "idna" -version = "0.3.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e14ddfc70884202db2244c223200c204c2bda1bc6e0998d11b5e024d657209e6" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" dependencies = [ "unicode-bidi", "unicode-normalization", @@ -293,39 +625,44 @@ dependencies = [ [[package]] name = "indexmap" -version = "1.9.3" +version = "2.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +checksum = "7b0b929d511467233429c45a44ac1dcaa21ba0f5ba11e4879e6ed28ddb4f9df4" dependencies = [ - "autocfg", - "hashbrown", + "equivalent", + "hashbrown 0.14.3", + "serde", ] [[package]] -name = "instant" -version = "0.1.12" +name = "indoc" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e186cfbae8084e513daff4240b4797e342f988cecda4fb6c939150f96315fd8" + +[[package]] +name = "itertools" +version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" dependencies = [ - "cfg-if", + "either", ] [[package]] -name = "io-lifetimes" -version = "1.0.9" +name = "itertools" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09270fd4fa1111bc614ed2246c7ef56239a3063d5be0d1ec3b589c505d400aeb" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" dependencies = [ - "hermit-abi 0.3.1", - "libc", - "windows-sys", + "either", ] [[package]] name = "itoa" -version = "1.0.6" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6" +checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" [[package]] name = "jni" @@ -340,7 +677,7 @@ dependencies = [ "log", "thiserror", "walkdir", - "windows-sys", + "windows-sys 0.45.0", ] [[package]] @@ -349,11 +686,20 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8eaf4bc02d17cbdd7ff4c7438cafcdf7fb9a4613313ad11b4f8fefe7d3fa0130" +[[package]] +name = "jobserver" +version = "0.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab46a6e9526ddef3ae7f787c06f0f2600639ba80ea3eade3d8e670a2230f51d6" +dependencies = [ + "libc", +] + [[package]] name = "js-sys" -version = "0.3.61" +version = "0.3.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "445dde2150c55e483f3d8416706b97ec8e8237c307e5b7b4b8dd15e6af2a0730" +checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" dependencies = [ "wasm-bindgen", ] @@ -365,38 +711,107 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] -name = "libc" -version = "0.2.141" +name = "lazycell" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3304a64d199bb964be99741b7a14d26972741915b3649639149b2479bb46f4b5" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] -name = "libloading" -version = "0.7.4" +name = "leb128" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" -dependencies = [ - "cfg-if", - "winapi", -] +checksum = "884e2677b40cc8c339eaefcb701c32ef1fd2493d71118dc0ca4b6a736c93bd67" [[package]] -name = "linux-raw-sys" -version = "0.3.1" +name = "libc" +version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d59d8c75012853d2e872fb56bc8a2e53718e2cafe1a4c823143141c6d90c322f" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" [[package]] -name = "log" -version = "0.4.17" +name = "libgit2-sys" +version = "0.16.2+1.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" +checksum = "ee4126d8b4ee5c9d9ea891dd875cfdc1e9d0950437179104b183d7d8a74d24e8" dependencies = [ - "cfg-if", + "cc", + "libc", + "libssh2-sys", + "libz-sys", + "openssl-sys", + "pkg-config", ] [[package]] -name = "malloc_buf" +name = "libloading" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" +dependencies = [ + "cfg-if", + "windows-targets 0.52.4", +] + +[[package]] +name = "libredox" +version = "0.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85c833ca1e66078851dba29046874e38f08b2c883700aa29a03ddd3b23814ee8" +dependencies = [ + "bitflags 2.4.2", + "libc", + "redox_syscall", +] + +[[package]] +name = "libssh2-sys" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dc8a030b787e2119a731f1951d6a773e2280c660f8ec4b0f5e1505a386e71ee" +dependencies = [ + "cc", + "libc", + "libz-sys", + "openssl-sys", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "libz-sys" +version = "1.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037731f5d3aaa87a5675e895b63ddff1a87624bc29f77004ea829809654e48f6" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "linux-raw-sys" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" + +[[package]] +name = "log" +version = "0.4.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" + +[[package]] +name = "mach" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b823e83b2affd8f40a9ee8c29dbc56404c1e34cd2710921f2801e2cf29527afa" +dependencies = [ + "libc", +] + +[[package]] +name = "malloc_buf" version = "0.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62bb907fe88d54d8d9ce32a3cceab4218ed2f6b7d35617cafe9adf84e43919cb" @@ -406,9 +821,33 @@ dependencies = [ [[package]] name = "memchr" -version = "2.5.0" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" + +[[package]] +name = "memfd" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +checksum = "b2cffa4ad52c6f791f4f8b15f0c05f9824b2ced1160e88cc393d64fff9a8ac64" +dependencies = [ + "rustix", +] + +[[package]] +name = "memoffset" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" +dependencies = [ + "autocfg", +] + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "ndk-context" @@ -416,6 +855,28 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27b02d87554356db9e9a873add8782d4ea6e3e58ea071a9adb9a2e8ddb884a8b" +[[package]] +name = "nix" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4" +dependencies = [ + "bitflags 2.4.2", + "cfg-if", + "cfg_aliases", + "libc", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "objc" version = "0.2.7" @@ -425,26 +886,71 @@ dependencies = [ "malloc_buf", ] +[[package]] +name = "object" +version = "0.32.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +dependencies = [ + "crc32fast", + "hashbrown 0.14.3", + "indexmap", + "memchr", +] + [[package]] name = "once_cell" -version = "1.17.1" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] -name = "output_vt100" -version = "0.1.3" +name = "openssl-probe" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "628223faebab4e3e40667ee0b2336d34a5b960ff60ea743ddfdbcf7770bcfb66" +checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" + +[[package]] +name = "openssl-sys" +version = "0.9.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dda2b0f344e78efc2facf7d195d098df0dd72151b26ab98da807afc26c198dff" dependencies = [ - "winapi", + "cc", + "libc", + "pkg-config", + "vcpkg", ] +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + +[[package]] +name = "paste" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" + [[package]] name = "percent-encoding" -version = "2.2.0" +version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "pin-project-lite" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" + +[[package]] +name = "pkg-config" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" [[package]] name = "ppv-lite86" @@ -454,40 +960,47 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "pretty_assertions" -version = "0.7.2" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cab0e7c02cf376875e9335e0ba1da535775beb5450d21e1dffca068818ed98b" +checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66" dependencies = [ - "ansi_term", - "ctor", "diff", - "output_vt100", + "yansi", +] + +[[package]] +name = "prettyplease" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5" +dependencies = [ + "proc-macro2", + "syn", ] [[package]] name = "proc-macro2" -version = "1.0.56" +version = "1.0.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435" +checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" dependencies = [ "unicode-ident", ] [[package]] -name = "proc_macro" -version = "0.1.0" +name = "psm" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874" dependencies = [ - "proc-macro2", - "quote", - "rand", - "syn 1.0.109", + "cc", ] [[package]] name = "quote" -version = "1.0.26" +version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" dependencies = [ "proc-macro2", ] @@ -530,38 +1043,54 @@ checksum = "f2ff9a1f06a88b01621b7ae906ef0211290d1c8a168a15542486a8f61c0833b9" [[package]] name = "redox_syscall" -version = "0.2.16" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" dependencies = [ - "bitflags", + "bitflags 1.3.2", ] [[package]] -name = "redox_syscall" -version = "0.3.5" +name = "redox_users" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" +checksum = "a18479200779601e498ada4e8c1e1f50e3ee19deb0259c25825a98b5603b2cb4" dependencies = [ - "bitflags", + "getrandom", + "libredox", + "thiserror", ] [[package]] -name = "redox_users" -version = "0.4.3" +name = "regalloc2" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" +checksum = "ad156d539c879b7a24a363a2016d77961786e71f48f2e2fc8302a92abd2429a6" dependencies = [ - "getrandom", - "redox_syscall 0.2.16", - "thiserror", + "hashbrown 0.13.2", + "log", + "rustc-hash", + "slice-group-by", + "smallvec", ] [[package]] name = "regex" -version = "1.7.3" +version = "1.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d" +checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" dependencies = [ "aho-corasick", "memchr", @@ -570,9 +1099,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.6.29" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" [[package]] name = "rustc-hash" @@ -582,23 +1111,22 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustix" -version = "0.37.7" +version = "0.38.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2aae838e49b3d63e9274e1c01833cc8139d3fec468c3b84688c628f44b1ae11d" +checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949" dependencies = [ - "bitflags", + "bitflags 2.4.2", "errno", - "io-lifetimes", "libc", "linux-raw-sys", - "windows-sys", + "windows-sys 0.52.0", ] [[package]] name = "ryu" -version = "1.0.13" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041" +checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" [[package]] name = "same-file" @@ -611,35 +1139,35 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.17" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed" +checksum = "92d43fe69e652f3df9bdc2b85b2854a0825b86e4fb76bc44d945137d053639ca" [[package]] name = "serde" -version = "1.0.159" +version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c04e8343c3daeec41f58990b9d77068df31209f2af111e059e9fe9646693065" +checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.159" +version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c614d17805b093df4b147b51339e7e44bf05ef59fba1e45d83500bcfb4d8585" +checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.13", + "syn", ] [[package]] name = "serde_json" -version = "1.0.95" +version = "1.0.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d721eca97ac802aa7777b701877c8004d950fc142651367300d21c1cc0194744" +checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0" dependencies = [ "indexmap", "itoa", @@ -647,23 +1175,62 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_spanned" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb3622f419d1296904700073ea6cc23ad690adbd66f13ea683df73298736f0c1" +dependencies = [ + "serde", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "slice-group-by" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "826167069c09b99d56f31e9ae5c99049e932a98c9dc2dac47645b08dbbf76ba7" + [[package]] name = "smallbitvec" version = "2.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75ce4f9dc4a41b4c3476cc925f1efb11b66df373a8fde5d4b8915fa91b5d995e" +[[package]] +name = "smallvec" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" + +[[package]] +name = "sptr" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b9b39299b249ad65f3b7e96443bad61c02ca5cd3589f46cb6d610a0fd6c0d6a" + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "strsim" -version = "0.8.0" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" +checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01" [[package]] name = "syn" -version = "1.0.109" +version = "2.0.52" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +checksum = "b699d15b36d1f02c3e7c69f8ffef53de37aefae075d8488d4ba1a7788d574a07" dependencies = [ "proc-macro2", "quote", @@ -671,56 +1238,41 @@ dependencies = [ ] [[package]] -name = "syn" -version = "2.0.13" +name = "target-lexicon" +version = "0.12.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c9da457c5285ac1f936ebd076af6dac17a61cfe7826f2076b4d015cf47bc8ec" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] +checksum = "e1fc403891a21bcfb7c37834ba66a547a8f402146eba7265b5a6d88059c9ff2f" [[package]] name = "tempfile" -version = "3.5.0" +version = "3.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9fbec84f381d5795b08656e4912bec604d162bff9291d6189a78f4c8ab87998" +checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" dependencies = [ "cfg-if", "fastrand", - "redox_syscall 0.3.5", "rustix", - "windows-sys", -] - -[[package]] -name = "textwrap" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" -dependencies = [ - "unicode-width", + "windows-sys 0.52.0", ] [[package]] name = "thiserror" -version = "1.0.40" +version = "1.0.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac" +checksum = "1e45bcbe8ed29775f228095caf2cd67af7a4ccf756ebff23a306bf3e8b47b24b" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.40" +version = "1.0.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" +checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81" dependencies = [ "proc-macro2", "quote", - "syn 2.0.13", + "syn", ] [[package]] @@ -752,76 +1304,141 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "toml" -version = "0.5.11" +version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" +checksum = "9a9aad4a3066010876e8dcf5a8a06e70a558751117a145c6ce2b82c2e2054290" dependencies = [ "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + +[[package]] +name = "toml_datetime" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_edit" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c1b5fd4128cc8d3e0cb74d4ed9a9cc7c7284becd4df68f5f940e1ad123606f6" +dependencies = [ + "indexmap", + "serde", + "serde_spanned", + "toml_datetime", + "winnow", +] + +[[package]] +name = "tracing" +version = "0.1.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +dependencies = [ + "once_cell", ] [[package]] name = "tree-sitter" -version = "0.20.10" +version = "0.22.1" dependencies = [ + "bindgen", "cc", - "lazy_static", "regex", + "wasmtime", + "wasmtime-c-api-impl", ] [[package]] name = "tree-sitter-cli" -version = "0.20.8" +version = "0.22.1" dependencies = [ "ansi_term", + "anstyle", "anyhow", - "atty", "clap", "ctor", + "ctrlc", "difference", - "dirs 3.0.2", + "dirs", + "filetime", "glob", + "heck", "html-escape", "indexmap", + "indoc", "lazy_static", "log", + "memchr", "pretty_assertions", - "proc_macro", "rand", "regex", "regex-syntax", "rustc-hash", "semver", "serde", + "serde_derive", "serde_json", "smallbitvec", "tempfile", "tiny_http", - "toml", "tree-sitter", "tree-sitter-config", "tree-sitter-highlight", "tree-sitter-loader", "tree-sitter-tags", + "tree-sitter-tests-proc-macro", "unindent", "walkdir", + "wasmparser 0.201.0", "webbrowser", - "which", + "which 6.0.0", ] [[package]] name = "tree-sitter-config" -version = "0.19.0" +version = "0.22.1" dependencies = [ "anyhow", - "dirs 3.0.2", + "dirs", "serde", "serde_json", ] [[package]] name = "tree-sitter-highlight" -version = "0.20.1" +version = "0.22.1" dependencies = [ + "lazy_static", "regex", "thiserror", "tree-sitter", @@ -829,11 +1446,13 @@ dependencies = [ [[package]] name = "tree-sitter-loader" -version = "0.20.0" +version = "0.22.1" dependencies = [ "anyhow", "cc", - "dirs 3.0.2", + "dirs", + "fs4", + "indoc", "libloading", "once_cell", "regex", @@ -842,11 +1461,12 @@ dependencies = [ "tree-sitter", "tree-sitter-highlight", "tree-sitter-tags", + "which 6.0.0", ] [[package]] name = "tree-sitter-tags" -version = "0.20.2" +version = "0.22.1" dependencies = [ "memchr", "regex", @@ -854,44 +1474,48 @@ dependencies = [ "tree-sitter", ] +[[package]] +name = "tree-sitter-tests-proc-macro" +version = "0.0.0" +dependencies = [ + "proc-macro2", + "quote", + "rand", + "syn", +] + [[package]] name = "unicode-bidi" -version = "0.3.13" +version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" +checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" [[package]] name = "unicode-ident" -version = "1.0.8" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] name = "unicode-normalization" -version = "0.1.22" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" dependencies = [ "tinyvec", ] -[[package]] -name = "unicode-width" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" - [[package]] name = "unindent" -version = "0.2.1" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aa30f5ea51ff7edfc797c6d3f9ec8cbd8cfedef5371766b7181d33977f4814f" +checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" [[package]] name = "url" -version = "2.3.1" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d68c799ae75762b8c3fe375feb6600ef5602c883c5d21eb51c09f22b83c4643" +checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" dependencies = [ "form_urlencoded", "idna", @@ -900,21 +1524,33 @@ dependencies = [ [[package]] name = "utf8-width" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5190c9442dcdaf0ddd50f37420417d219ae5261bbf5db120d0f9bab996c9cba1" +checksum = "86bd8d4e895da8537e5315b8254664e6b769c4ff3db18321b297a1e7004392e3" [[package]] -name = "vec_map" -version = "0.8.2" +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "version_check" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "walkdir" -version = "2.3.3" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" dependencies = [ "same-file", "winapi-util", @@ -928,9 +1564,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.84" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31f8dcbc21f30d9b8f2ea926ecb58f6b91192c17e9d33594b3df58b2007ca53b" +checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -938,24 +1574,24 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.84" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95ce90fd5bcc06af55a641a86428ee4229e44e07033963a2290a8e241607ccb9" +checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 1.0.109", + "syn", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.84" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c21f77c0bedc37fd5dc21f897894a5ca01e7bb159884559461862ae90c0b4c5" +checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -963,28 +1599,252 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.84" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" +checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.84" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" + +[[package]] +name = "wasm-encoder" +version = "0.41.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "972f97a5d8318f908dded23594188a90bcd09365986b1163e66d70170e5287ae" +dependencies = [ + "leb128", +] + +[[package]] +name = "wasmparser" +version = "0.121.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dbe55c8f9d0dbd25d9447a5a889ff90c0cc3feaa7395310d3d826b2c703eaab" +dependencies = [ + "bitflags 2.4.2", + "indexmap", + "semver", +] + +[[package]] +name = "wasmparser" +version = "0.201.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84e5df6dba6c0d7fafc63a450f1738451ed7a0b52295d83e868218fa286bf708" +dependencies = [ + "bitflags 2.4.2", + "indexmap", + "semver", +] + +[[package]] +name = "wasmtime" +version = "18.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c843b8bc4dd4f3a76173ba93405c71111d570af0d90ea5f6299c705d0c2add2" +dependencies = [ + "anyhow", + "bincode", + "bumpalo", + "cfg-if", + "gimli", + "indexmap", + "libc", + "log", + "object", + "once_cell", + "paste", + "rustix", + "serde", + "serde_derive", + "serde_json", + "target-lexicon", + "wasmparser 0.121.2", + "wasmtime-cranelift", + "wasmtime-environ", + "wasmtime-jit-icache-coherence", + "wasmtime-runtime", + "windows-sys 0.52.0", +] + +[[package]] +name = "wasmtime-asm-macros" +version = "18.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b9d329c718b3a18412a6a017c912b539baa8fe1210d21b651f6b4dbafed743" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "wasmtime-c-api-impl" +version = "18.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc93587c24d8e3cb28912eb7abf95f7e350380656faccc46cff04c0821ec58c2" +dependencies = [ + "anyhow", + "log", + "once_cell", + "tracing", + "wasmtime", + "wasmtime-c-api-macros", +] + +[[package]] +name = "wasmtime-c-api-macros" +version = "18.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0046fef7e28c3804e5e38bfa31ea2a0f73905319b677e57ebe37e49358989b5d" +checksum = "2e571a71eba52dfe81ef653a3a336888141f00fc2208a9962722e036fe2a34be" +dependencies = [ + "proc-macro2", + "quote", +] + +[[package]] +name = "wasmtime-cranelift" +version = "18.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31ca62f519225492bd555d0ec85a2dacb0c10315db3418c8b9aeb3824bf54a24" +dependencies = [ + "anyhow", + "cfg-if", + "cranelift-codegen", + "cranelift-control", + "cranelift-entity", + "cranelift-frontend", + "cranelift-native", + "cranelift-wasm", + "gimli", + "log", + "object", + "target-lexicon", + "thiserror", + "wasmparser 0.121.2", + "wasmtime-cranelift-shared", + "wasmtime-environ", + "wasmtime-versioned-export-macros", +] + +[[package]] +name = "wasmtime-cranelift-shared" +version = "18.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd5f2071f42e61490bf7cb95b9acdbe6a29dd577a398019304a960585f28b844" +dependencies = [ + "anyhow", + "cranelift-codegen", + "cranelift-control", + "cranelift-native", + "gimli", + "object", + "target-lexicon", + "wasmtime-environ", +] + +[[package]] +name = "wasmtime-environ" +version = "18.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82bf1a47f384610da19f58b0fd392ca6a3b720974315c08afb0392c0f3951fed" +dependencies = [ + "anyhow", + "bincode", + "cranelift-entity", + "gimli", + "indexmap", + "log", + "object", + "serde", + "serde_derive", + "target-lexicon", + "thiserror", + "wasmparser 0.121.2", + "wasmtime-types", +] + +[[package]] +name = "wasmtime-jit-icache-coherence" +version = "18.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33f4121cb29dda08139b2824a734dd095d83ce843f2d613a84eb580b9cfc17ac" +dependencies = [ + "cfg-if", + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "wasmtime-runtime" +version = "18.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e517f2b996bb3b0e34a82a2bce194f850d9bcfc25c08328ef5fb71b071066b8" +dependencies = [ + "anyhow", + "cc", + "cfg-if", + "indexmap", + "libc", + "log", + "mach", + "memfd", + "memoffset", + "paste", + "psm", + "rustix", + "sptr", + "wasm-encoder", + "wasmtime-asm-macros", + "wasmtime-environ", + "wasmtime-versioned-export-macros", + "wasmtime-wmemcheck", + "windows-sys 0.52.0", +] + +[[package]] +name = "wasmtime-types" +version = "18.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54a327d7a0ef57bd52a507d28b4561a74126c7a8535a2fc6f2025716bc6a52e8" +dependencies = [ + "cranelift-entity", + "serde", + "serde_derive", + "thiserror", + "wasmparser 0.121.2", +] + +[[package]] +name = "wasmtime-versioned-export-macros" +version = "18.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ef32eea9fc7035a55159a679d1e89b43ece5ae45d24eed4808e6a92c99a0da4" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "wasmtime-wmemcheck" +version = "18.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f4cbfb052d66f03603a9b77f18171ea245c7805714caad370a549a6344bf86b" [[package]] name = "web-sys" -version = "0.3.61" +version = "0.3.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e33b99f4b23ba3eec1a53ac264e35a755f00e966e0065077d6027c0f575b0b97" +checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef" dependencies = [ "js-sys", "wasm-bindgen", @@ -992,12 +1852,12 @@ dependencies = [ [[package]] name = "webbrowser" -version = "0.8.8" +version = "0.8.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "579cc485bd5ce5bfa0d738e4921dd0b956eca9800be1fd2e5257ebe95bc4617e" +checksum = "d1b04c569c83a9bb971dd47ec6fd48753315f4bf989b9b04a2e7ca4d7f0dc950" dependencies = [ "core-foundation", - "dirs 4.0.0", + "home", "jni", "log", "ndk-context", @@ -1009,13 +1869,27 @@ dependencies = [ [[package]] name = "which" -version = "4.4.0" +version = "4.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2441c784c52b289a054b7201fc93253e288f094e2f4be9058343127c4226a269" +checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" dependencies = [ "either", - "libc", + "home", + "once_cell", + "rustix", +] + +[[package]] +name = "which" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fa5e0c10bf77f44aac573e498d1a82d5fbd5e91f6fc0a99e7be4b38e85e101c" +dependencies = [ + "either", + "home", "once_cell", + "rustix", + "windows-sys 0.52.0", ] [[package]] @@ -1036,9 +1910,9 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" dependencies = [ "winapi", ] @@ -1055,7 +1929,25 @@ version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" dependencies = [ - "windows-targets", + "windows-targets 0.42.2", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.4", ] [[package]] @@ -1064,13 +1956,43 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" +dependencies = [ + "windows_aarch64_gnullvm 0.52.4", + "windows_aarch64_msvc 0.52.4", + "windows_i686_gnu 0.52.4", + "windows_i686_msvc 0.52.4", + "windows_x86_64_gnu 0.52.4", + "windows_x86_64_gnullvm 0.52.4", + "windows_x86_64_msvc 0.52.4", ] [[package]] @@ -1079,38 +2001,169 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" + [[package]] name = "windows_aarch64_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" + [[package]] name = "windows_i686_gnu" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" + [[package]] name = "windows_i686_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" + [[package]] name = "windows_x86_64_gnu" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" + [[package]] name = "windows_x86_64_gnullvm" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" + [[package]] name = "windows_x86_64_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" + +[[package]] +name = "winnow" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dffa400e67ed5a4dd237983829e66475f0a4a26938c4b04c21baede6262215b8" +dependencies = [ + "memchr", +] + +[[package]] +name = "xtask" +version = "0.1.0" +dependencies = [ + "git2", + "indoc", + "semver", + "serde", + "serde_json", + "toml", +] + +[[package]] +name = "yansi" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" + +[[package]] +name = "zerocopy" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/third-party/tree-sitter/tree-sitter/Cargo.toml b/third-party/tree-sitter/tree-sitter/Cargo.toml index f69dbc4fec1..62715b224bc 100644 --- a/third-party/tree-sitter/tree-sitter/Cargo.toml +++ b/third-party/tree-sitter/tree-sitter/Cargo.toml @@ -1,10 +1,93 @@ [workspace] default-members = ["cli"] -members = ["cli", "lib"] +members = [ + "cli", + "cli/config", + "cli/loader", + "lib", + "tags", + "highlight", + "xtask", +] resolver = "2" [workspace.package] -rust-version = "1.65" +version = "0.22.1" +authors = ["Max Brunsfeld "] +edition = "2021" +rust-version = "1.74.1" +homepage = "https://tree-sitter.github.io/tree-sitter" +repository = "https://github.com/tree-sitter/tree-sitter" +license = "MIT" +keywords = ["incremental", "parsing"] +categories = ["command-line-utilities", "parsing"] -[profile.release] -strip = true +[profile.optimize] +inherits = "release" +strip = true # Automatically strip symbols from the binary. +lto = true # Link-time optimization. +opt-level = 3 # Optimization level 3. +codegen-units = 1 # Maximum size reduction optimizations. + +[profile.size] +inherits = "optimize" +opt-level = "s" # Optimize for size. + +[profile.profile] +inherits = "optimize" +strip = false + +[workspace.dependencies] +ansi_term = "0.12.1" +anstyle = "1.0.6" +anyhow = "1.0.80" +cc = "1.0.90" +clap = { version = "4.5.2", features = [ + "cargo", + "derive", + "env", + "help", + "unstable-styles", +] } +ctor = "0.2.7" +ctrlc = { version = "3.4.4", features = ["termination"] } +difference = "2.0.0" +dirs = "5.0.1" +filetime = "0.2.23" +fs4 = "0.8.1" +git2 = "0.18.2" +glob = "0.3.1" +heck = "0.4.1" +html-escape = "0.2.13" +indexmap = "2.2.5" +indoc = "2.0.4" +lazy_static = "1.4.0" +libloading = "0.8.3" +log = { version = "0.4.21", features = ["std"] } +memchr = "2.7.1" +once_cell = "1.19.0" +pretty_assertions = "1.4.0" +rand = "0.8.5" +regex = "1.10.3" +regex-syntax = "0.8.2" +rustc-hash = "1.1.0" +semver = "1.0.22" +serde = { version = "1.0.197", features = ["derive"] } +serde_derive = "1.0.197" +serde_json = { version = "1.0.114", features = ["preserve_order"] } +smallbitvec = "2.5.1" +tempfile = "3.10.1" +thiserror = "1.0.57" +tiny_http = "0.12.0" +toml = "0.8.10" +unindent = "0.2.3" +walkdir = "2.5.0" +wasmparser = "0.201.0" +webbrowser = "0.8.13" +which = "6.0.0" + +tree-sitter = { version = "0.22.0", path = "./lib" } +tree-sitter-loader = { version = "0.22.0", path = "./cli/loader" } +tree-sitter-config = { version = "0.22.0", path = "./cli/config" } +tree-sitter-highlight = { version = "0.22.0", path = "./highlight" } +tree-sitter-tags = { version = "0.22.0", path = "./tags" } diff --git a/third-party/tree-sitter/tree-sitter/FUNDING.json b/third-party/tree-sitter/tree-sitter/FUNDING.json new file mode 100644 index 00000000000..33606931146 --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/FUNDING.json @@ -0,0 +1,7 @@ +{ + "drips": { + "ethereum": { + "ownedBy": "0xc01246694085eF6914C527EBdFb4d8C77dfeaf8e" + } + } +} diff --git a/third-party/tree-sitter/tree-sitter/LICENSE b/third-party/tree-sitter/tree-sitter/LICENSE index 4c2200224ec..3f674119a03 100644 --- a/third-party/tree-sitter/tree-sitter/LICENSE +++ b/third-party/tree-sitter/tree-sitter/LICENSE @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2018-2021 Max Brunsfeld +Copyright (c) 2018-2023 Max Brunsfeld Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/third-party/tree-sitter/tree-sitter/Makefile b/third-party/tree-sitter/tree-sitter/Makefile index 69f6f590e08..e1164428fa5 100644 --- a/third-party/tree-sitter/tree-sitter/Makefile +++ b/third-party/tree-sitter/tree-sitter/Makefile @@ -1,4 +1,4 @@ -VERSION := 0.20.9 +VERSION := 0.22.1 # install directory layout PREFIX ?= /usr/local @@ -18,15 +18,19 @@ endif OBJ := $(SRC:.c=.o) # define default flags, and override to append mandatory flags -CFLAGS ?= -O3 -Wall -Wextra -Werror -override CFLAGS += -std=gnu99 -fPIC -Ilib/src -Ilib/include +ARFLAGS := rcs +CFLAGS := -O3 -Wall -Wextra -Wshadow -pedantic +override CFLAGS += -std=c11 -fPIC -fvisibility=hidden +override CFLAGS += -Ilib/src -Ilib/src/wasm -Ilib/include # ABI versioning -SONAME_MAJOR := 0 -SONAME_MINOR := 0 +SONAME_MAJOR := $(word 1,$(subst ., ,$(VERSION))) +SONAME_MINOR := $(word 2,$(subst ., ,$(VERSION))) # OS-specific bits -ifeq ($(shell uname),Darwin) +ifeq ($(OS),Windows_NT) + $(error "Windows is not supported") +else ifeq ($(shell uname),Darwin) SOEXT = dylib SOEXTVER_MAJOR = $(SONAME_MAJOR).dylib SOEXTVER = $(SONAME_MAJOR).$(SONAME_MINOR).dylib @@ -37,35 +41,71 @@ else SOEXTVER = so.$(SONAME_MAJOR).$(SONAME_MINOR) LINKSHARED += -shared -Wl,-soname,libtree-sitter.so.$(SONAME_MAJOR) endif -ifneq (,$(filter $(shell uname),FreeBSD NetBSD DragonFly)) +ifneq ($(filter $(shell uname),FreeBSD NetBSD DragonFly),) PCLIBDIR := $(PREFIX)/libdata/pkgconfig endif -all: libtree-sitter.a libtree-sitter.$(SOEXTVER) +all: libtree-sitter.a libtree-sitter.$(SOEXT) tree-sitter.pc libtree-sitter.a: $(OBJ) - $(AR) rcs $@ $^ + $(AR) $(ARFLAGS) $@ $^ -libtree-sitter.$(SOEXTVER): $(OBJ) +libtree-sitter.$(SOEXT): $(OBJ) $(CC) $(LDFLAGS) $(LINKSHARED) $^ $(LDLIBS) -o $@ - ln -sf $@ libtree-sitter.$(SOEXT) - ln -sf $@ libtree-sitter.$(SOEXTVER_MAJOR) +ifneq ($(STRIP),) + $(STRIP) $@ +endif + +tree-sitter.pc: tree-sitter.pc.in + sed -e 's|@VERSION@|$(VERSION)|' \ + -e 's|@LIBDIR@|$(LIBDIR)|' \ + -e 's|@INCLUDEDIR@|$(INCLUDEDIR)|' \ + -e 's|=$(PREFIX)|=$${prefix}|' \ + -e 's|@PREFIX@|$(PREFIX)|' $< > $@ + +clean: + $(RM) $(OBJ) tree-sitter.pc libtree-sitter.a libtree-sitter.$(SOEXT) install: all - install -d '$(DESTDIR)$(LIBDIR)' - install -m755 libtree-sitter.a '$(DESTDIR)$(LIBDIR)'/libtree-sitter.a - install -m755 libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER) + install -Dm644 lib/include/tree_sitter/api.h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/api.h + install -Dm644 tree-sitter.pc '$(DESTDIR)$(PCLIBDIR)'/tree-sitter.pc + install -Dm644 libtree-sitter.a '$(DESTDIR)$(LIBDIR)'/libtree-sitter.a + install -m755 libtree-sitter.$(SOEXT) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER) ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER_MAJOR) - ln -sf libtree-sitter.$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXT) - install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter - install -m644 lib/include/tree_sitter/*.h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/ - install -d '$(DESTDIR)$(PCLIBDIR)' - sed -e 's|@LIBDIR@|$(LIBDIR)|;s|@INCLUDEDIR@|$(INCLUDEDIR)|;s|@VERSION@|$(VERSION)|' \ - -e 's|=$(PREFIX)|=$${prefix}|' \ - -e 's|@PREFIX@|$(PREFIX)|' \ - tree-sitter.pc.in > '$(DESTDIR)$(PCLIBDIR)'/tree-sitter.pc + ln -sf libtree-sitter.$(SOEXTVER_MAJOR) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXT) -clean: - rm -f lib/src/*.o libtree-sitter.a libtree-sitter.$(SOEXT) libtree-sitter.$(SOEXTVER_MAJOR) libtree-sitter.$(SOEXTVER) +uninstall: + $(RM) '$(DESTDIR)$(LIBDIR)'/libtree-sitter.a \ + '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER) \ + '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXTVER_MAJOR) \ + '$(DESTDIR)$(LIBDIR)'/libtree-sitter.$(SOEXT) \ + '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/api.h \ + '$(DESTDIR)$(PCLIBDIR)'/tree-sitter.pc + +.PHONY: all install uninstall clean + + +##### Dev targets ##### + +test: + script/fetch-fixtures + script/generate-fixtures + script/test + +test_wasm: + script/generate-fixtures-wasm + script/test-wasm + +lint: + cargo update --workspace --locked --quiet + cargo check --workspace --all-targets + cargo fmt --all --check + cargo clippy --workspace --all-targets -- -D warnings + +format: + cargo fmt --all + +changelog: + @git-cliff --config script/cliff.toml --output CHANGELOG.md --latest --github-token $(shell gh auth token) -.PHONY: all install clean +.PHONY: test test_wasm lint format changelog diff --git a/third-party/tree-sitter/tree-sitter/Package.swift b/third-party/tree-sitter/tree-sitter/Package.swift new file mode 100644 index 00000000000..79084cb9a0c --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/Package.swift @@ -0,0 +1,41 @@ +// swift-tools-version: 5.8 +// The swift-tools-version declares the minimum version of Swift required to build this package. + +import PackageDescription + +let package = Package( + name: "TreeSitter", + products: [ + // Products define the executables and libraries a package produces, and make them visible to other packages. + .library( + name: "TreeSitter", + targets: ["TreeSitter"]), + ], + targets: [ + .target(name: "TreeSitter", + path: "lib", + exclude: [ + "binding_rust", + "binding_web", + "node_modules", + "Cargo.toml", + "README.md", + "src/unicode/README.md", + "src/unicode/LICENSE", + "src/unicode/ICU_SHA", + "src/get_changed_ranges.c", + "src/tree_cursor.c", + "src/stack.c", + "src/node.c", + "src/lexer.c", + "src/parser.c", + "src/language.c", + "src/alloc.c", + "src/subtree.c", + "src/tree.c", + "src/query.c" + ], + sources: ["src/lib.c"]), + ], + cLanguageStandard: .c11 +) diff --git a/third-party/tree-sitter/tree-sitter/README.md b/third-party/tree-sitter/tree-sitter/README.md index 34390187b15..d378215eb0e 100644 --- a/third-party/tree-sitter/tree-sitter/README.md +++ b/third-party/tree-sitter/tree-sitter/README.md @@ -1,7 +1,8 @@ # tree-sitter -[![CICD](https://github.com/tree-sitter/tree-sitter/actions/workflows/CICD.yml/badge.svg)](https://github.com/tree-sitter/tree-sitter/actions/workflows/CICD.yml) [![DOI](https://zenodo.org/badge/14164618.svg)](https://zenodo.org/badge/latestdoi/14164618) +[![discord][discord]](https://discord.gg/w7nTvsVJhm) +[![matrix][matrix]](https://matrix.to/#/#tree-sitter-chat:matrix.org) Tree-sitter is a parser generator tool and an incremental parsing library. It can build a concrete syntax tree for a source file and efficiently update the syntax tree as the source file is edited. Tree-sitter aims to be: @@ -11,8 +12,10 @@ Tree-sitter is a parser generator tool and an incremental parsing library. It ca - **Dependency-free** so that the runtime library (which is written in pure C) can be embedded in any application ## Links - - [Documentation](https://tree-sitter.github.io) - [Rust binding](lib/binding_rust/README.md) - [WASM binding](lib/binding_web/README.md) - [Command-line interface](cli/README.md) + +[discord]: https://img.shields.io/discord/1063097320771698699?logo=discord&label=discord +[matrix]: https://img.shields.io/matrix/tree-sitter-chat%3Amatrix.org?logo=matrix&label=matrix diff --git a/third-party/tree-sitter/tree-sitter/build.zig b/third-party/tree-sitter/tree-sitter/build.zig new file mode 100644 index 00000000000..ed44706a659 --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/build.zig @@ -0,0 +1,16 @@ +const std = @import("std"); + +pub fn build(b: *std.Build) void { + var lib = b.addStaticLibrary(.{ + .name = "tree-sitter", + .target = b.standardTargetOptions(.{}), + .optimize = b.standardOptimizeOption(.{}), + }); + + lib.linkLibC(); + lib.addCSourceFile(.{ .file = .{ .path = "lib/src/lib.c" }, .flags = &.{"-std=c11"} }); + lib.addIncludePath(.{ .path = "lib/include" }); + lib.addIncludePath(.{ .path = "lib/src" }); + + b.installArtifact(lib); +} diff --git a/third-party/tree-sitter/tree-sitter/cli/Cargo.toml b/third-party/tree-sitter/tree-sitter/cli/Cargo.toml index f9f8ca4b34d..7d11bf4ab1f 100644 --- a/third-party/tree-sitter/tree-sitter/cli/Cargo.toml +++ b/third-party/tree-sitter/tree-sitter/cli/Cargo.toml @@ -1,82 +1,71 @@ [package] name = "tree-sitter-cli" +version.workspace = true description = "CLI tool for developing, testing, and using Tree-sitter parsers" -version = "0.20.8" -authors = ["Max Brunsfeld "] -edition = "2021" -license = "MIT" -readme = "README.md" -keywords = ["incremental", "parsing"] -categories = ["command-line-utilities", "parsing"] -repository = "https://github.com/tree-sitter/tree-sitter" +authors.workspace = true +edition.workspace = true rust-version.workspace = true +readme = "README.md" +homepage.workspace = true +repository.workspace = true +license.workspace = true +keywords.workspace = true +categories.workspace = true [[bin]] name = "tree-sitter" path = "src/main.rs" +doc = false [[bench]] name = "benchmark" harness = false -[dependencies] -ansi_term = "0.12" -anyhow = "1.0" -atty = "0.2" -clap = "2.32" -difference = "2.0" -dirs = "3.0" -glob = "0.3.0" -html-escape = "0.2.6" -indexmap = "1" -lazy_static = "1.2.0" -regex = "1" -regex-syntax = "0.6.4" -rustc-hash = "1" -semver = "1.0" -serde = { version = "1.0.130", features = ["derive"] } -smallbitvec = "2.5.1" -tiny_http = "0.12.0" -walkdir = "2.3" -webbrowser = "0.8.3" -which = "4.1.0" - -[dependencies.tree-sitter] -version = "0.20.10" -path = "../lib" - -[dependencies.tree-sitter-config] -version = "0.19.0" -path = "config" +[features] +wasm = ["tree-sitter/wasm", "tree-sitter-loader/wasm"] -[dependencies.tree-sitter-highlight] -version = "0.20" -path = "../highlight" - -[dependencies.tree-sitter-loader] -version = "0.20" -path = "loader" - -[dependencies.tree-sitter-tags] -version = "0.20" -path = "../tags" - -[dependencies.serde_json] -version = "1.0" -features = ["preserve_order"] +[dependencies] +ansi_term.workspace = true +anstyle.workspace = true +anyhow.workspace = true +clap.workspace = true +ctrlc.workspace = true +difference.workspace = true +dirs.workspace = true +filetime.workspace = true +glob.workspace = true +heck.workspace = true +html-escape.workspace = true +indexmap.workspace = true +indoc.workspace = true +lazy_static.workspace = true +log.workspace = true +memchr.workspace = true +regex.workspace = true +regex-syntax.workspace = true +rustc-hash.workspace = true +semver.workspace = true +serde.workspace = true +serde_derive.workspace = true +serde_json.workspace = true +smallbitvec.workspace = true +tiny_http.workspace = true +walkdir.workspace = true +wasmparser.workspace = true +webbrowser.workspace = true +which.workspace = true -[dependencies.log] -version = "0.4.6" -features = ["std"] +tree-sitter.workspace = true +tree-sitter-config.workspace = true +tree-sitter-highlight.workspace = true +tree-sitter-loader.workspace = true +tree-sitter-tags.workspace = true [dev-dependencies] -proc_macro = { path = "src/tests/proc_macro" } - -rand = "0.8" -tempfile = "3" -pretty_assertions = "0.7.2" -ctor = "0.1" -unindent = "0.2" +tree_sitter_proc_macro = { path = "src/tests/proc_macro", package = "tree-sitter-tests-proc-macro" } -[build-dependencies] -toml = "0.5" +rand.workspace = true +tempfile.workspace = true +pretty_assertions.workspace = true +ctor.workspace = true +unindent.workspace = true diff --git a/third-party/tree-sitter/tree-sitter/cli/README.md b/third-party/tree-sitter/tree-sitter/cli/README.md index 8cdda9c00fe..eb93bcfa241 100644 --- a/third-party/tree-sitter/tree-sitter/cli/README.md +++ b/third-party/tree-sitter/tree-sitter/cli/README.md @@ -1,7 +1,11 @@ -Tree-sitter CLI -=============== +# Tree-sitter CLI -[![Crates.io](https://img.shields.io/crates/v/tree-sitter-cli.svg)](https://crates.io/crates/tree-sitter-cli) +[![crates.io badge]][crates.io] [![npmjs.com badge]][npmjs.com] + +[crates.io]: https://crates.io/crates/tree-sitter-cli +[crates.io badge]: https://img.shields.io/crates/v/tree-sitter-cli.svg?color=%23B48723 +[npmjs.com]: https://www.npmjs.org/package/tree-sitter-cli +[npmjs.com badge]: https://img.shields.io/npm/v/tree-sitter-cli.svg?color=%23BF4A4A The Tree-sitter CLI allows you to develop, test, and use Tree-sitter grammars from the command line. It works on MacOS, Linux, and Windows. @@ -19,7 +23,7 @@ or with `npm`: npm install tree-sitter-cli ``` -You can also download a pre-built binary for your platform from [the releases page](https://github.com/tree-sitter/tree-sitter/releases/latest). +You can also download a pre-built binary for your platform from [the releases page]. ### Dependencies @@ -30,8 +34,11 @@ The `tree-sitter` binary itself has no dependencies, but specific commands have ### Commands -* `generate` - The `tree-sitter generate` command will generate a Tree-sitter parser based on the grammar in the current working directory. See [the documentation](http://tree-sitter.github.io/tree-sitter/creating-parsers) for more information. +* `generate` - The `tree-sitter generate` command will generate a Tree-sitter parser based on the grammar in the current working directory. See [the documentation] for more information. -* `test` - The `tree-sitter test` command will run the unit tests for the Tree-sitter parser in the current working directory. See [the documentation](http://tree-sitter.github.io/tree-sitter/creating-parsers) for more information. +* `test` - The `tree-sitter test` command will run the unit tests for the Tree-sitter parser in the current working directory. See [the documentation] for more information. * `parse` - The `tree-sitter parse` command will parse a file (or list of files) using Tree-sitter parsers. + +[the documentation]: https://tree-sitter.github.io/tree-sitter/creating-parsers +[the releases page]: https://github.com/tree-sitter/tree-sitter/releases/latest diff --git a/third-party/tree-sitter/tree-sitter/cli/benches/benchmark.rs b/third-party/tree-sitter/tree-sitter/cli/benches/benchmark.rs index efb73f3a5d1..f7700dd73c3 100644 --- a/third-party/tree-sitter/tree-sitter/cli/benches/benchmark.rs +++ b/third-party/tree-sitter/tree-sitter/cli/benches/benchmark.rs @@ -15,7 +15,7 @@ lazy_static! { static ref EXAMPLE_FILTER: Option = env::var("TREE_SITTER_BENCHMARK_EXAMPLE_FILTER").ok(); static ref REPETITION_COUNT: usize = env::var("TREE_SITTER_BENCHMARK_REPETITION_COUNT") - .map(|s| usize::from_str_radix(&s, 10).unwrap()) + .map(|s| s.parse::().unwrap()) .unwrap_or(5); static ref TEST_LOADER: Loader = Loader::with_parser_lib_path(SCRATCH_DIR.clone()); static ref EXAMPLE_AND_QUERY_PATHS_BY_LANGUAGE_DIR: BTreeMap, Vec)> = { @@ -25,29 +25,29 @@ lazy_static! { let (example_paths, query_paths) = result.entry(relative_path.to_owned()).or_default(); - if let Ok(example_files) = fs::read_dir(&dir.join("examples")) { + if let Ok(example_files) = fs::read_dir(dir.join("examples")) { example_paths.extend(example_files.filter_map(|p| { let p = p.unwrap().path(); if p.is_file() { - Some(p.to_owned()) + Some(p) } else { None } })); } - if let Ok(query_files) = fs::read_dir(&dir.join("queries")) { + if let Ok(query_files) = fs::read_dir(dir.join("queries")) { query_paths.extend(query_files.filter_map(|p| { let p = p.unwrap().path(); if p.is_file() { - Some(p.to_owned()) + Some(p) } else { None } })); } } else { - for entry in fs::read_dir(&dir).unwrap() { + for entry in fs::read_dir(dir).unwrap() { let entry = entry.unwrap().path(); if entry.is_dir() { process_dir(result, &entry); @@ -90,9 +90,9 @@ fn main() { } } - eprintln!("\nLanguage: {}", language_name); + eprintln!("\nLanguage: {language_name}"); let language = get_language(language_path); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); eprintln!(" Constructing Queries"); for path in query_paths { @@ -102,8 +102,9 @@ fn main() { } } - parse(&path, max_path_length, |source| { - Query::new(language, str::from_utf8(source).unwrap()) + parse(path, max_path_length, |source| { + Query::new(&language, str::from_utf8(source).unwrap()) + .with_context(|| format!("Query file path: {path:?}")) .expect("Failed to parse query"); }); } @@ -143,13 +144,13 @@ fn main() { } if let Some((average_normal, worst_normal)) = aggregate(&normal_speeds) { - eprintln!(" Average Speed (normal): {} bytes/ms", average_normal); - eprintln!(" Worst Speed (normal): {} bytes/ms", worst_normal); + eprintln!(" Average Speed (normal): {average_normal} bytes/ms"); + eprintln!(" Worst Speed (normal): {worst_normal} bytes/ms"); } if let Some((average_error, worst_error)) = aggregate(&error_speeds) { - eprintln!(" Average Speed (errors): {} bytes/ms", average_error); - eprintln!(" Worst Speed (errors): {} bytes/ms", worst_error); + eprintln!(" Average Speed (errors): {average_error} bytes/ms"); + eprintln!(" Worst Speed (errors): {worst_error} bytes/ms"); } all_normal_speeds.extend(normal_speeds); @@ -158,24 +159,24 @@ fn main() { eprintln!("\n Overall"); if let Some((average_normal, worst_normal)) = aggregate(&all_normal_speeds) { - eprintln!(" Average Speed (normal): {} bytes/ms", average_normal); - eprintln!(" Worst Speed (normal): {} bytes/ms", worst_normal); + eprintln!(" Average Speed (normal): {average_normal} bytes/ms"); + eprintln!(" Worst Speed (normal): {worst_normal} bytes/ms"); } if let Some((average_error, worst_error)) = aggregate(&all_error_speeds) { - eprintln!(" Average Speed (errors): {} bytes/ms", average_error); - eprintln!(" Worst Speed (errors): {} bytes/ms", worst_error); + eprintln!(" Average Speed (errors): {average_error} bytes/ms"); + eprintln!(" Worst Speed (errors): {worst_error} bytes/ms"); } - eprintln!(""); + eprintln!(); } -fn aggregate(speeds: &Vec) -> Option<(usize, usize)> { +fn aggregate(speeds: &[usize]) -> Option<(usize, usize)> { if speeds.is_empty() { return None; } let mut total = 0; let mut max = usize::MAX; - for speed in speeds.iter().cloned() { + for speed in speeds.iter().copied() { total += speed; if speed < max { max = speed; @@ -192,23 +193,26 @@ fn parse(path: &Path, max_path_length: usize, mut action: impl FnMut(&[u8])) -> ); let source_code = fs::read(path) - .with_context(|| format!("Failed to read {:?}", path)) + .with_context(|| format!("Failed to read {path:?}")) .unwrap(); let time = Instant::now(); for _ in 0..*REPETITION_COUNT { action(&source_code); } let duration = time.elapsed() / (*REPETITION_COUNT as u32); - let duration_ms = duration.as_millis(); - let speed = source_code.len() as u128 / (duration_ms + 1); - eprintln!("time {} ms\tspeed {} bytes/ms", duration_ms as usize, speed); + let duration_ns = duration.as_nanos(); + let speed = ((source_code.len() as u128) * 1_000_000) / duration_ns; + eprintln!( + "time {:>7.2} ms\t\tspeed {speed:>6} bytes/ms", + (duration_ns as f64) / 1e6, + ); speed as usize } fn get_language(path: &Path) -> Language { let src_dir = GRAMMARS_DIR.join(path).join("src"); TEST_LOADER - .load_language_at_path(&src_dir, &src_dir) - .with_context(|| format!("Failed to load language at path {:?}", src_dir)) + .load_language_at_path(&src_dir, &[&src_dir], None) + .with_context(|| format!("Failed to load language at path {src_dir:?}")) .unwrap() } diff --git a/third-party/tree-sitter/tree-sitter/cli/build.rs b/third-party/tree-sitter/tree-sitter/cli/build.rs index 74c6d833db2..a29a940b235 100644 --- a/third-party/tree-sitter/tree-sitter/cli/build.rs +++ b/third-party/tree-sitter/tree-sitter/cli/build.rs @@ -1,27 +1,51 @@ -use std::ffi::OsStr; -use std::path::{Path, PathBuf}; -use std::{env, fs}; +use std::{ + env, + ffi::OsStr, + fs, + path::{Path, PathBuf}, + time::SystemTime, +}; fn main() { if let Some(git_sha) = read_git_sha() { - println!("cargo:rustc-env={}={}", "BUILD_SHA", git_sha); + println!("cargo:rustc-env=BUILD_SHA={git_sha}"); } if web_playground_files_present() { - println!("cargo:rustc-cfg={}", "TREE_SITTER_EMBED_WASM_BINDING"); + println!("cargo:rustc-cfg=TREE_SITTER_EMBED_WASM_BINDING"); } - let rust_binding_version = read_rust_binding_version(); - println!( - "cargo:rustc-env={}={}", - "RUST_BINDING_VERSION", rust_binding_version, - ); + let build_time = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap() + .as_secs_f64(); + println!("cargo:rustc-env=BUILD_TIME={build_time}"); - let emscripten_version = fs::read_to_string("emscripten-version").unwrap(); - println!( - "cargo:rustc-env={}={}", - "EMSCRIPTEN_VERSION", emscripten_version, - ); + #[cfg(any( + target_os = "linux", + target_os = "android", + target_os = "freebsd", + target_os = "openbsd", + target_os = "netbsd", + target_os = "dragonfly", + ))] + { + let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap()).join("dynamic-symbols.txt"); + std::fs::write( + &out_dir, + "{ + ts_current_malloc; + ts_current_calloc; + ts_current_realloc; + ts_current_free; + };", + ) + .unwrap(); + println!( + "cargo:rustc-link-arg=-Wl,--dynamic-list={}", + out_dir.display() + ); + } } fn web_playground_files_present() -> bool { @@ -42,7 +66,8 @@ fn read_git_sha() -> Option { git_path = repo_path.join(".git"); if git_path.exists() { break; - } else if !repo_path.pop() { + } + if !repo_path.pop() { return None; } } @@ -57,10 +82,10 @@ fn read_git_sha() -> Option { } let git_head_path = git_dir_path.join("HEAD"); if let Some(path) = git_head_path.to_str() { - println!("cargo:rerun-if-changed={}", path); + println!("cargo:rerun-if-changed={path}"); } if let Ok(mut head_content) = fs::read_to_string(&git_head_path) { - if head_content.ends_with("\n") { + if head_content.ends_with('\n') { head_content.pop(); } @@ -71,13 +96,12 @@ fn read_git_sha() -> Option { // Go to real non-worktree gitdir let git_dir_path = git_dir_path .parent() - .map(|p| { + .and_then(|p| { p.file_name() .map(|n| n == OsStr::new("worktrees")) .and_then(|x| x.then(|| p.parent())) }) .flatten() - .flatten() .unwrap_or(&git_dir_path); let file = git_dir_path.join(&head_content); @@ -90,7 +114,7 @@ fn read_git_sha() -> Option { if let Some((hash, r#ref)) = line.split_once(' ') { if r#ref == head_content { if let Some(path) = packed_refs.to_str() { - println!("cargo:rerun-if-changed={}", path); + println!("cargo:rerun-if-changed={path}"); } return Some(hash.to_string()); } @@ -101,26 +125,15 @@ fn read_git_sha() -> Option { } }; if let Some(path) = ref_filename.to_str() { - println!("cargo:rerun-if-changed={}", path); + println!("cargo:rerun-if-changed={path}"); } return fs::read_to_string(&ref_filename).ok(); } // If we're on a detached commit, then the `HEAD` file itself contains the sha. - else if head_content.len() == 40 { + if head_content.len() == 40 { return Some(head_content); } } None } - -fn read_rust_binding_version() -> String { - let path = "Cargo.toml"; - let text = fs::read_to_string(path).unwrap(); - let cargo_toml = toml::from_str::(text.as_ref()).unwrap(); - cargo_toml["dependencies"]["tree-sitter"]["version"] - .as_str() - .unwrap() - .trim_matches('"') - .to_string() -} diff --git a/third-party/tree-sitter/tree-sitter/cli/config/Cargo.toml b/third-party/tree-sitter/tree-sitter/cli/config/Cargo.toml index 114d6ce8916..8379a5468fd 100644 --- a/third-party/tree-sitter/tree-sitter/cli/config/Cargo.toml +++ b/third-party/tree-sitter/tree-sitter/cli/config/Cargo.toml @@ -1,21 +1,19 @@ [package] name = "tree-sitter-config" +version.workspace = true description = "User configuration of tree-sitter's command line programs" -version = "0.19.0" -authors = ["Max Brunsfeld "] -edition = "2018" -license = "MIT" -readme = "README.md" -keywords = ["incremental", "parsing"] -categories = ["command-line-utilities", "parsing"] -repository = "https://github.com/tree-sitter/tree-sitter" +authors.workspace = true +edition.workspace = true rust-version.workspace = true +readme = "README.md" +homepage.workspace = true +repository.workspace = true +license.workspace = true +keywords.workspace = true +categories.workspace = true [dependencies] -anyhow = "1.0" -dirs = "3.0" -serde = { version = "1.0.130", features = ["derive"] } - -[dependencies.serde_json] -version = "1.0.45" -features = ["preserve_order"] +anyhow.workspace = true +dirs.workspace = true +serde.workspace = true +serde_json.workspace = true diff --git a/third-party/tree-sitter/tree-sitter/cli/config/README.md b/third-party/tree-sitter/tree-sitter/cli/config/README.md index 8cbfbcf47c8..e7d7b39b92e 100644 --- a/third-party/tree-sitter/tree-sitter/cli/config/README.md +++ b/third-party/tree-sitter/tree-sitter/cli/config/README.md @@ -1,5 +1,7 @@ -# `tree-sitter-config` +# Tree-sitter Config + +Manages Tree-sitter's configuration file. You can use a configuration file to control the behavior of the `tree-sitter` -command-line program. This crate implements the logic for finding and the +command-line program. This crate implements the logic for finding and the parsing the contents of the configuration file. diff --git a/third-party/tree-sitter/tree-sitter/cli/config/src/lib.rs b/third-party/tree-sitter/tree-sitter/cli/config/src/lib.rs index 3cd09b8dd13..1686b54f757 100644 --- a/third-party/tree-sitter/tree-sitter/cli/config/src/lib.rs +++ b/third-party/tree-sitter/tree-sitter/cli/config/src/lib.rs @@ -1,4 +1,4 @@ -//! Manages tree-sitter's configuration file. +#![doc = include_str!("../README.md")] use anyhow::{anyhow, Context, Result}; use serde::{Deserialize, Serialize}; @@ -39,7 +39,7 @@ impl Config { } let legacy_path = dirs::home_dir() - .ok_or(anyhow!("Cannot determine home directory"))? + .ok_or_else(|| anyhow!("Cannot determine home directory"))? .join(".tree-sitter") .join("config.json"); if legacy_path.is_file() { @@ -51,7 +51,7 @@ impl Config { fn xdg_config_file() -> Result { let xdg_path = dirs::config_dir() - .ok_or(anyhow!("Cannot determine config directory"))? + .ok_or_else(|| anyhow!("Cannot determine config directory"))? .join("tree-sitter") .join("config.json"); Ok(xdg_path) @@ -60,21 +60,26 @@ impl Config { /// Locates and loads in the user's configuration file. We search for the configuration file /// in the following locations, in order: /// + /// - Location specified by the path parameter if provided /// - `$TREE_SITTER_DIR/config.json`, if the `TREE_SITTER_DIR` environment variable is set /// - `tree-sitter/config.json` in your default user configuration directory, as determined /// by [`dirs::config_dir`](https://docs.rs/dirs/*/dirs/fn.config_dir.html) /// - `$HOME/.tree-sitter/config.json` as a fallback from where tree-sitter _used_ to store /// its configuration - pub fn load() -> Result { - let location = match Self::find_config_file()? { - Some(location) => location, - None => return Config::initial(), + pub fn load(path: Option) -> Result { + let location = if let Some(path) = path { + path + } else if let Some(path) = Self::find_config_file()? { + path + } else { + return Self::initial(); }; + let content = fs::read_to_string(&location) .with_context(|| format!("Failed to read {}", &location.to_string_lossy()))?; let config = serde_json::from_str(&content) .with_context(|| format!("Bad JSON config {}", &location.to_string_lossy()))?; - Ok(Config { location, config }) + Ok(Self { location, config }) } /// Creates an empty initial configuration file. You can then use the [`Config::add`][] method @@ -83,7 +88,7 @@ impl Config { /// disk. /// /// (Note that this is typically only done by the `tree-sitter init-config` command.) - pub fn initial() -> Result { + pub fn initial() -> Result { let location = if let Ok(path) = env::var("TREE_SITTER_DIR") { let mut path = PathBuf::from(path); path.push("config.json"); @@ -92,7 +97,7 @@ impl Config { Self::xdg_config_file()? }; let config = serde_json::json!({}); - Ok(Config { location, config }) + Ok(Self { location, config }) } /// Saves this configuration to the file that it was originally loaded from. diff --git a/third-party/tree-sitter/tree-sitter/cli/emscripten-version b/third-party/tree-sitter/tree-sitter/cli/emscripten-version deleted file mode 100644 index 05b41fb67e2..00000000000 --- a/third-party/tree-sitter/tree-sitter/cli/emscripten-version +++ /dev/null @@ -1 +0,0 @@ -3.1.29 diff --git a/third-party/tree-sitter/tree-sitter/cli/loader/Cargo.toml b/third-party/tree-sitter/tree-sitter/cli/loader/Cargo.toml index 6af28f30509..c235e4c4773 100644 --- a/third-party/tree-sitter/tree-sitter/cli/loader/Cargo.toml +++ b/third-party/tree-sitter/tree-sitter/cli/loader/Cargo.toml @@ -1,37 +1,33 @@ [package] name = "tree-sitter-loader" +version.workspace = true description = "Locates, builds, and loads tree-sitter grammars at runtime" -version = "0.20.0" -authors = ["Max Brunsfeld "] -edition = "2018" -license = "MIT" -readme = "README.md" -keywords = ["incremental", "parsing"] -categories = ["command-line-utilities", "parsing"] -repository = "https://github.com/tree-sitter/tree-sitter" +authors.workspace = true +edition.workspace = true rust-version.workspace = true +readme = "README.md" +homepage.workspace = true +repository.workspace = true +license.workspace = true +keywords.workspace = true +categories.workspace = true -[dependencies] -anyhow = "1.0" -cc = "^1.0.58" -dirs = "3.0" -libloading = "0.7" -once_cell = "1.7" -regex = "1" -serde = { version = "1.0.130", features = ["derive"] } - -[dependencies.serde_json] -version = "1.0" -features = ["preserve_order"] - -[dependencies.tree-sitter] -version = "0.20" -path = "../../lib" +[features] +wasm = ["tree-sitter/wasm"] -[dependencies.tree-sitter-highlight] -version = "0.20" -path = "../../highlight" +[dependencies] +anyhow.workspace = true +cc.workspace = true +dirs.workspace = true +fs4.workspace = true +indoc.workspace = true +libloading.workspace = true +once_cell.workspace = true +regex.workspace = true +serde.workspace = true +serde_json.workspace = true +which.workspace = true -[dependencies.tree-sitter-tags] -version = "0.20" -path = "../../tags" +tree-sitter.workspace = true +tree-sitter-highlight.workspace = true +tree-sitter-tags.workspace = true diff --git a/third-party/tree-sitter/tree-sitter/cli/loader/README.md b/third-party/tree-sitter/tree-sitter/cli/loader/README.md index 9889ec71d78..a3c1867495f 100644 --- a/third-party/tree-sitter/tree-sitter/cli/loader/README.md +++ b/third-party/tree-sitter/tree-sitter/cli/loader/README.md @@ -1,6 +1,6 @@ -# `tree-sitter-loader` +# Tree-sitter Loader The `tree-sitter` command-line program will dynamically find and build grammars at runtime, if you have cloned the grammars' repositories to your local -filesystem. This helper crate implements that logic, so that you can use it in +filesystem. This helper crate implements that logic, so that you can use it in your own program analysis tools, as well. diff --git a/third-party/tree-sitter/tree-sitter/cli/loader/build.rs b/third-party/tree-sitter/tree-sitter/cli/loader/build.rs index e0ebd1c48f1..714a662bd75 100644 --- a/third-party/tree-sitter/tree-sitter/cli/loader/build.rs +++ b/third-party/tree-sitter/tree-sitter/cli/loader/build.rs @@ -3,4 +3,7 @@ fn main() { "cargo:rustc-env=BUILD_TARGET={}", std::env::var("TARGET").unwrap() ); + + let emscripten_version = std::fs::read_to_string("emscripten-version").unwrap(); + println!("cargo:rustc-env=EMSCRIPTEN_VERSION={emscripten_version}"); } diff --git a/third-party/tree-sitter/tree-sitter/cli/loader/emscripten-version b/third-party/tree-sitter/tree-sitter/cli/loader/emscripten-version new file mode 100644 index 00000000000..1f1a39706ab --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/cli/loader/emscripten-version @@ -0,0 +1 @@ +3.1.37 diff --git a/third-party/tree-sitter/tree-sitter/cli/loader/src/lib.rs b/third-party/tree-sitter/tree-sitter/cli/loader/src/lib.rs index 029da4513ea..a22ce1cf90a 100644 --- a/third-party/tree-sitter/tree-sitter/cli/loader/src/lib.rs +++ b/third-party/tree-sitter/tree-sitter/cli/loader/src/lib.rs @@ -1,19 +1,28 @@ -use anyhow::{anyhow, Context, Error, Result}; -use libloading::{Library, Symbol}; -use once_cell::unsync::OnceCell; -use regex::{Regex, RegexBuilder}; -use serde::{Deserialize, Deserializer, Serialize}; +#![doc = include_str!("../README.md")] + use std::collections::HashMap; -use std::io::BufReader; +use std::ffi::{OsStr, OsString}; +use std::io::{BufRead, BufReader}; use std::ops::Range; use std::path::{Path, PathBuf}; use std::process::Command; use std::sync::Mutex; use std::time::SystemTime; use std::{env, fs, mem}; + +use anyhow::{anyhow, Context, Error, Result}; +use fs4::FileExt; +use indoc::indoc; +use libloading::{Library, Symbol}; +use once_cell::unsync::OnceCell; +use regex::{Regex, RegexBuilder}; +use serde::{Deserialize, Deserializer, Serialize}; use tree_sitter::{Language, QueryError, QueryErrorKind}; use tree_sitter_highlight::HighlightConfiguration; use tree_sitter_tags::{Error as TagsError, TagsConfiguration}; +use which::which; + +pub const EMSCRIPTEN_TAG: &str = concat!("docker.io/emscripten/emsdk:", env!("EMSCRIPTEN_VERSION")); #[derive(Default, Deserialize, Serialize)] pub struct Config { @@ -33,9 +42,8 @@ where D: Deserializer<'de>, { let paths = Vec::::deserialize(deserializer)?; - let home = match dirs::home_dir() { - Some(home) => home, - None => return Ok(paths), + let Some(home) = dirs::home_dir() else { + return Ok(paths); }; let standardized = paths .into_iter() @@ -55,30 +63,34 @@ fn standardize_path(path: PathBuf, home: &Path) -> PathBuf { } impl Config { - pub fn initial() -> Config { + #[must_use] + pub fn initial() -> Self { let home_dir = dirs::home_dir().expect("Cannot determine home directory"); - Config { + Self { parser_directories: vec![ home_dir.join("github"), home_dir.join("src"), home_dir.join("source"), + home_dir.join("projects"), + home_dir.join("dev"), + home_dir.join("git"), ], } } } #[cfg(unix)] -const DYLIB_EXTENSION: &'static str = "so"; +const DYLIB_EXTENSION: &str = "so"; #[cfg(windows)] -const DYLIB_EXTENSION: &'static str = "dll"; +const DYLIB_EXTENSION: &str = "dll"; -const BUILD_TARGET: &'static str = env!("BUILD_TARGET"); +const BUILD_TARGET: &str = env!("BUILD_TARGET"); pub struct LanguageConfiguration<'a> { pub scope: Option, pub content_regex: Option, - pub _first_line_regex: Option, + pub first_line_regex: Option, pub injection_regex: Option, pub file_types: Vec, pub root_path: PathBuf, @@ -86,6 +98,7 @@ pub struct LanguageConfiguration<'a> { pub injections_filenames: Option>, pub locals_filenames: Option>, pub tags_filenames: Option>, + pub language_name: String, language_id: usize, highlight_config: OnceCell>, tags_config: OnceCell>, @@ -95,12 +108,17 @@ pub struct LanguageConfiguration<'a> { pub struct Loader { parser_lib_path: PathBuf, - languages_by_id: Vec<(PathBuf, OnceCell)>, + languages_by_id: Vec<(PathBuf, OnceCell, Option>)>, language_configurations: Vec>, language_configuration_ids_by_file_type: HashMap>, + language_configuration_in_current_path: Option, + language_configuration_ids_by_first_line_regex: HashMap>, highlight_names: Box>>, use_all_highlight_names: bool, debug_build: bool, + + #[cfg(feature = "wasm")] + wasm_store: Mutex>, } unsafe impl Send for Loader {} @@ -111,32 +129,39 @@ impl Loader { let parser_lib_path = match env::var("TREE_SITTER_LIBDIR") { Ok(path) => PathBuf::from(path), _ => dirs::cache_dir() - .ok_or(anyhow!("Cannot determine cache directory"))? + .ok_or_else(|| anyhow!("Cannot determine cache directory"))? .join("tree-sitter") .join("lib"), }; Ok(Self::with_parser_lib_path(parser_lib_path)) } + #[must_use] pub fn with_parser_lib_path(parser_lib_path: PathBuf) -> Self { - Loader { + Self { parser_lib_path, languages_by_id: Vec::new(), language_configurations: Vec::new(), language_configuration_ids_by_file_type: HashMap::new(), + language_configuration_in_current_path: None, + language_configuration_ids_by_first_line_regex: HashMap::new(), highlight_names: Box::new(Mutex::new(Vec::new())), use_all_highlight_names: true, debug_build: false, + + #[cfg(feature = "wasm")] + wasm_store: Mutex::default(), } } - pub fn configure_highlights(&mut self, names: &Vec) { + pub fn configure_highlights(&mut self, names: &[String]) { self.use_all_highlight_names = false; let mut highlights = self.highlight_names.lock().unwrap(); highlights.clear(); highlights.extend(names.iter().cloned()); } + #[must_use] pub fn highlight_names(&self) -> Vec { self.highlight_names.lock().unwrap().clone() } @@ -146,8 +171,7 @@ impl Loader { eprintln!("Warning: You have not configured any parser directories!"); eprintln!("Please run `tree-sitter init-config` and edit the resulting"); eprintln!("configuration file to indicate where we should look for"); - eprintln!("language grammars."); - eprintln!(""); + eprintln!("language grammars.\n"); } for parser_container_dir in &config.parser_directories { if let Ok(entries) = fs::read_dir(parser_container_dir) { @@ -157,6 +181,7 @@ impl Loader { if parser_dir_name.starts_with("tree-sitter-") { self.find_language_configurations_at_path( &parser_container_dir.join(parser_dir_name), + false, ) .ok(); } @@ -167,23 +192,24 @@ impl Loader { Ok(()) } - pub fn languages_at_path(&mut self, path: &Path) -> Result> { - if let Ok(configurations) = self.find_language_configurations_at_path(path) { + pub fn languages_at_path(&mut self, path: &Path) -> Result> { + if let Ok(configurations) = self.find_language_configurations_at_path(path, true) { let mut language_ids = configurations .iter() - .map(|c| c.language_id) + .map(|c| (c.language_id, c.language_name.clone())) .collect::>(); - language_ids.sort(); + language_ids.sort_unstable(); language_ids.dedup(); language_ids .into_iter() - .map(|id| self.language_for_id(id)) + .map(|(id, name)| Ok((self.language_for_id(id)?, name))) .collect::>>() } else { Ok(Vec::new()) } } + #[must_use] pub fn get_all_language_configurations(&self) -> Vec<(&LanguageConfiguration, &Path)> { self.language_configurations .iter() @@ -204,6 +230,30 @@ impl Loader { Ok(None) } + pub fn language_configuration_for_first_line_regex( + &self, + path: &Path, + ) -> Result> { + self.language_configuration_ids_by_first_line_regex + .iter() + .try_fold(None, |_, (regex, ids)| { + if let Some(regex) = Self::regex(Some(regex)) { + let file = fs::File::open(path)?; + let reader = BufReader::new(file); + let first_line = reader.lines().next().transpose()?; + if let Some(first_line) = first_line { + if regex.is_match(&first_line) && !ids.is_empty() { + let configuration = &self.language_configurations[ids[0]]; + let language = self.language_for_id(configuration.language_id)?; + return Ok(Some((language, configuration))); + } + } + } + + Ok(None) + }) + } + pub fn language_configuration_for_file_name( &self, path: &Path, @@ -224,17 +274,14 @@ impl Loader { if let Some(configuration_ids) = configuration_ids { if !configuration_ids.is_empty() { - let configuration; - - // If there is only one language configuration, then use it. - if configuration_ids.len() == 1 { - configuration = &self.language_configurations[configuration_ids[0]]; + let configuration = if configuration_ids.len() == 1 { + &self.language_configurations[configuration_ids[0]] } // If multiple language configurations match, then determine which // one to use by applying the configurations' content regexes. else { - let file_contents = fs::read(path) - .with_context(|| format!("Failed to read path {:?}", path))?; + let file_contents = + fs::read(path).with_context(|| format!("Failed to read path {path:?}"))?; let file_contents = String::from_utf8_lossy(&file_contents); let mut best_score = -2isize; let mut best_configuration_id = None; @@ -264,8 +311,8 @@ impl Loader { } } - configuration = &self.language_configurations[best_configuration_id.unwrap()]; - } + &self.language_configurations[best_configuration_id.unwrap()] + }; let language = self.language_for_id(configuration.language_id)?; return Ok(Some((language, configuration))); @@ -303,19 +350,22 @@ impl Loader { } fn language_for_id(&self, id: usize) -> Result { - let (path, language) = &self.languages_by_id[id]; + let (path, language, externals) = &self.languages_by_id[id]; language .get_or_try_init(|| { let src_path = path.join("src"); - self.load_language_at_path(&src_path, &src_path) + self.load_language_at_path(&src_path, &[&src_path], externals.as_deref()) }) - .map(|l| *l) + .cloned() } - pub fn load_language_at_path(&self, src_path: &Path, header_path: &Path) -> Result { + pub fn load_language_at_path( + &self, + src_path: &Path, + header_paths: &[&Path], + external_files: Option<&[PathBuf]>, + ) -> Result { let grammar_path = src_path.join("grammar.json"); - let parser_path = src_path.join("parser.c"); - let mut scanner_path = src_path.join("scanner.c"); #[derive(Deserialize)] struct GrammarJSON { @@ -326,178 +376,535 @@ impl Loader { let grammar_json: GrammarJSON = serde_json::from_reader(BufReader::new(&mut grammar_file)) .with_context(|| "Failed to parse grammar.json")?; - let scanner_path = if scanner_path.exists() { - Some(scanner_path) - } else { - scanner_path.set_extension("cc"); - if scanner_path.exists() { - Some(scanner_path) - } else { - None - } - }; - - self.load_language_from_sources( + self.load_language_at_path_with_name( + src_path, + header_paths, &grammar_json.name, - &header_path, - &parser_path, - &scanner_path, + external_files, ) } - pub fn load_language_from_sources( + pub fn load_language_at_path_with_name( &self, + src_path: &Path, + header_paths: &[&Path], name: &str, - header_path: &Path, - parser_path: &Path, - scanner_path: &Option, + external_files: Option<&[PathBuf]>, ) -> Result { let mut lib_name = name.to_string(); + let language_fn_name = format!("tree_sitter_{}", replace_dashes_with_underscores(name)); if self.debug_build { lib_name.push_str(".debug._"); } + + fs::create_dir_all(&self.parser_lib_path)?; + let mut library_path = self.parser_lib_path.join(lib_name); library_path.set_extension(DYLIB_EXTENSION); - let recompile = needs_recompile(&library_path, &parser_path, &scanner_path) + let parser_path = src_path.join("parser.c"); + let scanner_path = self.get_scanner_path(src_path); + + let mut paths_to_check = vec![parser_path.clone()]; + + if let Some(scanner_path) = scanner_path.as_ref() { + paths_to_check.push(scanner_path.clone()); + } + + paths_to_check.extend( + external_files + .unwrap_or_default() + .iter() + .map(|p| src_path.join(p)), + ); + + #[cfg(feature = "wasm")] + if self.wasm_store.lock().unwrap().is_some() { + library_path.set_extension("wasm"); + } + + let mut recompile = needs_recompile(&library_path, &paths_to_check) .with_context(|| "Failed to compare source and binary timestamps")?; - if recompile { - fs::create_dir_all(&self.parser_lib_path)?; - let mut config = cc::Build::new(); - config - .cpp(true) - .opt_level(2) - .cargo_metadata(false) - .target(BUILD_TARGET) - .host(BUILD_TARGET); - let compiler = config.get_compiler(); - let mut command = Command::new(compiler.path()); - for (key, value) in compiler.env() { - command.env(key, value); + #[cfg(feature = "wasm")] + if let Some(wasm_store) = self.wasm_store.lock().unwrap().as_mut() { + if recompile { + self.compile_parser_to_wasm( + name, + src_path, + scanner_path + .as_ref() + .and_then(|p| p.strip_prefix(src_path).ok()), + &library_path, + false, + )?; } - if cfg!(windows) { - command.args(&["/nologo", "/LD", "/I"]).arg(header_path); - if self.debug_build { - command.arg("/Od"); - } else { - command.arg("/O2"); + let wasm_bytes = fs::read(&library_path)?; + return Ok(wasm_store.load_language(name, &wasm_bytes)?); + } + + let lock_path = if env::var("CROSS_RUNNER").is_ok() { + PathBuf::from("/tmp") + .join("tree-sitter") + .join("lock") + .join(format!("{name}.lock")) + } else { + dirs::cache_dir() + .ok_or_else(|| anyhow!("Cannot determine cache directory"))? + .join("tree-sitter") + .join("lock") + .join(format!("{name}.lock")) + }; + + if let Ok(lock_file) = fs::OpenOptions::new().write(true).open(&lock_path) { + recompile = false; + if lock_file.try_lock_exclusive().is_err() { + // if we can't acquire the lock, another process is compiling the parser, wait for it and don't recompile + lock_file.lock_exclusive()?; + recompile = false; + } else { + // if we can acquire the lock, check if the lock file is older than 30 seconds, a + // run that was interrupted and left the lock file behind should not block + // subsequent runs + let time = lock_file.metadata()?.modified()?.elapsed()?.as_secs(); + if time > 30 { + fs::remove_file(&lock_path)?; + recompile = true; } - command.arg(parser_path); - if let Some(scanner_path) = scanner_path.as_ref() { - command.arg(scanner_path); + } + } + + if recompile { + fs::create_dir_all(lock_path.parent().unwrap()).with_context(|| { + format!( + "Failed to create directory {:?}", + lock_path.parent().unwrap() + ) + })?; + let lock_file = fs::OpenOptions::new() + .create(true) + .truncate(true) + .write(true) + .open(&lock_path)?; + lock_file.lock_exclusive()?; + + self.compile_parser_to_dylib( + header_paths, + &parser_path, + scanner_path.as_deref(), + &library_path, + &lock_file, + &lock_path, + )?; + + if scanner_path.is_some() { + self.check_external_scanner(name, &library_path)?; + } + } + + let library = unsafe { Library::new(&library_path) } + .with_context(|| format!("Error opening dynamic library {library_path:?}"))?; + let language = unsafe { + let language_fn = library + .get:: Language>>(language_fn_name.as_bytes()) + .with_context(|| format!("Failed to load symbol {language_fn_name}"))?; + language_fn() + }; + mem::forget(library); + Ok(language) + } + + fn compile_parser_to_dylib( + &self, + header_paths: &[&Path], + parser_path: &Path, + scanner_path: Option<&Path>, + library_path: &Path, + lock_file: &fs::File, + lock_path: &Path, + ) -> Result<(), Error> { + let mut config = cc::Build::new(); + config + .cpp(true) + .opt_level(2) + .cargo_metadata(false) + .cargo_warnings(false) + .target(BUILD_TARGET) + .host(BUILD_TARGET) + .flag_if_supported("-Werror=implicit-function-declaration"); + let compiler = config.get_compiler(); + let mut command = Command::new(compiler.path()); + for (key, value) in compiler.env() { + command.env(key, value); + } + + if compiler.is_like_msvc() { + command.args(["/nologo", "/LD"]); + + for path in header_paths { + command.arg(format!("/I{}", path.to_string_lossy())); + } + + if self.debug_build { + command.arg("/Od"); + } else { + command.arg("/O2"); + } + command.arg(parser_path); + + if let Some(scanner_path) = scanner_path.as_ref() { + if scanner_path.extension() != Some("c".as_ref()) { + eprintln!("Warning: Using a C++ scanner is now deprecated. Please migrate your scanner code to C, as C++ support will be removed in the near future."); } - command - .arg("/link") - .arg(format!("/out:{}", library_path.to_str().unwrap())); + + command.arg(scanner_path); + } + command + .arg("/link") + .arg(format!("/out:{}", library_path.to_str().unwrap())); + } else { + command + .arg("-shared") + .arg("-fno-exceptions") + .arg("-g") + .arg("-o") + .arg(library_path); + + for path in header_paths { + command.arg(format!("-I{}", path.to_string_lossy())); + } + + if !cfg!(windows) { + command.arg("-fPIC"); + } + + if self.debug_build { + command.arg("-O0"); } else { - command - .arg("-shared") - .arg("-fPIC") - .arg("-fno-exceptions") - .arg("-g") - .arg("-I") - .arg(header_path) - .arg("-o") - .arg(&library_path); - - if self.debug_build { - command.arg("-O0"); + command.arg("-O2"); + } + + if let Some(scanner_path) = scanner_path.as_ref() { + if scanner_path.extension() == Some("c".as_ref()) { + command.arg("-xc").arg("-std=c11").arg(scanner_path); } else { - command.arg("-O2"); + eprintln!("Warning: Using a C++ scanner is now deprecated. Please migrate your scanner code to C, as C++ support will be removed in the near future."); + command.arg(scanner_path); } + } + command.arg("-xc").arg(parser_path); + } - // For conditional compilation of external scanner code when - // used internally by `tree-siteer parse` and other sub commands. - command.arg("-DTREE_SITTER_INTERNAL_BUILD"); + // For conditional compilation of external scanner code when + // used internally by `tree-sitter parse` and other sub commands. + command.arg("-DTREE_SITTER_INTERNAL_BUILD"); - if let Some(scanner_path) = scanner_path.as_ref() { - if scanner_path.extension() == Some("c".as_ref()) { - command.arg("-xc").arg("-std=c99").arg(scanner_path); - } else { - command.arg(scanner_path); + // Always use the same allocator in the CLI as any scanner, useful for debugging and + // tracking memory leaks in tests. + #[cfg(not(any(target_os = "macos", target_os = "ios")))] + command.arg("-DTREE_SITTER_REUSE_ALLOCATOR"); + + let output = command.output().with_context(|| { + format!("Failed to execute the C compiler with the following command:\n{command:?}") + })?; + + lock_file.unlock()?; + fs::remove_file(lock_path)?; + + if !output.status.success() { + return Err(anyhow!( + "Parser compilation failed.\nStdout: {}\nStderr: {}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + )); + } + + Ok(()) + } + + #[cfg(unix)] + fn check_external_scanner(&self, name: &str, library_path: &Path) -> Result<()> { + let prefix = if cfg!(target_os = "macos") { "_" } else { "" }; + let mut must_have = vec![ + format!("{prefix}tree_sitter_{name}_external_scanner_create"), + format!("{prefix}tree_sitter_{name}_external_scanner_destroy"), + format!("{prefix}tree_sitter_{name}_external_scanner_serialize"), + format!("{prefix}tree_sitter_{name}_external_scanner_deserialize"), + format!("{prefix}tree_sitter_{name}_external_scanner_scan"), + ]; + + let command = Command::new("nm") + .arg("-W") + .arg("-U") + .arg(library_path) + .output(); + if let Ok(output) = command { + if output.status.success() { + let mut found_non_static = false; + for line in String::from_utf8_lossy(&output.stdout).lines() { + if line.contains(" T ") { + if let Some(function_name) = + line.split_whitespace().collect::>().get(2) + { + if !line.contains("tree_sitter_") { + if !found_non_static { + found_non_static = true; + eprintln!("Warning: Found non-static non-tree-sitter functions in the external scannner"); + } + eprintln!(" `{function_name}`"); + } else { + must_have.retain(|f| f != function_name); + } + } } } - command.arg("-xc").arg(parser_path); + if found_non_static { + eprintln!("Consider making these functions static, they can cause conflicts when another tree-sitter project uses the same function name"); + } + + if !must_have.is_empty() { + let missing = must_have + .iter() + .map(|f| format!(" `{f}`")) + .collect::>() + .join("\n"); + + return Err(anyhow!(format!( + indoc! {" + Missing required functions in the external scanner, parsing won't work without these! + + {} + + You can read more about this at https://tree-sitter.github.io/tree-sitter/creating-parsers#external-scanners + "}, + missing, + ))); + } } + } + + Ok(()) + } + + #[cfg(windows)] + fn check_external_scanner(&self, _name: &str, _library_path: &Path) -> Result<()> { + // TODO: there's no nm command on windows, whoever wants to implement this can and should :) + + // let mut must_have = vec![ + // format!("tree_sitter_{name}_external_scanner_create"), + // format!("tree_sitter_{name}_external_scanner_destroy"), + // format!("tree_sitter_{name}_external_scanner_serialize"), + // format!("tree_sitter_{name}_external_scanner_deserialize"), + // format!("tree_sitter_{name}_external_scanner_scan"), + // ]; + + Ok(()) + } - let output = command - .output() - .with_context(|| "Failed to execute C compiler")?; - if !output.status.success() { - return Err(anyhow!( - "Parser compilation failed.\nStdout: {}\nStderr: {}", - String::from_utf8_lossy(&output.stdout), - String::from_utf8_lossy(&output.stderr) - )); + pub fn compile_parser_to_wasm( + &self, + language_name: &str, + src_path: &Path, + scanner_filename: Option<&Path>, + output_path: &Path, + force_docker: bool, + ) -> Result<(), Error> { + #[derive(PartialEq, Eq)] + enum EmccSource { + Native(PathBuf), + Docker, + Podman, + } + + fn path_of_bin( + name: &str, + test: impl Fn(&Path) -> std::io::Result, + ) -> Option { + let bin_path = which(name).ok()?; + if test(&bin_path).is_ok() { + Some(bin_path) + } else { + None } } - let library = unsafe { Library::new(&library_path) } - .with_context(|| format!("Error opening dynamic library {:?}", &library_path))?; - let language_fn_name = format!("tree_sitter_{}", replace_dashes_with_underscores(name)); - let language = unsafe { - let language_fn: Symbol Language> = library - .get(language_fn_name.as_bytes()) - .with_context(|| format!("Failed to load symbol {}", language_fn_name))?; - language_fn() + // Order of preference: emscripten > docker > podman > error + let source = if force_docker { + None + } else { + path_of_bin(if cfg!(windows) { "emcc.bat" } else { "emcc" }, |p| { + Command::new(p).output() + }) + .map(EmccSource::Native) + } + .or_else(|| { + path_of_bin("docker", |docker| { + // `docker info` should succeed iff the daemon is running + // see https://docs.docker.com/config/daemon/troubleshoot/#check-whether-docker-is-running + Command::new(docker).args(["info"]).output() + }) + .map(|_| EmccSource::Docker) + }) + .or_else(|| { + path_of_bin("podman", |podman| { + Command::new(podman).arg("--version").output() + }) + .map(|_| EmccSource::Podman) + }); + + let Some(cmd) = source else { + return Err(anyhow!( + "You must have either emcc or docker on your PATH to run this command" + )); }; - mem::forget(library); - Ok(language) + + let mut command = match cmd { + EmccSource::Native(emcc_path) => { + let mut command = Command::new(emcc_path); + command.current_dir(src_path); + command + } + + EmccSource::Docker | EmccSource::Podman => { + let mut command = match cmd { + EmccSource::Docker => Command::new("docker"), + EmccSource::Podman => Command::new("podman"), + _ => unreachable!(), + }; + command.args(["run", "--rm"]); + + // Mount the parser directory as a volume + command.args(["--workdir", "/src"]); + + let mut volume_string = OsString::from(&src_path); + volume_string.push(":/src:Z"); + command.args([OsStr::new("--volume"), &volume_string]); + + // In case `docker` is an alias to `podman`, ensure that podman + // mounts the current directory as writable by the container + // user which has the same uid as the host user. Setting the + // podman-specific variable is more reliable than attempting to + // detect whether `docker` is an alias for `podman`. + // see https://docs.podman.io/en/latest/markdown/podman-run.1.html#userns-mode + command.env("PODMAN_USERNS", "keep-id"); + + // Get the current user id so that files created in the docker container will have + // the same owner. + #[cfg(unix)] + { + #[link(name = "c")] + extern "C" { + fn getuid() -> u32; + } + // don't need to set user for podman since PODMAN_USERNS=keep-id is already set + if cmd == EmccSource::Docker { + let user_id = unsafe { getuid() }; + command.args(["--user", &user_id.to_string()]); + } + }; + + // Run `emcc` in a container using the `emscripten-slim` image + command.args([EMSCRIPTEN_TAG, "emcc"]); + command + } + }; + + let output_name = "output.wasm"; + + command.args([ + "-o", + output_name, + "-Os", + "-s", + "WASM=1", + "-s", + "SIDE_MODULE=2", + "-s", + "TOTAL_MEMORY=33554432", + "-s", + "NODEJS_CATCH_EXIT=0", + "-s", + &format!("EXPORTED_FUNCTIONS=[\"_tree_sitter_{language_name}\"]"), + "-fno-exceptions", + "-fvisibility=hidden", + "-I", + ".", + ]); + + if let Some(scanner_filename) = scanner_filename { + if scanner_filename + .extension() + .and_then(|ext| ext.to_str()) + .map_or(false, |ext| ["cc", "cpp"].contains(&ext)) + { + eprintln!("Warning: Using a C++ scanner is now deprecated. Please migrate your scanner code to C, as C++ support will be removed in the near future."); + command.arg("-xc++"); + } + command.arg(scanner_filename); + } + + command.arg("parser.c"); + let status = command + .spawn() + .with_context(|| "Failed to run emcc command")? + .wait()?; + if !status.success() { + return Err(anyhow!("emcc command failed")); + } + + fs::rename(src_path.join(output_name), output_path) + .context("failed to rename wasm output file")?; + + Ok(()) } + #[must_use] pub fn highlight_config_for_injection_string<'a>( &'a self, string: &str, ) -> Option<&'a HighlightConfiguration> { match self.language_configuration_for_injection_string(string) { Err(e) => { - eprintln!( - "Failed to load language for injection string '{}': {}", - string, e - ); + eprintln!("Failed to load language for injection string '{string}': {e}",); None } Ok(None) => None, - Ok(Some((language, configuration))) => match configuration.highlight_config(language) { - Err(e) => { - eprintln!( - "Failed to load property sheet for injection string '{}': {}", - string, e - ); - None + Ok(Some((language, configuration))) => { + match configuration.highlight_config(language, None) { + Err(e) => { + eprintln!( + "Failed to load property sheet for injection string '{string}': {e}", + ); + None + } + Ok(None) => None, + Ok(Some(config)) => Some(config), } - Ok(None) => None, - Ok(Some(config)) => Some(config), - }, + } } } - pub fn find_language_configurations_at_path<'a>( - &'a mut self, + pub fn find_language_configurations_at_path( + &mut self, parser_path: &Path, + set_current_path_config: bool, ) -> Result<&[LanguageConfiguration]> { - #[derive(Deserialize)] + #[derive(Deserialize, Clone, Default)] #[serde(untagged)] enum PathsJSON { + #[default] Empty, Single(String), Multiple(Vec), } - impl Default for PathsJSON { - fn default() -> Self { - PathsJSON::Empty - } - } - impl PathsJSON { fn into_vec(self) -> Option> { match self { - PathsJSON::Empty => None, - PathsJSON::Single(s) => Some(vec![s]), - PathsJSON::Multiple(s) => Some(s), + Self::Empty => None, + Self::Single(s) => Some(vec![s]), + Self::Multiple(s) => Some(s), } } } @@ -523,6 +930,8 @@ impl Loader { locals: PathsJSON, #[serde(default)] tags: PathsJSON, + #[serde(default, rename = "external-files")] + external_files: PathsJSON, } #[derive(Deserialize)] @@ -532,9 +941,14 @@ impl Loader { tree_sitter: Vec, } + #[derive(Deserialize)] + struct GrammarJSON { + name: String, + } + let initial_language_configuration_count = self.language_configurations.len(); - if let Ok(package_json_contents) = fs::read_to_string(&parser_path.join("package.json")) { + if let Ok(package_json_contents) = fs::read_to_string(parser_path.join("package.json")) { let package_json = serde_json::from_str::(&package_json_contents); if let Ok(package_json) = package_json { let language_count = self.languages_by_id.len(); @@ -543,10 +957,17 @@ impl Loader { // the package.json, but defaults to the directory containing the package.json. let language_path = parser_path.join(config_json.path); + let grammar_path = language_path.join("src").join("grammar.json"); + let mut grammar_file = fs::File::open(grammar_path) + .with_context(|| "Failed to read grammar.json")?; + let grammar_json: GrammarJSON = + serde_json::from_reader(BufReader::new(&mut grammar_file)) + .with_context(|| "Failed to parse grammar.json")?; + // Determine if a previous language configuration in this package.json file // already uses the same language. let mut language_id = None; - for (id, (path, _)) in + for (id, (path, _, _)) in self.languages_by_id.iter().enumerate().skip(language_count) { if language_path == *path { @@ -555,38 +976,71 @@ impl Loader { } // If not, add a new language path to the list. - let language_id = language_id.unwrap_or_else(|| { - self.languages_by_id.push((language_path, OnceCell::new())); + let language_id = if let Some(language_id) = language_id { + language_id + } else { + self.languages_by_id.push(( + language_path, + OnceCell::new(), + config_json.external_files.clone().into_vec().map(|files| { + files.into_iter() + .map(|path| { + let path = parser_path.join(path); + // prevent p being above/outside of parser_path + + if path.starts_with(parser_path) { + Ok(path) + } else { + Err(anyhow!("External file path {path:?} is outside of parser directory {parser_path:?}")) + } + }) + .collect::>>() + }).transpose()?, + )); self.languages_by_id.len() - 1 - }); + }; let configuration = LanguageConfiguration { root_path: parser_path.to_path_buf(), + language_name: grammar_json.name.clone(), scope: config_json.scope, language_id, file_types: config_json.file_types.unwrap_or(Vec::new()), - content_regex: Self::regex(config_json.content_regex), - _first_line_regex: Self::regex(config_json.first_line_regex), - injection_regex: Self::regex(config_json.injection_regex), + content_regex: Self::regex(config_json.content_regex.as_deref()), + first_line_regex: Self::regex(config_json.first_line_regex.as_deref()), + injection_regex: Self::regex(config_json.injection_regex.as_deref()), injections_filenames: config_json.injections.into_vec(), locals_filenames: config_json.locals.into_vec(), tags_filenames: config_json.tags.into_vec(), highlights_filenames: config_json.highlights.into_vec(), highlight_config: OnceCell::new(), tags_config: OnceCell::new(), - highlight_names: &*self.highlight_names, + highlight_names: &self.highlight_names, use_all_highlight_names: self.use_all_highlight_names, }; for file_type in &configuration.file_types { self.language_configuration_ids_by_file_type .entry(file_type.to_string()) - .or_insert(Vec::new()) + .or_default() + .push(self.language_configurations.len()); + } + if let Some(first_line_regex) = &configuration.first_line_regex { + self.language_configuration_ids_by_first_line_regex + .entry(first_line_regex.to_string()) + .or_default() .push(self.language_configurations.len()); } self.language_configurations .push(unsafe { mem::transmute(configuration) }); + + if set_current_path_config + && self.language_configuration_in_current_path.is_none() + { + self.language_configuration_in_current_path = + Some(self.language_configurations.len() - 1); + } } } } @@ -594,13 +1048,20 @@ impl Loader { if self.language_configurations.len() == initial_language_configuration_count && parser_path.join("src").join("grammar.json").exists() { + let grammar_path = parser_path.join("src").join("grammar.json"); + let mut grammar_file = + fs::File::open(grammar_path).with_context(|| "Failed to read grammar.json")?; + let grammar_json: GrammarJSON = + serde_json::from_reader(BufReader::new(&mut grammar_file)) + .with_context(|| "Failed to parse grammar.json")?; let configuration = LanguageConfiguration { root_path: parser_path.to_owned(), + language_name: grammar_json.name, language_id: self.languages_by_id.len(), file_types: Vec::new(), scope: None, content_regex: None, - _first_line_regex: None, + first_line_regex: None, injection_regex: None, injections_filenames: None, locals_filenames: None, @@ -608,20 +1069,20 @@ impl Loader { tags_filenames: None, highlight_config: OnceCell::new(), tags_config: OnceCell::new(), - highlight_names: &*self.highlight_names, + highlight_names: &self.highlight_names, use_all_highlight_names: self.use_all_highlight_names, }; self.language_configurations .push(unsafe { mem::transmute(configuration) }); self.languages_by_id - .push((parser_path.to_owned(), OnceCell::new())); + .push((parser_path.to_owned(), OnceCell::new(), None)); } Ok(&self.language_configurations[initial_language_configuration_count..]) } - fn regex(pattern: Option) -> Option { - pattern.and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()) + fn regex(pattern: Option<&str>) -> Option { + pattern.and_then(|r| RegexBuilder::new(r).multi_line(true).build().ok()) } pub fn select_language( @@ -633,11 +1094,11 @@ impl Loader { if let Some(scope) = scope { if let Some(config) = self .language_configuration_for_scope(scope) - .with_context(|| format!("Failed to load language for scope '{}'", scope))? + .with_context(|| format!("Failed to load language for scope '{scope}'"))? { Ok(config.0) } else { - return Err(anyhow!("Unknown scope '{}'", scope)); + Err(anyhow!("Unknown scope '{scope}'")) } } else if let Some((lang, _)) = self .language_configuration_for_file_name(path) @@ -649,13 +1110,17 @@ impl Loader { })? { Ok(lang) + } else if let Some(id) = self.language_configuration_in_current_path { + Ok(self.language_for_id(self.language_configurations[id].language_id)?) } else if let Some(lang) = self - .languages_at_path(¤t_dir) + .languages_at_path(current_dir) .with_context(|| "Failed to load language in current directory")? .first() .cloned() { - Ok(lang) + Ok(lang.0) + } else if let Some(lang) = self.language_configuration_for_first_line_regex(path)? { + Ok(lang.0) } else { Err(anyhow!("No language found")) } @@ -664,25 +1129,90 @@ impl Loader { pub fn use_debug_build(&mut self, flag: bool) { self.debug_build = flag; } + + #[cfg(feature = "wasm")] + pub fn use_wasm(&mut self, engine: tree_sitter::wasmtime::Engine) { + *self.wasm_store.lock().unwrap() = Some(tree_sitter::WasmStore::new(engine).unwrap()); + } + + #[must_use] + pub fn get_scanner_path(&self, src_path: &Path) -> Option { + let mut path = src_path.join("scanner.c"); + for extension in ["c", "cc", "cpp"] { + path.set_extension(extension); + if path.exists() { + return Some(path); + } + } + None + } } impl<'a> LanguageConfiguration<'a> { - pub fn highlight_config(&self, language: Language) -> Result> { - return self - .highlight_config + pub fn highlight_config( + &self, + language: Language, + paths: Option<&[String]>, + ) -> Result> { + let (highlights_filenames, injections_filenames, locals_filenames) = match paths { + Some(paths) => ( + Some( + paths + .iter() + .filter(|p| p.ends_with("highlights.scm")) + .cloned() + .collect::>(), + ), + Some( + paths + .iter() + .filter(|p| p.ends_with("tags.scm")) + .cloned() + .collect::>(), + ), + Some( + paths + .iter() + .filter(|p| p.ends_with("locals.scm")) + .cloned() + .collect::>(), + ), + ), + None => (None, None, None), + }; + self.highlight_config .get_or_try_init(|| { - let (highlights_query, highlight_ranges) = - self.read_queries(&self.highlights_filenames, "highlights.scm")?; - let (injections_query, injection_ranges) = - self.read_queries(&self.injections_filenames, "injections.scm")?; - let (locals_query, locals_ranges) = - self.read_queries(&self.locals_filenames, "locals.scm")?; + let (highlights_query, highlight_ranges) = self.read_queries( + if highlights_filenames.is_some() { + highlights_filenames.as_deref() + } else { + self.highlights_filenames.as_deref() + }, + "highlights.scm", + )?; + let (injections_query, injection_ranges) = self.read_queries( + if injections_filenames.is_some() { + injections_filenames.as_deref() + } else { + self.injections_filenames.as_deref() + }, + "injections.scm", + )?; + let (locals_query, locals_ranges) = self.read_queries( + if locals_filenames.is_some() { + locals_filenames.as_deref() + } else { + self.locals_filenames.as_deref() + }, + "locals.scm", + )?; if highlights_query.is_empty() { Ok(None) } else { let mut result = HighlightConfiguration::new( language, + &self.language_name, &highlights_query, &injections_query, &locals_query, @@ -717,25 +1247,26 @@ impl<'a> LanguageConfiguration<'a> { let mut all_highlight_names = self.highlight_names.lock().unwrap(); if self.use_all_highlight_names { for capture_name in result.query.capture_names() { - if !all_highlight_names.contains(capture_name) { - all_highlight_names.push(capture_name.clone()); + if !all_highlight_names.iter().any(|x| x == capture_name) { + all_highlight_names.push((*capture_name).to_string()); } } } - result.configure(&all_highlight_names.as_slice()); + result.configure(all_highlight_names.as_slice()); + drop(all_highlight_names); Ok(Some(result)) } }) - .map(Option::as_ref); + .map(Option::as_ref) } pub fn tags_config(&self, language: Language) -> Result> { self.tags_config .get_or_try_init(|| { let (tags_query, tags_ranges) = - self.read_queries(&self.tags_filenames, "tags.scm")?; + self.read_queries(self.tags_filenames.as_deref(), "tags.scm")?; let (locals_query, locals_ranges) = - self.read_queries(&self.locals_filenames, "locals.scm")?; + self.read_queries(self.locals_filenames.as_deref(), "locals.scm")?; if tags_query.is_empty() { Ok(None) } else { @@ -758,7 +1289,6 @@ impl<'a> LanguageConfiguration<'a> { locals_query.len(), ) } - .into() } else { error.into() } @@ -768,9 +1298,9 @@ impl<'a> LanguageConfiguration<'a> { .map(Option::as_ref) } - fn include_path_in_query_error<'b>( + fn include_path_in_query_error( mut error: QueryError, - ranges: &'b Vec<(String, Range)>, + ranges: &[(String, Range)], source: &str, start_offset: usize, ) -> Error { @@ -778,36 +1308,47 @@ impl<'a> LanguageConfiguration<'a> { let (path, range) = ranges .iter() .find(|(_, range)| range.contains(&offset_within_section)) - .unwrap(); + .unwrap_or_else(|| ranges.last().unwrap()); error.offset = offset_within_section - range.start; error.row = source[range.start..offset_within_section] .chars() .filter(|c| *c == '\n') .count(); - Error::from(error).context(format!("Error in query file {:?}", path)) + Error::from(error).context(format!("Error in query file {path:?}")) } + #[allow(clippy::type_complexity)] fn read_queries( &self, - paths: &Option>, + paths: Option<&[String]>, default_path: &str, ) -> Result<(String, Vec<(String, Range)>)> { let mut query = String::new(); let mut path_ranges = Vec::new(); - if let Some(paths) = paths.as_ref() { + if let Some(paths) = paths { for path in paths { let abs_path = self.root_path.join(path); let prev_query_len = query.len(); query += &fs::read_to_string(&abs_path) - .with_context(|| format!("Failed to read query file {:?}", path))?; + .with_context(|| format!("Failed to read query file {path:?}"))?; path_ranges.push((path.clone(), prev_query_len..query.len())); } } else { + // highlights.scm is needed to test highlights, and tags.scm to test tags + if default_path == "highlights.scm" || default_path == "tags.scm" { + eprintln!( + indoc! {" + Warning: you should add a `{}` entry pointing to the highlights path in `tree-sitter` language list in the grammar's package.json + See more here: https://tree-sitter.github.io/tree-sitter/syntax-highlighting#query-paths + "}, + default_path.replace(".scm", "") + ); + } let queries_path = self.root_path.join("queries"); let path = queries_path.join(default_path); if path.exists() { query = fs::read_to_string(&path) - .with_context(|| format!("Failed to read query file {:?}", path))?; + .with_context(|| format!("Failed to read query file {path:?}"))?; path_ranges.push((default_path.to_string(), 0..query.len())); } } @@ -816,20 +1357,14 @@ impl<'a> LanguageConfiguration<'a> { } } -fn needs_recompile( - lib_path: &Path, - parser_c_path: &Path, - scanner_path: &Option, -) -> Result { +fn needs_recompile(lib_path: &Path, paths_to_check: &[PathBuf]) -> Result { if !lib_path.exists() { return Ok(true); } - let lib_mtime = mtime(lib_path)?; - if mtime(parser_c_path)? > lib_mtime { - return Ok(true); - } - if let Some(scanner_path) = scanner_path { - if mtime(scanner_path)? > lib_mtime { + let lib_mtime = + mtime(lib_path).with_context(|| format!("Failed to read mtime of {lib_path:?}"))?; + for path in paths_to_check { + if mtime(path)? > lib_mtime { return Ok(true); } } diff --git a/third-party/tree-sitter/tree-sitter/cli/npm/.gitignore b/third-party/tree-sitter/tree-sitter/cli/npm/.gitignore index 942b33a180b..65e04cffcbf 100644 --- a/third-party/tree-sitter/tree-sitter/cli/npm/.gitignore +++ b/third-party/tree-sitter/tree-sitter/cli/npm/.gitignore @@ -3,3 +3,4 @@ tree-sitter.exe *.gz *.tgz LICENSE +README.md diff --git a/third-party/tree-sitter/tree-sitter/cli/npm/dsl.d.ts b/third-party/tree-sitter/tree-sitter/cli/npm/dsl.d.ts index f2ee57f1053..63f9ed493d9 100644 --- a/third-party/tree-sitter/tree-sitter/cli/npm/dsl.d.ts +++ b/third-party/tree-sitter/tree-sitter/cli/npm/dsl.d.ts @@ -1,19 +1,19 @@ -type AliasRule = {type: 'ALIAS'; named: boolean; content: Rule; value: string}; -type BlankRule = {type: 'BLANK'}; -type ChoiceRule = {type: 'CHOICE'; members: Rule[]}; -type FieldRule = {type: 'FIELD'; name: string; content: Rule}; -type ImmediateTokenRule = {type: 'IMMEDIATE_TOKEN'; content: Rule}; -type PatternRule = {type: 'PATTERN'; value: string}; -type PrecDynamicRule = {type: 'PREC_DYNAMIC'; content: Rule; value: number}; -type PrecLeftRule = {type: 'PREC_LEFT'; content: Rule; value: number}; -type PrecRightRule = {type: 'PREC_RIGHT'; content: Rule; value: number}; -type PrecRule = {type: 'PREC'; content: Rule; value: number}; -type Repeat1Rule = {type: 'REPEAT1'; content: Rule}; -type RepeatRule = {type: 'REPEAT'; content: Rule}; -type SeqRule = {type: 'SEQ'; members: Rule[]}; -type StringRule = {type: 'STRING'; value: string}; -type SymbolRule = {type: 'SYMBOL'; name: Name}; -type TokenRule = {type: 'TOKEN'; content: Rule}; +type AliasRule = { type: 'ALIAS'; named: boolean; content: Rule; value: string }; +type BlankRule = { type: 'BLANK' }; +type ChoiceRule = { type: 'CHOICE'; members: Rule[] }; +type FieldRule = { type: 'FIELD'; name: string; content: Rule }; +type ImmediateTokenRule = { type: 'IMMEDIATE_TOKEN'; content: Rule }; +type PatternRule = { type: 'PATTERN'; value: string }; +type PrecDynamicRule = { type: 'PREC_DYNAMIC'; content: Rule; value: number }; +type PrecLeftRule = { type: 'PREC_LEFT'; content: Rule; value: number }; +type PrecRightRule = { type: 'PREC_RIGHT'; content: Rule; value: number }; +type PrecRule = { type: 'PREC'; content: Rule; value: number }; +type Repeat1Rule = { type: 'REPEAT1'; content: Rule }; +type RepeatRule = { type: 'REPEAT'; content: Rule }; +type SeqRule = { type: 'SEQ'; members: Rule[] }; +type StringRule = { type: 'STRING'; value: string }; +type SymbolRule = { type: 'SYMBOL'; name: Name }; +type TokenRule = { type: 'TOKEN'; content: Rule }; type Rule = | AliasRule @@ -42,14 +42,15 @@ type GrammarSymbols = { type RuleBuilder = ( $: GrammarSymbols, + previous: Rule, ) => RuleOrLiteral; type RuleBuilders< RuleName extends string, BaseGrammarRuleName extends string > = { - [name in RuleName]: RuleBuilder; -}; + [name in RuleName]: RuleBuilder; + }; interface Grammar< RuleName extends string, @@ -68,11 +69,17 @@ interface Grammar< rules: Rules; /** - * An array of arrays of precedence names. Each inner array represents - * a *descending* ordering. Names listed earlier in one of these arrays - * have higher precedence than any names listed later in the same array. + * An array of arrays of precedence names or rules. Each inner array represents + * a *descending* ordering. Names/rules listed earlier in one of these arrays + * have higher precedence than any names/rules listed later in the same array. + * + * Using rules is just a shorthand way for using a name then calling prec() + * with that name. It is just a convenience. */ - precedences?: () => String[][], + precedences?: ( + $: GrammarSymbols, + previous: Rule[][], + ) => RuleOrLiteral[][], /** * An array of arrays of rule names. Each inner array represents a set of @@ -86,6 +93,7 @@ interface Grammar< */ conflicts?: ( $: GrammarSymbols, + previous: Rule[][], ) => RuleOrLiteral[][]; /** @@ -102,7 +110,7 @@ interface Grammar< externals?: ( $: Record>, previous: Rule[], - ) => SymbolRule[]; + ) => RuleOrLiteral[]; /** * An array of tokens that may appear anywhere in the language. This @@ -126,6 +134,7 @@ interface Grammar< */ inline?: ( $: GrammarSymbols, + previous: Rule[], ) => RuleOrLiteral[]; /** @@ -134,10 +143,11 @@ interface Grammar< * * @param $ grammar rules * - * @see http://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types + * @see https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types */ supertypes?: ( $: GrammarSymbols, + previous: Rule[], ) => RuleOrLiteral[]; /** @@ -153,8 +163,8 @@ interface Grammar< type GrammarSchema = { [K in keyof Grammar]: K extends 'rules' - ? Record - : Grammar[K]; + ? Record + : Grammar[K]; }; /** diff --git a/third-party/tree-sitter/tree-sitter/cli/npm/install.js b/third-party/tree-sitter/tree-sitter/cli/npm/install.js index 2790b47d789..b008947cba7 100644 --- a/third-party/tree-sitter/tree-sitter/cli/npm/install.js +++ b/third-party/tree-sitter/tree-sitter/cli/npm/install.js @@ -6,25 +6,43 @@ const http = require('http'); const https = require('https'); const packageJSON = require('./package.json'); -// Determine the URL of the file. -const platformName = { - 'darwin': 'macos', - 'linux': 'linux', - 'win32': 'windows' -}[process.platform]; - -let archName = { - 'x64': 'x64', - 'x86': 'x86', - 'ia32': 'x86' -}[process.arch]; - -// ARM macs can run x64 binaries via Rosetta. Rely on that for now. -if (platformName === 'macos' && process.arch === 'arm64') { - archName = 'x64'; +// Look to a results table in https://github.com/tree-sitter/tree-sitter/issues/2196 +const matrix = { + platform: { + 'darwin': { + name: 'macos', + arch: { + 'arm64': { name: 'arm64' }, + 'x64': { name: 'x64' }, + } + }, + 'linux': { + name: 'linux', + arch: { + 'arm64': { name: 'arm64' }, + 'arm': { name: 'arm' }, + 'x64': { name: 'x64' }, + 'x86': { name: 'x86' }, + 'ppc64': { name: 'powerpc64' }, + } + }, + 'win32': { + name: 'windows', + arch: { + 'arm64': { name: 'arm64' }, + 'x64': { name: 'x64' }, + 'x86': { name: 'x86' }, + 'ia32': { name: 'x86' }, + } + }, + }, } -if (!platformName || !archName) { +// Determine the URL of the file. +const platform = matrix.platform[process.platform]; +const arch = platform && platform.arch[process.arch]; + +if (!platform || !platform.name || !arch || !arch.name) { console.error( `Cannot install tree-sitter-cli for platform ${process.platform}, architecture ${process.arch}` ); @@ -32,7 +50,7 @@ if (!platformName || !archName) { } const releaseURL = `https://github.com/tree-sitter/tree-sitter/releases/download/v${packageJSON.version}`; -const assetName = `tree-sitter-${platformName}-${archName}.gz`; +const assetName = `tree-sitter-${platform.name}-${arch.name}.gz`; const assetURL = `${releaseURL}/${assetName}`; // Remove previously-downloaded files. @@ -65,29 +83,42 @@ file.on('finish', () => { // Follow redirects. function get(url, callback) { - const requestUrl = new URL(url) - let request = https - let requestConfig = requestUrl - const proxyEnv = process.env['HTTPS_PROXY'] || process.env['https_proxy'] - - if (proxyEnv) { - const proxyUrl = new URL(proxyEnv) - request = proxyUrl.protocol === 'https:' ? https : http - requestConfig = { - hostname: proxyUrl.hostname, - port: proxyUrl.port, - path: requestUrl.toString(), - headers: { - Host: requestUrl.hostname - } - } - } - - request.get(requestConfig, response => { + const processResponse = (response) => { if (response.statusCode === 301 || response.statusCode === 302) { get(response.headers.location, callback); } else { callback(response); } - }); + }; + + const proxyEnv = process.env['HTTPS_PROXY'] || process.env['https_proxy']; + if (!proxyEnv) { + https.get(url, processResponse); + return; + } + + const requestUrl = new URL(url); + const requestPort = requestUrl.port || (requestUrl.protocol === 'https:' ? 443 : 80); + const proxyUrl = new URL(proxyEnv); + const request = proxyUrl.protocol === 'https:' ? https : http; + request.request({ + host: proxyUrl.hostname, + port: proxyUrl.port || (proxyUrl.protocol === 'https:' ? 443 : 80), + method: 'CONNECT', + path: `${requestUrl.hostname}:${requestPort}`, + }).on('connect', (response, socket, _head) => { + if (response.statusCode !== 200) { + // let caller handle error + callback(response); + return; + } + + const agent = https.Agent({ socket }); + https.get({ + host: requestUrl.host, + port: requestPort, + path: `${requestUrl.pathname}${requestUrl.search}`, + agent, + }, processResponse); + }).end(); } diff --git a/third-party/tree-sitter/tree-sitter/cli/npm/package.json b/third-party/tree-sitter/tree-sitter/cli/npm/package.json index 02309193fbb..5868401f8d8 100644 --- a/third-party/tree-sitter/tree-sitter/cli/npm/package.json +++ b/third-party/tree-sitter/tree-sitter/cli/npm/package.json @@ -1,11 +1,11 @@ { "name": "tree-sitter-cli", - "version": "0.20.8", + "version": "0.22.1", "author": "Max Brunsfeld", "license": "MIT", "repository": { "type": "git", - "url": "http://github.com/tree-sitter/tree-sitter.git" + "url": "https://github.com/tree-sitter/tree-sitter.git" }, "description": "CLI for generating fast incremental parsers", "keywords": [ @@ -15,7 +15,8 @@ "main": "lib/api/index.js", "scripts": { "install": "node install.js", - "prepack": "cp ../../LICENSE ." + "prepack": "cp ../../LICENSE ../README.md .", + "postpack": "rm LICENSE README.md" }, "bin": { "tree-sitter": "cli.js" diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/binding_files.rs b/third-party/tree-sitter/tree-sitter/cli/src/generate/binding_files.rs deleted file mode 100644 index 4241b61660a..00000000000 --- a/third-party/tree-sitter/tree-sitter/cli/src/generate/binding_files.rs +++ /dev/null @@ -1,154 +0,0 @@ -use super::write_file; -use anyhow::{Context, Result}; -use std::path::{Path, PathBuf}; -use std::{fs, str}; - -const BINDING_CC_TEMPLATE: &'static str = include_str!("./templates/binding.cc"); -const BINDING_GYP_TEMPLATE: &'static str = include_str!("./templates/binding.gyp"); -const INDEX_JS_TEMPLATE: &'static str = include_str!("./templates/index.js"); -const LIB_RS_TEMPLATE: &'static str = include_str!("./templates/lib.rs"); -const BUILD_RS_TEMPLATE: &'static str = include_str!("./templates/build.rs"); -const CARGO_TOML_TEMPLATE: &'static str = include_str!("./templates/cargo.toml"); -const PACKAGE_JSON_TEMPLATE: &'static str = include_str!("./templates/package.json"); -const PARSER_NAME_PLACEHOLDER: &'static str = "PARSER_NAME"; -const CLI_VERSION_PLACEHOLDER: &'static str = "CLI_VERSION"; -const CLI_VERSION: &'static str = env!("CARGO_PKG_VERSION"); -const RUST_BINDING_VERSION: &'static str = env!("RUST_BINDING_VERSION"); -const RUST_BINDING_VERSION_PLACEHOLDER: &'static str = "RUST_BINDING_VERSION"; - -pub fn generate_binding_files(repo_path: &Path, language_name: &str) -> Result<()> { - let bindings_dir = repo_path.join("bindings"); - - let dashed_language_name = language_name.replace("_", "-"); - let dashed_language_name = dashed_language_name.as_str(); - - // Generate rust bindings if needed. - let rust_binding_dir = bindings_dir.join("rust"); - create_path(&rust_binding_dir, |path| create_dir(path))?; - - create_path(&rust_binding_dir.join("lib.rs").to_owned(), |path| { - generate_file(path, LIB_RS_TEMPLATE, language_name) - })?; - - create_path(&rust_binding_dir.join("build.rs").to_owned(), |path| { - generate_file(path, BUILD_RS_TEMPLATE, language_name) - })?; - - create_path(&repo_path.join("Cargo.toml").to_owned(), |path| { - generate_file(path, CARGO_TOML_TEMPLATE, dashed_language_name) - })?; - - // Generate node bindings - let node_binding_dir = bindings_dir.join("node"); - create_path(&node_binding_dir, |path| create_dir(path))?; - - create_path(&node_binding_dir.join("index.js").to_owned(), |path| { - generate_file(path, INDEX_JS_TEMPLATE, language_name) - })?; - - create_path(&node_binding_dir.join("binding.cc").to_owned(), |path| { - generate_file(path, BINDING_CC_TEMPLATE, language_name) - })?; - - // Create binding.gyp, or update it with new binding path. - let binding_gyp_path = repo_path.join("binding.gyp"); - create_path_else( - &binding_gyp_path, - |path| generate_file(path, BINDING_GYP_TEMPLATE, language_name), - |path| { - let binding_gyp = - fs::read_to_string(path).with_context(|| "Failed to read binding.gyp")?; - let old_path = "\"src/binding.cc\""; - if binding_gyp.contains(old_path) { - eprintln!("Updating binding.gyp with new binding path"); - let binding_gyp = binding_gyp.replace(old_path, "\"bindings/node/binding.cc\""); - write_file(path, binding_gyp)?; - } - Ok(()) - }, - )?; - - // Create package.json, or update it with new binding path. - let package_json_path = repo_path.join("package.json"); - create_path_else( - &package_json_path, - |path| generate_file(path, PACKAGE_JSON_TEMPLATE, dashed_language_name), - |path| { - let package_json_str = - fs::read_to_string(path).with_context(|| "Failed to read package.json")?; - let mut package_json = - serde_json::from_str::>( - &package_json_str, - ) - .with_context(|| "Failed to parse package.json")?; - let package_json_main = package_json.get("main"); - let package_json_needs_update = package_json_main.map_or(true, |v| { - let main_string = v.as_str(); - main_string == Some("index.js") || main_string == Some("./index.js") - }); - if package_json_needs_update { - eprintln!("Updating package.json with new binding path"); - package_json.insert( - "main".to_string(), - serde_json::Value::String("bindings/node".to_string()), - ); - let mut package_json_str = serde_json::to_string_pretty(&package_json)?; - package_json_str.push('\n'); - write_file(path, package_json_str)?; - } - Ok(()) - }, - )?; - - // Remove files from old node binding paths. - let old_index_js_path = repo_path.join("index.js"); - let old_binding_cc_path = repo_path.join("src").join("binding.cc"); - if old_index_js_path.exists() { - fs::remove_file(old_index_js_path).ok(); - } - if old_binding_cc_path.exists() { - fs::remove_file(old_binding_cc_path).ok(); - } - - Ok(()) -} - -fn generate_file(path: &Path, template: &str, language_name: &str) -> Result<()> { - write_file( - path, - template - .replace(PARSER_NAME_PLACEHOLDER, language_name) - .replace(CLI_VERSION_PLACEHOLDER, CLI_VERSION) - .replace(RUST_BINDING_VERSION_PLACEHOLDER, RUST_BINDING_VERSION), - ) -} - -fn create_dir(path: &Path) -> Result<()> { - fs::create_dir_all(&path) - .with_context(|| format!("Failed to create {:?}", path.to_string_lossy())) -} - -fn create_path(path: &PathBuf, action: F) -> Result -where - F: Fn(&PathBuf) -> Result<()>, -{ - if !path.exists() { - action(path)?; - return Ok(true); - } - Ok(false) -} - -fn create_path_else(path: &PathBuf, action: T, else_action: F) -> Result -where - T: Fn(&PathBuf) -> Result<()>, - F: Fn(&PathBuf) -> Result<()>, -{ - if !path.exists() { - action(path)?; - return Ok(true); - } else { - else_action(path)?; - } - Ok(false) -} diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/build_lex_table.rs b/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/build_lex_table.rs index d3ebb2419a9..bc65447c6d2 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/build_lex_table.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/build_lex_table.rs @@ -10,7 +10,7 @@ use std::collections::hash_map::Entry; use std::collections::{HashMap, VecDeque}; use std::mem; -pub(crate) fn build_lex_table( +pub fn build_lex_table( parse_table: &mut ParseTable, syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, @@ -18,23 +18,22 @@ pub(crate) fn build_lex_table( coincident_token_index: &CoincidentTokenIndex, token_conflict_map: &TokenConflictMap, ) -> (LexTable, LexTable) { - let keyword_lex_table; - if syntax_grammar.word_token.is_some() { + let keyword_lex_table = if syntax_grammar.word_token.is_some() { let mut builder = LexTableBuilder::new(lexical_grammar); builder.add_state_for_tokens(keywords); - keyword_lex_table = builder.table; + builder.table } else { - keyword_lex_table = LexTable::default(); - } + LexTable::default() + }; - let mut parse_state_ids_by_token_set: Vec<(TokenSet, Vec)> = Vec::new(); + let mut parse_state_ids_by_token_set = Vec::<(TokenSet, Vec)>::new(); for (i, state) in parse_table.states.iter().enumerate() { let tokens = state .terminal_entries .keys() .filter_map(|token| { if token.is_terminal() { - if keywords.contains(&token) { + if keywords.contains(token) { syntax_grammar.word_token } else { Some(*token) @@ -48,7 +47,7 @@ pub(crate) fn build_lex_table( .collect(); let mut did_merge = false; - for entry in parse_state_ids_by_token_set.iter_mut() { + for entry in &mut parse_state_ids_by_token_set { if merge_token_set( &mut entry.0, &tokens, @@ -198,7 +197,7 @@ impl<'a> LexTableBuilder<'a> { for transition in transitions { if let Some((completed_id, completed_precedence)) = completion { if !TokenConflictMap::prefer_transition( - &self.lexical_grammar, + self.lexical_grammar, &transition, completed_id, completed_precedence, @@ -248,12 +247,11 @@ fn merge_token_set( { return false; } - if !coincident_token_index.contains(symbol, existing_token) { - if token_conflict_map.does_overlap(existing_token.index, i) - || token_conflict_map.does_overlap(i, existing_token.index) - { - return false; - } + if !coincident_token_index.contains(symbol, existing_token) + && (token_conflict_map.does_overlap(existing_token.index, i) + || token_conflict_map.does_overlap(i, existing_token.index)) + { + return false; } } } @@ -315,7 +313,7 @@ fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) { let mut new_state = LexState::default(); mem::swap(&mut new_state, &mut table.states[state_ids[0]]); - for (_, advance_action) in new_state.advance_actions.iter_mut() { + for (_, advance_action) in &mut new_state.advance_actions { advance_action.state = group_ids_by_state_id[advance_action.state]; } if let Some(eof_action) = &mut new_state.eof_action { @@ -324,18 +322,14 @@ fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) { new_states.push(new_state); } - for state in parse_table.states.iter_mut() { + for state in &mut parse_table.states { state.lex_state_id = group_ids_by_state_id[state.lex_state_id]; } table.states = new_states; } -fn lex_states_differ( - left: &LexState, - right: &LexState, - group_ids_by_state_id: &Vec, -) -> bool { +fn lex_states_differ(left: &LexState, right: &LexState, group_ids_by_state_id: &[usize]) -> bool { left.advance_actions .iter() .zip(right.advance_actions.iter()) @@ -362,7 +356,7 @@ fn sort_states(table: &mut LexTable, parse_table: &mut ParseTable) { .map(|old_id| { let mut state = LexState::default(); mem::swap(&mut state, &mut table.states[*old_id]); - for (_, advance_action) in state.advance_actions.iter_mut() { + for (_, advance_action) in &mut state.advance_actions { advance_action.state = new_ids_by_old_id[advance_action.state]; } if let Some(eof_action) = &mut state.eof_action { @@ -373,7 +367,7 @@ fn sort_states(table: &mut LexTable, parse_table: &mut ParseTable) { .collect(); // Update the parse table's lex state references - for state in parse_table.states.iter_mut() { + for state in &mut parse_table.states { state.lex_state_id = new_ids_by_old_id[state.lex_state_id]; } } diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/build_parse_table.rs b/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/build_parse_table.rs index 10320263a90..b9b78966d47 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/build_parse_table.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/build_parse_table.rs @@ -25,7 +25,7 @@ use rustc_hash::FxHasher; type SymbolSequence = Vec; type AuxiliarySymbolSequence = Vec; -pub(crate) type ParseStateInfo<'a> = (SymbolSequence, ParseItemSet<'a>); +pub type ParseStateInfo<'a> = (SymbolSequence, ParseItemSet<'a>); #[derive(Clone)] struct AuxiliarySymbolInfo { @@ -51,12 +51,13 @@ struct ParseTableBuilder<'a> { item_set_builder: ParseItemSetBuilder<'a>, syntax_grammar: &'a SyntaxGrammar, lexical_grammar: &'a LexicalGrammar, - variable_info: &'a Vec, + variable_info: &'a [VariableInfo], core_ids_by_core: HashMap, usize>, state_ids_by_item_set: IndexMap, ParseStateId, BuildHasherDefault>, parse_state_info_by_id: Vec>, parse_state_queue: VecDeque, non_terminal_extra_states: Vec<(Symbol, usize)>, + actual_conflicts: HashSet>, parse_table: ParseTable, } @@ -74,14 +75,10 @@ impl<'a> ParseTableBuilder<'a> { self.add_parse_state( &Vec::new(), &Vec::new(), - ParseItemSet::with( - [( - ParseItem::start(), - [Symbol::end()].iter().cloned().collect(), - )] - .iter() - .cloned(), - ), + ParseItemSet::with(std::iter::once(( + ParseItem::start(), + std::iter::once(&Symbol::end()).copied().collect(), + ))), ); // Compute the possible item sets for non-terminal extras. @@ -96,7 +93,7 @@ impl<'a> ParseTableBuilder<'a> { for production in &variable.productions { non_terminal_extra_item_sets_by_first_terminal .entry(production.first_symbol().unwrap()) - .or_insert(ParseItemSet::default()) + .or_insert_with(ParseItemSet::default) .insert( ParseItem { variable_index: extra_non_terminal.index as u32, @@ -104,9 +101,8 @@ impl<'a> ParseTableBuilder<'a> { step_index: 1, has_preceding_inherited_fields: false, }, - &[Symbol::end_of_nonterminal_extra()] - .iter() - .cloned() + &std::iter::once(&Symbol::end_of_nonterminal_extra()) + .copied() .collect(), ); } @@ -128,10 +124,24 @@ impl<'a> ParseTableBuilder<'a> { self.parse_state_info_by_id[entry.state_id].0.clone(), entry.preceding_auxiliary_symbols, entry.state_id, - item_set, + &item_set, )?; } + if !self.actual_conflicts.is_empty() { + println!("Warning: unnecessary conflicts"); + for conflict in &self.actual_conflicts { + println!( + " {}", + conflict + .iter() + .map(|symbol| format!("`{}`", self.symbol_name(symbol))) + .collect::>() + .join(", ") + ); + } + } + Ok((self.parse_table, self.parse_state_info_by_id)) } @@ -180,7 +190,7 @@ impl<'a> ParseTableBuilder<'a> { mut preceding_symbols: SymbolSequence, mut preceding_auxiliary_symbols: Vec, state_id: ParseStateId, - item_set: ParseItemSet<'a>, + item_set: &ParseItemSet<'a>, ) -> Result<()> { let mut terminal_successors = BTreeMap::new(); let mut non_terminal_successors = BTreeMap::new(); @@ -203,7 +213,7 @@ impl<'a> ParseTableBuilder<'a> { // for conflict resolution. if variable.is_auxiliary() { preceding_auxiliary_symbols - .push(self.get_auxiliary_node_info(&item_set, next_symbol)); + .push(self.get_auxiliary_node_info(item_set, next_symbol)); } // For most parse items, the symbols associated with the preceding children @@ -223,12 +233,12 @@ impl<'a> ParseTableBuilder<'a> { non_terminal_successors .entry(next_symbol) - .or_insert_with(|| ParseItemSet::default()) + .or_insert_with(ParseItemSet::default) .insert(successor, lookaheads); } else { terminal_successors .entry(next_symbol) - .or_insert_with(|| ParseItemSet::default()) + .or_insert_with(ParseItemSet::default) .insert(successor, lookaheads); } } @@ -253,7 +263,7 @@ impl<'a> ParseTableBuilder<'a> { let table_entry = self.parse_table.states[state_id] .terminal_entries .entry(lookahead) - .or_insert_with(|| ParseTableEntry::new()); + .or_insert_with(ParseTableEntry::new); let reduction_info = reduction_infos.entry(lookahead).or_default(); // While inserting Reduce actions, eagerly resolve conflicts related @@ -263,7 +273,7 @@ impl<'a> ParseTableBuilder<'a> { table_entry.actions.push(action); } else { match Self::compare_precedence( - &self.syntax_grammar, + self.syntax_grammar, precedence, &[symbol], &reduction_info.precedence, @@ -296,7 +306,7 @@ impl<'a> ParseTableBuilder<'a> { } } - // Having computed the the successor item sets for each symbol, add a new + // Having computed the successor item sets for each symbol, add a new // parse state for each of these item sets, and add a corresponding Shift // action to this state. for (symbol, next_item_set) in terminal_successors { @@ -318,7 +328,7 @@ impl<'a> ParseTableBuilder<'a> { } entry - .or_insert_with(|| ParseTableEntry::new()) + .or_insert_with(ParseTableEntry::new) .actions .push(ParseAction::Shift { state: next_state_id, @@ -346,7 +356,7 @@ impl<'a> ParseTableBuilder<'a> { // * fail, terminating the parser generation process for symbol in lookaheads_with_conflicts.iter() { self.handle_conflict( - &item_set, + item_set, state_id, &preceding_symbols, &preceding_auxiliary_symbols, @@ -429,7 +439,7 @@ impl<'a> ParseTableBuilder<'a> { item_set: &ParseItemSet, state_id: ParseStateId, preceding_symbols: &SymbolSequence, - preceding_auxiliary_symbols: &Vec, + preceding_auxiliary_symbols: &[AuxiliarySymbolInfo], conflicting_lookahead: Symbol, reduction_info: &ReductionInfo, ) -> Result<()> { @@ -445,33 +455,31 @@ impl<'a> ParseTableBuilder<'a> { // REDUCE-REDUCE conflicts where all actions have the *same* // precedence, and there can still be SHIFT/REDUCE conflicts. let mut considered_associativity = false; - let mut shift_precedence: Vec<(&Precedence, Symbol)> = Vec::new(); + let mut shift_precedence = Vec::<(&Precedence, Symbol)>::new(); let mut conflicting_items = HashSet::new(); for (item, lookaheads) in &item_set.entries { if let Some(step) = item.step() { - if item.step_index > 0 { - if self + if item.step_index > 0 + && self .item_set_builder .first_set(&step.symbol) .contains(&conflicting_lookahead) - { - if item.variable_index != u32::MAX { - conflicting_items.insert(item); - } + { + if item.variable_index != u32::MAX { + conflicting_items.insert(item); + } - let p = ( - item.precedence(), - Symbol::non_terminal(item.variable_index as usize), - ); - if let Err(i) = shift_precedence.binary_search(&p) { - shift_precedence.insert(i, p); - } + let p = ( + item.precedence(), + Symbol::non_terminal(item.variable_index as usize), + ); + if let Err(i) = shift_precedence.binary_search(&p) { + shift_precedence.insert(i, p); } } - } else if lookaheads.contains(&conflicting_lookahead) { - if item.variable_index != u32::MAX { - conflicting_items.insert(item); - } + } else if lookaheads.contains(&conflicting_lookahead) && item.variable_index != u32::MAX + { + conflicting_items.insert(item); } } @@ -497,7 +505,7 @@ impl<'a> ParseTableBuilder<'a> { let mut shift_is_more = false; for p in shift_precedence { match Self::compare_precedence( - &self.syntax_grammar, + self.syntax_grammar, p.0, &[p.1], &reduction_info.precedence, @@ -582,6 +590,7 @@ impl<'a> ParseTableBuilder<'a> { .expected_conflicts .contains(&actual_conflict) { + self.actual_conflicts.remove(&actual_conflict); return Ok(()); } @@ -639,11 +648,10 @@ impl<'a> ParseTableBuilder<'a> { let prec_line = if let Some(associativity) = associativity { Some(format!( - "(precedence: {}, associativity: {:?})", - precedence, associativity + "(precedence: {precedence}, associativity: {associativity:?})", )) } else if !precedence.is_none() { - Some(format!("(precedence: {})", precedence)) + Some(format!("(precedence: {precedence})")) } else { None }; @@ -707,24 +715,22 @@ impl<'a> ParseTableBuilder<'a> { }; if actual_conflict.len() > 1 { - if shift_items.len() > 0 { + if !shift_items.is_empty() { resolution_count += 1; write!( &mut msg, - " {}: Specify a higher precedence in", - resolution_count + " {resolution_count}: Specify a higher precedence in", ) .unwrap(); list_rule_names(&mut msg, &shift_items); - write!(&mut msg, " than in the other rules.\n").unwrap(); + writeln!(&mut msg, " than in the other rules.").unwrap(); } for item in &reduce_items { resolution_count += 1; - write!( + writeln!( &mut msg, - " {}: Specify a higher precedence in `{}` than in the other rules.\n", - resolution_count, + " {resolution_count}: Specify a higher precedence in `{}` than in the other rules.", self.symbol_name(&Symbol::non_terminal(item.variable_index as usize)) ) .unwrap(); @@ -735,19 +741,17 @@ impl<'a> ParseTableBuilder<'a> { resolution_count += 1; write!( &mut msg, - " {}: Specify a left or right associativity in", - resolution_count + " {resolution_count}: Specify a left or right associativity in", ) .unwrap(); list_rule_names(&mut msg, &reduce_items); - write!(&mut msg, "\n").unwrap(); + writeln!(&mut msg).unwrap(); } resolution_count += 1; write!( &mut msg, - " {}: Add a conflict for these rules: ", - resolution_count + " {resolution_count}: Add a conflict for these rules: ", ) .unwrap(); for (i, symbol) in actual_conflict.iter().enumerate() { @@ -756,7 +760,7 @@ impl<'a> ParseTableBuilder<'a> { } write!(&mut msg, "`{}`", self.symbol_name(symbol)).unwrap(); } - write!(&mut msg, "\n").unwrap(); + writeln!(&mut msg).unwrap(); Err(anyhow!(msg)) } @@ -789,7 +793,7 @@ impl<'a> ParseTableBuilder<'a> { // and to the default precedence, which is zero. (Precedence::Integer(l), Precedence::Integer(r)) if *l != 0 || *r != 0 => l.cmp(r), (Precedence::Integer(l), Precedence::None) if *l != 0 => l.cmp(&0), - (Precedence::None, Precedence::Integer(r)) if *r != 0 => 0.cmp(&r), + (Precedence::None, Precedence::Integer(r)) if *r != 0 => 0.cmp(r), // Named precedences can be compared to other named precedences. _ => grammar @@ -856,7 +860,7 @@ impl<'a> ParseTableBuilder<'a> { production_info .field_map .entry(field_name.clone()) - .or_insert(Vec::new()) + .or_default() .push(FieldLocation { index: i, inherited: false, @@ -869,11 +873,11 @@ impl<'a> ParseTableBuilder<'a> { .is_visible() { let info = &self.variable_info[step.symbol.index]; - for (field_name, _) in &info.fields { + for field_name in info.fields.keys() { production_info .field_map .entry(field_name.clone()) - .or_insert(Vec::new()) + .or_default() .push(FieldLocation { index: i, inherited: true, @@ -887,7 +891,7 @@ impl<'a> ParseTableBuilder<'a> { } if item.production.steps.len() > self.parse_table.max_aliased_production_length { - self.parse_table.max_aliased_production_length = item.production.steps.len() + self.parse_table.max_aliased_production_length = item.production.steps.len(); } if let Some(index) = self @@ -923,7 +927,7 @@ impl<'a> ParseTableBuilder<'a> { } fn populate_following_tokens( - result: &mut Vec, + result: &mut [TokenSet], grammar: &SyntaxGrammar, inlines: &InlinedProductionMap, builder: &ParseItemSetBuilder, @@ -934,7 +938,6 @@ fn populate_following_tokens( .flat_map(|v| &v.productions) .chain(&inlines.productions); let all_tokens = (0..result.len()) - .into_iter() .map(Symbol::terminal) .collect::(); for production in productions { @@ -958,12 +961,13 @@ fn populate_following_tokens( } } -pub(crate) fn build_parse_table<'a>( +pub fn build_parse_table<'a>( syntax_grammar: &'a SyntaxGrammar, lexical_grammar: &'a LexicalGrammar, inlines: &'a InlinedProductionMap, - variable_info: &'a Vec, + variable_info: &'a [VariableInfo], ) -> Result<(ParseTable, Vec, Vec>)> { + let actual_conflicts = syntax_grammar.expected_conflicts.iter().cloned().collect(); let item_set_builder = ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines); let mut following_tokens = vec![TokenSet::new(); lexical_grammar.variables.len()]; populate_following_tokens( @@ -979,6 +983,7 @@ pub(crate) fn build_parse_table<'a>( item_set_builder, variable_info, non_terminal_extra_states: Vec::new(), + actual_conflicts, state_ids_by_item_set: IndexMap::default(), core_ids_by_core: HashMap::new(), parse_state_info_by_id: Vec::new(), diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/coincident_tokens.rs b/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/coincident_tokens.rs index bb234c4ac1f..a2181438d3b 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/coincident_tokens.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/coincident_tokens.rs @@ -3,7 +3,7 @@ use crate::generate::rules::Symbol; use crate::generate::tables::{ParseStateId, ParseTable}; use std::fmt; -pub(crate) struct CoincidentTokenIndex<'a> { +pub struct CoincidentTokenIndex<'a> { entries: Vec>, grammar: &'a LexicalGrammar, n: usize, @@ -23,7 +23,7 @@ impl<'a> CoincidentTokenIndex<'a> { for other_symbol in state.terminal_entries.keys() { if other_symbol.is_terminal() { let index = result.index(symbol.index, other_symbol.index); - if result.entries[index].last().cloned() != Some(i) { + if result.entries[index].last().copied() != Some(i) { result.entries[index].push(i); } } @@ -34,7 +34,7 @@ impl<'a> CoincidentTokenIndex<'a> { result } - pub fn states_with(&self, a: Symbol, b: Symbol) -> &Vec { + pub fn states_with(&self, a: Symbol, b: Symbol) -> &[ParseStateId] { &self.entries[self.index(a.index, b.index)] } @@ -42,7 +42,8 @@ impl<'a> CoincidentTokenIndex<'a> { !self.entries[self.index(a.index, b.index)].is_empty() } - fn index(&self, a: usize, b: usize) -> usize { + #[must_use] + const fn index(&self, a: usize, b: usize) -> usize { if a < b { a * self.n + b } else { @@ -53,20 +54,20 @@ impl<'a> CoincidentTokenIndex<'a> { impl<'a> fmt::Debug for CoincidentTokenIndex<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "CoincidentTokenIndex {{\n")?; + writeln!(f, "CoincidentTokenIndex {{")?; - write!(f, " entries: {{\n")?; + writeln!(f, " entries: {{")?; for i in 0..self.n { - write!(f, " {}: {{\n", self.grammar.variables[i].name)?; + writeln!(f, " {}: {{", self.grammar.variables[i].name)?; for j in 0..self.n { - write!( + writeln!( f, - " {}: {:?},\n", + " {}: {:?},", self.grammar.variables[j].name, self.entries[self.index(i, j)].len() )?; } - write!(f, " }},\n")?; + writeln!(f, " }},")?; } write!(f, " }},")?; write!(f, "}}")?; diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/item.rs b/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/item.rs index 32b1a8d9abf..82c32a81482 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/item.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/item.rs @@ -22,9 +22,9 @@ lazy_static! { }; } -/// A ParseItem represents an in-progress match of a single production in a grammar. +/// A [`ParseItem`] represents an in-progress match of a single production in a grammar. #[derive(Clone, Copy, Debug)] -pub(crate) struct ParseItem<'a> { +pub struct ParseItem<'a> { /// The index of the parent rule within the grammar. pub variable_index: u32, /// The number of symbols that have already been matched. @@ -47,35 +47,35 @@ pub(crate) struct ParseItem<'a> { pub has_preceding_inherited_fields: bool, } -/// A ParseItemSet represents a set of in-progress matches of productions in a +/// A [`ParseItemSet`] represents a set of in-progress matches of productions in a /// grammar, and for each in-progress match, a set of "lookaheads" - tokens that /// are allowed to *follow* the in-progress rule. This object corresponds directly /// to a state in the final parse table. -#[derive(Clone, Debug, PartialEq, Eq)] -pub(crate) struct ParseItemSet<'a> { +#[derive(Clone, Debug, PartialEq, Eq, Default)] +pub struct ParseItemSet<'a> { pub entries: Vec<(ParseItem<'a>, TokenSet)>, } -/// A ParseItemSetCore is like a ParseItemSet, but without the lookahead +/// A [`ParseItemSetCore`] is like a [`ParseItemSet`], but without the lookahead /// information. Parse states with the same core are candidates for merging. #[derive(Clone, Debug, PartialEq, Eq)] -pub(crate) struct ParseItemSetCore<'a> { +pub struct ParseItemSetCore<'a> { pub entries: Vec>, } -pub(crate) struct ParseItemDisplay<'a>( +pub struct ParseItemDisplay<'a>( pub &'a ParseItem<'a>, pub &'a SyntaxGrammar, pub &'a LexicalGrammar, ); -pub(crate) struct TokenSetDisplay<'a>( +pub struct TokenSetDisplay<'a>( pub &'a TokenSet, pub &'a SyntaxGrammar, pub &'a LexicalGrammar, ); -pub(crate) struct ParseItemSetDisplay<'a>( +pub struct ParseItemSetDisplay<'a>( pub &'a ParseItemSet<'a>, pub &'a SyntaxGrammar, pub &'a LexicalGrammar, @@ -116,16 +116,19 @@ impl<'a> ParseItem<'a> { } } + #[must_use] pub fn is_done(&self) -> bool { self.step_index as usize == self.production.steps.len() } - pub fn is_augmented(&self) -> bool { + #[must_use] + pub const fn is_augmented(&self) -> bool { self.variable_index == u32::MAX } /// Create an item like this one, but advanced by one step. - pub fn successor(&self) -> ParseItem<'a> { + #[must_use] + pub const fn successor(&self) -> ParseItem<'a> { ParseItem { variable_index: self.variable_index, production: self.production, @@ -136,8 +139,8 @@ impl<'a> ParseItem<'a> { /// Create an item identical to this one, but with a different production. /// This is used when dynamically "inlining" certain symbols in a production. - pub fn substitute_production(&self, production: &'a Production) -> ParseItem<'a> { - let mut result = self.clone(); + pub const fn substitute_production(&self, production: &'a Production) -> ParseItem<'a> { + let mut result = *self; result.production = production; result } @@ -172,14 +175,6 @@ impl<'a> ParseItemSet<'a> { } } -impl<'a> Default for ParseItemSet<'a> { - fn default() -> Self { - Self { - entries: Vec::new(), - } - } -} - impl<'a> fmt::Display for ParseItemDisplay<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { if self.0.is_augmented() { @@ -196,10 +191,10 @@ impl<'a> fmt::Display for ParseItemDisplay<'a> { if i == self.0.step_index as usize { write!(f, " •")?; if let Some(associativity) = step.associativity { - if !step.precedence.is_none() { - write!(f, " ({} {:?})", step.precedence, associativity)?; + if step.precedence.is_none() { + write!(f, " ({associativity:?})")?; } else { - write!(f, " ({:?})", associativity)?; + write!(f, " ({} {associativity:?})", step.precedence)?; } } else if !step.precedence.is_none() { write!(f, " ({})", step.precedence)?; @@ -211,7 +206,7 @@ impl<'a> fmt::Display for ParseItemDisplay<'a> { if let Some(variable) = self.2.variables.get(step.symbol.index) { write!(f, "{}", &variable.name)?; } else { - write!(f, "{}-{}", "terminal", step.symbol.index)?; + write!(f, "terminal-{}", step.symbol.index)?; } } else if step.symbol.is_external() { write!(f, "{}", &self.1.external_tokens[step.symbol.index].name)?; @@ -228,10 +223,10 @@ impl<'a> fmt::Display for ParseItemDisplay<'a> { write!(f, " •")?; if let Some(step) = self.0.production.steps.last() { if let Some(associativity) = step.associativity { - if !step.precedence.is_none() { - write!(f, " ({} {:?})", step.precedence, associativity)?; + if step.precedence.is_none() { + write!(f, " ({associativity:?})")?; } else { - write!(f, " ({:?})", associativity)?; + write!(f, " ({} {associativity:?})", step.precedence)?; } } else if !step.precedence.is_none() { write!(f, " ({})", step.precedence)?; @@ -255,7 +250,7 @@ impl<'a> fmt::Display for TokenSetDisplay<'a> { if let Some(variable) = self.2.variables.get(symbol.index) { write!(f, "{}", &variable.name)?; } else { - write!(f, "{}-{}", "terminal", symbol.index)?; + write!(f, "terminal-{}", symbol.index)?; } } else if symbol.is_external() { write!(f, "{}", &self.1.external_tokens[symbol.index].name)?; @@ -270,7 +265,7 @@ impl<'a> fmt::Display for TokenSetDisplay<'a> { impl<'a> fmt::Display for ParseItemSetDisplay<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { - for (item, lookaheads) in self.0.entries.iter() { + for (item, lookaheads) in &self.0.entries { writeln!( f, "{}\t{}", @@ -288,7 +283,7 @@ impl<'a> Hash for ParseItem<'a> { hasher.write_u32(self.step_index); hasher.write_i32(self.production.dynamic_precedence); hasher.write_usize(self.production.steps.len()); - hasher.write_i32(self.has_preceding_inherited_fields as i32); + hasher.write_i32(i32::from(self.has_preceding_inherited_fields)); self.precedence().hash(hasher); self.associativity().hash(hasher); @@ -344,7 +339,7 @@ impl<'a> PartialEq for ParseItem<'a> { } } - return true; + true } } @@ -364,7 +359,7 @@ impl<'a> Ord for ParseItem<'a> { .len() .cmp(&other.production.steps.len()) }) - .then_with(|| self.precedence().cmp(&other.precedence())) + .then_with(|| self.precedence().cmp(other.precedence())) .then_with(|| self.associativity().cmp(&other.associativity())) .then_with(|| { for (i, step) in self.production.steps.iter().enumerate() { @@ -383,7 +378,7 @@ impl<'a> Ord for ParseItem<'a> { return o; } } - return Ordering::Equal; + Ordering::Equal }) } } @@ -399,7 +394,7 @@ impl<'a> Eq for ParseItem<'a> {} impl<'a> Hash for ParseItemSet<'a> { fn hash(&self, hasher: &mut H) { hasher.write_usize(self.entries.len()); - for (item, lookaheads) in self.entries.iter() { + for (item, lookaheads) in &self.entries { item.hash(hasher); lookaheads.hash(hasher); } diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/item_set_builder.rs b/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/item_set_builder.rs index 18283576dda..8f9644d02ee 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/item_set_builder.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/item_set_builder.rs @@ -16,7 +16,7 @@ struct FollowSetInfo { propagates_lookaheads: bool, } -pub(crate) struct ParseItemSetBuilder<'a> { +pub struct ParseItemSetBuilder<'a> { syntax_grammar: &'a SyntaxGrammar, lexical_grammar: &'a LexicalGrammar, first_sets: HashMap, @@ -69,7 +69,7 @@ impl<'a> ParseItemSetBuilder<'a> { } // The FIRST set of a non-terminal `i` is the union of the following sets: - // * the set of all terminals that appear at the beginings of i's productions + // * the set of all terminals that appear at the beginnings of i's productions // * the FIRST sets of all the non-terminals that appear at the beginnings // of i's productions // @@ -80,7 +80,10 @@ impl<'a> ParseItemSetBuilder<'a> { for i in 0..syntax_grammar.variables.len() { let symbol = Symbol::non_terminal(i); - let first_set = &mut result.first_sets.entry(symbol).or_insert(TokenSet::new()); + let first_set = result + .first_sets + .entry(symbol) + .or_insert_with(TokenSet::new); processed_non_terminals.clear(); symbols_to_process.clear(); symbols_to_process.push(symbol); @@ -88,10 +91,7 @@ impl<'a> ParseItemSetBuilder<'a> { if current_symbol.is_terminal() || current_symbol.is_external() { first_set.insert(current_symbol); } else if processed_non_terminals.insert(current_symbol) { - for production in syntax_grammar.variables[current_symbol.index] - .productions - .iter() - { + for production in &syntax_grammar.variables[current_symbol.index].productions { if let Some(step) = production.steps.first() { symbols_to_process.push(step.symbol); } @@ -100,7 +100,7 @@ impl<'a> ParseItemSetBuilder<'a> { } // The LAST set is defined in a similar way to the FIRST set. - let last_set = &mut result.last_sets.entry(symbol).or_insert(TokenSet::new()); + let last_set = result.last_sets.entry(symbol).or_insert_with(TokenSet::new); processed_non_terminals.clear(); symbols_to_process.clear(); symbols_to_process.push(symbol); @@ -108,10 +108,7 @@ impl<'a> ParseItemSetBuilder<'a> { if current_symbol.is_terminal() || current_symbol.is_external() { last_set.insert(current_symbol); } else if processed_non_terminals.insert(current_symbol) { - for production in syntax_grammar.variables[current_symbol.index] - .productions - .iter() - { + for production in &syntax_grammar.variables[current_symbol.index].productions { if let Some(step) = production.steps.last() { symbols_to_process.push(step.symbol); } @@ -235,7 +232,7 @@ impl<'a> ParseItemSetBuilder<'a> { result } - pub(crate) fn transitive_closure(&mut self, item_set: &ParseItemSet<'a>) -> ParseItemSet<'a> { + pub fn transitive_closure(&mut self, item_set: &ParseItemSet<'a>) -> ParseItemSet<'a> { let mut result = ParseItemSet::default(); for (item, lookaheads) in &item_set.entries { if let Some(productions) = self @@ -270,11 +267,9 @@ impl<'a> ParseItemSetBuilder<'a> { let next_step = item.successor().step(); // Determine which tokens can follow this non-terminal. - let following_tokens = if let Some(next_step) = next_step { + let following_tokens = next_step.map_or(lookaheads, |next_step| { self.first_sets.get(&next_step.symbol).unwrap() - } else { - &lookaheads - }; + }); // Use the pre-computed *additions* to expand the non-terminal. for addition in &self.transitive_closure_additions[step.symbol.index] { @@ -291,9 +286,9 @@ impl<'a> ParseItemSetBuilder<'a> { impl<'a> fmt::Debug for ParseItemSetBuilder<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "ParseItemSetBuilder {{\n")?; + writeln!(f, "ParseItemSetBuilder {{")?; - write!(f, " first_sets: {{\n")?; + writeln!(f, " first_sets: {{")?; for (symbol, first_set) in &self.first_sets { let name = match symbol.kind { SymbolType::NonTerminal => &self.syntax_grammar.variables[symbol.index].name, @@ -301,16 +296,15 @@ impl<'a> fmt::Debug for ParseItemSetBuilder<'a> { SymbolType::Terminal => &self.lexical_grammar.variables[symbol.index].name, SymbolType::End | SymbolType::EndOfNonTerminalExtra => "END", }; - write!( + writeln!( f, - " first({:?}): {}\n", - name, - TokenSetDisplay(first_set, &self.syntax_grammar, &self.lexical_grammar) + " first({name:?}): {}", + TokenSetDisplay(first_set, self.syntax_grammar, self.lexical_grammar) )?; } - write!(f, " }}\n")?; + writeln!(f, " }}")?; - write!(f, " last_sets: {{\n")?; + writeln!(f, " last_sets: {{")?; for (symbol, last_set) in &self.last_sets { let name = match symbol.kind { SymbolType::NonTerminal => &self.syntax_grammar.variables[symbol.index].name, @@ -318,26 +312,25 @@ impl<'a> fmt::Debug for ParseItemSetBuilder<'a> { SymbolType::Terminal => &self.lexical_grammar.variables[symbol.index].name, SymbolType::End | SymbolType::EndOfNonTerminalExtra => "END", }; - write!( + writeln!( f, - " last({:?}): {}\n", - name, - TokenSetDisplay(last_set, &self.syntax_grammar, &self.lexical_grammar) + " last({name:?}): {}", + TokenSetDisplay(last_set, self.syntax_grammar, self.lexical_grammar) )?; } - write!(f, " }}\n")?; + writeln!(f, " }}")?; - write!(f, " additions: {{\n")?; + writeln!(f, " additions: {{")?; for (i, variable) in self.syntax_grammar.variables.iter().enumerate() { - write!(f, " {}: {{\n", variable.name)?; + writeln!(f, " {}: {{", variable.name)?; for addition in &self.transitive_closure_additions[i] { - write!( + writeln!( f, - " {}\n", + " {}", ParseItemDisplay(&addition.item, self.syntax_grammar, self.lexical_grammar) )?; } - write!(f, " }},\n")?; + writeln!(f, " }},")?; } write!(f, " }},")?; diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/minimize_parse_table.rs b/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/minimize_parse_table.rs index d10bea56135..d9d2b7f56d2 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/minimize_parse_table.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/minimize_parse_table.rs @@ -9,7 +9,7 @@ use log::info; use std::collections::{HashMap, HashSet}; use std::mem; -pub(crate) fn minimize_parse_table( +pub fn minimize_parse_table( parse_table: &mut ParseTable, syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, @@ -67,9 +67,9 @@ impl<'a> Minimizer<'a> { symbol, .. } => { - if !self.simple_aliases.contains_key(&symbol) - && !self.syntax_grammar.supertype_symbols.contains(&symbol) - && !aliased_symbols.contains(&symbol) + if !self.simple_aliases.contains_key(symbol) + && !self.syntax_grammar.supertype_symbols.contains(symbol) + && !aliased_symbols.contains(symbol) && self.syntax_grammar.variables[symbol.index].kind != VariableType::Named && (unit_reduction_symbol.is_none() @@ -97,21 +97,22 @@ impl<'a> Minimizer<'a> { } } - for state in self.parse_table.states.iter_mut() { + for state in &mut self.parse_table.states { let mut done = false; while !done { done = true; state.update_referenced_states(|other_state_id, state| { - if let Some(symbol) = unit_reduction_symbols_by_state.get(&other_state_id) { - done = false; - match state.nonterminal_entries.get(symbol) { - Some(GotoAction::Goto(state_id)) => *state_id, - _ => other_state_id, - } - } else { - other_state_id - } - }) + unit_reduction_symbols_by_state.get(&other_state_id).map_or( + other_state_id, + |symbol| { + done = false; + match state.nonterminal_entries.get(symbol) { + Some(GotoAction::Goto(state_id)) => *state_id, + _ => other_state_id, + } + }, + ) + }); } } } @@ -198,7 +199,7 @@ impl<'a> Minimizer<'a> { &self, left_state: &ParseState, right_state: &ParseState, - group_ids_by_state_id: &Vec, + group_ids_by_state_id: &[ParseStateId], ) -> bool { for (token, left_entry) in &left_state.terminal_entries { if let Some(right_entry) = right_state.terminal_entries.get(token) { @@ -223,15 +224,15 @@ impl<'a> Minimizer<'a> { } for token in right_state.terminal_entries.keys() { - if !left_state.terminal_entries.contains_key(token) { - if self.token_conflicts( + if !left_state.terminal_entries.contains_key(token) + && self.token_conflicts( left_state.id, right_state.id, left_state.terminal_entries.keys(), *token, - ) { - return true; - } + ) + { + return true; } } @@ -242,7 +243,7 @@ impl<'a> Minimizer<'a> { &self, state1: &ParseState, state2: &ParseState, - group_ids_by_state_id: &Vec, + group_ids_by_state_id: &[ParseStateId], ) -> bool { for (token, entry1) in &state1.terminal_entries { if let ParseAction::Shift { state: s1, .. } = entry1.actions.last().unwrap() { @@ -252,12 +253,10 @@ impl<'a> Minimizer<'a> { let group2 = group_ids_by_state_id[*s2]; if group1 != group2 { info!( - "split states {} {} - successors for {} are split: {} {}", + "split states {} {} - successors for {} are split: {s1} {s2}", state1.id, state2.id, self.symbol_name(token), - s1, - s2, ); return true; } @@ -275,12 +274,10 @@ impl<'a> Minimizer<'a> { let group2 = group_ids_by_state_id[*s2]; if group1 != group2 { info!( - "split states {} {} - successors for {} are split: {} {}", + "split states {} {} - successors for {} are split: {s1} {s2}", state1.id, state2.id, self.symbol_name(symbol), - s1, - s2, ); return true; } @@ -300,16 +297,14 @@ impl<'a> Minimizer<'a> { token: &Symbol, entry1: &ParseTableEntry, entry2: &ParseTableEntry, - group_ids_by_state_id: &Vec, + group_ids_by_state_id: &[ParseStateId], ) -> bool { // To be compatible, entries need to have the same actions. let actions1 = &entry1.actions; let actions2 = &entry2.actions; if actions1.len() != actions2.len() { info!( - "split states {} {} - differing action counts for token {}", - state_id1, - state_id2, + "split states {state_id1} {state_id2} - differing action counts for token {}", self.symbol_name(token) ); return true; @@ -334,22 +329,15 @@ impl<'a> Minimizer<'a> { let group2 = group_ids_by_state_id[*s2]; if group1 == group2 && is_repetition1 == is_repetition2 { continue; - } else { - info!( - "split states {} {} - successors for {} are split: {} {}", - state_id1, - state_id2, - self.symbol_name(token), - s1, - s2, - ); - return true; } + info!( + "split states {state_id1} {state_id2} - successors for {} are split: {s1} {s2}", + self.symbol_name(token), + ); + return true; } else if action1 != action2 { info!( - "split states {} {} - unequal actions for {}", - state_id1, - state_id2, + "split states {state_id1} {state_id2} - unequal actions for {}", self.symbol_name(token), ); return true; @@ -367,10 +355,7 @@ impl<'a> Minimizer<'a> { new_token: Symbol, ) -> bool { if new_token == Symbol::end_of_nonterminal_extra() { - info!( - "split states {} {} - end of non-terminal extra", - left_id, right_id, - ); + info!("split states {left_id} {right_id} - end of non-terminal extra",); return true; } @@ -378,9 +363,7 @@ impl<'a> Minimizer<'a> { // existing lookahead tokens. if new_token.is_external() { info!( - "split states {} {} - external token {}", - left_id, - right_id, + "split states {left_id} {right_id} - external token {}", self.symbol_name(&new_token), ); return true; @@ -395,9 +378,7 @@ impl<'a> Minimizer<'a> { .any(|external| external.corresponding_internal_token == Some(new_token)) { info!( - "split states {} {} - internal/external token {}", - left_id, - right_id, + "split states {left_id} {right_id} - internal/external token {}", self.symbol_name(&new_token), ); return true; @@ -405,27 +386,24 @@ impl<'a> Minimizer<'a> { // Do not add a token if it conflicts with an existing token. for token in existing_tokens { - if token.is_terminal() { - if !(self.syntax_grammar.word_token == Some(*token) + if token.is_terminal() + && !(self.syntax_grammar.word_token == Some(*token) && self.keywords.contains(&new_token)) - && !(self.syntax_grammar.word_token == Some(new_token) - && self.keywords.contains(token)) - && (self + && !(self.syntax_grammar.word_token == Some(new_token) + && self.keywords.contains(token)) + && (self + .token_conflict_map + .does_conflict(new_token.index, token.index) + || self .token_conflict_map - .does_conflict(new_token.index, token.index) - || self - .token_conflict_map - .does_match_same_string(new_token.index, token.index)) - { - info!( - "split states {} {} - token {} conflicts with {}", - left_id, - right_id, - self.symbol_name(&new_token), - self.symbol_name(token), - ); - return true; - } + .does_match_same_string(new_token.index, token.index)) + { + info!( + "split states {left_id} {right_id} - token {} conflicts with {}", + self.symbol_name(&new_token), + self.symbol_name(token), + ); + return true; } } diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/mod.rs b/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/mod.rs index fe996254ecc..a87afefd5f9 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/mod.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/mod.rs @@ -1,5 +1,5 @@ -pub(crate) mod build_lex_table; -pub(crate) mod build_parse_table; +pub mod build_lex_table; +pub mod build_parse_table; mod coincident_tokens; mod item; mod item_set_builder; @@ -20,11 +20,11 @@ use anyhow::Result; use log::info; use std::collections::{BTreeSet, HashMap}; -pub(crate) fn build_tables( +pub fn build_tables( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, simple_aliases: &AliasMap, - variable_info: &Vec, + variable_info: &[VariableInfo], inlines: &InlinedProductionMap, report_symbol_name: Option<&str>, ) -> Result<(ParseTable, LexTable, LexTable, Option)> { @@ -69,8 +69,8 @@ pub(crate) fn build_tables( if let Some(report_symbol_name) = report_symbol_name { report_state_info( - &syntax_grammar, - &lexical_grammar, + syntax_grammar, + lexical_grammar, &parse_table, &parse_state_info, report_symbol_name, @@ -98,9 +98,8 @@ fn populate_error_state( // First identify the *conflict-free tokens*: tokens that do not overlap with // any other token in any way, besides matching exactly the same string. let conflict_free_tokens: TokenSet = (0..n) - .into_iter() .filter_map(|i| { - let conflicts_with_other_tokens = (0..n).into_iter().any(|j| { + let conflicts_with_other_tokens = (0..n).any(|j| { j != i && !coincident_token_index.contains(Symbol::terminal(i), Symbol::terminal(j)) && token_conflict_map.does_match_shorter_or_longer(i, j) @@ -126,18 +125,19 @@ fn populate_error_state( // the *conflict-free tokens* identified above. for i in 0..n { let symbol = Symbol::terminal(i); - if !conflict_free_tokens.contains(&symbol) && !keywords.contains(&symbol) { - if syntax_grammar.word_token != Some(symbol) { - if let Some(t) = conflict_free_tokens.iter().find(|t| { - !coincident_token_index.contains(symbol, *t) - && token_conflict_map.does_conflict(symbol.index, t.index) - }) { - info!( - "error recovery - exclude token {} because of conflict with {}", - lexical_grammar.variables[i].name, lexical_grammar.variables[t.index].name - ); - continue; - } + if !conflict_free_tokens.contains(&symbol) + && !keywords.contains(&symbol) + && syntax_grammar.word_token != Some(symbol) + { + if let Some(t) = conflict_free_tokens.iter().find(|t| { + !coincident_token_index.contains(symbol, *t) + && token_conflict_map.does_conflict(symbol.index, t.index) + }) { + info!( + "error recovery - exclude token {} because of conflict with {}", + lexical_grammar.variables[i].name, lexical_grammar.variables[t.index].name + ); + continue; } } info!( @@ -361,7 +361,7 @@ fn mark_fragile_tokens( ) { let n = lexical_grammar.variables.len(); let mut valid_tokens_mask = Vec::with_capacity(n); - for state in parse_table.states.iter_mut() { + for state in &mut parse_table.states { valid_tokens_mask.clear(); valid_tokens_mask.resize(n, false); for token in state.terminal_entries.keys() { @@ -369,14 +369,12 @@ fn mark_fragile_tokens( valid_tokens_mask[token.index] = true; } } - for (token, entry) in state.terminal_entries.iter_mut() { + for (token, entry) in &mut state.terminal_entries { if token.is_terminal() { for (i, is_valid) in valid_tokens_mask.iter().enumerate() { - if *is_valid { - if token_conflict_map.does_overlap(i, token.index) { - entry.reusable = false; - break; - } + if *is_valid && token_conflict_map.does_overlap(i, token.index) { + entry.reusable = false; + break; } } } @@ -388,7 +386,7 @@ fn report_state_info<'a>( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, parse_table: &ParseTable, - parse_state_info: &Vec>, + parse_state_info: &[ParseStateInfo<'a>], report_symbol_name: &'a str, ) { let mut all_state_indices = BTreeSet::new(); @@ -399,7 +397,7 @@ fn report_state_info<'a>( for (i, state) in parse_table.states.iter().enumerate() { all_state_indices.insert(i); let item_set = &parse_state_info[state.id]; - for (item, _) in item_set.1.entries.iter() { + for (item, _) in &item_set.1.entries { if !item.is_augmented() { symbols_with_state_indices[item.variable_index as usize] .1 @@ -424,7 +422,7 @@ fn report_state_info<'a>( width = max_symbol_name_length ); } - eprintln!(""); + eprintln!(); let state_indices = if report_symbol_name == "*" { Some(&all_state_indices) @@ -441,14 +439,14 @@ fn report_state_info<'a>( }; if let Some(state_indices) = state_indices { - let mut state_indices = state_indices.into_iter().cloned().collect::>(); + let mut state_indices = state_indices.iter().copied().collect::>(); state_indices.sort_unstable_by_key(|i| (parse_table.states[*i].core_id, *i)); for state_index in state_indices { let id = parse_table.states[state_index].id; let (preceding_symbols, item_set) = &parse_state_info[id]; - eprintln!("state index: {}", state_index); - eprintln!("state id: {}", id); + eprintln!("state index: {state_index}"); + eprintln!("state id: {id}"); eprint!("symbol sequence:"); for symbol in preceding_symbols { let name = if symbol.is_terminal() { @@ -458,11 +456,11 @@ fn report_state_info<'a>( } else { &syntax_grammar.variables[symbol.index].name }; - eprint!(" {}", name); + eprint!(" {name}"); } eprintln!( "\nitems:\n{}", - self::item::ParseItemSetDisplay(&item_set, syntax_grammar, lexical_grammar,), + self::item::ParseItemSetDisplay(item_set, syntax_grammar, lexical_grammar,), ); } } diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/token_conflicts.rs b/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/token_conflicts.rs index 223d3481831..33a904b02af 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/token_conflicts.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/build_tables/token_conflicts.rs @@ -16,7 +16,7 @@ struct TokenConflictStatus { matches_different_string: bool, } -pub(crate) struct TokenConflictMap<'a> { +pub struct TokenConflictMap<'a> { n: usize, status_matrix: Vec, following_tokens: Vec, @@ -104,19 +104,17 @@ impl<'a> TokenConflictMap<'a> { } pub fn prefer_token(grammar: &LexicalGrammar, left: (i32, usize), right: (i32, usize)) -> bool { - if left.0 > right.0 { - return true; - } else if left.0 < right.0 { - return false; - } - - match grammar.variables[left.1] - .implicit_precedence - .cmp(&grammar.variables[right.1].implicit_precedence) - { + match left.0.cmp(&right.0) { Ordering::Less => false, Ordering::Greater => true, - Ordering::Equal => left.1 < right.1, + Ordering::Equal => match grammar.variables[left.1] + .implicit_precedence + .cmp(&grammar.variables[right.1].implicit_precedence) + { + Ordering::Less => false, + Ordering::Greater => true, + Ordering::Equal => left.1 < right.1, + }, } } @@ -135,10 +133,9 @@ impl<'a> TokenConflictMap<'a> { return false; } if has_separator_transitions - && grammar + && !grammar .variable_indices_for_nfa_states(&t.states) - .position(|i| i == completed_id) - .is_none() + .any(|i| i == completed_id) { return false; } @@ -149,53 +146,53 @@ impl<'a> TokenConflictMap<'a> { impl<'a> fmt::Debug for TokenConflictMap<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "TokenConflictMap {{\n")?; + writeln!(f, "TokenConflictMap {{")?; let syntax_grammar = SyntaxGrammar::default(); - write!(f, " following_tokens: {{\n")?; + writeln!(f, " following_tokens: {{")?; for (i, following_tokens) in self.following_tokens.iter().enumerate() { - write!( + writeln!( f, - " follow({:?}): {},\n", + " follow({:?}): {},", self.grammar.variables[i].name, - TokenSetDisplay(following_tokens, &syntax_grammar, &self.grammar) + TokenSetDisplay(following_tokens, &syntax_grammar, self.grammar) )?; } - write!(f, " }},\n")?; + writeln!(f, " }},")?; - write!(f, " starting_characters: {{\n")?; + writeln!(f, " starting_characters: {{")?; for i in 0..self.n { - write!( + writeln!( f, - " {:?}: {:?},\n", + " {:?}: {:?},", self.grammar.variables[i].name, self.starting_chars_by_index[i] )?; } - write!(f, " }},\n")?; + writeln!(f, " }},")?; - write!(f, " following_characters: {{\n")?; + writeln!(f, " following_characters: {{")?; for i in 0..self.n { - write!( + writeln!( f, - " {:?}: {:?},\n", + " {:?}: {:?},", self.grammar.variables[i].name, self.following_chars_by_index[i] )?; } - write!(f, " }},\n")?; + writeln!(f, " }},")?; - write!(f, " status_matrix: {{\n")?; + writeln!(f, " status_matrix: {{")?; for i in 0..self.n { - write!(f, " {:?}: {{\n", self.grammar.variables[i].name)?; + writeln!(f, " {:?}: {{", self.grammar.variables[i].name)?; for j in 0..self.n { - write!( + writeln!( f, - " {:?}: {:?},\n", + " {:?}: {:?},", self.grammar.variables[j].name, self.status_matrix[matrix_index(self.n, i, j)] )?; } - write!(f, " }},\n")?; + writeln!(f, " }},")?; } write!(f, " }},")?; write!(f, "}}")?; @@ -203,7 +200,7 @@ impl<'a> fmt::Debug for TokenConflictMap<'a> { } } -fn matrix_index(variable_count: usize, i: usize, j: usize) -> usize { +const fn matrix_index(variable_count: usize, i: usize, j: usize) -> usize { variable_count * i + j } @@ -221,8 +218,8 @@ fn get_starting_chars(cursor: &mut NfaCursor, grammar: &LexicalGrammar) -> Vec, - following_tokens: &Vec, + starting_chars: &[CharacterSet], + following_tokens: &[TokenSet], ) -> Vec { following_tokens .iter() @@ -241,7 +238,7 @@ fn get_following_chars( fn compute_conflict_status( cursor: &mut NfaCursor, grammar: &LexicalGrammar, - following_chars: &Vec, + following_chars: &[CharacterSet], i: usize, j: usize, ) -> (TokenConflictStatus, TokenConflictStatus) { @@ -330,9 +327,8 @@ fn compute_conflict_status( if variable_id == completed_id { successor_contains_completed_id = true; break; - } else { - advanced_id = Some(variable_id); } + advanced_id = Some(variable_id); } // Determine which action is preferred: matching the already complete @@ -357,12 +353,10 @@ fn compute_conflict_status( result.1.does_match_valid_continuation = true; } } + } else if completed_id == i { + result.0.matches_prefix = true; } else { - if completed_id == i { - result.0.matches_prefix = true; - } else { - result.1.matches_prefix = true; - } + result.1.matches_prefix = true; } } } @@ -390,12 +384,12 @@ mod tests { Variable { name: "token_0".to_string(), kind: VariableType::Named, - rule: Rule::pattern("[a-f]1|0x\\d"), + rule: Rule::pattern("[a-f]1|0x\\d", ""), }, Variable { name: "token_1".to_string(), kind: VariableType::Named, - rule: Rule::pattern("d*ef"), + rule: Rule::pattern("d*ef", ""), }, ], }) @@ -426,7 +420,7 @@ mod tests { Variable { name: "identifier".to_string(), kind: VariableType::Named, - rule: Rule::pattern("\\w+"), + rule: Rule::pattern("\\w+", ""), }, Variable { name: "instanceof".to_string(), @@ -442,14 +436,14 @@ mod tests { let token_map = TokenConflictMap::new( &grammar, vec![ - [Symbol::terminal(var("identifier"))] - .iter() - .cloned() + std::iter::once(&Symbol::terminal(var("identifier"))) + .copied() + .collect(), + std::iter::once(&Symbol::terminal(var("in"))) + .copied() .collect(), - [Symbol::terminal(var("in"))].iter().cloned().collect(), - [Symbol::terminal(var("identifier"))] - .iter() - .cloned() + std::iter::once(&Symbol::terminal(var("identifier"))) + .copied() .collect(), ], ); @@ -471,7 +465,7 @@ mod tests { #[test] fn test_token_conflicts_with_separators() { let grammar = expand_tokens(ExtractedLexicalGrammar { - separators: vec![Rule::pattern("\\s")], + separators: vec![Rule::pattern("\\s", "")], variables: vec![ Variable { name: "x".to_string(), @@ -498,7 +492,7 @@ mod tests { #[test] fn test_token_conflicts_with_open_ended_tokens() { let grammar = expand_tokens(ExtractedLexicalGrammar { - separators: vec![Rule::pattern("\\s")], + separators: vec![Rule::pattern("\\s", "")], variables: vec![ Variable { name: "x".to_string(), @@ -508,7 +502,7 @@ mod tests { Variable { name: "anything".to_string(), kind: VariableType::Named, - rule: Rule::prec(Precedence::Integer(-1), Rule::pattern(".*")), + rule: Rule::prec(Precedence::Integer(-1), Rule::pattern(".*", "")), }, ], }) diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/char_tree.rs b/third-party/tree-sitter/tree-sitter/cli/src/generate/char_tree.rs index 2de5e8320a7..2e28d56fe85 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/generate/char_tree.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/char_tree.rs @@ -29,20 +29,20 @@ impl CharacterTree { 1 => { let range = &ranges[0]; if range.start == range.end { - Some(CharacterTree::Compare { + Some(Self::Compare { operator: Comparator::Equal, value: range.start, - consequence: Some(Box::new(CharacterTree::Yes)), + consequence: Some(Box::new(Self::Yes)), alternative: None, }) } else { - Some(CharacterTree::Compare { + Some(Self::Compare { operator: Comparator::GreaterOrEqual, value: range.start, - consequence: Some(Box::new(CharacterTree::Compare { + consequence: Some(Box::new(Self::Compare { operator: Comparator::LessOrEqual, value: range.end, - consequence: Some(Box::new(CharacterTree::Yes)), + consequence: Some(Box::new(Self::Yes)), alternative: None, })), alternative: None, @@ -52,14 +52,14 @@ impl CharacterTree { len => { let mid = len / 2; let mid_range = &ranges[mid]; - Some(CharacterTree::Compare { + Some(Self::Compare { operator: Comparator::Less, value: mid_range.start, consequence: Self::from_ranges(&ranges[0..mid]).map(Box::new), - alternative: Some(Box::new(CharacterTree::Compare { + alternative: Some(Box::new(Self::Compare { operator: Comparator::LessOrEqual, value: mid_range.end, - consequence: Some(Box::new(CharacterTree::Yes)), + consequence: Some(Box::new(Self::Yes)), alternative: Self::from_ranges(&ranges[(mid + 1)..]).map(Box::new), })), }) @@ -70,8 +70,8 @@ impl CharacterTree { #[cfg(test)] fn contains(&self, c: char) -> bool { match self { - CharacterTree::Yes => true, - CharacterTree::Compare { + Self::Yes => true, + Self::Compare { value, operator, alternative, diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/dedup.rs b/third-party/tree-sitter/tree-sitter/cli/src/generate/dedup.rs index dcba231838a..fffe2675a57 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/generate/dedup.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/dedup.rs @@ -1,9 +1,9 @@ -pub(crate) fn split_state_id_groups( - states: &Vec, +pub fn split_state_id_groups( + states: &[S], state_ids_by_group_id: &mut Vec>, - group_ids_by_state_id: &mut Vec, + group_ids_by_state_id: &mut [usize], start_group_id: usize, - mut f: impl FnMut(&S, &S, &Vec) -> bool, + mut f: impl FnMut(&S, &S, &[usize]) -> bool, ) -> bool { let mut result = false; @@ -33,7 +33,7 @@ pub(crate) fn split_state_id_groups( } let right_state = &states[right_state_id]; - if f(left_state, right_state, &group_ids_by_state_id) { + if f(left_state, right_state, group_ids_by_state_id) { split_state_ids.push(right_state_id); } @@ -44,9 +44,9 @@ pub(crate) fn split_state_id_groups( } // If any states were removed from the group, add them all as a new group. - if split_state_ids.len() > 0 { + if !split_state_ids.is_empty() { result = true; - state_ids_by_group_id[group_id].retain(|i| !split_state_ids.contains(&i)); + state_ids_by_group_id[group_id].retain(|i| !split_state_ids.contains(i)); let new_group_id = state_ids_by_group_id.len(); for id in &split_state_ids { diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/dsl.js b/third-party/tree-sitter/tree-sitter/cli/src/generate/dsl.js index 4281cee12dc..581aeddd212 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/generate/dsl.js +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/dsl.js @@ -48,13 +48,14 @@ function choice(...elements) { } function optional(value) { - checkArguments(arguments.length, optional, 'optional'); + checkArguments(arguments, arguments.length, optional, 'optional'); return choice(value, blank()); } function prec(number, rule) { checkPrecedence(number); checkArguments( + arguments, arguments.length - 1, prec, 'prec', @@ -76,6 +77,7 @@ prec.left = function(number, rule) { checkPrecedence(number); checkArguments( + arguments, arguments.length - 1, prec.left, 'prec.left', @@ -97,6 +99,7 @@ prec.right = function(number, rule) { checkPrecedence(number); checkArguments( + arguments, arguments.length - 1, prec.right, 'prec.right', @@ -113,6 +116,7 @@ prec.right = function(number, rule) { prec.dynamic = function(number, rule) { checkPrecedence(number); checkArguments( + arguments, arguments.length - 1, prec.dynamic, 'prec.dynamic', @@ -127,7 +131,7 @@ prec.dynamic = function(number, rule) { } function repeat(rule) { - checkArguments(arguments.length, repeat, 'repeat'); + checkArguments(arguments, arguments.length, repeat, 'repeat'); return { type: "REPEAT", content: normalize(rule) @@ -135,7 +139,7 @@ function repeat(rule) { } function repeat1(rule) { - checkArguments(arguments.length, repeat1, 'repeat1'); + checkArguments(arguments, arguments.length, repeat1, 'repeat1'); return { type: "REPEAT1", content: normalize(rule) @@ -157,6 +161,7 @@ function sym(name) { } function token(value) { + checkArguments(arguments, arguments.length, token, 'token', '', 'literal'); return { type: "TOKEN", content: normalize(value) @@ -164,6 +169,7 @@ function token(value) { } token.immediate = function(value) { + checkArguments(arguments, arguments.length, token.immediate, 'token.immediate', '', 'literal'); return { type: "IMMEDIATE_TOKEN", content: normalize(value) @@ -181,7 +187,11 @@ function normalize(value) { value }; case RegExp: - return { + return value.flags ? { + type: 'PATTERN', + value: value.source, + flags: value.flags + } : { type: 'PATTERN', value: value.source }; @@ -225,6 +235,8 @@ function grammar(baseGrammar, options) { supertypes: [], precedences: [], }; + } else { + baseGrammar = baseGrammar.grammar; } let externals = baseGrammar.externals; @@ -304,6 +316,10 @@ function grammar(baseGrammar, options) { if (typeof word != 'string') { throw new Error("Grammar's 'word' property must be a named rule."); } + + if (word === 'ReferenceError') { + throw new Error("Grammar's 'word' property must be a valid rule name."); + } } let conflicts = baseGrammar.conflicts; @@ -341,7 +357,17 @@ function grammar(baseGrammar, options) { throw new Error("Grammar's inline must be an array of rules."); } - inline = inlineRules.map(symbol => symbol.name); + inline = inlineRules.filter((symbol, index, self) => { + if (self.findIndex(s => s.name === symbol.name) !== index) { + console.log(`Warning: duplicate inline rule '${symbol.name}'`); + return false; + } + if (symbol.name === 'ReferenceError') { + console.log(`Warning: inline rule '${symbol.symbol.name}' is not defined.`); + return false; + } + return true; + }).map(symbol => symbol.name); } let supertypes = baseGrammar.supertypes; @@ -381,14 +407,19 @@ function grammar(baseGrammar, options) { throw new Error("Grammar must have at least one rule."); } - return {name, word, rules, extras, conflicts, precedences, externals, inline, supertypes}; + return { grammar: { name, word, rules, extras, conflicts, precedences, externals, inline, supertypes } }; } -function checkArguments(ruleCount, caller, callerName, suffix = '') { - if (ruleCount > 1) { +function checkArguments(args, ruleCount, caller, callerName, suffix = '', argType = 'rule') { + // Allow for .map() usage where additional arguments are index and the entire array. + const isMapCall = ruleCount === 3 && typeof args[1] === 'number' && Array.isArray(args[2]); + if (isMapCall) { + ruleCount = typeof args[2] === 'number' ? 1 : args[2].length; + } + if (ruleCount > 1 && !isMapCall) { const error = new Error([ - `The \`${callerName}\` function only takes one rule argument${suffix}.`, - 'You passed multiple rules. Did you mean to call `seq`?\n' + `The \`${callerName}\` function only takes one ${argType} argument${suffix}.`, + `You passed in multiple ${argType}s. Did you mean to call \`seq\`?\n` ].join('\n')); Error.captureStackTrace(error, caller); throw error @@ -415,4 +446,4 @@ global.grammar = grammar; global.field = field; const result = require(process.env.TREE_SITTER_GRAMMAR_PATH); -console.log(JSON.stringify(result, null, 2)); +process.stdout.write(JSON.stringify(result.grammar, null, null)); diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/grammar-schema.json b/third-party/tree-sitter/tree-sitter/cli/src/generate/grammar-schema.json index 5ca353703ea..59aa209ce53 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/generate/grammar-schema.json +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/grammar-schema.json @@ -31,6 +31,16 @@ } }, + "precedences": { + "type": "array", + "items": { + "type": "array", + "items": { + "$ref": "#/definitions/rule" + } + } + }, + "externals": { "type": "array", "items": { @@ -63,7 +73,7 @@ }, "supertypes": { - "description": "A list of hidden rule names that should be considered supertypes in the generated node types file. See http://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types.", + "description": "A list of hidden rule names that should be considered supertypes in the generated node types file. See https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types.", "type": "array", "items": { "description": "the name of a rule in `rules` or `extras`", @@ -105,7 +115,8 @@ "type": "string", "pattern": "^PATTERN$" }, - "value": { "type": "string" } + "value": { "type": "string" }, + "flags": { "type": "string" } }, "required": ["type", "value"] }, @@ -240,7 +251,10 @@ "pattern": "^(PREC|PREC_LEFT|PREC_RIGHT|PREC_DYNAMIC)$" }, "value": { - "type": "integer" + "oneof": [ + { "type": "integer" }, + { "type": "string" } + ] }, "content": { "$ref": "#/definitions/rule" diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/grammar_files.rs b/third-party/tree-sitter/tree-sitter/cli/src/generate/grammar_files.rs new file mode 100644 index 00000000000..549bb2796c7 --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/grammar_files.rs @@ -0,0 +1,544 @@ +use super::write_file; +use anyhow::{anyhow, Context, Result}; +use heck::{ToKebabCase, ToShoutySnakeCase, ToSnakeCase, ToUpperCamelCase}; +use serde::Deserialize; +use serde_json::{json, Map, Value}; +use std::fs::File; +use std::io::BufReader; +use std::path::{Path, PathBuf}; +use std::{fs, str}; + +const CLI_VERSION: &str = env!("CARGO_PKG_VERSION"); +const CLI_VERSION_PLACEHOLDER: &str = "CLI_VERSION"; + +const PARSER_NAME_PLACEHOLDER: &str = "PARSER_NAME"; +const CAMEL_PARSER_NAME_PLACEHOLDER: &str = "CAMEL_PARSER_NAME"; +const UPPER_PARSER_NAME_PLACEHOLDER: &str = "UPPER_PARSER_NAME"; +const LOWER_PARSER_NAME_PLACEHOLDER: &str = "LOWER_PARSER_NAME"; + +const GRAMMAR_JS_TEMPLATE: &str = include_str!("./templates/grammar.js"); +const PACKAGE_JSON_TEMPLATE: &str = include_str!("./templates/package.json"); +const GITIGNORE_TEMPLATE: &str = include_str!("./templates/gitignore"); +const GITATTRIBUTES_TEMPLATE: &str = include_str!("./templates/gitattributes"); +const EDITORCONFIG_TEMPLATE: &str = include_str!("./templates/.editorconfig"); + +const RUST_BINDING_VERSION: &str = env!("CARGO_PKG_VERSION"); +const RUST_BINDING_VERSION_PLACEHOLDER: &str = "RUST_BINDING_VERSION"; + +const LIB_RS_TEMPLATE: &str = include_str!("./templates/lib.rs"); +const BUILD_RS_TEMPLATE: &str = include_str!("./templates/build.rs"); +const CARGO_TOML_TEMPLATE: &str = include_str!("./templates/cargo.toml"); + +const INDEX_JS_TEMPLATE: &str = include_str!("./templates/index.js"); +const INDEX_D_TS_TEMPLATE: &str = include_str!("./templates/index.d.ts"); +const JS_BINDING_CC_TEMPLATE: &str = include_str!("./templates/js-binding.cc"); +const BINDING_GYP_TEMPLATE: &str = include_str!("./templates/binding.gyp"); + +const MAKEFILE_TEMPLATE: &str = include_str!("./templates/makefile"); +const PARSER_NAME_H_TEMPLATE: &str = include_str!("./templates/PARSER_NAME.h"); +const PARSER_NAME_PC_IN_TEMPLATE: &str = include_str!("./templates/PARSER_NAME.pc.in"); + +const GO_MOD_TEMPLATE: &str = include_str!("./templates/go.mod"); +const BINDING_GO_TEMPLATE: &str = include_str!("./templates/binding.go"); +const BINDING_GO_TEST_TEMPLATE: &str = include_str!("./templates/binding_test.go"); + +const SETUP_PY_TEMPLATE: &str = include_str!("./templates/setup.py"); +const INIT_PY_TEMPLATE: &str = include_str!("./templates/__init__.py"); +const INIT_PYI_TEMPLATE: &str = include_str!("./templates/__init__.pyi"); +const PYPROJECT_TOML_TEMPLATE: &str = include_str!("./templates/pyproject.toml"); +const PY_BINDING_C_TEMPLATE: &str = include_str!("./templates/py-binding.c"); + +const PACKAGE_SWIFT_TEMPLATE: &str = include_str!("./templates/Package.swift"); + +#[derive(Deserialize, Debug)] +struct LanguageConfiguration {} + +#[derive(Deserialize, Debug)] +struct PackageJSON { + #[serde(rename = "tree-sitter")] + tree_sitter: Option>, +} + +pub fn path_in_ignore(repo_path: &Path) -> bool { + [ + "bindings", + "build", + "examples", + "node_modules", + "queries", + "script", + "src", + "target", + "test", + "types", + ] + .iter() + .any(|dir| repo_path.ends_with(dir)) +} + +fn insert_after( + map: Map, + after: &str, + key: &str, + value: Value, +) -> Map { + let mut entries = map.into_iter().collect::>(); + let after_index = entries + .iter() + .position(|(k, _)| k == after) + .unwrap_or(entries.len() - 1) + + 1; + entries.insert(after_index, (key.to_string(), value)); + entries.into_iter().collect() +} + +pub fn generate_grammar_files( + repo_path: &Path, + language_name: &str, + generate_bindings: bool, +) -> Result<()> { + let dashed_language_name = language_name.to_kebab_case(); + + // TODO: remove legacy code updates in v0.24.0 + + // Create or update package.json + let package_json_path_state = missing_path_else( + repo_path.join("package.json"), + |path| generate_file(path, PACKAGE_JSON_TEMPLATE, dashed_language_name.as_str()), + |path| { + let package_json_str = + fs::read_to_string(path).with_context(|| "Failed to read package.json")?; + let mut package_json = serde_json::from_str::>(&package_json_str) + .with_context(|| "Failed to parse package.json")?; + if generate_bindings { + let mut updated = false; + + let dependencies = package_json + .entry("dependencies".to_string()) + .or_insert_with(|| Value::Object(Map::new())) + .as_object_mut() + .unwrap(); + if dependencies.remove("nan").is_some() { + eprintln!("Replacing nan dependency with node-addon-api in package.json"); + dependencies.insert("node-addon-api".to_string(), "^7.1.0".into()); + updated = true; + } + if !dependencies.contains_key("node-gyp-build") { + eprintln!("Adding node-gyp-build dependency to package.json"); + dependencies.insert("node-gyp-build".to_string(), "^4.8.0".into()); + updated = true; + } + + let dev_dependencies = package_json + .entry("devDependencies".to_string()) + .or_insert_with(|| Value::Object(Map::new())) + .as_object_mut() + .unwrap(); + if !dev_dependencies.contains_key("prebuildify") { + eprintln!("Adding prebuildify devDependency to package.json"); + dev_dependencies.insert("prebuildify".to_string(), "^6.0.0".into()); + updated = true; + } + + let scripts = package_json + .entry("scripts".to_string()) + .or_insert_with(|| Value::Object(Map::new())) + .as_object_mut() + .unwrap(); + match scripts.get("install") { + None => { + eprintln!("Adding an install script to package.json"); + scripts.insert("install".to_string(), "node-gyp-build".into()); + updated = true; + } + Some(Value::String(v)) if v != "node-gyp-build" => { + eprintln!("Updating the install script in package.json"); + scripts.insert("install".to_string(), "node-gyp-build".into()); + updated = true; + } + Some(_) => {} + } + if !scripts.contains_key("prebuildify") { + eprintln!("Adding a prebuildify script to package.json"); + scripts.insert( + "prebuildify".to_string(), + "prebuildify --napi --strip".into(), + ); + updated = true; + } + + // insert `peerDependencies` after `dependencies` + if !package_json.contains_key("peerDependencies") { + eprintln!("Adding peerDependencies to package.json"); + package_json = insert_after( + package_json, + "dependencies", + "peerDependencies", + json!({"tree-sitter": "^0.21.0"}), + ); + + package_json = insert_after( + package_json, + "peerDependencies", + "peerDependenciesMeta", + json!({"tree_sitter": {"optional": true}}), + ); + updated = true; + } + + // insert `types` right after `main` + if !package_json.contains_key("types") { + eprintln!("Adding types to package.json"); + package_json = + insert_after(package_json, "main", "types", "bindings/node".into()); + updated = true; + } + + // insert `files` right after `keywords` + if !package_json.contains_key("files") { + eprintln!("Adding files to package.json"); + package_json = insert_after( + package_json, + "keywords", + "files", + json!([ + "grammar.js", + "binding.gyp", + "prebuilds/**", + "bindings/node/*", + "queries/*", + "src/**", + ]), + ); + updated = true; + } + + if updated { + let mut package_json_str = serde_json::to_string_pretty(&package_json)?; + package_json_str.push('\n'); + write_file(path, package_json_str)?; + } + } + + Ok(()) + }, + )?; + + let (_, package_json) = lookup_package_json_for_path(package_json_path_state.as_path())?; + + // Do not create a grammar.js file in a repo with multiple language configs + if !package_json.has_multiple_language_configs() { + missing_path(repo_path.join("grammar.js"), |path| { + generate_file(path, GRAMMAR_JS_TEMPLATE, language_name) + })?; + } + + if !generate_bindings { + // our job is done + return Ok(()); + } + + // Write .gitignore file + missing_path(repo_path.join(".gitignore"), |path| { + generate_file(path, GITIGNORE_TEMPLATE, language_name) + })?; + + // Write .gitattributes file + missing_path(repo_path.join(".gitattributes"), |path| { + generate_file(path, GITATTRIBUTES_TEMPLATE, language_name) + })?; + + // Write .editorconfig file + missing_path(repo_path.join(".editorconfig"), |path| { + generate_file(path, EDITORCONFIG_TEMPLATE, language_name) + })?; + + let bindings_dir = repo_path.join("bindings"); + + // Generate Rust bindings + missing_path(bindings_dir.join("rust"), create_dir)?.apply(|path| { + missing_path(path.join("lib.rs"), |path| { + generate_file(path, LIB_RS_TEMPLATE, language_name) + })?; + + missing_path(path.join("build.rs"), |path| { + generate_file(path, BUILD_RS_TEMPLATE, language_name) + })?; + + missing_path(repo_path.join("Cargo.toml"), |path| { + generate_file(path, CARGO_TOML_TEMPLATE, dashed_language_name.as_str()) + })?; + + Ok(()) + })?; + + // Generate Node bindings + missing_path(bindings_dir.join("node"), create_dir)?.apply(|path| { + missing_path_else( + path.join("index.js"), + |path| generate_file(path, INDEX_JS_TEMPLATE, language_name), + |path| { + let index_js = + fs::read_to_string(path).with_context(|| "Failed to read index.js")?; + if index_js.contains("../../build/Release") { + eprintln!("Replacing index.js with new binding API"); + generate_file(path, INDEX_JS_TEMPLATE, language_name)?; + } + Ok(()) + }, + )?; + + missing_path(path.join("index.d.ts"), |path| { + generate_file(path, INDEX_D_TS_TEMPLATE, language_name) + })?; + + missing_path_else( + path.join("binding.cc"), + |path| generate_file(path, JS_BINDING_CC_TEMPLATE, language_name), + |path| { + let binding_cc = + fs::read_to_string(path).with_context(|| "Failed to read binding.cc")?; + if binding_cc.contains("NAN_METHOD(New) {}") { + eprintln!("Replacing binding.cc with new binding API"); + generate_file(path, JS_BINDING_CC_TEMPLATE, language_name)?; + } + Ok(()) + }, + )?; + + // Create binding.gyp, or update it with new binding API. + missing_path_else( + repo_path.join("binding.gyp"), + |path| generate_file(path, BINDING_GYP_TEMPLATE, language_name), + |path| { + let binding_gyp = + fs::read_to_string(path).with_context(|| "Failed to read binding.gyp")?; + if binding_gyp.contains("require('nan')") { + eprintln!("Replacing binding.gyp with new binding API"); + generate_file(path, BINDING_GYP_TEMPLATE, language_name)?; + } + Ok(()) + }, + )?; + + Ok(()) + })?; + + // Generate C bindings + missing_path(bindings_dir.join("c"), create_dir)?.apply(|path| { + missing_path( + path.join(format!("tree-sitter-{language_name}.h")), + |path| generate_file(path, PARSER_NAME_H_TEMPLATE, language_name), + )?; + + missing_path( + path.join(format!("tree-sitter-{language_name}.pc.in")), + |path| generate_file(path, PARSER_NAME_PC_IN_TEMPLATE, language_name), + )?; + + missing_path(repo_path.join("Makefile"), |path| { + generate_file(path, MAKEFILE_TEMPLATE, language_name) + })?; + + Ok(()) + })?; + + // Generate Go bindings + missing_path(bindings_dir.join("go"), create_dir)?.apply(|path| { + missing_path(path.join("binding.go"), |path| { + generate_file(path, BINDING_GO_TEMPLATE, language_name) + })?; + + missing_path(path.join("binding_test.go"), |path| { + generate_file(path, BINDING_GO_TEST_TEMPLATE, language_name) + })?; + + missing_path(path.join("go.mod"), |path| { + generate_file(path, GO_MOD_TEMPLATE, language_name) + })?; + + Ok(()) + })?; + + // Generate Python bindings + missing_path( + bindings_dir + .join("python") + .join(format!("tree_sitter_{}", language_name.to_snake_case())), + create_dir, + )? + .apply(|path| { + missing_path(path.join("binding.c"), |path| { + generate_file(path, PY_BINDING_C_TEMPLATE, language_name) + })?; + + missing_path(path.join("__init__.py"), |path| { + generate_file(path, INIT_PY_TEMPLATE, language_name) + })?; + + missing_path(path.join("__init__.pyi"), |path| { + generate_file(path, INIT_PYI_TEMPLATE, language_name) + })?; + + missing_path(path.join("py.typed"), |path| { + generate_file(path, "", language_name) // py.typed is empty + })?; + + missing_path(repo_path.join("setup.py"), |path| { + generate_file(path, SETUP_PY_TEMPLATE, language_name) + })?; + + missing_path(repo_path.join("pyproject.toml"), |path| { + generate_file(path, PYPROJECT_TOML_TEMPLATE, dashed_language_name.as_str()) + })?; + + Ok(()) + })?; + + // Generate Swift bindings + missing_path( + bindings_dir + .join("swift") + .join(format!("TreeSitter{}", language_name.to_upper_camel_case())), + create_dir, + )? + .apply(|path| { + missing_path(path.join(format!("{language_name}.h")), |path| { + generate_file(path, PARSER_NAME_H_TEMPLATE, language_name) + })?; + + missing_path(repo_path.join("Package.swift"), |path| { + generate_file(path, PACKAGE_SWIFT_TEMPLATE, language_name) + })?; + + Ok(()) + })?; + + Ok(()) +} + +fn lookup_package_json_for_path(path: &Path) -> Result<(PathBuf, PackageJSON)> { + let mut pathbuf = path.to_owned(); + loop { + let package_json = pathbuf + .exists() + .then(|| -> Result { + let file = + File::open(pathbuf.as_path()).with_context(|| "Failed to open package.json")?; + let package_json: PackageJSON = serde_json::from_reader(BufReader::new(file))?; + Ok(package_json) + }) + .transpose()?; + if let Some(package_json) = package_json { + if package_json.tree_sitter.is_some() { + return Ok((pathbuf, package_json)); + } + } + pathbuf.pop(); // package.json + if !pathbuf.pop() { + return Err(anyhow!(concat!( + "Failed to locate a package.json file that has a \"tree-sitter\" section,", + " please ensure you have one, and if you don't then consult the docs", + ))); + } + pathbuf.push("package.json"); + } +} + +fn generate_file(path: &Path, template: &str, language_name: &str) -> Result<()> { + write_file( + path, + template + .replace( + CAMEL_PARSER_NAME_PLACEHOLDER, + &language_name.to_upper_camel_case(), + ) + .replace( + UPPER_PARSER_NAME_PLACEHOLDER, + &language_name.to_shouty_snake_case(), + ) + .replace( + LOWER_PARSER_NAME_PLACEHOLDER, + &language_name.to_snake_case(), + ) + .replace(PARSER_NAME_PLACEHOLDER, language_name) + .replace(CLI_VERSION_PLACEHOLDER, CLI_VERSION) + .replace(RUST_BINDING_VERSION_PLACEHOLDER, RUST_BINDING_VERSION), + ) +} + +fn create_dir(path: &Path) -> Result<()> { + fs::create_dir_all(path) + .with_context(|| format!("Failed to create {:?}", path.to_string_lossy())) +} + +#[derive(PartialEq, Eq, Debug)] +enum PathState { + Exists(PathBuf), + Missing(PathBuf), +} + +#[allow(dead_code)] +impl PathState { + fn exists(&self, mut action: impl FnMut(&Path) -> Result<()>) -> Result<&Self> { + if let Self::Exists(path) = self { + action(path.as_path())?; + } + Ok(self) + } + + fn missing(&self, mut action: impl FnMut(&Path) -> Result<()>) -> Result<&Self> { + if let Self::Missing(path) = self { + action(path.as_path())?; + } + Ok(self) + } + + fn apply(&self, mut action: impl FnMut(&Path) -> Result<()>) -> Result<&Self> { + action(self.as_path())?; + Ok(self) + } + + fn apply_state(&self, mut action: impl FnMut(&Self) -> Result<()>) -> Result<&Self> { + action(self)?; + Ok(self) + } + + fn as_path(&self) -> &Path { + match self { + Self::Exists(path) | Self::Missing(path) => path.as_path(), + } + } +} + +fn missing_path(path: PathBuf, mut action: F) -> Result +where + F: FnMut(&Path) -> Result<()>, +{ + if !path.exists() { + action(path.as_path())?; + Ok(PathState::Missing(path)) + } else { + Ok(PathState::Exists(path)) + } +} + +fn missing_path_else(path: PathBuf, mut action: T, mut else_action: F) -> Result +where + T: FnMut(&Path) -> Result<()>, + F: FnMut(&Path) -> Result<()>, +{ + if !path.exists() { + action(path.as_path())?; + Ok(PathState::Missing(path)) + } else { + else_action(path.as_path())?; + Ok(PathState::Exists(path)) + } +} + +impl PackageJSON { + fn has_multiple_language_configs(&self) -> bool { + self.tree_sitter.as_ref().is_some_and(|c| c.len() > 1) + } +} diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/grammars.rs b/third-party/tree-sitter/tree-sitter/cli/src/generate/grammars.rs index db8d8524c6d..5f057a1bcc5 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/generate/grammars.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/grammars.rs @@ -4,7 +4,7 @@ use std::collections::HashMap; use std::fmt; #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] -pub(crate) enum VariableType { +pub enum VariableType { Hidden, Auxiliary, Anonymous, @@ -14,20 +14,20 @@ pub(crate) enum VariableType { // Input grammar #[derive(Clone, Debug, PartialEq, Eq)] -pub(crate) struct Variable { +pub struct Variable { pub name: String, pub kind: VariableType, pub rule: Rule, } #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub(crate) enum PrecedenceEntry { +pub enum PrecedenceEntry { Name(String), Symbol(String), } #[derive(Debug, Default, PartialEq, Eq)] -pub(crate) struct InputGrammar { +pub struct InputGrammar { pub name: String, pub variables: Vec, pub extra_symbols: Vec, @@ -42,7 +42,7 @@ pub(crate) struct InputGrammar { // Extracted lexical grammar #[derive(Debug, PartialEq, Eq)] -pub(crate) struct LexicalVariable { +pub struct LexicalVariable { pub name: String, pub kind: VariableType, pub implicit_precedence: i32, @@ -50,7 +50,7 @@ pub(crate) struct LexicalVariable { } #[derive(Debug, Default, PartialEq, Eq)] -pub(crate) struct LexicalGrammar { +pub struct LexicalGrammar { pub nfa: Nfa, pub variables: Vec, } @@ -58,7 +58,7 @@ pub(crate) struct LexicalGrammar { // Extracted syntax grammar #[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub(crate) struct ProductionStep { +pub struct ProductionStep { pub symbol: Symbol, pub precedence: Precedence, pub associativity: Option, @@ -67,33 +67,33 @@ pub(crate) struct ProductionStep { } #[derive(Clone, Debug, Default, PartialEq, Eq)] -pub(crate) struct Production { +pub struct Production { pub steps: Vec, pub dynamic_precedence: i32, } #[derive(Default)] -pub(crate) struct InlinedProductionMap { +pub struct InlinedProductionMap { pub productions: Vec, pub production_map: HashMap<(*const Production, u32), Vec>, } #[derive(Clone, Debug, PartialEq, Eq)] -pub(crate) struct SyntaxVariable { +pub struct SyntaxVariable { pub name: String, pub kind: VariableType, pub productions: Vec, } #[derive(Clone, Debug, PartialEq, Eq)] -pub(crate) struct ExternalToken { +pub struct ExternalToken { pub name: String, pub kind: VariableType, pub corresponding_internal_token: Option, } #[derive(Debug, Default)] -pub(crate) struct SyntaxGrammar { +pub struct SyntaxGrammar { pub variables: Vec, pub extra_symbols: Vec, pub expected_conflicts: Vec>, @@ -106,7 +106,7 @@ pub(crate) struct SyntaxGrammar { #[cfg(test)] impl ProductionStep { - pub(crate) fn new(symbol: Symbol) -> Self { + pub const fn new(symbol: Symbol) -> Self { Self { symbol, precedence: Precedence::None, @@ -116,11 +116,7 @@ impl ProductionStep { } } - pub(crate) fn with_prec( - self, - precedence: Precedence, - associativity: Option, - ) -> Self { + pub fn with_prec(self, precedence: Precedence, associativity: Option) -> Self { Self { symbol: self.symbol, precedence, @@ -130,7 +126,7 @@ impl ProductionStep { } } - pub(crate) fn with_alias(self, value: &str, is_named: bool) -> Self { + pub fn with_alias(self, value: &str, is_named: bool) -> Self { Self { symbol: self.symbol, precedence: self.precedence, @@ -142,7 +138,7 @@ impl ProductionStep { field_name: self.field_name, } } - pub(crate) fn with_field_name(self, name: &str) -> Self { + pub fn with_field_name(self, name: &str) -> Self { Self { symbol: self.symbol, precedence: self.precedence, @@ -155,7 +151,7 @@ impl ProductionStep { impl Production { pub fn first_symbol(&self) -> Option { - self.steps.first().map(|s| s.symbol.clone()) + self.steps.first().map(|s| s.symbol) } } @@ -195,24 +191,24 @@ impl Variable { } impl VariableType { - pub fn is_visible(&self) -> bool { - *self == VariableType::Named || *self == VariableType::Anonymous + pub fn is_visible(self) -> bool { + self == Self::Named || self == Self::Anonymous } } impl LexicalGrammar { pub fn variable_indices_for_nfa_states<'a>( &'a self, - state_ids: &'a Vec, + state_ids: &'a [u32], ) -> impl Iterator + 'a { let mut prev = None; state_ids.iter().filter_map(move |state_id| { let variable_id = self.variable_index_for_nfa_state(*state_id); - if prev != Some(variable_id) { + if prev == Some(variable_id) { + None + } else { prev = Some(variable_id); prev - } else { - None } }) } @@ -246,7 +242,7 @@ impl InlinedProductionMap { .map(|production_indices| { production_indices .iter() - .cloned() + .copied() .map(move |index| &self.productions[index]) }) } @@ -255,8 +251,8 @@ impl InlinedProductionMap { impl fmt::Display for PrecedenceEntry { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - PrecedenceEntry::Name(n) => write!(f, "'{}'", n), - PrecedenceEntry::Symbol(s) => write!(f, "$.{}", s), + Self::Name(n) => write!(f, "'{n}'"), + Self::Symbol(s) => write!(f, "$.{s}"), } } } diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/mod.rs b/third-party/tree-sitter/tree-sitter/cli/src/generate/mod.rs index 4838828b1c6..ea850c8d3c5 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/generate/mod.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/mod.rs @@ -1,7 +1,25 @@ -mod binding_files; +use std::io::Write; +use std::path::{Path, PathBuf}; +use std::process::{Command, Stdio}; +use std::{env, fs}; + +use anyhow::{anyhow, Context, Result}; +use lazy_static::lazy_static; +use regex::{Regex, RegexBuilder}; +use semver::Version; + +use build_tables::build_tables; +use grammar_files::path_in_ignore; +use grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar}; +use parse_grammar::parse_grammar; +use prepare_grammar::prepare_grammar; +use render::render_c_code; +use rules::AliasMap; + mod build_tables; mod char_tree; mod dedup; +mod grammar_files; mod grammars; mod nfa; mod node_types; @@ -11,21 +29,6 @@ mod render; mod rules; mod tables; -use self::build_tables::build_tables; -use self::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar}; -use self::parse_grammar::parse_grammar; -use self::prepare_grammar::prepare_grammar; -use self::render::render_c_code; -use self::rules::AliasMap; -use anyhow::{anyhow, Context, Result}; -use lazy_static::lazy_static; -use regex::{Regex, RegexBuilder}; -use semver::Version; -use std::fs; -use std::io::Write; -use std::path::{Path, PathBuf}; -use std::process::{Command, Stdio}; - lazy_static! { static ref JSON_COMMENT_REGEX: Regex = RegexBuilder::new("^\\s*//.*") .multi_line(true) @@ -38,13 +41,55 @@ struct GeneratedParser { node_types_json: String, } +pub const ALLOC_HEADER: &str = include_str!("./templates/alloc.h"); + pub fn generate_parser_in_directory( - repo_path: &PathBuf, + repo_path: &Path, grammar_path: Option<&str>, abi_version: usize, generate_bindings: bool, report_symbol_name: Option<&str>, + js_runtime: Option<&str>, ) -> Result<()> { + let mut repo_path = repo_path.to_owned(); + let mut grammar_path = grammar_path; + + // Populate a new empty grammar directory. + if let Some(path) = grammar_path { + let path = PathBuf::from(path); + if !path + .try_exists() + .with_context(|| "Some error with specified path")? + { + fs::create_dir_all(&path)?; + grammar_path = None; + repo_path = path; + } + } + + if repo_path.is_dir() && !repo_path.join("grammar.js").exists() && !path_in_ignore(&repo_path) { + if let Some(dir_name) = repo_path + .file_name() + .map(|x| x.to_string_lossy().to_ascii_lowercase()) + { + if let Some(language_name) = dir_name + .strip_prefix("tree-sitter-") + .or_else(|| Some(dir_name.as_ref())) + { + grammar_files::generate_grammar_files(&repo_path, language_name, false)?; + } + } + } + + // Read the grammar.json. + let grammar_json = if let Some(path) = grammar_path { + load_grammar_file(path.as_ref(), js_runtime)? + } else { + let grammar_js_path = + grammar_path.map_or(repo_path.join("grammar.js"), std::convert::Into::into); + load_grammar_file(&grammar_js_path, js_runtime)? + }; + let src_path = repo_path.join("src"); let header_path = src_path.join("tree_sitter"); @@ -52,17 +97,9 @@ pub fn generate_parser_in_directory( fs::create_dir_all(&src_path)?; fs::create_dir_all(&header_path)?; - // Read the grammar.json. - let grammar_json; - match grammar_path { - Some(path) => { - grammar_json = load_grammar_file(path.as_ref())?; - } - None => { - let grammar_js_path = grammar_path.map_or(repo_path.join("grammar.js"), |s| s.into()); - grammar_json = load_grammar_file(&grammar_js_path)?; - fs::write(&src_path.join("grammar.json"), &grammar_json)?; - } + if grammar_path.is_none() { + fs::write(src_path.join("grammar.json"), &grammar_json) + .with_context(|| format!("Failed to write grammar.json to {src_path:?}"))?; } // Parse and preprocess the grammar. @@ -79,7 +116,7 @@ pub fn generate_parser_in_directory( &language_name, syntax_grammar, lexical_grammar, - inlines, + &inlines, simple_aliases, abi_version, report_symbol_name, @@ -87,10 +124,12 @@ pub fn generate_parser_in_directory( write_file(&src_path.join("parser.c"), c_code)?; write_file(&src_path.join("node-types.json"), node_types_json)?; + write_file(&header_path.join("alloc.h"), ALLOC_HEADER)?; + write_file(&header_path.join("array.h"), tree_sitter::ARRAY_HEADER)?; write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?; - if generate_bindings { - binding_files::generate_binding_files(&repo_path, &language_name)?; + if !path_in_ignore(&repo_path) { + grammar_files::generate_grammar_files(&repo_path, &language_name, generate_bindings)?; } Ok(()) @@ -105,7 +144,7 @@ pub fn generate_parser_for_grammar(grammar_json: &str) -> Result<(String, String &input_grammar.name, syntax_grammar, lexical_grammar, - inlines, + &inlines, simple_aliases, tree_sitter::LANGUAGE_VERSION, None, @@ -114,10 +153,10 @@ pub fn generate_parser_for_grammar(grammar_json: &str) -> Result<(String, String } fn generate_parser_for_grammar_with_opts( - name: &String, + name: &str, syntax_grammar: SyntaxGrammar, lexical_grammar: LexicalGrammar, - inlines: InlinedProductionMap, + inlines: &InlinedProductionMap, simple_aliases: AliasMap, abi_version: usize, report_symbol_name: Option<&str>, @@ -135,7 +174,7 @@ fn generate_parser_for_grammar_with_opts( &lexical_grammar, &simple_aliases, &variable_info, - &inlines, + inlines, report_symbol_name, )?; let c_code = render_c_code( @@ -155,32 +194,40 @@ fn generate_parser_for_grammar_with_opts( }) } -pub fn load_grammar_file(grammar_path: &Path) -> Result { +pub fn load_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result { + if grammar_path.is_dir() { + return Err(anyhow!( + "Path to a grammar file with `.js` or `.json` extension is required" + )); + } match grammar_path.extension().and_then(|e| e.to_str()) { - Some("js") => Ok(load_js_grammar_file(grammar_path)?), - Some("json") => Ok(fs::read_to_string(grammar_path)?), - _ => Err(anyhow!( - "Unknown grammar file extension: {:?}", - grammar_path - )), + Some("js") => Ok(load_js_grammar_file(grammar_path, js_runtime) + .with_context(|| "Failed to load grammar.js")?), + Some("json") => { + Ok(fs::read_to_string(grammar_path).with_context(|| "Failed to load grammar.json")?) + } + _ => Err(anyhow!("Unknown grammar file extension: {grammar_path:?}",)), } } -fn load_js_grammar_file(grammar_path: &Path) -> Result { +fn load_js_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result { let grammar_path = fs::canonicalize(grammar_path)?; - let mut node_process = Command::new("node") + + let js_runtime = js_runtime.unwrap_or("node"); + + let mut node_process = Command::new(js_runtime) .env("TREE_SITTER_GRAMMAR_PATH", grammar_path) .stdin(Stdio::piped()) .stdout(Stdio::piped()) .spawn() - .expect("Failed to run `node`"); + .with_context(|| format!("Failed to run `{js_runtime}`"))?; let mut node_stdin = node_process .stdin .take() - .expect("Failed to open stdin for node"); + .with_context(|| "Failed to open stdin for node")?; let cli_version = Version::parse(env!("CARGO_PKG_VERSION")) - .expect("Could not parse this package's version as semver."); + .with_context(|| "Could not parse this package's version as semver.")?; write!( node_stdin, "global.TREE_SITTER_CLI_VERSION_MAJOR = {}; @@ -188,24 +235,43 @@ fn load_js_grammar_file(grammar_path: &Path) -> Result { global.TREE_SITTER_CLI_VERSION_PATCH = {};", cli_version.major, cli_version.minor, cli_version.patch, ) - .expect("Failed to write tree-sitter version to node's stdin"); + .with_context(|| "Failed to write tree-sitter version to node's stdin")?; let javascript_code = include_bytes!("./dsl.js"); node_stdin .write(javascript_code) - .expect("Failed to write grammar dsl to node's stdin"); + .with_context(|| "Failed to write grammar dsl to node's stdin")?; drop(node_stdin); let output = node_process .wait_with_output() - .expect("Failed to read output from node"); + .with_context(|| "Failed to read output from node")?; match output.status.code() { None => panic!("Node process was killed"), - Some(0) => {} - Some(code) => return Err(anyhow!("Node process exited with status {}", code)), - } + Some(0) => { + let stdout = + String::from_utf8(output.stdout).with_context(|| "Got invalid UTF8 from node")?; - let mut result = String::from_utf8(output.stdout).expect("Got invalid UTF8 from node"); - result.push('\n'); - Ok(result) + let mut grammar_json = &stdout[..]; + + if let Some(pos) = stdout.rfind('\n') { + // If there's a newline, split the last line from the rest of the output + let node_output = &stdout[..pos]; + grammar_json = &stdout[pos + 1..]; + + let mut stdout = std::io::stdout().lock(); + stdout.write_all(node_output.as_bytes())?; + stdout.write_all(b"\n")?; + stdout.flush()?; + } + + Ok(serde_json::to_string_pretty( + &serde_json::from_str::(grammar_json) + .with_context(|| "Failed to parse grammar JSON")?, + ) + .with_context(|| "Failed to serialize grammar JSON")? + + "\n") + } + Some(code) => Err(anyhow!("Node process exited with status {code}")), + } } fn write_file(path: &Path, body: impl AsRef<[u8]>) -> Result<()> { diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/nfa.rs b/third-party/tree-sitter/tree-sitter/cli/src/generate/nfa.rs index 6be360826cd..66f78074aad 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/generate/nfa.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/nfa.rs @@ -28,7 +28,7 @@ pub enum NfaState { }, } -#[derive(PartialEq, Eq)] +#[derive(PartialEq, Eq, Default)] pub struct Nfa { pub states: Vec, } @@ -47,40 +47,36 @@ pub struct NfaTransition { pub states: Vec, } -impl Default for Nfa { - fn default() -> Self { - Self { states: Vec::new() } - } -} - const END: u32 = char::MAX as u32 + 1; impl CharacterSet { /// Create a character set with a single character. - pub fn empty() -> Self { - CharacterSet { ranges: Vec::new() } + pub const fn empty() -> Self { + Self { ranges: Vec::new() } } /// Create a character set with a given *inclusive* range of characters. + #[allow(clippy::single_range_in_vec_init)] pub fn from_range(mut first: char, mut last: char) -> Self { if first > last { swap(&mut first, &mut last); } - CharacterSet { + Self { ranges: vec![(first as u32)..(last as u32 + 1)], } } /// Create a character set with a single character. + #[allow(clippy::single_range_in_vec_init)] pub fn from_char(c: char) -> Self { - CharacterSet { + Self { ranges: vec![(c as u32)..(c as u32 + 1)], } } /// Create a character set containing all characters *not* present /// in this character set. - pub fn negate(mut self) -> CharacterSet { + pub fn negate(mut self) -> Self { let mut i = 0; let mut previous_end = 0; while i < self.ranges.len() { @@ -110,10 +106,10 @@ impl CharacterSet { self } - pub fn add(mut self, other: &CharacterSet) -> Self { + pub fn add(mut self, other: &Self) -> Self { let mut index = 0; for range in &other.ranges { - index = self.add_int_range(index, range.start as u32, range.end as u32); + index = self.add_int_range(index, range.start, range.end); } self } @@ -143,7 +139,7 @@ impl CharacterSet { i } - pub fn does_intersect(&self, other: &CharacterSet) -> bool { + pub fn does_intersect(&self, other: &Self) -> bool { let mut left_ranges = self.ranges.iter(); let mut right_ranges = other.ranges.iter(); let mut left_range = left_ranges.next(); @@ -163,7 +159,7 @@ impl CharacterSet { /// Get the set of characters that are present in both this set /// and the other set. Remove those common characters from both /// of the operands. - pub fn remove_intersection(&mut self, other: &mut CharacterSet) -> CharacterSet { + pub fn remove_intersection(&mut self, other: &mut Self) -> Self { let mut intersection = Vec::new(); let mut left_i = 0; let mut right_i = 0; @@ -209,29 +205,28 @@ impl CharacterSet { } } } - Ordering::Equal => { - // [ L ] - // [ R ] - if left.end < right.end { - intersection.push(left.start..left.end); - right.start = left.end; - self.ranges.remove(left_i); - } - // [ L ] - // [ R ] - else if left.end == right.end { - intersection.push(left.clone()); - self.ranges.remove(left_i); - other.ranges.remove(right_i); - } - // [ L ] - // [ R ] - else if left.end > right.end { - intersection.push(right.clone()); - left.start = right.end; - other.ranges.remove(right_i); - } + // [ L ] + // [ R ] + Ordering::Equal if left.end < right.end => { + intersection.push(left.start..left.end); + right.start = left.end; + self.ranges.remove(left_i); + } + // [ L ] + // [ R ] + Ordering::Equal if left.end == right.end => { + intersection.push(left.clone()); + self.ranges.remove(left_i); + other.ranges.remove(right_i); } + // [ L ] + // [ R ] + Ordering::Equal if left.end > right.end => { + intersection.push(right.clone()); + left.start = right.end; + other.ranges.remove(right_i); + } + Ordering::Equal => {} Ordering::Greater => { // [ L ] // [ R ] @@ -271,30 +266,30 @@ impl CharacterSet { } } } - CharacterSet { + Self { ranges: intersection, } } /// Produces a `CharacterSet` containing every character in `self` that is not present in /// `other`. - pub fn difference(mut self, mut other: CharacterSet) -> CharacterSet { + pub fn difference(mut self, mut other: Self) -> Self { self.remove_intersection(&mut other); self } /// Produces a `CharacterSet` containing every character that is in _exactly one_ of `self` or /// `other`, but is not present in both sets. - pub fn symmetric_difference(mut self, mut other: CharacterSet) -> CharacterSet { + pub fn symmetric_difference(mut self, mut other: Self) -> Self { self.remove_intersection(&mut other); self.add(&other) } - pub fn iter<'a>(&'a self) -> impl Iterator + 'a { - self.ranges.iter().flat_map(|r| r.clone()) + pub fn iter(&self) -> impl Iterator + '_ { + self.ranges.iter().flat_map(std::clone::Clone::clone) } - pub fn chars<'a>(&'a self) -> impl Iterator + 'a { + pub fn chars(&self) -> impl Iterator + '_ { self.iter().filter_map(char::from_u32) } @@ -329,11 +324,10 @@ impl CharacterSet { prev_range_successor += 1; } prev_range = Some(range.start..c); - None } else { prev_range = Some(c..c); - None } + None }) .collect() } @@ -344,13 +338,19 @@ impl CharacterSet { } impl Ord for CharacterSet { - fn cmp(&self, other: &CharacterSet) -> Ordering { + fn cmp(&self, other: &Self) -> Ordering { let count_cmp = self .ranges .iter() - .map(|r| r.len()) + .map(std::iter::ExactSizeIterator::len) .sum::() - .cmp(&other.ranges.iter().map(|r| r.len()).sum()); + .cmp( + &other + .ranges + .iter() + .map(std::iter::ExactSizeIterator::len) + .sum(), + ); if count_cmp != Ordering::Equal { return count_cmp; } @@ -368,12 +368,12 @@ impl Ord for CharacterSet { } } } - return Ordering::Equal; + Ordering::Equal } } impl PartialOrd for CharacterSet { - fn partial_cmp(&self, other: &CharacterSet) -> Option { + fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } @@ -390,7 +390,7 @@ impl fmt::Debug for CharacterSet { if i > 0 { write!(f, ", ")?; } - write!(f, "{:?}", c)?; + write!(f, "{c:?}")?; } write!(f, "]")?; Ok(()) @@ -398,8 +398,8 @@ impl fmt::Debug for CharacterSet { } impl Nfa { - pub fn new() -> Self { - Nfa { states: Vec::new() } + pub const fn new() -> Self { + Self { states: Vec::new() } } pub fn last_state_id(&self) -> u32 { @@ -409,9 +409,9 @@ impl Nfa { impl fmt::Debug for Nfa { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "Nfa {{ states: {{\n")?; + writeln!(f, "Nfa {{ states: {{")?; for (i, state) in self.states.iter().enumerate() { - write!(f, " {}: {:?},\n", i, state)?; + writeln!(f, " {i}: {state:?},")?; } write!(f, "}} }}")?; Ok(()) @@ -434,7 +434,7 @@ impl<'a> NfaCursor<'a> { } pub fn force_reset(&mut self, states: Vec) { - self.state_ids = states + self.state_ids = states; } pub fn transition_chars(&self) -> impl Iterator { @@ -464,7 +464,7 @@ impl<'a> NfaCursor<'a> { fn group_transitions<'b>( iter: impl Iterator, ) -> Vec { - let mut result: Vec = Vec::new(); + let mut result = Vec::::new(); for (chars, is_sep, prec, state) in iter { let mut chars = chars.clone(); let mut i = 0; @@ -472,9 +472,8 @@ impl<'a> NfaCursor<'a> { let intersection = result[i].characters.remove_intersection(&mut chars); if !intersection.is_empty() { let mut intersection_states = result[i].states.clone(); - match intersection_states.binary_search(&state) { - Err(j) => intersection_states.insert(j, state), - _ => {} + if let Err(j) = intersection_states.binary_search(&state) { + intersection_states.insert(j, state); } let intersection_transition = NfaTransition { characters: intersection, @@ -824,8 +823,7 @@ mod tests { .map(|(chars, is_sep, prec, state)| (chars, *is_sep, *prec, *state)) ), row.1, - "row {}", - i + "row {i}", ); } } @@ -966,15 +964,14 @@ mod tests { row.right ); - let symm_difference = row.left_only.clone().add(&mut row.right_only.clone()); + let symm_difference = row.left_only.clone().add(&row.right_only); assert_eq!( row.left.clone().symmetric_difference(row.right.clone()), symm_difference, - "row {}b: {:?} ~~ {:?}", - i, + "row {i}b: {:?} ~~ {:?}", row.left, row.right - ) + ); } } @@ -1035,6 +1032,7 @@ mod tests { } #[test] + #[allow(clippy::single_range_in_vec_init)] fn test_character_set_get_ranges() { struct Row { chars: Vec, @@ -1064,12 +1062,9 @@ mod tests { chars, ruled_out_chars, expected_ranges, - } in table.iter() + } in &table { - let ruled_out_chars = ruled_out_chars - .into_iter() - .map(|c: &char| *c as u32) - .collect(); + let ruled_out_chars = ruled_out_chars.iter().map(|c: &char| *c as u32).collect(); let mut set = CharacterSet::empty(); for c in chars { set = set.add_char(*c); diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/node_types.rs b/third-party/tree-sitter/tree-sitter/cli/src/generate/node_types.rs index 43918980037..7f5124589c3 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/generate/node_types.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/node_types.rs @@ -6,19 +6,19 @@ use std::cmp::Ordering; use std::collections::{BTreeMap, HashMap, HashSet}; #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub(crate) enum ChildType { +pub enum ChildType { Normal(Symbol), Aliased(Alias), } #[derive(Clone, Debug, Default, PartialEq, Eq)] -pub(crate) struct FieldInfo { +pub struct FieldInfo { pub quantity: ChildQuantity, pub types: Vec, } #[derive(Clone, Debug, Default, PartialEq, Eq)] -pub(crate) struct VariableInfo { +pub struct VariableInfo { pub fields: HashMap, pub children: FieldInfo, pub children_without_fields: FieldInfo, @@ -26,7 +26,7 @@ pub(crate) struct VariableInfo { } #[derive(Debug, Serialize, PartialEq, Eq, Default, PartialOrd, Ord)] -pub(crate) struct NodeInfoJSON { +pub struct NodeInfoJSON { #[serde(rename = "type")] kind: String, named: bool, @@ -39,14 +39,14 @@ pub(crate) struct NodeInfoJSON { } #[derive(Clone, Debug, Serialize, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub(crate) struct NodeTypeJSON { +pub struct NodeTypeJSON { #[serde(rename = "type")] kind: String, named: bool, } #[derive(Debug, Serialize, PartialEq, Eq, PartialOrd, Ord)] -pub(crate) struct FieldInfoJSON { +pub struct FieldInfoJSON { multiple: bool, required: bool, types: Vec, @@ -61,7 +61,7 @@ pub struct ChildQuantity { impl Default for FieldInfoJSON { fn default() -> Self { - FieldInfoJSON { + Self { multiple: false, required: true, types: Vec::new(), @@ -76,23 +76,25 @@ impl Default for ChildQuantity { } impl ChildQuantity { - fn zero() -> Self { - ChildQuantity { + #[must_use] + const fn zero() -> Self { + Self { exists: false, required: false, multiple: false, } } - fn one() -> Self { - ChildQuantity { + #[must_use] + const fn one() -> Self { + Self { exists: true, required: true, multiple: false, } } - fn append(&mut self, other: ChildQuantity) { + fn append(&mut self, other: Self) { if other.exists { if self.exists || other.multiple { self.multiple = true; @@ -104,7 +106,7 @@ impl ChildQuantity { } } - fn union(&mut self, other: ChildQuantity) -> bool { + fn union(&mut self, other: Self) -> bool { let mut result = false; if !self.exists && other.exists { result = true; @@ -144,7 +146,7 @@ impl ChildQuantity { /// 2. aliases. If a parent node type `M` is aliased as some other type `N`, /// then nodes which *appear* to have type `N` may have internal structure based /// on `M`. -pub(crate) fn get_variable_info( +pub fn get_variable_info( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, default_aliases: &AliasMap, @@ -209,12 +211,12 @@ pub(crate) fn get_variable_info( let field_info = variable_info .fields .entry(field_name.clone()) - .or_insert(FieldInfo::default()); + .or_insert_with(FieldInfo::default); did_change |= extend_sorted(&mut field_info.types, Some(&child_type)); let production_field_quantity = production_field_quantities .entry(field_name) - .or_insert(ChildQuantity::zero()); + .or_insert_with(ChildQuantity::zero); // Inherit the types and quantities of hidden children associated with fields. if child_is_hidden && child_symbol.is_non_terminal() { @@ -252,13 +254,13 @@ pub(crate) fn get_variable_info( for (field_name, child_field_info) in &child_variable_info.fields { production_field_quantities .entry(field_name) - .or_insert(ChildQuantity::zero()) + .or_insert_with(ChildQuantity::zero) .append(child_field_info.quantity); did_change |= extend_sorted( &mut variable_info .fields .entry(field_name.clone()) - .or_insert(FieldInfo::default()) + .or_insert_with(FieldInfo::default) .types, &child_field_info.types, ); @@ -308,12 +310,12 @@ pub(crate) fn get_variable_info( .quantity .union(production_children_without_fields_quantity); - for (field_name, info) in variable_info.fields.iter_mut() { + for (field_name, info) in &mut variable_info.fields { did_change |= info.quantity.union( production_field_quantities .get(field_name) - .cloned() - .unwrap_or(ChildQuantity::zero()), + .copied() + .unwrap_or_else(ChildQuantity::zero), ); } } @@ -345,8 +347,8 @@ pub(crate) fn get_variable_info( .types .retain(child_type_is_visible); } - for variable_info in result.iter_mut() { - for (_, field_info) in variable_info.fields.iter_mut() { + for variable_info in &mut result { + for field_info in variable_info.fields.values_mut() { field_info.types.retain(child_type_is_visible); } variable_info.fields.retain(|_, v| !v.types.is_empty()); @@ -359,11 +361,11 @@ pub(crate) fn get_variable_info( Ok(result) } -pub(crate) fn generate_node_types_json( +pub fn generate_node_types_json( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, default_aliases: &AliasMap, - variable_info: &Vec, + variable_info: &[VariableInfo], ) -> Vec { let mut node_types_json = BTreeMap::new(); @@ -373,7 +375,7 @@ pub(crate) fn generate_node_types_json( named: alias.is_named, }, ChildType::Normal(symbol) => { - if let Some(alias) = default_aliases.get(&symbol) { + if let Some(alias) = default_aliases.get(symbol) { NodeTypeJSON { kind: alias.value.clone(), named: alias.is_named, @@ -408,15 +410,15 @@ pub(crate) fn generate_node_types_json( }; let populate_field_info_json = |json: &mut FieldInfoJSON, info: &FieldInfo| { - if info.types.len() > 0 { + if info.types.is_empty() { + json.required = false; + } else { json.multiple |= info.quantity.multiple; json.required &= info.quantity.required; json.types .extend(info.types.iter().map(child_type_to_node_type)); json.types.sort_unstable(); json.types.dedup(); - } else { - json.required = false; } }; @@ -432,7 +434,7 @@ pub(crate) fn generate_node_types_json( if !default_aliases.contains_key(extra_symbol) { aliases_by_symbol .entry(*extra_symbol) - .or_insert(HashSet::new()) + .or_insert_with(HashSet::new) .insert(None); } } @@ -441,7 +443,7 @@ pub(crate) fn generate_node_types_json( for step in &production.steps { aliases_by_symbol .entry(step.symbol) - .or_insert(HashSet::new()) + .or_insert_with(HashSet::new) .insert( step.alias .as_ref() @@ -451,7 +453,10 @@ pub(crate) fn generate_node_types_json( } } } - aliases_by_symbol.insert(Symbol::non_terminal(0), [None].iter().cloned().collect()); + aliases_by_symbol.insert( + Symbol::non_terminal(0), + std::iter::once(&None).cloned().collect(), + ); let mut subtype_map = Vec::new(); for (i, info) in variable_info.iter().enumerate() { @@ -516,7 +521,7 @@ pub(crate) fn generate_node_types_json( }); let fields_json = node_type_json.fields.as_mut().unwrap(); - for (new_field, field_info) in info.fields.iter() { + for (new_field, field_info) in &info.fields { let field_json = fields_json.entry(new_field.clone()).or_insert_with(|| { // If another rule is aliased with the same name, and does *not* have this field, // then this field cannot be required. @@ -558,7 +563,7 @@ pub(crate) fn generate_node_types_json( } }); - for (_, node_type_json) in node_types_json.iter_mut() { + for node_type_json in node_types_json.values_mut() { if node_type_json .children .as_ref() @@ -571,7 +576,7 @@ pub(crate) fn generate_node_types_json( process_supertypes(children, &subtype_map); } if let Some(fields) = &mut node_type_json.fields { - for (_, field_info) in fields.iter_mut() { + for field_info in fields.values_mut() { process_supertypes(field_info, &subtype_map); } } @@ -590,11 +595,11 @@ pub(crate) fn generate_node_types_json( .unwrap_or(&empty) .iter() .map(move |alias| { - if let Some(alias) = alias { - (&alias.value, alias.kind()) - } else { - (&variable.name, variable.kind) - } + alias + .as_ref() + .map_or((&variable.name, variable.kind), |alias| { + (&alias.value, alias.kind()) + }) }) }); let external_tokens = @@ -608,11 +613,9 @@ pub(crate) fn generate_node_types_json( .unwrap_or(&empty) .iter() .map(move |alias| { - if let Some(alias) = alias { + alias.as_ref().map_or((&token.name, token.kind), |alias| { (&alias.value, alias.kind()) - } else { - (&token.name, token.kind) - } + }) }) }); @@ -630,7 +633,7 @@ pub(crate) fn generate_node_types_json( children.required = false; } if let Some(fields) = &mut node_type_json.fields { - for (_, field) in fields.iter_mut() { + for field in fields.values_mut() { field.required = false; } } @@ -647,7 +650,7 @@ pub(crate) fn generate_node_types_json( } let mut result = node_types_json.into_iter().map(|e| e.1).collect::>(); - result.extend(anonymous_node_types.into_iter()); + result.extend(anonymous_node_types); result.sort_unstable_by(|a, b| { b.subtypes .is_some() @@ -663,10 +666,7 @@ pub(crate) fn generate_node_types_json( result } -fn process_supertypes( - info: &mut FieldInfoJSON, - subtype_map: &Vec<(NodeTypeJSON, Vec)>, -) { +fn process_supertypes(info: &mut FieldInfoJSON, subtype_map: &[(NodeTypeJSON, Vec)]) { for (supertype, subtypes) in subtype_map { if info.types.contains(supertype) { info.types.retain(|t| !subtypes.contains(t)); @@ -682,9 +682,9 @@ fn variable_type_for_child_type( match child_type { ChildType::Aliased(alias) => alias.kind(), ChildType::Normal(symbol) => { - if syntax_grammar.supertype_symbols.contains(&symbol) { + if syntax_grammar.supertype_symbols.contains(symbol) { VariableType::Named - } else if syntax_grammar.variables_to_inline.contains(&symbol) { + } else if syntax_grammar.variables_to_inline.contains(symbol) { VariableType::Hidden } else { match symbol.kind { @@ -700,11 +700,10 @@ fn variable_type_for_child_type( fn extend_sorted<'a, T>(vec: &mut Vec, values: impl IntoIterator) -> bool where - T: Clone + Eq + Ord, - T: 'a, + T: 'a + Clone + Eq + Ord, { values.into_iter().any(|value| { - if let Err(i) = vec.binary_search(&value) { + if let Err(i) = vec.binary_search(value) { vec.insert(i, value.clone()); true } else { @@ -724,7 +723,7 @@ mod tests { #[test] fn test_node_types_simple() { - let node_types = get_node_types(InputGrammar { + let node_types = get_node_types(&InputGrammar { variables: vec![ Variable { name: "v1".to_string(), @@ -813,7 +812,7 @@ mod tests { #[test] fn test_node_types_simple_extras() { - let node_types = get_node_types(InputGrammar { + let node_types = get_node_types(&InputGrammar { extra_symbols: vec![Rule::named("v3")], variables: vec![ Variable { @@ -914,7 +913,7 @@ mod tests { #[test] fn test_node_types_with_supertypes() { - let node_types = get_node_types(InputGrammar { + let node_types = get_node_types(&InputGrammar { supertype_symbols: vec!["_v2".to_string()], variables: vec![ Variable { @@ -996,7 +995,7 @@ mod tests { #[test] fn test_node_types_for_children_without_fields() { - let node_types = get_node_types(InputGrammar { + let node_types = get_node_types(&InputGrammar { variables: vec![ Variable { name: "v1".to_string(), @@ -1088,7 +1087,7 @@ mod tests { #[test] fn test_node_types_with_inlined_rules() { - let node_types = get_node_types(InputGrammar { + let node_types = get_node_types(&InputGrammar { variables_to_inline: vec!["v2".to_string()], variables: vec![ Variable { @@ -1138,7 +1137,7 @@ mod tests { #[test] fn test_node_types_for_aliased_nodes() { - let node_types = get_node_types(InputGrammar { + let node_types = get_node_types(&InputGrammar { variables: vec![ Variable { name: "thing".to_string(), @@ -1172,12 +1171,12 @@ mod tests { Variable { name: "identifier".to_string(), kind: VariableType::Named, - rule: Rule::pattern("\\w+"), + rule: Rule::pattern("\\w+", ""), }, Variable { name: "foo_identifier".to_string(), kind: VariableType::Named, - rule: Rule::pattern("[\\w-]+"), + rule: Rule::pattern("[\\w-]+", ""), }, ], ..Default::default() @@ -1208,7 +1207,7 @@ mod tests { #[test] fn test_node_types_with_multiple_valued_fields() { - let node_types = get_node_types(InputGrammar { + let node_types = get_node_types(&InputGrammar { variables: vec![ Variable { name: "a".to_string(), @@ -1270,13 +1269,13 @@ mod tests { #[test] fn test_node_types_with_fields_on_hidden_tokens() { - let node_types = get_node_types(InputGrammar { + let node_types = get_node_types(&InputGrammar { variables: vec![Variable { name: "script".to_string(), kind: VariableType::Named, rule: Rule::seq(vec![ - Rule::field("a".to_string(), Rule::pattern("hi")), - Rule::field("b".to_string(), Rule::pattern("bye")), + Rule::field("a".to_string(), Rule::pattern("hi", "")), + Rule::field("b".to_string(), Rule::pattern("bye", "")), ]), }], ..Default::default() @@ -1296,7 +1295,7 @@ mod tests { #[test] fn test_node_types_with_multiple_rules_same_alias_name() { - let node_types = get_node_types(InputGrammar { + let node_types = get_node_types(&InputGrammar { variables: vec![ Variable { name: "script".to_string(), @@ -1416,7 +1415,7 @@ mod tests { #[test] fn test_node_types_with_tokens_aliased_to_match_rules() { - let node_types = get_node_types(InputGrammar { + let node_types = get_node_types(&InputGrammar { variables: vec![ Variable { name: "a".to_string(), @@ -1766,9 +1765,9 @@ mod tests { ); } - fn get_node_types(grammar: InputGrammar) -> Vec { + fn get_node_types(grammar: &InputGrammar) -> Vec { let (syntax_grammar, lexical_grammar, _, default_aliases) = - prepare_grammar(&grammar).unwrap(); + prepare_grammar(grammar).unwrap(); let variable_info = get_variable_info(&syntax_grammar, &lexical_grammar, &default_aliases).unwrap(); generate_node_types_json( @@ -1783,17 +1782,18 @@ mod tests { variables: Vec, supertype_symbols: Vec, ) -> SyntaxGrammar { - let mut syntax_grammar = SyntaxGrammar::default(); - syntax_grammar.variables = variables; - syntax_grammar.supertype_symbols = supertype_symbols; - syntax_grammar + SyntaxGrammar { + variables, + supertype_symbols, + ..SyntaxGrammar::default() + } } fn build_lexical_grammar() -> LexicalGrammar { let mut lexical_grammar = LexicalGrammar::default(); for i in 0..10 { lexical_grammar.variables.push(LexicalVariable { - name: format!("token_{}", i), + name: format!("token_{i}"), kind: VariableType::Named, implicit_precedence: 0, start_state: 0, diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/parse_grammar.rs b/third-party/tree-sitter/tree-sitter/cli/src/generate/parse_grammar.rs index 7fda0b716a2..5a52b346364 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/generate/parse_grammar.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/parse_grammar.rs @@ -7,6 +7,7 @@ use serde_json::{Map, Value}; #[derive(Deserialize)] #[serde(tag = "type")] #[allow(non_camel_case_types)] +#[allow(clippy::upper_case_acronyms)] enum RuleJSON { ALIAS { content: Box, @@ -19,6 +20,7 @@ enum RuleJSON { }, PATTERN { value: String, + flags: Option, }, SYMBOL { name: String, @@ -90,15 +92,15 @@ pub(crate) struct GrammarJSON { } pub(crate) fn parse_grammar(input: &str) -> Result { - let grammar_json: GrammarJSON = serde_json::from_str(&input)?; + let grammar_json: GrammarJSON = serde_json::from_str(input)?; let mut variables = Vec::with_capacity(grammar_json.rules.len()); for (name, value) in grammar_json.rules { variables.push(Variable { - name: name.to_owned(), + name: name.clone(), kind: VariableType::Named, rule: parse_rule(serde_json::from_value(value)?), - }) + }); } let mut precedence_orderings = Vec::with_capacity(grammar_json.precedences.len()); @@ -113,12 +115,27 @@ pub(crate) fn parse_grammar(input: &str) -> Result { "Invalid rule in precedences array. Only strings and symbols are allowed" )) } - }) + }); } precedence_orderings.push(ordering); } - let extra_symbols = grammar_json.extras.into_iter().map(parse_rule).collect(); + let extra_symbols = grammar_json + .extras + .into_iter() + .try_fold(Vec::new(), |mut acc, item| { + let rule = parse_rule(item); + if let Rule::String(ref value) = rule { + if value.is_empty() { + return Err(anyhow!( + "Rules in the `extras` array must not contain empty strings" + )); + } + } + acc.push(rule); + Ok(acc) + })?; + let external_tokens = grammar_json.externals.into_iter().map(parse_rule).collect(); Ok(InputGrammar { @@ -143,7 +160,24 @@ fn parse_rule(json: RuleJSON) -> Rule { } => Rule::alias(parse_rule(*content), value, named), RuleJSON::BLANK => Rule::Blank, RuleJSON::STRING { value } => Rule::String(value), - RuleJSON::PATTERN { value } => Rule::Pattern(value), + RuleJSON::PATTERN { value, flags } => Rule::Pattern( + value, + flags.map_or(String::new(), |f| { + f.chars() + .filter(|c| { + if *c == 'i' { + true + } else { + // silently ignore unicode flags + if *c != 'u' && *c != 'v' { + eprintln!("Warning: unsupported flag {c}"); + } + false + } + }) + .collect() + }), + ), RuleJSON::SYMBOL { name } => Rule::NamedSymbol(name), RuleJSON::CHOICE { members } => Rule::choice(members.into_iter().map(parse_rule).collect()), RuleJSON::FIELD { content, name } => Rule::field(name, parse_rule(*content)), @@ -167,11 +201,11 @@ fn parse_rule(json: RuleJSON) -> Rule { } } -impl Into for PrecedenceValueJSON { - fn into(self) -> Precedence { - match self { - PrecedenceValueJSON::Integer(i) => Precedence::Integer(i), - PrecedenceValueJSON::Name(i) => Precedence::Name(i), +impl From for Precedence { + fn from(val: PrecedenceValueJSON) -> Self { + match val { + PrecedenceValueJSON::Integer(i) => Self::Integer(i), + PrecedenceValueJSON::Name(i) => Self::Name(i), } } } diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/expand_repeats.rs b/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/expand_repeats.rs index 1979691439b..e296d42527e 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/expand_repeats.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/expand_repeats.rs @@ -24,7 +24,7 @@ impl Expander { // convert that rule itself into a binary tree structure instead of introducing // another auxiliary rule. if let (VariableType::Hidden, Rule::Repeat(repeated_content)) = (variable.kind, &rule) { - let inner_rule = self.expand_rule(&repeated_content); + let inner_rule = self.expand_rule(repeated_content); variable.rule = self.wrap_rule_in_binary_tree(Symbol::non_terminal(index), inner_rule); variable.kind = VariableType::Auxiliary; return true; @@ -57,7 +57,7 @@ impl Expander { params: params.clone(), }, - // For repetitions, introduce an auxiliary rule that contains the the + // For repetitions, introduce an auxiliary rule that contains the // repeated content, but can also contain a recursive binary tree structure. Rule::Repeat(content) => { let inner_rule = self.expand_rule(content); @@ -107,8 +107,8 @@ pub(super) fn expand_repeats(mut grammar: ExtractedSyntaxGrammar) -> ExtractedSy existing_repeats: HashMap::new(), }; - for (i, mut variable) in grammar.variables.iter_mut().enumerate() { - let expanded_top_level_repetition = expander.expand_variable(i, &mut variable); + for (i, variable) in grammar.variables.iter_mut().enumerate() { + let expanded_top_level_repetition = expander.expand_variable(i, variable); // If a hidden variable had a top-level repetition and it was converted to // a recursive rule, then it can't be inlined. @@ -119,9 +119,7 @@ pub(super) fn expand_repeats(mut grammar: ExtractedSyntaxGrammar) -> ExtractedSy } } - grammar - .variables - .extend(expander.auxiliary_variables.into_iter()); + grammar.variables.extend(expander.auxiliary_variables); grammar } diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/expand_tokens.rs b/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/expand_tokens.rs index d6c73d9ae5b..d38719e7343 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/expand_tokens.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/expand_tokens.rs @@ -4,17 +4,14 @@ use crate::generate::nfa::{CharacterSet, Nfa, NfaState}; use crate::generate::rules::{Precedence, Rule}; use anyhow::{anyhow, Context, Result}; use lazy_static::lazy_static; -use regex::Regex; use regex_syntax::ast::{ - parse, Ast, Class, ClassPerlKind, ClassSet, ClassSetBinaryOpKind, ClassSetItem, - ClassUnicodeKind, RepetitionKind, RepetitionRange, + parse, Ast, ClassPerlKind, ClassSet, ClassSetBinaryOpKind, ClassSetItem, ClassUnicodeKind, + RepetitionKind, RepetitionRange, }; use std::collections::HashMap; use std::i32; lazy_static! { - static ref CURLY_BRACE_REGEX: Regex = - Regex::new(r#"(^|[^\\pP])\{([^}]*[^0-9A-Fa-f,}][^}]*)\}"#).unwrap(); static ref UNICODE_CATEGORIES: HashMap<&'static str, Vec> = serde_json::from_str(UNICODE_CATEGORIES_JSON).unwrap(); static ref UNICODE_PROPERTIES: HashMap<&'static str, Vec> = @@ -25,11 +22,10 @@ lazy_static! { serde_json::from_str(UNICODE_PROPERTY_ALIASES_JSON).unwrap(); } -const UNICODE_CATEGORIES_JSON: &'static str = include_str!("./unicode-categories.json"); -const UNICODE_PROPERTIES_JSON: &'static str = include_str!("./unicode-properties.json"); -const UNICODE_CATEGORY_ALIASES_JSON: &'static str = include_str!("./unicode-category-aliases.json"); -const UNICODE_PROPERTY_ALIASES_JSON: &'static str = include_str!("./unicode-property-aliases.json"); -const ALLOWED_REDUNDANT_ESCAPED_CHARS: [char; 4] = ['!', '\'', '"', '/']; +const UNICODE_CATEGORIES_JSON: &str = include_str!("./unicode-categories.json"); +const UNICODE_PROPERTIES_JSON: &str = include_str!("./unicode-properties.json"); +const UNICODE_CATEGORY_ALIASES_JSON: &str = include_str!("./unicode-category-aliases.json"); +const UNICODE_PROPERTY_ALIASES_JSON: &str = include_str!("./unicode-property-aliases.json"); struct NfaBuilder { nfa: Nfa, @@ -51,7 +47,7 @@ fn get_implicit_precedence(rule: &Rule) -> i32 { } } -fn get_completion_precedence(rule: &Rule) -> i32 { +const fn get_completion_precedence(rule: &Rule) -> i32 { if let Rule::Metadata { params, .. } = rule { if let Precedence::Integer(p) = params.precedence { return p; @@ -60,43 +56,18 @@ fn get_completion_precedence(rule: &Rule) -> i32 { 0 } -fn preprocess_regex(content: &str) -> String { - let content = CURLY_BRACE_REGEX.replace(content, "$1\\{$2\\}"); - let mut result = String::with_capacity(content.len()); - let mut is_escaped = false; - for c in content.chars() { - if is_escaped { - if ALLOWED_REDUNDANT_ESCAPED_CHARS.contains(&c) { - result.push(c); - } else { - result.push('\\'); - result.push(c); - } - is_escaped = false; - } else if c == '\\' { - is_escaped = true; - } else { - result.push(c); - } - } - if is_escaped { - result.push('\\'); - } - result -} - -pub(crate) fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result { +pub fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result { let mut builder = NfaBuilder { nfa: Nfa::new(), is_sep: true, precedence_stack: vec![0], }; - let separator_rule = if grammar.separators.len() > 0 { + let separator_rule = if grammar.separators.is_empty() { + Rule::Blank + } else { grammar.separators.push(Rule::Blank); Rule::repeat(Rule::choice(grammar.separators)) - } else { - Rule::Blank }; let mut variables = Vec::new(); @@ -139,17 +110,16 @@ pub(crate) fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result Result { match rule { - Rule::Pattern(s) => { - let s = preprocess_regex(s); - let ast = parse::Parser::new().parse(&s)?; - self.expand_regex(&ast, next_state_id) + Rule::Pattern(s, f) => { + let ast = parse::Parser::new().parse(s)?; + self.expand_regex(&ast, next_state_id, f.contains('i')) } Rule::String(s) => { for c in s.chars().rev() { self.push_advance(CharacterSet::empty().add_char(c), next_state_id); next_state_id = self.nfa.last_state_id(); } - Ok(s.len() > 0) + Ok(!s.is_empty()) } Rule::Choice(elements) => { let mut alternative_state_ids = Vec::new(); @@ -170,7 +140,7 @@ impl NfaBuilder { } Rule::Seq(elements) => { let mut result = false; - for element in elements.into_iter().rev() { + for element in elements.iter().rev() { if self.expand_rule(element, next_state_id)? { result = true; } @@ -206,16 +176,46 @@ impl NfaBuilder { result } Rule::Blank => Ok(false), - _ => Err(anyhow!("Grammar error: Unexpected rule {:?}", rule)), + _ => Err(anyhow!("Grammar error: Unexpected rule {rule:?}")), } } - fn expand_regex(&mut self, ast: &Ast, mut next_state_id: u32) -> Result { + fn expand_regex( + &mut self, + ast: &Ast, + mut next_state_id: u32, + case_insensitive: bool, + ) -> Result { + const fn inverse_char(c: char) -> char { + match c { + 'a'..='z' => (c as u8 - b'a' + b'A') as char, + 'A'..='Z' => (c as u8 - b'A' + b'a') as char, + c => c, + } + } + + fn with_inverse_char(mut chars: CharacterSet) -> CharacterSet { + for char in chars.clone().chars() { + let inverted = inverse_char(char); + if char != inverted { + chars = chars.add_char(inverted); + } + } + chars + } + match ast { Ast::Empty(_) => Ok(false), Ast::Flags(_) => Err(anyhow!("Regex error: Flags are not supported")), Ast::Literal(literal) => { - self.push_advance(CharacterSet::from_char(literal.c), next_state_id); + let mut char_set = CharacterSet::from_char(literal.c); + if case_insensitive { + let inverted = inverse_char(literal.c); + if literal.c != inverted { + char_set = char_set.add_char(inverted); + } + } + self.push_advance(char_set, next_state_id); Ok(true) } Ast::Dot(_) => { @@ -223,70 +223,82 @@ impl NfaBuilder { Ok(true) } Ast::Assertion(_) => Err(anyhow!("Regex error: Assertions are not supported")), - Ast::Class(class) => match class { - Class::Unicode(class) => { - let mut chars = self.expand_unicode_character_class(&class.kind)?; - if class.negated { - chars = chars.negate(); - } - self.push_advance(chars, next_state_id); - Ok(true) + Ast::ClassUnicode(class) => { + let mut chars = self.expand_unicode_character_class(&class.kind)?; + if class.negated { + chars = chars.negate(); } - Class::Perl(class) => { - let mut chars = self.expand_perl_character_class(&class.kind); - if class.negated { - chars = chars.negate(); - } - self.push_advance(chars, next_state_id); - Ok(true) + if case_insensitive { + chars = with_inverse_char(chars); } - Class::Bracketed(class) => { - let mut chars = self.translate_class_set(&class.kind)?; - if class.negated { - chars = chars.negate(); - } - self.push_advance(chars, next_state_id); - Ok(true) + self.push_advance(chars, next_state_id); + Ok(true) + } + Ast::ClassPerl(class) => { + let mut chars = self.expand_perl_character_class(&class.kind); + if class.negated { + chars = chars.negate(); } - }, + if case_insensitive { + chars = with_inverse_char(chars); + } + self.push_advance(chars, next_state_id); + Ok(true) + } + Ast::ClassBracketed(class) => { + let mut chars = self.translate_class_set(&class.kind)?; + if class.negated { + chars = chars.negate(); + } + if case_insensitive { + chars = with_inverse_char(chars); + } + self.push_advance(chars, next_state_id); + Ok(true) + } Ast::Repetition(repetition) => match repetition.op.kind { RepetitionKind::ZeroOrOne => { - self.expand_zero_or_one(&repetition.ast, next_state_id) + self.expand_zero_or_one(&repetition.ast, next_state_id, case_insensitive) } RepetitionKind::OneOrMore => { - self.expand_one_or_more(&repetition.ast, next_state_id) + self.expand_one_or_more(&repetition.ast, next_state_id, case_insensitive) } RepetitionKind::ZeroOrMore => { - self.expand_zero_or_more(&repetition.ast, next_state_id) + self.expand_zero_or_more(&repetition.ast, next_state_id, case_insensitive) } RepetitionKind::Range(RepetitionRange::Exactly(count)) => { - self.expand_count(&repetition.ast, count, next_state_id) + self.expand_count(&repetition.ast, count, next_state_id, case_insensitive) } RepetitionKind::Range(RepetitionRange::AtLeast(min)) => { - if self.expand_zero_or_more(&repetition.ast, next_state_id)? { - self.expand_count(&repetition.ast, min, next_state_id) + if self.expand_zero_or_more(&repetition.ast, next_state_id, case_insensitive)? { + self.expand_count(&repetition.ast, min, next_state_id, case_insensitive) } else { Ok(false) } } RepetitionKind::Range(RepetitionRange::Bounded(min, max)) => { - let mut result = self.expand_count(&repetition.ast, min, next_state_id)?; + let mut result = + self.expand_count(&repetition.ast, min, next_state_id, case_insensitive)?; for _ in min..max { if result { next_state_id = self.nfa.last_state_id(); } - if self.expand_zero_or_one(&repetition.ast, next_state_id)? { + if self.expand_zero_or_one( + &repetition.ast, + next_state_id, + case_insensitive, + )? { result = true; } } Ok(result) } }, - Ast::Group(group) => self.expand_regex(&group.ast, next_state_id), + Ast::Group(group) => self.expand_regex(&group.ast, next_state_id, case_insensitive), Ast::Alternation(alternation) => { let mut alternative_state_ids = Vec::new(); - for ast in alternation.asts.iter() { - if self.expand_regex(&ast, next_state_id)? { + for ast in &alternation.asts { + if self.expand_regex(ast, next_state_id, case_insensitive)? { alternative_state_ids.push(self.nfa.last_state_id()); } else { alternative_state_ids.push(next_state_id); @@ -304,7 +316,7 @@ impl NfaBuilder { Ast::Concat(concat) => { let mut result = false; for ast in concat.asts.iter().rev() { - if self.expand_regex(&ast, next_state_id)? { + if self.expand_regex(ast, next_state_id, case_insensitive)? { result = true; next_state_id = self.nfa.last_state_id(); } @@ -316,7 +328,7 @@ impl NfaBuilder { fn translate_class_set(&self, class_set: &ClassSet) -> Result { match &class_set { - ClassSet::Item(item) => self.expand_character_class(&item), + ClassSet::Item(item) => self.expand_character_class(item), ClassSet::BinaryOp(binary_op) => { let mut lhs_char_class = self.translate_class_set(&binary_op.lhs)?; let mut rhs_char_class = self.translate_class_set(&binary_op.rhs)?; @@ -335,13 +347,18 @@ impl NfaBuilder { } } - fn expand_one_or_more(&mut self, ast: &Ast, next_state_id: u32) -> Result { + fn expand_one_or_more( + &mut self, + ast: &Ast, + next_state_id: u32, + case_insensitive: bool, + ) -> Result { self.nfa.states.push(NfaState::Accept { variable_index: 0, precedence: 0, }); // Placeholder for split let split_state_id = self.nfa.last_state_id(); - if self.expand_regex(&ast, split_state_id)? { + if self.expand_regex(ast, split_state_id, case_insensitive)? { self.nfa.states[split_state_id as usize] = NfaState::Split(self.nfa.last_state_id(), next_state_id); Ok(true) @@ -351,8 +368,13 @@ impl NfaBuilder { } } - fn expand_zero_or_one(&mut self, ast: &Ast, next_state_id: u32) -> Result { - if self.expand_regex(ast, next_state_id)? { + fn expand_zero_or_one( + &mut self, + ast: &Ast, + next_state_id: u32, + case_insensitive: bool, + ) -> Result { + if self.expand_regex(ast, next_state_id, case_insensitive)? { self.push_split(next_state_id); Ok(true) } else { @@ -360,8 +382,13 @@ impl NfaBuilder { } } - fn expand_zero_or_more(&mut self, ast: &Ast, next_state_id: u32) -> Result { - if self.expand_one_or_more(&ast, next_state_id)? { + fn expand_zero_or_more( + &mut self, + ast: &Ast, + next_state_id: u32, + case_insensitive: bool, + ) -> Result { + if self.expand_one_or_more(ast, next_state_id, case_insensitive)? { self.push_split(next_state_id); Ok(true) } else { @@ -369,10 +396,16 @@ impl NfaBuilder { } } - fn expand_count(&mut self, ast: &Ast, count: u32, mut next_state_id: u32) -> Result { + fn expand_count( + &mut self, + ast: &Ast, + count: u32, + mut next_state_id: u32, + case_insensitive: bool, + ) -> Result { let mut result = false; for _ in 0..count { - if self.expand_regex(ast, next_state_id)? { + if self.expand_regex(ast, next_state_id, case_insensitive)? { result = true; next_state_id = self.nfa.last_state_id(); } @@ -388,7 +421,7 @@ impl NfaBuilder { ClassSetItem::Union(union) => { let mut result = CharacterSet::empty(); for item in &union.items { - result = result.add(&self.expand_character_class(&item)?); + result = result.add(&self.expand_character_class(item)?); } Ok(result) } @@ -407,9 +440,8 @@ impl NfaBuilder { } Ok(set) } - _ => Err(anyhow!( - "Regex error: Unsupported character class syntax {:?}", - item + ClassSetItem::Ascii(_) => Err(anyhow!( + "Regex error: Unsupported character class syntax {item:?}", )), } } @@ -430,15 +462,15 @@ impl NfaBuilder { if actual_class_name.len() == 1 { category_letter = actual_class_name.clone(); } else { - let code_points = UNICODE_CATEGORIES - .get(actual_class_name.as_str()) - .or_else(|| UNICODE_PROPERTIES.get(actual_class_name.as_str())) - .ok_or_else(|| { - anyhow!( - "Regex error: Unsupported unicode character class {}", - class_name - ) - })?; + let code_points = + UNICODE_CATEGORIES + .get(actual_class_name.as_str()) + .or_else(|| UNICODE_PROPERTIES.get(actual_class_name.as_str())) + .ok_or_else(|| { + anyhow!( + "Regex error: Unsupported unicode character class {class_name}", + ) + })?; for c in code_points { if let Some(c) = std::char::from_u32(*c) { chars = chars.add_char(c); @@ -475,7 +507,9 @@ impl NfaBuilder { .add_char(' ') .add_char('\t') .add_char('\r') - .add_char('\n'), + .add_char('\n') + .add_char('\x0B') + .add_char('\x0C'), ClassPerlKind::Word => CharacterSet::empty() .add_char('_') .add_range('A', 'Z') @@ -563,7 +597,7 @@ mod tests { let table = [ // regex with sequences and alternatives Row { - rules: vec![Rule::pattern("(a|b|c)d(e|f|g)h?")], + rules: vec![Rule::pattern("(a|b|c)d(e|f|g)h?", "")], separators: vec![], examples: vec![ ("ade1", Some((0, "ade"))), @@ -574,13 +608,13 @@ mod tests { }, // regex with repeats Row { - rules: vec![Rule::pattern("a*")], + rules: vec![Rule::pattern("a*", "")], separators: vec![], examples: vec![("aaa1", Some((0, "aaa"))), ("b", Some((0, "")))], }, // regex with repeats in sequences Row { - rules: vec![Rule::pattern("a((bc)+|(de)*)f")], + rules: vec![Rule::pattern("a((bc)+|(de)*)f", "")], separators: vec![], examples: vec![ ("af1", Some((0, "af"))), @@ -591,13 +625,13 @@ mod tests { }, // regex with character ranges Row { - rules: vec![Rule::pattern("[a-fA-F0-9]+")], + rules: vec![Rule::pattern("[a-fA-F0-9]+", "")], separators: vec![], examples: vec![("A1ff0.", Some((0, "A1ff0")))], }, // regex with perl character classes Row { - rules: vec![Rule::pattern("\\w\\d\\s")], + rules: vec![Rule::pattern("\\w\\d\\s", "")], separators: vec![], examples: vec![("_0 ", Some((0, "_0 ")))], }, @@ -611,7 +645,7 @@ mod tests { Row { rules: vec![Rule::repeat(Rule::seq(vec![ Rule::string("{"), - Rule::pattern("[a-f]+"), + Rule::pattern("[a-f]+", ""), Rule::string("}"), ]))], separators: vec![], @@ -624,9 +658,9 @@ mod tests { // longest match rule Row { rules: vec![ - Rule::pattern("a|bc"), - Rule::pattern("aa"), - Rule::pattern("bcd"), + Rule::pattern("a|bc", ""), + Rule::pattern("aa", ""), + Rule::pattern("bcd", ""), ], separators: vec![], examples: vec![ @@ -640,7 +674,7 @@ mod tests { }, // regex with an alternative including the empty string Row { - rules: vec![Rule::pattern("a(b|)+c")], + rules: vec![Rule::pattern("a(b|)+c", "")], separators: vec![], examples: vec![ ("ac.", Some((0, "ac"))), @@ -650,8 +684,8 @@ mod tests { }, // separators Row { - rules: vec![Rule::pattern("[a-f]+")], - separators: vec![Rule::string("\\\n"), Rule::pattern("\\s")], + rules: vec![Rule::pattern("[a-f]+", "")], + separators: vec![Rule::string("\\\n"), Rule::pattern("\\s", "")], examples: vec![ (" a", Some((0, "a"))), (" \nb", Some((0, "b"))), @@ -662,11 +696,11 @@ mod tests { // shorter tokens with higher precedence Row { rules: vec![ - Rule::prec(Precedence::Integer(2), Rule::pattern("abc")), - Rule::prec(Precedence::Integer(1), Rule::pattern("ab[cd]e")), - Rule::pattern("[a-e]+"), + Rule::prec(Precedence::Integer(2), Rule::pattern("abc", "")), + Rule::prec(Precedence::Integer(1), Rule::pattern("ab[cd]e", "")), + Rule::pattern("[a-e]+", ""), ], - separators: vec![Rule::string("\\\n"), Rule::pattern("\\s")], + separators: vec![Rule::string("\\\n"), Rule::pattern("\\s", "")], examples: vec![ ("abceef", Some((0, "abc"))), ("abdeef", Some((1, "abde"))), @@ -676,13 +710,13 @@ mod tests { // immediate tokens with higher precedence Row { rules: vec![ - Rule::prec(Precedence::Integer(1), Rule::pattern("[^a]+")), + Rule::prec(Precedence::Integer(1), Rule::pattern("[^a]+", "")), Rule::immediate_token(Rule::prec( Precedence::Integer(2), - Rule::pattern("[^ab]+"), + Rule::pattern("[^ab]+", ""), )), ], - separators: vec![Rule::pattern("\\s")], + separators: vec![Rule::pattern("\\s", "")], examples: vec![("cccb", Some((1, "ccc")))], }, Row { @@ -704,7 +738,7 @@ mod tests { // nested choices within sequences Row { rules: vec![Rule::seq(vec![ - Rule::pattern("[0-9]+"), + Rule::pattern("[0-9]+", ""), Rule::choice(vec![ Rule::Blank, Rule::choice(vec![Rule::seq(vec![ @@ -713,7 +747,7 @@ mod tests { Rule::Blank, Rule::choice(vec![Rule::string("+"), Rule::string("-")]), ]), - Rule::pattern("[0-9]+"), + Rule::pattern("[0-9]+", ""), ])]), ]), ])], @@ -730,7 +764,7 @@ mod tests { }, // nested groups Row { - rules: vec![Rule::seq(vec![Rule::pattern(r#"([^x\\]|\\(.|\n))+"#)])], + rules: vec![Rule::seq(vec![Rule::pattern(r"([^x\\]|\\(.|\n))+", "")])], separators: vec![], examples: vec![("abcx", Some((0, "abc"))), ("abc\\0x", Some((0, "abc\\0")))], }, @@ -738,24 +772,24 @@ mod tests { Row { rules: vec![ // Escaped forward slash (used in JS because '/' is the regex delimiter) - Rule::pattern(r#"\/"#), + Rule::pattern(r"\/", ""), // Escaped quotes - Rule::pattern(r#"\"\'"#), + Rule::pattern(r#"\"\'"#, ""), // Quote preceded by a literal backslash - Rule::pattern(r#"[\\']+"#), + Rule::pattern(r"[\\']+", ""), ], separators: vec![], examples: vec![ ("/", Some((0, "/"))), ("\"\'", Some((1, "\"\'"))), - (r#"'\'a"#, Some((2, r#"'\'"#))), + (r"'\'a", Some((2, r"'\'"))), ], }, // unicode property escapes Row { rules: vec![ - Rule::pattern(r#"\p{L}+\P{L}+"#), - Rule::pattern(r#"\p{White_Space}+\P{White_Space}+[\p{White_Space}]*"#), + Rule::pattern(r"\p{L}+\P{L}+", ""), + Rule::pattern(r"\p{White_Space}+\P{White_Space}+[\p{White_Space}]*", ""), ], separators: vec![], examples: vec![ @@ -765,17 +799,17 @@ mod tests { }, // unicode property escapes in bracketed sets Row { - rules: vec![Rule::pattern(r#"[\p{L}\p{Nd}]+"#)], + rules: vec![Rule::pattern(r"[\p{L}\p{Nd}]+", "")], separators: vec![], examples: vec![("abΨ12٣٣, ok", Some((0, "abΨ12٣٣")))], }, // unicode character escapes Row { rules: vec![ - Rule::pattern(r#"\u{00dc}"#), - Rule::pattern(r#"\U{000000dd}"#), - Rule::pattern(r#"\u00de"#), - Rule::pattern(r#"\U000000df"#), + Rule::pattern(r"\u{00dc}", ""), + Rule::pattern(r"\U{000000dd}", ""), + Rule::pattern(r"\u00de", ""), + Rule::pattern(r"\U000000df", ""), ], separators: vec![], examples: vec![ @@ -785,17 +819,15 @@ mod tests { ("\u{00df}", Some((3, "\u{00df}"))), ], }, - // allowing un-escaped curly braces Row { rules: vec![ - // Un-escaped curly braces - Rule::pattern(r#"u{[0-9a-fA-F]+}"#), + Rule::pattern(r"u\{[0-9a-fA-F]+\}", ""), // Already-escaped curly braces - Rule::pattern(r#"\{[ab]{3}\}"#), + Rule::pattern(r"\{[ab]{3}\}", ""), // Unicode codepoints - Rule::pattern(r#"\u{1000A}"#), + Rule::pattern(r"\u{1000A}", ""), // Unicode codepoints (lowercase) - Rule::pattern(r#"\u{1000b}"#), + Rule::pattern(r"\u{1000b}", ""), ], separators: vec![], examples: vec![ @@ -807,7 +839,7 @@ mod tests { }, // Emojis Row { - rules: vec![Rule::pattern(r"\p{Emoji}+")], + rules: vec![Rule::pattern(r"\p{Emoji}+", "")], separators: vec![], examples: vec![ ("🐎", Some((0, "🐎"))), @@ -820,7 +852,7 @@ mod tests { }, // Intersection Row { - rules: vec![Rule::pattern(r"[[0-7]&&[4-9]]+")], + rules: vec![Rule::pattern(r"[[0-7]&&[4-9]]+", "")], separators: vec![], examples: vec![ ("456", Some((0, "456"))), @@ -833,7 +865,7 @@ mod tests { }, // Difference Row { - rules: vec![Rule::pattern(r"[[0-9]--[4-7]]+")], + rules: vec![Rule::pattern(r"[[0-9]--[4-7]]+", "")], separators: vec![], examples: vec![ ("123", Some((0, "123"))), @@ -846,7 +878,7 @@ mod tests { }, // Symmetric difference Row { - rules: vec![Rule::pattern(r"[[0-7]~~[4-9]]+")], + rules: vec![Rule::pattern(r"[[0-7]~~[4-9]]+", "")], separators: vec![], examples: vec![ ("123", Some((0, "123"))), @@ -867,7 +899,7 @@ mod tests { // [6-7]: y y // [3-9]--[5-7]: y y y y y // final regex: y y y y y y - rules: vec![Rule::pattern(r"[[[0-5]--[2-4]]~~[[3-9]--[6-7]]]+")], + rules: vec![Rule::pattern(r"[[[0-5]--[2-4]]~~[[3-9]--[6-7]]]+", "")], separators: vec![], examples: vec![ ("01", Some((0, "01"))), @@ -889,13 +921,13 @@ mod tests { let grammar = expand_tokens(ExtractedLexicalGrammar { separators: separators.clone(), variables: rules - .into_iter() + .iter() .map(|rule| Variable::named("", rule.clone())) .collect(), }) .unwrap(); - for (haystack, needle) in examples.iter() { + for (haystack, needle) in examples { assert_eq!(simulate_nfa(&grammar, haystack), *needle); } } diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/extract_default_aliases.rs b/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/extract_default_aliases.rs index d39bf8dd6f4..6ffc7eca3fc 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/extract_default_aliases.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/extract_default_aliases.rs @@ -28,10 +28,10 @@ pub(super) fn extract_default_aliases( // For each grammar symbol, find all of the aliases under which the symbol appears, // and determine whether or not the symbol ever appears *unaliased*. - for variable in syntax_grammar.variables.iter() { - for production in variable.productions.iter() { - for step in production.steps.iter() { - let mut status = match step.symbol.kind { + for variable in &syntax_grammar.variables { + for production in &variable.productions { + for step in &production.steps { + let status = match step.symbol.kind { SymbolType::External => &mut external_status_list[step.symbol.index], SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index], SymbolType::Terminal => &mut terminal_status_list[step.symbol.index], @@ -62,8 +62,8 @@ pub(super) fn extract_default_aliases( } } - for symbol in syntax_grammar.extra_symbols.iter() { - let mut status = match symbol.kind { + for symbol in &syntax_grammar.extra_symbols { + let status = match symbol.kind { SymbolType::External => &mut external_status_list[symbol.index], SymbolType::NonTerminal => &mut non_terminal_status_list[symbol.index], SymbolType::Terminal => &mut terminal_status_list[symbol.index], @@ -98,25 +98,23 @@ pub(super) fn extract_default_aliases( for (symbol, status) in symbols_with_statuses { if status.appears_unaliased { status.aliases.clear(); - } else { - if let Some(default_entry) = status - .aliases - .iter() - .enumerate() - .max_by_key(|(i, (_, count))| (count, -(*i as i64))) - .map(|(_, entry)| entry.clone()) - { - status.aliases.clear(); - status.aliases.push(default_entry.clone()); - result.insert(symbol, default_entry.0); - } + } else if let Some(default_entry) = status + .aliases + .iter() + .enumerate() + .max_by_key(|(i, (_, count))| (count, -(*i as i64))) + .map(|(_, entry)| entry.clone()) + { + status.aliases.clear(); + status.aliases.push(default_entry.clone()); + result.insert(symbol, default_entry.0); } } // Wherever a symbol is aliased as its default alias, remove the usage of the alias, // because it will now be redundant. let mut alias_positions_to_clear = Vec::new(); - for variable in syntax_grammar.variables.iter_mut() { + for variable in &mut syntax_grammar.variables { alias_positions_to_clear.clear(); for (i, production) in variable.productions.iter().enumerate() { @@ -132,7 +130,7 @@ pub(super) fn extract_default_aliases( // If this step is aliased as the symbol's default alias, then remove that alias. if step.alias.is_some() - && step.alias.as_ref() == status.aliases.get(0).map(|t| &t.0) + && step.alias.as_ref() == status.aliases.first().map(|t| &t.0) { let mut other_productions_must_use_this_alias_at_this_index = false; for (other_i, other_production) in variable.productions.iter().enumerate() { diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/extract_tokens.rs b/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/extract_tokens.rs index 928f914c621..7d87bbd2fc0 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/extract_tokens.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/extract_tokens.rs @@ -15,12 +15,12 @@ pub(super) fn extract_tokens( extracted_usage_counts: Vec::new(), }; - for mut variable in grammar.variables.iter_mut() { - extractor.extract_tokens_in_variable(&mut variable); + for variable in &mut grammar.variables { + extractor.extract_tokens_in_variable(variable); } - for mut variable in grammar.external_tokens.iter_mut() { - extractor.extract_tokens_in_variable(&mut variable); + for variable in &mut grammar.external_tokens { + extractor.extract_tokens_in_variable(variable); } let mut lexical_variables = Vec::with_capacity(extractor.extracted_variables.len()); @@ -49,7 +49,7 @@ pub(super) fn extract_tokens( }) = variable.rule { if i > 0 && extractor.extracted_usage_counts[index] == 1 { - let mut lexical_variable = &mut lexical_variables[index]; + let lexical_variable = &mut lexical_variables[index]; lexical_variable.kind = variable.kind; lexical_variable.name = variable.name; symbol_replacer.replacements.insert(i, index); @@ -59,7 +59,7 @@ pub(super) fn extract_tokens( variables.push(variable); } - for variable in variables.iter_mut() { + for variable in &mut variables { variable.rule = symbol_replacer.replace_symbols_in_rule(&variable.rule); } @@ -67,10 +67,10 @@ pub(super) fn extract_tokens( .expected_conflicts .into_iter() .map(|conflict| { - let mut result: Vec<_> = conflict + let mut result = conflict .iter() .map(|symbol| symbol_replacer.replace_symbol(*symbol)) - .collect(); + .collect::>(); result.sort_unstable(); result.dedup(); result @@ -94,12 +94,10 @@ pub(super) fn extract_tokens( for rule in grammar.extra_symbols { if let Rule::Symbol(symbol) = rule { extra_symbols.push(symbol_replacer.replace_symbol(symbol)); + } else if let Some(index) = lexical_variables.iter().position(|v| v.rule == rule) { + extra_symbols.push(Symbol::terminal(index)); } else { - if let Some(index) = lexical_variables.iter().position(|v| v.rule == rule) { - extra_symbols.push(Symbol::terminal(index)); - } else { - separators.push(rule); - } + separators.push(rule); } } @@ -119,13 +117,13 @@ pub(super) fn extract_tokens( name: external_token.name, kind: external_token.kind, corresponding_internal_token: None, - }) + }); } else { external_tokens.push(ExternalToken { name: lexical_variables[symbol.index].name.clone(), kind: external_token.kind, corresponding_internal_token: Some(symbol), - }) + }); } } else { return Err(anyhow!( @@ -209,7 +207,7 @@ impl TokenExtractor { } else { Rule::Metadata { params: params.clone(), - rule: Box::new(self.extract_tokens_in_rule((&rule).clone())), + rule: Box::new(self.extract_tokens_in_rule(rule)), } } } @@ -298,20 +296,19 @@ impl SymbolReplacer { } let mut adjusted_index = symbol.index; - for (replaced_index, _) in self.replacements.iter() { + for replaced_index in self.replacements.keys() { if *replaced_index < symbol.index { adjusted_index -= 1; } } - return Symbol::non_terminal(adjusted_index); + Symbol::non_terminal(adjusted_index) } } #[cfg(test)] mod test { use super::*; - use crate::generate::grammars::VariableType; #[test] fn test_extraction() { @@ -320,7 +317,7 @@ mod test { "rule_0", Rule::repeat(Rule::seq(vec![ Rule::string("a"), - Rule::pattern("b"), + Rule::pattern("b", ""), Rule::choice(vec![ Rule::non_terminal(1), Rule::non_terminal(2), @@ -331,8 +328,8 @@ mod test { ]), ])), ), - Variable::named("rule_1", Rule::pattern("e")), - Variable::named("rule_2", Rule::pattern("b")), + Variable::named("rule_1", Rule::pattern("e", "")), + Variable::named("rule_2", Rule::pattern("b", "")), Variable::named( "rule_3", Rule::seq(vec![Rule::non_terminal(2), Rule::Blank]), @@ -378,12 +375,12 @@ mod test { lexical_grammar.variables, vec![ Variable::anonymous("a", Rule::string("a")), - Variable::auxiliary("rule_0_token1", Rule::pattern("b")), + Variable::auxiliary("rule_0_token1", Rule::pattern("b", "")), Variable::auxiliary( "rule_0_token2", Rule::repeat(Rule::choice(vec![Rule::string("c"), Rule::string("d"),])) ), - Variable::named("rule_1", Rule::pattern("e")), + Variable::named("rule_1", Rule::pattern("e", "")), ] ); } @@ -404,14 +401,14 @@ mod test { assert_eq!( lexical_grammar.variables, vec![Variable::anonymous("hello", Rule::string("hello")),] - ) + ); } #[test] fn test_extracting_extra_symbols() { let mut grammar = build_grammar(vec![ Variable::named("rule_0", Rule::string("x")), - Variable::named("comment", Rule::pattern("//.*")), + Variable::named("comment", Rule::pattern("//.*", "")), ]); grammar.extra_symbols = vec![Rule::string(" "), Rule::non_terminal(1)]; diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/flatten_grammar.rs b/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/flatten_grammar.rs index e9950e1bc15..8f56dbf9f2f 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/flatten_grammar.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/flatten_grammar.rs @@ -88,7 +88,7 @@ impl RuleFlattener { self.associativity_stack.pop(); if did_push && !at_end { self.production.steps.last_mut().unwrap().associativity = - self.associativity_stack.last().cloned(); + self.associativity_stack.last().copied(); } } @@ -110,7 +110,7 @@ impl RuleFlattener { .last() .cloned() .unwrap_or(Precedence::None), - associativity: self.associativity_stack.last().cloned(), + associativity: self.associativity_stack.last().copied(), alias: self.alias_stack.last().cloned(), field_name: self.field_name_stack.last().cloned(), }); @@ -129,7 +129,7 @@ fn extract_choices(rule: Rule) -> Vec { let extraction = extract_choices(element); let mut next_result = Vec::new(); for entry in result { - for extraction_entry in extraction.iter() { + for extraction_entry in &extraction { next_result.push(Rule::Seq(vec![entry.clone(), extraction_entry.clone()])); } } @@ -157,7 +157,7 @@ fn extract_choices(rule: Rule) -> Vec { } } -fn flatten_variable(variable: Variable) -> Result { +fn flatten_variable(variable: Variable) -> SyntaxVariable { let mut productions = Vec::new(); for rule in extract_choices(variable.rule) { let production = RuleFlattener::new().flatten(rule); @@ -165,14 +165,14 @@ fn flatten_variable(variable: Variable) -> Result { productions.push(production); } } - Ok(SyntaxVariable { + SyntaxVariable { name: variable.name, kind: variable.kind, productions, - }) + } } -fn symbol_is_used(variables: &Vec, symbol: Symbol) -> bool { +fn symbol_is_used(variables: &[SyntaxVariable], symbol: Symbol) -> bool { for variable in variables { for production in &variable.productions { for step in &production.steps { @@ -188,7 +188,7 @@ fn symbol_is_used(variables: &Vec, symbol: Symbol) -> bool { pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result { let mut variables = Vec::new(); for variable in grammar.variables { - variables.push(flatten_variable(variable)?); + variables.push(flatten_variable(variable)); } for (i, variable) in variables.iter().enumerate() { for production in &variable.productions { @@ -220,7 +220,6 @@ unless they are used only as the grammar's start rule. mod tests { use super::*; use crate::generate::grammars::VariableType; - use crate::generate::rules::Symbol; #[test] fn test_flatten_grammar() { @@ -245,8 +244,7 @@ mod tests { ), Rule::non_terminal(7), ]), - }) - .unwrap(); + }); assert_eq!( result.productions, @@ -304,8 +302,7 @@ mod tests { ), Rule::non_terminal(7), ]), - }) - .unwrap(); + }); assert_eq!( result.productions, @@ -344,8 +341,7 @@ mod tests { Precedence::Integer(101), Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(2)]), ), - }) - .unwrap(); + }); assert_eq!( result.productions, @@ -367,8 +363,7 @@ mod tests { Precedence::Integer(101), Rule::seq(vec![Rule::non_terminal(1)]), ), - }) - .unwrap(); + }); assert_eq!( result.productions, @@ -393,8 +388,7 @@ mod tests { Rule::field("second-thing".to_string(), Rule::terminal(3)), ]), ]), - }) - .unwrap(); + }); assert_eq!( result.productions, diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/intern_symbols.rs b/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/intern_symbols.rs index 5cd29cc40ed..092126aec02 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/intern_symbols.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/intern_symbols.rs @@ -11,7 +11,7 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result } let mut variables = Vec::with_capacity(grammar.variables.len()); - for variable in grammar.variables.iter() { + for variable in &grammar.variables { variables.push(Variable { name: variable.name.clone(), kind: variable_type_for_name(&variable.name), @@ -20,10 +20,10 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result } let mut external_tokens = Vec::with_capacity(grammar.external_tokens.len()); - for external_token in grammar.external_tokens.iter() { - let rule = interner.intern_rule(&external_token)?; + for external_token in &grammar.external_tokens { + let rule = interner.intern_rule(external_token)?; let (name, kind) = if let Rule::NamedSymbol(name) = external_token { - (name.clone(), variable_type_for_name(&name)) + (name.clone(), variable_type_for_name(name)) } else { (String::new(), VariableType::Anonymous) }; @@ -31,35 +31,35 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result } let mut extra_symbols = Vec::with_capacity(grammar.extra_symbols.len()); - for extra_token in grammar.extra_symbols.iter() { + for extra_token in &grammar.extra_symbols { extra_symbols.push(interner.intern_rule(extra_token)?); } let mut supertype_symbols = Vec::with_capacity(grammar.supertype_symbols.len()); - for supertype_symbol_name in grammar.supertype_symbols.iter() { + for supertype_symbol_name in &grammar.supertype_symbols { supertype_symbols.push( interner .intern_name(supertype_symbol_name) - .ok_or_else(|| anyhow!("Undefined symbol `{}`", supertype_symbol_name))?, + .ok_or_else(|| anyhow!("Undefined symbol `{supertype_symbol_name}`"))?, ); } let mut expected_conflicts = Vec::new(); - for conflict in grammar.expected_conflicts.iter() { + for conflict in &grammar.expected_conflicts { let mut interned_conflict = Vec::with_capacity(conflict.len()); for name in conflict { interned_conflict.push( interner - .intern_name(&name) - .ok_or_else(|| anyhow!("Undefined symbol `{}`", name))?, + .intern_name(name) + .ok_or_else(|| anyhow!("Undefined symbol `{name}`"))?, ); } expected_conflicts.push(interned_conflict); } let mut variables_to_inline = Vec::new(); - for name in grammar.variables_to_inline.iter() { - if let Some(symbol) = interner.intern_name(&name) { + for name in &grammar.variables_to_inline { + if let Some(symbol) = interner.intern_name(name) { variables_to_inline.push(symbol); } } @@ -68,8 +68,8 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result if let Some(name) = grammar.word_token.as_ref() { word_token = Some( interner - .intern_name(&name) - .ok_or_else(|| anyhow!("Undefined symbol `{}`", &name))?, + .intern_name(name) + .ok_or_else(|| anyhow!("Undefined symbol `{name}`"))?, ); } @@ -118,13 +118,10 @@ impl<'a> Interner<'a> { params: params.clone(), }), - Rule::NamedSymbol(name) => { - if let Some(symbol) = self.intern_name(&name) { - Ok(Rule::Symbol(symbol)) - } else { - Err(anyhow!("Undefined symbol `{}`", name)) - } - } + Rule::NamedSymbol(name) => self.intern_name(name).map_or_else( + || Err(anyhow!("Undefined symbol `{name}`")), + |symbol| Ok(Rule::Symbol(symbol)), + ), _ => Ok(rule.clone()), } @@ -145,12 +142,12 @@ impl<'a> Interner<'a> { } } - return None; + None } } fn variable_type_for_name(name: &str) -> VariableType { - if name.starts_with("_") { + if name.starts_with('_') { VariableType::Hidden } else { VariableType::Named diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/mod.rs b/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/mod.rs index 51b32cc8f61..15243c454f5 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/mod.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/mod.rs @@ -6,7 +6,7 @@ mod flatten_grammar; mod intern_symbols; mod process_inlines; -pub(crate) use self::expand_tokens::expand_tokens; +pub use self::expand_tokens::expand_tokens; use self::expand_repeats::expand_repeats; use self::extract_default_aliases::extract_default_aliases; @@ -26,7 +26,7 @@ use std::{ mem, }; -pub(crate) struct IntermediateGrammar { +pub struct IntermediateGrammar { variables: Vec, extra_symbols: Vec, expected_conflicts: Vec>, @@ -37,12 +37,12 @@ pub(crate) struct IntermediateGrammar { word_token: Option, } -pub(crate) type InternedGrammar = IntermediateGrammar; +pub type InternedGrammar = IntermediateGrammar; -pub(crate) type ExtractedSyntaxGrammar = IntermediateGrammar; +pub type ExtractedSyntaxGrammar = IntermediateGrammar; #[derive(Debug, PartialEq, Eq)] -pub(crate) struct ExtractedLexicalGrammar { +pub struct ExtractedLexicalGrammar { pub variables: Vec, pub separators: Vec, } @@ -50,21 +50,21 @@ pub(crate) struct ExtractedLexicalGrammar { impl Default for IntermediateGrammar { fn default() -> Self { Self { - variables: Default::default(), - extra_symbols: Default::default(), - expected_conflicts: Default::default(), - precedence_orderings: Default::default(), - external_tokens: Default::default(), - variables_to_inline: Default::default(), - supertype_symbols: Default::default(), - word_token: Default::default(), + variables: Vec::default(), + extra_symbols: Vec::default(), + expected_conflicts: Vec::default(), + precedence_orderings: Vec::default(), + external_tokens: Vec::default(), + variables_to_inline: Vec::default(), + supertype_symbols: Vec::default(), + word_token: Option::default(), } } } /// Transform an input grammar into separate components that are ready /// for parse table construction. -pub(crate) fn prepare_grammar( +pub fn prepare_grammar( input_grammar: &InputGrammar, ) -> Result<( SyntaxGrammar, @@ -109,9 +109,7 @@ fn validate_precedences(grammar: &InputGrammar) -> Result<()> { hash_map::Entry::Occupied(e) => { if e.get() != &ordering { return Err(anyhow!( - "Conflicting orderings for precedences {} and {}", - entry1, - entry2 + "Conflicting orderings for precedences {entry1} and {entry2}", )); } } @@ -127,16 +125,11 @@ fn validate_precedences(grammar: &InputGrammar) -> Result<()> { Rule::Repeat(rule) => validate(rule_name, rule, names), Rule::Seq(elements) | Rule::Choice(elements) => elements .iter() - .map(|e| validate(rule_name, e, names)) - .collect(), + .try_for_each(|e| validate(rule_name, e, names)), Rule::Metadata { rule, params } => { if let Precedence::Name(n) = ¶ms.precedence { if !names.contains(n) { - return Err(anyhow!( - "Undeclared precedence '{}' in rule '{}'", - n, - rule_name - )); + return Err(anyhow!("Undeclared precedence '{n}' in rule '{rule_name}'")); } } validate(rule_name, rule, names)?; @@ -168,7 +161,7 @@ fn validate_precedences(grammar: &InputGrammar) -> Result<()> { #[cfg(test)] mod tests { use super::*; - use crate::generate::grammars::{InputGrammar, Variable, VariableType}; + use crate::generate::grammars::VariableType; #[test] fn test_validate_precedences_with_undeclared_precedence() { diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/process_inlines.rs b/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/process_inlines.rs index 206ef8d3e36..659927a1768 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/process_inlines.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/prepare_grammar/process_inlines.rs @@ -8,7 +8,7 @@ use std::collections::HashMap; #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] struct ProductionStepId { // A `None` value here means that the production itself was produced via inlining, - // and is stored in the the builder's `productions` vector, as opposed to being + // and is stored in the builder's `productions` vector, as opposed to being // stored in one of the grammar's variables. variable_index: Option, production_index: usize, @@ -21,7 +21,7 @@ struct InlinedProductionMapBuilder { } impl InlinedProductionMapBuilder { - fn build<'a>(mut self, grammar: &'a SyntaxGrammar) -> InlinedProductionMap { + fn build(mut self, grammar: &SyntaxGrammar) -> InlinedProductionMap { let mut step_ids_to_process = Vec::new(); for (variable_index, variable) in grammar.variables.iter().enumerate() { for production_index in 0..variable.productions.len() { @@ -38,14 +38,14 @@ impl InlinedProductionMapBuilder { if grammar.variables_to_inline.contains(&step.symbol) { let inlined_step_ids = self .inline_production_at_step(step_id, grammar) - .into_iter() - .cloned() + .iter() + .copied() .map(|production_index| ProductionStepId { variable_index: None, production_index, step_index: step_id.step_index, }); - step_ids_to_process.splice(i..i + 1, inlined_step_ids); + step_ids_to_process.splice(i..=i, inlined_step_ids); } else { step_ids_to_process[i] = ProductionStepId { variable_index: step_id.variable_index, @@ -67,11 +67,12 @@ impl InlinedProductionMapBuilder { let production_map = production_indices_by_step_id .into_iter() .map(|(step_id, production_indices)| { - let production = if let Some(variable_index) = step_id.variable_index { - &grammar.variables[variable_index].productions[step_id.production_index] - } else { - &productions[step_id.production_index] - } as *const Production; + let production = step_id.variable_index.map_or_else( + || &productions[step_id.production_index], + |variable_index| { + &grammar.variables[variable_index].productions[step_id.production_index] + }, + ) as *const Production; ((production, step_id.step_index as u32), production_indices) }) .collect(); @@ -86,29 +87,29 @@ impl InlinedProductionMapBuilder { &'a mut self, step_id: ProductionStepId, grammar: &'a SyntaxGrammar, - ) -> &'a Vec { + ) -> &'a [usize] { // Build a list of productions produced by inlining rules. let mut i = 0; let step_index = step_id.step_index; let mut productions_to_add = vec![self.production_for_id(step_id, grammar).clone()]; while i < productions_to_add.len() { if let Some(step) = productions_to_add[i].steps.get(step_index) { - let symbol = step.symbol.clone(); + let symbol = step.symbol; if grammar.variables_to_inline.contains(&symbol) { // Remove the production from the vector, replacing it with a placeholder. let production = productions_to_add - .splice(i..i + 1, [Production::default()].iter().cloned()) + .splice(i..=i, std::iter::once(&Production::default()).cloned()) .next() .unwrap(); // Replace the placeholder with the inlined productions. productions_to_add.splice( - i..i + 1, + i..=i, grammar.variables[symbol.index].productions.iter().map(|p| { let mut production = production.clone(); let removed_step = production .steps - .splice(step_index..(step_index + 1), p.steps.iter().cloned()) + .splice(step_index..=step_index, p.steps.iter().cloned()) .next() .unwrap(); let inserted_steps = @@ -127,7 +128,7 @@ impl InlinedProductionMapBuilder { if last_inserted_step.precedence.is_none() { last_inserted_step.precedence = removed_step.precedence; } - if last_inserted_step.associativity == None { + if last_inserted_step.associativity.is_none() { last_inserted_step.associativity = removed_step.associativity; } } @@ -169,11 +170,10 @@ impl InlinedProductionMapBuilder { id: ProductionStepId, grammar: &'a SyntaxGrammar, ) -> &'a Production { - if let Some(variable_index) = id.variable_index { - &grammar.variables[variable_index].productions[id.production_index] - } else { - &self.productions[id.production_index] - } + id.variable_index.map_or_else( + || &self.productions[id.production_index], + |variable_index| &grammar.variables[variable_index].productions[id.production_index], + ) } fn production_step_for_id<'a>( @@ -203,6 +203,12 @@ pub(super) fn process_inlines( lexical_grammar.variables[symbol.index].name, )) } + SymbolType::NonTerminal if symbol.index == 0 => { + return Err(anyhow!( + "Rule `{}` cannot be inlined because it is the first rule", + grammar.variables[symbol.index].name, + )) + } _ => {} } } @@ -217,9 +223,7 @@ pub(super) fn process_inlines( #[cfg(test)] mod tests { use super::*; - use crate::generate::grammars::{ - LexicalVariable, ProductionStep, SyntaxVariable, VariableType, - }; + use crate::generate::grammars::{LexicalVariable, SyntaxVariable, VariableType}; use crate::generate::rules::{Associativity, Precedence, Symbol}; #[test] @@ -260,7 +264,7 @@ mod tests { ..Default::default() }; - let inline_map = process_inlines(&grammar, &Default::default()).unwrap(); + let inline_map = process_inlines(&grammar, &LexicalGrammar::default()).unwrap(); // Nothing to inline at step 0. assert!(inline_map @@ -356,15 +360,15 @@ mod tests { ..Default::default() }; - let inline_map = process_inlines(&grammar, &Default::default()).unwrap(); + let inline_map = process_inlines(&grammar, &LexicalGrammar::default()).unwrap(); - let productions: Vec<&Production> = inline_map + let productions = inline_map .inlined_productions(&grammar.variables[0].productions[0], 1) .unwrap() - .collect(); + .collect::>(); assert_eq!( - productions.iter().cloned().cloned().collect::>(), + productions.iter().copied().cloned().collect::>(), vec![ Production { dynamic_precedence: 0, @@ -455,15 +459,15 @@ mod tests { ..Default::default() }; - let inline_map = process_inlines(&grammar, &Default::default()).unwrap(); + let inline_map = process_inlines(&grammar, &LexicalGrammar::default()).unwrap(); - let productions: Vec<_> = inline_map + let productions = inline_map .inlined_productions(&grammar.variables[0].productions[0], 0) .unwrap() - .collect(); + .collect::>(); assert_eq!( - productions.iter().cloned().cloned().collect::>(), + productions.iter().copied().cloned().collect::>(), vec![Production { dynamic_precedence: 0, steps: vec![ diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/render.rs b/third-party/tree-sitter/tree-sitter/cli/src/generate/render.rs index cb9f6c72a74..2c0f73a588c 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/generate/render.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/render.rs @@ -129,6 +129,7 @@ impl Generator { } self.add_lex_modes_list(); + self.add_parse_table(); if !self.syntax_grammar.external_tokens.is_empty() { self.add_external_token_enum(); @@ -136,7 +137,6 @@ impl Generator { self.add_external_scanner_states_list(); } - self.add_parse_table(); self.add_parser_export(); self.buffer @@ -152,54 +152,56 @@ impl Generator { self.symbol_ids[&Symbol::end()].clone(), ); - self.symbol_map = self - .parse_table - .symbols - .iter() - .map(|symbol| { - let mut mapping = symbol; - - // There can be multiple symbols in the grammar that have the same name and kind, - // due to simple aliases. When that happens, ensure that they map to the same - // public-facing symbol. If one of the symbols is not aliased, choose that one - // to be the public-facing symbol. Otherwise, pick the symbol with the lowest - // numeric value. - if let Some(alias) = self.default_aliases.get(symbol) { - let kind = alias.kind(); - for other_symbol in &self.parse_table.symbols { - if let Some(other_alias) = self.default_aliases.get(other_symbol) { - if other_symbol < mapping && other_alias == alias { - mapping = other_symbol; - } - } else if self.metadata_for_symbol(*other_symbol) == (&alias.value, kind) { + self.symbol_map = HashMap::new(); + + for symbol in &self.parse_table.symbols { + let mut mapping = symbol; + + // There can be multiple symbols in the grammar that have the same name and kind, + // due to simple aliases. When that happens, ensure that they map to the same + // public-facing symbol. If one of the symbols is not aliased, choose that one + // to be the public-facing symbol. Otherwise, pick the symbol with the lowest + // numeric value. + if let Some(alias) = self.default_aliases.get(symbol) { + let kind = alias.kind(); + for other_symbol in &self.parse_table.symbols { + if let Some(other_alias) = self.default_aliases.get(other_symbol) { + if other_symbol < mapping && other_alias == alias { mapping = other_symbol; - break; } + } else if self.metadata_for_symbol(*other_symbol) == (&alias.value, kind) { + mapping = other_symbol; + break; } } - // Two anonymous tokens with different flags but the same string value - // should be represented with the same symbol in the public API. Examples: - // * "<" and token(prec(1, "<")) - // * "(" and token.immediate("(") - else if symbol.is_terminal() { - let metadata = self.metadata_for_symbol(*symbol); - for other_symbol in &self.parse_table.symbols { - let other_metadata = self.metadata_for_symbol(*other_symbol); - if other_metadata == metadata { - mapping = other_symbol; - break; + } + // Two anonymous tokens with different flags but the same string value + // should be represented with the same symbol in the public API. Examples: + // * "<" and token(prec(1, "<")) + // * "(" and token.immediate("(") + else if symbol.is_terminal() { + let metadata = self.metadata_for_symbol(*symbol); + for other_symbol in &self.parse_table.symbols { + let other_metadata = self.metadata_for_symbol(*other_symbol); + if other_metadata == metadata { + if let Some(mapped) = self.symbol_map.get(other_symbol) { + if mapped == symbol { + break; + } } + mapping = other_symbol; + break; } } + } - (*symbol, *mapping) - }) - .collect(); + self.symbol_map.insert(*symbol, *mapping); + } for production_info in &self.parse_table.production_infos { // Build a list of all field names for field_name in production_info.field_map.keys() { - if let Err(i) = self.field_names.binary_search(&field_name) { + if let Err(i) = self.field_names.binary_search(field_name) { self.field_names.insert(i, field_name.clone()); } } @@ -207,13 +209,14 @@ impl Generator { for alias in &production_info.alias_sequence { // Generate a mapping from aliases to C identifiers. if let Some(alias) = &alias { - let existing_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| { - if let Some(default_alias) = self.default_aliases.get(symbol) { - default_alias == alias - } else { - let (name, kind) = self.metadata_for_symbol(*symbol); - name == alias.value && kind == alias.kind() - } + let existing_symbol = self.parse_table.symbols.iter().copied().find(|symbol| { + self.default_aliases.get(symbol).map_or_else( + || { + let (name, kind) = self.metadata_for_symbol(*symbol); + name == alias.value && kind == alias.kind() + }, + |default_alias| default_alias == alias, + ) }); // Some aliases match an existing symbol in the grammar. @@ -254,13 +257,12 @@ impl Generator { } fn add_includes(&mut self) { - add_line!(self, "#include "); + add_line!(self, "#include \"tree_sitter/parser.h\""); add_line!(self, ""); } fn add_pragmas(&mut self) { add_line!(self, "#if defined(__GNUC__) || defined(__clang__)"); - add_line!(self, "#pragma GCC diagnostic push"); add_line!( self, "#pragma GCC diagnostic ignored \"-Wmissing-field-initializers\"" @@ -314,7 +316,7 @@ impl Generator { "#define SYMBOL_COUNT {}", self.parse_table.symbols.len() ); - add_line!(self, "#define ALIAS_COUNT {}", self.unique_aliases.len(),); + add_line!(self, "#define ALIAS_COUNT {}", self.unique_aliases.len()); add_line!(self, "#define TOKEN_COUNT {}", token_count); add_line!( self, @@ -336,19 +338,19 @@ impl Generator { } fn add_symbol_enum(&mut self) { - add_line!(self, "enum {{"); + add_line!(self, "enum ts_symbol_identifiers {{"); indent!(self); self.symbol_order.insert(Symbol::end(), 0); let mut i = 1; - for symbol in self.parse_table.symbols.iter() { + for symbol in &self.parse_table.symbols { if *symbol != Symbol::end() { self.symbol_order.insert(*symbol, i); - add_line!(self, "{} = {},", self.symbol_ids[&symbol], i); + add_line!(self, "{} = {},", self.symbol_ids[symbol], i); i += 1; } } for alias in &self.unique_aliases { - add_line!(self, "{} = {},", self.alias_ids[&alias], i); + add_line!(self, "{} = {},", self.alias_ids[alias], i); i += 1; } dedent!(self); @@ -359,20 +361,21 @@ impl Generator { fn add_symbol_names_list(&mut self) { add_line!(self, "static const char * const ts_symbol_names[] = {{"); indent!(self); - for symbol in self.parse_table.symbols.iter() { + for symbol in &self.parse_table.symbols { let name = self.sanitize_string( self.default_aliases .get(symbol) - .map(|alias| alias.value.as_str()) - .unwrap_or(self.metadata_for_symbol(*symbol).0), + .map_or(self.metadata_for_symbol(*symbol).0, |alias| { + alias.value.as_str() + }), ); - add_line!(self, "[{}] = \"{}\",", self.symbol_ids[&symbol], name); + add_line!(self, "[{}] = \"{}\",", self.symbol_ids[symbol], name); } for alias in &self.unique_aliases { add_line!( self, "[{}] = \"{}\",", - self.alias_ids[&alias], + self.alias_ids[alias], self.sanitize_string(&alias.value) ); } @@ -397,8 +400,8 @@ impl Generator { add_line!( self, "[{}] = {},", - self.alias_ids[&alias], - self.alias_ids[&alias], + self.alias_ids[alias], + self.alias_ids[alias], ); } @@ -408,7 +411,7 @@ impl Generator { } fn add_field_name_enum(&mut self) { - add_line!(self, "enum {{"); + add_line!(self, "enum ts_field_identifiers {{"); indent!(self); for (i, field_name) in self.field_names.iter().enumerate() { add_line!(self, "{} = {},", self.field_id(field_name), i + 1); @@ -442,7 +445,7 @@ impl Generator { ); indent!(self); for symbol in &self.parse_table.symbols { - add_line!(self, "[{}] = {{", self.symbol_ids[&symbol]); + add_line!(self, "[{}] = {{", self.symbol_ids[symbol]); indent!(self); if let Some(Alias { is_named, .. }) = self.default_aliases.get(symbol) { add_line!(self, ".visible = true,"); @@ -474,7 +477,7 @@ impl Generator { add_line!(self, "}},"); } for alias in &self.unique_aliases { - add_line!(self, "[{}] = {{", self.alias_ids[&alias]); + add_line!(self, "[{}] = {{", self.alias_ids[alias]); indent!(self); add_line!(self, ".visible = true,"); add_line!(self, ".named = {},", alias.is_named); @@ -506,7 +509,7 @@ impl Generator { indent!(self); for (j, alias) in production_info.alias_sequence.iter().enumerate() { if let Some(alias) = alias { - add_line!(self, "[{}] = {},", j, self.alias_ids[&alias]); + add_line!(self, "[{}] = {},", j, self.alias_ids[alias]); } } dedent!(self); @@ -525,15 +528,13 @@ impl Generator { if let Some(alias) = &step.alias { if step.symbol.is_non_terminal() && Some(alias) != self.default_aliases.get(&step.symbol) + && self.symbol_ids.contains_key(&step.symbol) { - if self.symbol_ids.contains_key(&step.symbol) { - if let Some(alias_id) = self.alias_ids.get(&alias) { - let alias_ids = alias_ids_by_symbol - .entry(step.symbol) - .or_insert(Vec::new()); - if let Err(i) = alias_ids.binary_search(&alias_id) { - alias_ids.insert(i, alias_id); - } + if let Some(alias_id) = self.alias_ids.get(alias) { + let alias_ids = + alias_ids_by_symbol.entry(step.symbol).or_insert(Vec::new()); + if let Err(i) = alias_ids.binary_search(&alias_id) { + alias_ids.insert(i, alias_id); } } } @@ -552,12 +553,12 @@ impl Generator { indent!(self); for (symbol, alias_ids) in alias_ids_by_symbol { let symbol_id = &self.symbol_ids[symbol]; - let public_symbol_id = &self.symbol_ids[&self.symbol_map[&symbol]]; - add_line!(self, "{}, {},", symbol_id, 1 + alias_ids.len()); + let public_symbol_id = &self.symbol_ids[&self.symbol_map[symbol]]; + add_line!(self, "{symbol_id}, {},", 1 + alias_ids.len()); indent!(self); - add_line!(self, "{},", public_symbol_id); + add_line!(self, "{public_symbol_id},"); for alias_id in alias_ids { - add_line!(self, "{},", alias_id); + add_line!(self, "{alias_id},"); } dedent!(self); } @@ -583,7 +584,7 @@ impl Generator { let primary_state = first_state_for_each_core_id .entry(state.core_id) .or_insert(idx); - add_line!(self, "[{}] = {},", idx, primary_state); + add_line!(self, "[{idx}] = {primary_state},"); } dedent!(self); add_line!(self, "}};"); @@ -594,14 +595,16 @@ impl Generator { let mut flat_field_maps = vec![]; let mut next_flat_field_map_index = 0; self.get_field_map_id( - &Vec::new(), + Vec::new(), &mut flat_field_maps, &mut next_flat_field_map_index, ); let mut field_map_ids = Vec::new(); for production_info in &self.parse_table.production_infos { - if !production_info.field_map.is_empty() { + if production_info.field_map.is_empty() { + field_map_ids.push((0, 0)); + } else { let mut flat_field_map = Vec::new(); for (field_name, locations) in &production_info.field_map { for location in locations { @@ -610,14 +613,12 @@ impl Generator { } field_map_ids.push(( self.get_field_map_id( - &flat_field_map, + flat_field_map.clone(), &mut flat_field_maps, &mut next_flat_field_map_index, ), flat_field_map.len(), )); - } else { - field_map_ids.push((0, 0)); } } @@ -630,10 +631,7 @@ impl Generator { if length > 0 { add_line!( self, - "[{}] = {{.index = {}, .length = {}}},", - production_id, - row_id, - length + "[{production_id}] = {{.index = {row_id}, .length = {length}}},", ); } } @@ -647,7 +645,7 @@ impl Generator { ); indent!(self); for (row_index, field_pairs) in flat_field_maps.into_iter().skip(1) { - add_line!(self, "[{}] =", row_index); + add_line!(self, "[{row_index}] ="); indent!(self); for (field_name, location) in field_pairs { add_whitespace!(self); @@ -676,7 +674,7 @@ impl Generator { // For each lex state, compute a summary of the code that needs to be // generated. - let state_transition_summaries: Vec> = lex_table + let state_transition_summaries = lex_table .states .iter() .map(|state| { @@ -695,7 +693,7 @@ impl Generator { ruled_out_chars.extend(chars.iter()); } else { ranges = chars.clone().negate().simplify_ignoring(&ruled_out_chars); - ranges.insert(0, '\0'..'\0') + ranges.insert(0, '\0'..'\0'); } // Record any large character sets so that they can be extracted @@ -733,10 +731,10 @@ impl Generator { }) .collect() }) - .collect(); + .collect::>>(); // Generate a helper function for each large character set. - let mut sorted_large_char_sets: Vec<_> = large_character_sets.iter().map(|e| e).collect(); + let mut sorted_large_char_sets = large_character_sets.iter().collect::>(); sorted_large_char_sets.sort_unstable_by_key(|info| (info.symbol, info.index)); for info in sorted_large_char_sets { add_line!( @@ -758,8 +756,7 @@ impl Generator { add_line!( self, - "static bool {}(TSLexer *lexer, TSStateId state) {{", - name + "static bool {name}(TSLexer *lexer, TSStateId state) {{", ); indent!(self); @@ -769,7 +766,7 @@ impl Generator { indent!(self); for (i, state) in lex_table.states.into_iter().enumerate() { - add_line!(self, "case {}:", i); + add_line!(self, "case {i}:"); indent!(self); self.add_lex_state(state, &state_transition_summaries[i], &large_character_sets); dedent!(self); @@ -808,14 +805,14 @@ impl Generator { } i += 1; } - return None; + None } fn add_lex_state( &mut self, state: LexState, - transition_info: &Vec, - large_character_sets: &Vec, + transition_info: &[TransitionSummary], + large_character_sets: &[LargeCharacterSetInfo], ) { if let Some(accept_action) = state.accept_action { add_line!(self, "ACCEPT_TOKEN({});", self.symbol_ids[&accept_action]); @@ -850,7 +847,7 @@ impl Generator { // Otherwise, generate code to compare the lookahead character // with all of the character ranges. - if transition.ranges.len() > 0 { + if !transition.ranges.is_empty() { add!(self, "if ("); self.add_character_range_conditions(&transition.ranges, transition.is_included, 2); add!(self, ") "); @@ -873,18 +870,33 @@ impl Generator { line_break.push_str(" "); } + // parenthesis needed if we add the `!eof` condition to explicitly avoid confusion with + // precedence of `&&` and `||` + let (mut need_open_paren, mut need_close_paren) = (false, false); for (i, range) in ranges.iter().enumerate() { if is_included { if i > 0 { - add!(self, " ||{}", line_break); + add!(self, " ||{line_break}"); + } + if range.start == '\0' { + add!(self, "!eof && "); + (need_open_paren, need_close_paren) = (true, true); } if range.end == range.start { + if need_open_paren { + add!(self, "("); + need_open_paren = false; + } add!(self, "lookahead == "); self.add_character(range.start); + if need_close_paren && i == ranges.len() - 1 { + add!(self, ")"); + need_close_paren = false; + } } else if range.end as u32 == range.start as u32 + 1 { add!(self, "lookahead == "); self.add_character(range.start); - add!(self, " ||{}lookahead == ", line_break); + add!(self, " ||{line_break}lookahead == "); self.add_character(range.end); } else { add!(self, "("); @@ -895,7 +907,7 @@ impl Generator { } } else { if i > 0 { - add!(self, " &&{}", line_break); + add!(self, " &&{line_break}"); } if range.end == range.start { add!(self, "lookahead != "); @@ -903,19 +915,17 @@ impl Generator { } else if range.end as u32 == range.start as u32 + 1 { add!(self, "lookahead != "); self.add_character(range.start); - add!(self, " &&{}lookahead != ", line_break); + add!(self, " &&{line_break}lookahead != "); + self.add_character(range.end); + } else if range.start != '\0' { + add!(self, "(lookahead < "); + self.add_character(range.start); + add!(self, " || "); self.add_character(range.end); + add!(self, " < lookahead)"); } else { - if range.start != '\0' { - add!(self, "(lookahead < "); - self.add_character(range.start); - add!(self, " || "); - self.add_character(range.end); - add!(self, " < lookahead)"); - } else { - add!(self, "lookahead > "); - self.add_character(range.end); - } + add!(self, "lookahead > "); + self.add_character(range.end); } } } @@ -944,7 +954,7 @@ impl Generator { add!(self, "("); } - add!(self, "c {} ", op); + add!(self, "c {op} "); self.add_character(*value); if !simple { @@ -997,17 +1007,16 @@ impl Generator { indent!(self); for (i, state) in self.parse_table.states.iter().enumerate() { if state.is_end_of_non_terminal_extra() { - add_line!(self, "[{}] = {{(TSStateId)(-1)}},", i,); + add_line!(self, "[{i}] = {{(TSStateId)(-1)}},"); } else if state.external_lex_state_id > 0 { add_line!( self, - "[{}] = {{.lex_state = {}, .external_lex_state = {}}},", - i, + "[{i}] = {{.lex_state = {}, .external_lex_state = {}}},", state.lex_state_id, state.external_lex_state_id ); } else { - add_line!(self, "[{}] = {{.lex_state = {}}},", i, state.lex_state_id); + add_line!(self, "[{i}] = {{.lex_state = {}}},", state.lex_state_id); } } dedent!(self); @@ -1016,7 +1025,7 @@ impl Generator { } fn add_external_token_enum(&mut self) { - add_line!(self, "enum {{"); + add_line!(self, "enum ts_external_scanner_symbol_identifiers {{"); indent!(self); for i in 0..self.syntax_grammar.external_tokens.len() { add_line!( @@ -1041,11 +1050,11 @@ impl Generator { let token = &self.syntax_grammar.external_tokens[i]; let id_token = token .corresponding_internal_token - .unwrap_or(Symbol::external(i)); + .unwrap_or_else(|| Symbol::external(i)); add_line!( self, "[{}] = {},", - self.external_token_id(&token), + self.external_token_id(token), self.symbol_ids[&id_token], ); } @@ -1140,12 +1149,7 @@ impl Generator { &mut parse_table_entries, &mut next_parse_action_list_index, ); - add_line!( - self, - "[{}] = ACTIONS({}),", - self.symbol_ids[symbol], - entry_id - ); + add_line!(self, "[{}] = ACTIONS({entry_id}),", self.symbol_ids[symbol]); } dedent!(self); add_line!(self, "}},"); @@ -1160,7 +1164,7 @@ impl Generator { let mut index = 0; let mut small_state_indices = Vec::new(); - let mut symbols_by_value: HashMap<(usize, SymbolType), Vec> = HashMap::new(); + let mut symbols_by_value = HashMap::<(usize, SymbolType), Vec>::new(); for state in self.parse_table.states.iter().skip(self.large_state_count) { small_state_indices.push(index); symbols_by_value.clear(); @@ -1201,14 +1205,14 @@ impl Generator { (symbols.len(), *kind, *value, symbols[0]) }); - add_line!(self, "[{}] = {},", index, values_with_symbols.len()); + add_line!(self, "[{index}] = {},", values_with_symbols.len()); indent!(self); - for ((value, kind), symbols) in values_with_symbols.iter_mut() { + for ((value, kind), symbols) in &mut values_with_symbols { if *kind == SymbolType::NonTerminal { - add_line!(self, "STATE({}), {},", value, symbols.len()); + add_line!(self, "STATE({value}), {},", symbols.len()); } else { - add_line!(self, "ACTIONS({}), {},", value, symbols.len()); + add_line!(self, "ACTIONS({value}), {},", symbols.len()); } symbols.sort_unstable(); @@ -1239,8 +1243,7 @@ impl Generator { for i in self.large_state_count..self.parse_table.states.len() { add_line!( self, - "[SMALL_STATE({})] = {},", - i, + "[SMALL_STATE({i})] = {},", small_state_indices[i - self.large_state_count] ); } @@ -1249,10 +1252,10 @@ impl Generator { add_line!(self, ""); } - let mut parse_table_entries: Vec<_> = parse_table_entries + let mut parse_table_entries = parse_table_entries .into_iter() .map(|(entry, i)| (i, entry)) - .collect(); + .collect::>(); parse_table_entries.sort_by_key(|(index, _)| *index); self.add_parse_action_list(parse_table_entries); } @@ -1266,8 +1269,7 @@ impl Generator { for (i, entry) in parse_table_entries { add!( self, - " [{}] = {{.entry = {{.count = {}, .reusable = {}}}}},", - i, + " [{i}] = {{.entry = {{.count = {}, .reusable = {}}}}},", entry.actions.len(), entry.reusable ); @@ -1282,9 +1284,9 @@ impl Generator { is_repetition, } => { if is_repetition { - add!(self, "SHIFT_REPEAT({})", state); + add!(self, "SHIFT_REPEAT({state})"); } else { - add!(self, "SHIFT({})", state); + add!(self, "SHIFT({state})"); } } ParseAction::Reduce { @@ -1294,17 +1296,17 @@ impl Generator { production_id, .. } => { - add!(self, "REDUCE({}, {}", self.symbol_ids[&symbol], child_count); + add!(self, "REDUCE({}, {child_count}", self.symbol_ids[&symbol]); if dynamic_precedence != 0 { - add!(self, ", .dynamic_precedence = {}", dynamic_precedence); + add!(self, ", .dynamic_precedence = {dynamic_precedence}"); } if production_id != 0 { - add!(self, ", .production_id = {}", production_id); + add!(self, ", .production_id = {production_id}"); } add!(self, ")"); } } - add!(self, ",") + add!(self, ","); } add!(self, "\n"); } @@ -1315,42 +1317,43 @@ impl Generator { fn add_parser_export(&mut self) { let language_function_name = format!("tree_sitter_{}", self.language_name); - let external_scanner_name = format!("{}_external_scanner", language_function_name); + let external_scanner_name = format!("{language_function_name}_external_scanner"); add_line!(self, "#ifdef __cplusplus"); add_line!(self, r#"extern "C" {{"#); add_line!(self, "#endif"); if !self.syntax_grammar.external_tokens.is_empty() { - add_line!(self, "void *{}_create(void);", external_scanner_name); - add_line!(self, "void {}_destroy(void *);", external_scanner_name); + add_line!(self, "void *{external_scanner_name}_create(void);"); + add_line!(self, "void {external_scanner_name}_destroy(void *);"); add_line!( self, - "bool {}_scan(void *, TSLexer *, const bool *);", - external_scanner_name + "bool {external_scanner_name}_scan(void *, TSLexer *, const bool *);", ); add_line!( self, - "unsigned {}_serialize(void *, char *);", - external_scanner_name + "unsigned {external_scanner_name}_serialize(void *, char *);", ); add_line!( self, - "void {}_deserialize(void *, const char *, unsigned);", - external_scanner_name + "void {external_scanner_name}_deserialize(void *, const char *, unsigned);", ); add_line!(self, ""); } add_line!(self, "#ifdef _WIN32"); - add_line!(self, "#define extern __declspec(dllexport)"); + add_line!(self, "#define TS_PUBLIC __declspec(dllexport)"); + add_line!(self, "#else"); + add_line!( + self, + "#define TS_PUBLIC __attribute__((visibility(\"default\")))" + ); add_line!(self, "#endif"); add_line!(self, ""); add_line!( self, - "extern const TSLanguage *{}(void) {{", - language_function_name + "TS_PUBLIC const TSLanguage *{language_function_name}() {{", ); indent!(self); add_line!(self, "static const TSLanguage language = {{"); @@ -1410,11 +1413,11 @@ impl Generator { indent!(self); add_line!(self, "&ts_external_scanner_states[0][0],"); add_line!(self, "ts_external_scanner_symbol_map,"); - add_line!(self, "{}_create,", external_scanner_name); - add_line!(self, "{}_destroy,", external_scanner_name); - add_line!(self, "{}_scan,", external_scanner_name); - add_line!(self, "{}_serialize,", external_scanner_name); - add_line!(self, "{}_deserialize,", external_scanner_name); + add_line!(self, "{external_scanner_name}_create,"); + add_line!(self, "{external_scanner_name}_destroy,"); + add_line!(self, "{external_scanner_name}_scan,"); + add_line!(self, "{external_scanner_name}_serialize,"); + add_line!(self, "{external_scanner_name}_deserialize,"); dedent!(self); add_line!(self, "}},"); } @@ -1451,7 +1454,7 @@ impl Generator { fn get_field_map_id( &self, - flat_field_map: &Vec<(String, FieldLocation)>, + flat_field_map: Vec<(String, FieldLocation)>, flat_field_maps: &mut Vec<(usize, Vec<(String, FieldLocation)>)>, next_flat_field_map_index: &mut usize, ) -> usize { @@ -1460,8 +1463,8 @@ impl Generator { } let result = *next_flat_field_map_index; - flat_field_maps.push((result, flat_field_map.clone())); *next_flat_field_map_index += flat_field_map.len(); + flat_field_maps.push((result, flat_field_map)); result } @@ -1500,8 +1503,8 @@ impl Generator { self.symbol_ids.insert(symbol, id); } - fn field_id(&self, field_name: &String) -> String { - format!("field_{}", field_name) + fn field_id(&self, field_name: &str) -> String { + format!("field_{field_name}") } fn metadata_for_symbol(&self, symbol: Symbol) -> (&str, VariableType) { @@ -1525,54 +1528,93 @@ impl Generator { fn sanitize_identifier(&self, name: &str) -> String { let mut result = String::with_capacity(name.len()); for c in name.chars() { - if ('a' <= c && c <= 'z') - || ('A' <= c && c <= 'Z') - || ('0' <= c && c <= '9') - || c == '_' - { + if c.is_ascii_alphanumeric() || c == '_' { result.push(c); } else { - let replacement = match c { - '~' => "TILDE", - '`' => "BQUOTE", - '!' => "BANG", - '@' => "AT", - '#' => "POUND", - '$' => "DOLLAR", - '%' => "PERCENT", - '^' => "CARET", - '&' => "AMP", - '*' => "STAR", - '(' => "LPAREN", - ')' => "RPAREN", - '-' => "DASH", - '+' => "PLUS", - '=' => "EQ", - '{' => "LBRACE", - '}' => "RBRACE", - '[' => "LBRACK", - ']' => "RBRACK", - '\\' => "BSLASH", - '|' => "PIPE", - ':' => "COLON", - ';' => "SEMI", - '"' => "DQUOTE", - '\'' => "SQUOTE", - '<' => "LT", - '>' => "GT", - ',' => "COMMA", - '.' => "DOT", - '?' => "QMARK", - '/' => "SLASH", - '\n' => "LF", - '\r' => "CR", - '\t' => "TAB", - _ => continue, - }; - if !result.is_empty() && !result.ends_with("_") { - result.push('_'); + 'special_chars: { + let replacement = match c { + ' ' if name.len() == 1 => "SPACE", + '~' => "TILDE", + '`' => "BQUOTE", + '!' => "BANG", + '@' => "AT", + '#' => "POUND", + '$' => "DOLLAR", + '%' => "PERCENT", + '^' => "CARET", + '&' => "AMP", + '*' => "STAR", + '(' => "LPAREN", + ')' => "RPAREN", + '-' => "DASH", + '+' => "PLUS", + '=' => "EQ", + '{' => "LBRACE", + '}' => "RBRACE", + '[' => "LBRACK", + ']' => "RBRACK", + '\\' => "BSLASH", + '|' => "PIPE", + ':' => "COLON", + ';' => "SEMI", + '"' => "DQUOTE", + '\'' => "SQUOTE", + '<' => "LT", + '>' => "GT", + ',' => "COMMA", + '.' => "DOT", + '?' => "QMARK", + '/' => "SLASH", + '\n' => "LF", + '\r' => "CR", + '\t' => "TAB", + '\0' => "NULL", + '\u{0001}' => "SOH", + '\u{0002}' => "STX", + '\u{0003}' => "ETX", + '\u{0004}' => "EOT", + '\u{0005}' => "ENQ", + '\u{0006}' => "ACK", + '\u{0007}' => "BEL", + '\u{0008}' => "BS", + '\u{000b}' => "VTAB", + '\u{000c}' => "FF", + '\u{000e}' => "SO", + '\u{000f}' => "SI", + '\u{0010}' => "DLE", + '\u{0011}' => "DC1", + '\u{0012}' => "DC2", + '\u{0013}' => "DC3", + '\u{0014}' => "DC4", + '\u{0015}' => "NAK", + '\u{0016}' => "SYN", + '\u{0017}' => "ETB", + '\u{0018}' => "CAN", + '\u{0019}' => "EM", + '\u{001a}' => "SUB", + '\u{001b}' => "ESC", + '\u{001c}' => "FS", + '\u{001d}' => "GS", + '\u{001e}' => "RS", + '\u{001f}' => "US", + '\u{007F}' => "DEL", + '\u{FEFF}' => "BOM", + '\u{0080}'..='\u{FFFF}' => { + result.push_str(&format!("u{:04x}", c as u32)); + break 'special_chars; + } + '\u{10000}'..='\u{10FFFF}' => { + result.push_str(&format!("U{:08x}", c as u32)); + break 'special_chars; + } + '0'..='9' | 'a'..='z' | 'A'..='Z' | '_' => unreachable!(), + ' ' => break 'special_chars, + }; + if !result.is_empty() && !result.ends_with('_') { + result.push('_'); + } + result += replacement; } - result += replacement; } } result @@ -1585,10 +1627,19 @@ impl Generator { '\"' => result += "\\\"", '?' => result += "\\?", '\\' => result += "\\\\", + '\u{0007}' => result += "\\a", + '\u{0008}' => result += "\\b", + '\u{000b}' => result += "\\v", '\u{000c}' => result += "\\f", '\n' => result += "\\n", '\r' => result += "\\r", '\t' => result += "\\t", + '\0' => result += "\\0", + '\u{0001}'..='\u{001f}' => result += &format!("\\x{:02x}", c as u32), + '\u{007F}'..='\u{FFFF}' => result += &format!("\\u{:04x}", c as u32), + '\u{10000}'..='\u{10FFFF}' => { + result.push_str(&format!("\\U{:08x}", c as u32)); + } _ => result.push(c), } } @@ -1605,9 +1656,9 @@ impl Generator { '\r' => add!(self, "'\\r'"), _ => { if c == ' ' || c.is_ascii_graphic() { - add!(self, "'{}'", c) + add!(self, "'{c}'"); } else { - add!(self, "{}", c as u32) + add!(self, "{}", c as u32); } } } @@ -1632,7 +1683,8 @@ impl Generator { /// * `abi_version` - The language ABI version that should be generated. Usually /// you want Tree-sitter's current version, but right after making an ABI /// change, it may be useful to generate code with the previous ABI. -pub(crate) fn render_c_code( +#[allow(clippy::too_many_arguments)] +pub fn render_c_code( name: &str, parse_table: ParseTable, main_lex_table: LexTable, @@ -1643,12 +1695,10 @@ pub(crate) fn render_c_code( default_aliases: AliasMap, abi_version: usize, ) -> String { - if !(ABI_VERSION_MIN..=ABI_VERSION_MAX).contains(&abi_version) { - panic!( - "This version of Tree-sitter can only generate parsers with ABI version {} - {}, not {}", - ABI_VERSION_MIN, ABI_VERSION_MAX, abi_version - ); - } + assert!( + (ABI_VERSION_MIN..=ABI_VERSION_MAX).contains(&abi_version), + "This version of Tree-sitter can only generate parsers with ABI version {ABI_VERSION_MIN} - {ABI_VERSION_MAX}, not {abi_version}", + ); Generator { buffer: String::new(), diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/rules.rs b/third-party/tree-sitter/tree-sitter/cli/src/generate/rules.rs index 0e3ff898926..af744781f6b 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/generate/rules.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/rules.rs @@ -1,10 +1,9 @@ use super::grammars::VariableType; use smallbitvec::SmallBitVec; -use std::iter::FromIterator; use std::{collections::HashMap, fmt}; #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub(crate) enum SymbolType { +pub enum SymbolType { External, End, EndOfNonTerminalExtra, @@ -13,28 +12,29 @@ pub(crate) enum SymbolType { } #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub(crate) enum Associativity { +pub enum Associativity { Left, Right, } #[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub(crate) struct Alias { +pub struct Alias { pub value: String, pub is_named: bool, } -#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Default)] pub enum Precedence { + #[default] None, Integer(i32), Name(String), } -pub(crate) type AliasMap = HashMap; +pub type AliasMap = HashMap; #[derive(Clone, Debug, Default, PartialEq, Eq, Hash)] -pub(crate) struct MetadataParams { +pub struct MetadataParams { pub precedence: Precedence, pub dynamic_precedence: i32, pub associativity: Option, @@ -47,16 +47,16 @@ pub(crate) struct MetadataParams { } #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub(crate) struct Symbol { +pub struct Symbol { pub kind: SymbolType, pub index: usize, } #[derive(Clone, Debug, PartialEq, Eq, Hash)] -pub(crate) enum Rule { +pub enum Rule { Blank, String(String), - Pattern(String), + Pattern(String, String), NamedSymbol(String), Symbol(Symbol), Choice(Vec), @@ -73,7 +73,7 @@ pub(crate) enum Rule { // index corresponding to a token, and each value representing whether or not // the token is present in the set. #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct TokenSet { +pub struct TokenSet { terminal_bits: SmallBitVec, external_bits: SmallBitVec, eof: bool, @@ -81,76 +81,77 @@ pub(crate) struct TokenSet { } impl Rule { - pub fn field(name: String, content: Rule) -> Self { + pub fn field(name: String, content: Self) -> Self { add_metadata(content, move |params| { params.field_name = Some(name); }) } - pub fn alias(content: Rule, value: String, is_named: bool) -> Self { + pub fn alias(content: Self, value: String, is_named: bool) -> Self { add_metadata(content, move |params| { - params.alias = Some(Alias { is_named, value }); + params.alias = Some(Alias { value, is_named }); }) } - pub fn token(content: Rule) -> Self { + pub fn token(content: Self) -> Self { add_metadata(content, |params| { params.is_token = true; }) } - pub fn immediate_token(content: Rule) -> Self { + pub fn immediate_token(content: Self) -> Self { add_metadata(content, |params| { params.is_token = true; params.is_main_token = true; }) } - pub fn prec(value: Precedence, content: Rule) -> Self { + pub fn prec(value: Precedence, content: Self) -> Self { add_metadata(content, |params| { params.precedence = value; }) } - pub fn prec_left(value: Precedence, content: Rule) -> Self { + pub fn prec_left(value: Precedence, content: Self) -> Self { add_metadata(content, |params| { params.associativity = Some(Associativity::Left); params.precedence = value; }) } - pub fn prec_right(value: Precedence, content: Rule) -> Self { + pub fn prec_right(value: Precedence, content: Self) -> Self { add_metadata(content, |params| { params.associativity = Some(Associativity::Right); params.precedence = value; }) } - pub fn prec_dynamic(value: i32, content: Rule) -> Self { + pub fn prec_dynamic(value: i32, content: Self) -> Self { add_metadata(content, |params| { params.dynamic_precedence = value; }) } - pub fn repeat(rule: Rule) -> Self { - Rule::Repeat(Box::new(rule)) + pub fn repeat(rule: Self) -> Self { + Self::Repeat(Box::new(rule)) } - pub fn choice(rules: Vec) -> Self { + pub fn choice(rules: Vec) -> Self { let mut elements = Vec::with_capacity(rules.len()); for rule in rules { choice_helper(&mut elements, rule); } - Rule::Choice(elements) + Self::Choice(elements) } - pub fn seq(rules: Vec) -> Self { - Rule::Seq(rules) + pub fn seq(rules: Vec) -> Self { + Self::Seq(rules) } } impl Alias { - pub fn kind(&self) -> VariableType { + #[must_use] + pub const fn kind(&self) -> VariableType { if self.is_named { VariableType::Named } else { @@ -160,85 +161,101 @@ impl Alias { } impl Precedence { - pub fn is_none(&self) -> bool { - matches!(self, Precedence::None) + #[must_use] + pub const fn is_none(&self) -> bool { + matches!(self, Self::None) } } #[cfg(test)] impl Rule { - pub fn terminal(index: usize) -> Self { - Rule::Symbol(Symbol::terminal(index)) + #[must_use] + pub const fn terminal(index: usize) -> Self { + Self::Symbol(Symbol::terminal(index)) } - pub fn non_terminal(index: usize) -> Self { - Rule::Symbol(Symbol::non_terminal(index)) + #[must_use] + pub const fn non_terminal(index: usize) -> Self { + Self::Symbol(Symbol::non_terminal(index)) } - pub fn external(index: usize) -> Self { - Rule::Symbol(Symbol::external(index)) + #[must_use] + pub const fn external(index: usize) -> Self { + Self::Symbol(Symbol::external(index)) } + #[must_use] pub fn named(name: &'static str) -> Self { - Rule::NamedSymbol(name.to_string()) + Self::NamedSymbol(name.to_string()) } + #[must_use] pub fn string(value: &'static str) -> Self { - Rule::String(value.to_string()) + Self::String(value.to_string()) } - pub fn pattern(value: &'static str) -> Self { - Rule::Pattern(value.to_string()) + #[must_use] + pub fn pattern(value: &'static str, flags: &'static str) -> Self { + Self::Pattern(value.to_string(), flags.to_string()) } } impl Symbol { + #[must_use] pub fn is_terminal(&self) -> bool { self.kind == SymbolType::Terminal } + #[must_use] pub fn is_non_terminal(&self) -> bool { self.kind == SymbolType::NonTerminal } + #[must_use] pub fn is_external(&self) -> bool { self.kind == SymbolType::External } + #[must_use] pub fn is_eof(&self) -> bool { self.kind == SymbolType::End } - pub fn non_terminal(index: usize) -> Self { - Symbol { + #[must_use] + pub const fn non_terminal(index: usize) -> Self { + Self { kind: SymbolType::NonTerminal, index, } } - pub fn terminal(index: usize) -> Self { - Symbol { + #[must_use] + pub const fn terminal(index: usize) -> Self { + Self { kind: SymbolType::Terminal, index, } } - pub fn external(index: usize) -> Self { - Symbol { + #[must_use] + pub const fn external(index: usize) -> Self { + Self { kind: SymbolType::External, index, } } - pub fn end() -> Self { - Symbol { + #[must_use] + pub const fn end() -> Self { + Self { kind: SymbolType::End, index: 0, } } - pub fn end_of_nonterminal_extra() -> Self { - Symbol { + #[must_use] + pub const fn end_of_nonterminal_extra() -> Self { + Self { kind: SymbolType::EndOfNonTerminalExtra, index: 0, } @@ -246,8 +263,9 @@ impl Symbol { } impl From for Rule { + #[must_use] fn from(symbol: Symbol) -> Self { - Rule::Symbol(symbol) + Self::Symbol(symbol) } } @@ -261,7 +279,7 @@ impl TokenSet { } } - pub fn iter<'a>(&'a self) -> impl Iterator + 'a { + pub fn iter(&self) -> impl Iterator + '_ { self.terminal_bits .iter() .enumerate() @@ -292,7 +310,7 @@ impl TokenSet { }) } - pub fn terminals<'a>(&'a self) -> impl Iterator + 'a { + pub fn terminals(&self) -> impl Iterator + '_ { self.terminal_bits .iter() .enumerate() @@ -361,11 +379,9 @@ impl TokenSet { }; } }; - if other.index < vec.len() { - if vec[other.index] { - vec.set(other.index, false); - return true; - } + if other.index < vec.len() && vec[other.index] { + vec.set(other.index, false); + return true; } false } @@ -377,7 +393,7 @@ impl TokenSet { && !self.external_bits.iter().any(|a| a) } - pub fn insert_all_terminals(&mut self, other: &TokenSet) -> bool { + pub fn insert_all_terminals(&mut self, other: &Self) -> bool { let mut result = false; if other.terminal_bits.len() > self.terminal_bits.len() { self.terminal_bits.resize(other.terminal_bits.len(), false); @@ -391,7 +407,7 @@ impl TokenSet { result } - fn insert_all_externals(&mut self, other: &TokenSet) -> bool { + fn insert_all_externals(&mut self, other: &Self) -> bool { let mut result = false; if other.external_bits.len() > self.external_bits.len() { self.external_bits.resize(other.external_bits.len(), false); @@ -405,7 +421,7 @@ impl TokenSet { result } - pub fn insert_all(&mut self, other: &TokenSet) -> bool { + pub fn insert_all(&mut self, other: &Self) -> bool { let mut result = false; if other.eof { result |= !self.eof; @@ -466,15 +482,9 @@ fn choice_helper(result: &mut Vec, rule: Rule) { impl fmt::Display for Precedence { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - Precedence::Integer(i) => write!(f, "{}", i), - Precedence::Name(s) => write!(f, "'{}'", s), - Precedence::None => write!(f, "none"), + Self::Integer(i) => write!(f, "{i}"), + Self::Name(s) => write!(f, "'{s}'"), + Self::None => write!(f, "none"), } } } - -impl Default for Precedence { - fn default() -> Self { - Precedence::None - } -} diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/tables.rs b/third-party/tree-sitter/tree-sitter/cli/src/generate/tables.rs index 16bf185165c..3d84c541a81 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/generate/tables.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/tables.rs @@ -1,9 +1,9 @@ use super::nfa::CharacterSet; use super::rules::{Alias, Symbol, TokenSet}; use std::collections::BTreeMap; -pub(crate) type ProductionInfoId = usize; -pub(crate) type ParseStateId = usize; -pub(crate) type LexStateId = usize; +pub type ProductionInfoId = usize; +pub type ParseStateId = usize; +pub type LexStateId = usize; use std::hash::BuildHasherDefault; @@ -11,7 +11,7 @@ use indexmap::IndexMap; use rustc_hash::FxHasher; #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub(crate) enum ParseAction { +pub enum ParseAction { Accept, Shift { state: ParseStateId, @@ -28,19 +28,19 @@ pub(crate) enum ParseAction { } #[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub(crate) enum GotoAction { +pub enum GotoAction { Goto(ParseStateId), ShiftExtra, } #[derive(Clone, Debug, PartialEq, Eq, Hash)] -pub(crate) struct ParseTableEntry { +pub struct ParseTableEntry { pub actions: Vec, pub reusable: bool, } #[derive(Clone, Debug, Default, PartialEq, Eq)] -pub(crate) struct ParseState { +pub struct ParseState { pub id: ParseStateId, pub terminal_entries: IndexMap>, pub nonterminal_entries: IndexMap>, @@ -50,19 +50,19 @@ pub(crate) struct ParseState { } #[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] -pub(crate) struct FieldLocation { +pub struct FieldLocation { pub index: usize, pub inherited: bool, } #[derive(Debug, Default, PartialEq, Eq)] -pub(crate) struct ProductionInfo { +pub struct ProductionInfo { pub alias_sequence: Vec>, pub field_map: BTreeMap>, } #[derive(Debug, PartialEq, Eq)] -pub(crate) struct ParseTable { +pub struct ParseTable { pub states: Vec, pub symbols: Vec, pub production_infos: Vec, @@ -71,25 +71,25 @@ pub(crate) struct ParseTable { } #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] -pub(crate) struct AdvanceAction { +pub struct AdvanceAction { pub state: LexStateId, pub in_main_token: bool, } #[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord)] -pub(crate) struct LexState { +pub struct LexState { pub accept_action: Option, pub eof_action: Option, pub advance_actions: Vec<(CharacterSet, AdvanceAction)>, } -#[derive(Debug, PartialEq, Eq)] -pub(crate) struct LexTable { +#[derive(Debug, PartialEq, Eq, Default)] +pub struct LexTable { pub states: Vec, } impl ParseTableEntry { - pub fn new() -> Self { + pub const fn new() -> Self { Self { reusable: true, actions: Vec::new(), @@ -97,19 +97,13 @@ impl ParseTableEntry { } } -impl Default for LexTable { - fn default() -> Self { - LexTable { states: Vec::new() } - } -} - impl ParseState { pub fn is_end_of_non_terminal_extra(&self) -> bool { self.terminal_entries .contains_key(&Symbol::end_of_nonterminal_extra()) } - pub fn referenced_states<'a>(&'a self) -> impl Iterator + 'a { + pub fn referenced_states(&self) -> impl Iterator + '_ { self.terminal_entries .iter() .flat_map(|(_, entry)| { @@ -129,7 +123,7 @@ impl ParseState { pub fn update_referenced_states(&mut self, mut f: F) where - F: FnMut(usize, &ParseState) -> usize, + F: FnMut(usize, &Self) -> usize, { let mut updates = Vec::new(); for (symbol, entry) in &self.terminal_entries { diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/.editorconfig b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/.editorconfig new file mode 100644 index 00000000000..d3a8b5b697f --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/.editorconfig @@ -0,0 +1,39 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true + +[*.{json,toml,yml,gyp}] +indent_style = space +indent_size = 2 + +[*.js] +indent_style = space +indent_size = 2 + +[*.rs] +indent_style = space +indent_size = 4 + +[*.{c,cc,h}] +indent_style = space +indent_size = 4 + +[*.{py,pyi}] +indent_style = space +indent_size = 4 + +[*.swift] +indent_style = space +indent_size = 4 + +[*.go] +indent_style = tab +indent_size = 8 + +[Makefile] +indent_style = tab +indent_size = 8 diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/PARSER_NAME.h b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/PARSER_NAME.h new file mode 100644 index 00000000000..3dbbfd100d8 --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/PARSER_NAME.h @@ -0,0 +1,16 @@ +#ifndef TREE_SITTER_UPPER_PARSER_NAME_H_ +#define TREE_SITTER_UPPER_PARSER_NAME_H_ + +typedef struct TSLanguage TSLanguage; + +#ifdef __cplusplus +extern "C" { +#endif + +const TSLanguage *tree_sitter_PARSER_NAME(void); + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_UPPER_PARSER_NAME_H_ diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/PARSER_NAME.pc.in b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/PARSER_NAME.pc.in new file mode 100644 index 00000000000..deed9fa47f6 --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/PARSER_NAME.pc.in @@ -0,0 +1,11 @@ +prefix=@PREFIX@ +libdir=@LIBDIR@ +includedir=@INCLUDEDIR@ + +Name: tree-sitter-PARSER_NAME +Description: CAMEL_PARSER_NAME grammar for tree-sitter +URL: @URL@ +Version: @VERSION@ +Requires: @REQUIRES@ +Libs: -L${libdir} @ADDITIONAL_LIBS@ -ltree-sitter-PARSER_NAME +Cflags: -I${includedir} diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/Package.swift b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/Package.swift new file mode 100644 index 00000000000..f0cc1cd19d3 --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/Package.swift @@ -0,0 +1,48 @@ +// swift-tools-version:5.3 +import PackageDescription + +let package = Package( + name: "TreeSitterCAMEL_PARSER_NAME", + platforms: [.macOS(.v10_13), .iOS(.v11)], + products: [ + .library(name: "TreeSitterCAMEL_PARSER_NAME", targets: ["TreeSitterCAMEL_PARSER_NAME"]), + ], + dependencies: [], + targets: [ + .target(name: "TreeSitterCAMEL_PARSER_NAME", + path: ".", + exclude: [ + "Cargo.toml", + "Makefile", + "binding.gyp", + "bindings/c", + "bindings/go", + "bindings/node", + "bindings/python", + "bindings/rust", + "prebuilds", + "grammar.js", + "package.json", + "package-lock.json", + "pyproject.toml", + "setup.py", + "test", + "examples", + ".editorconfig", + ".github", + ".gitignore", + ".gitattributes", + ".gitmodules", + ], + sources: [ + "src/parser.c", + // NOTE: if your language has an external scanner, add it here. + ], + resources: [ + .copy("queries") + ], + publicHeadersPath: "bindings/swift", + cSettings: [.headerSearchPath("src")]) + ], + cLanguageStandard: .c11 +) diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/__init__.py b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/__init__.py new file mode 100644 index 00000000000..d3796ccb059 --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/__init__.py @@ -0,0 +1,5 @@ +"CAMEL_PARSER_NAME grammar for tree-sitter" + +from ._binding import language + +__all__ = ["language"] diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/__init__.pyi b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/__init__.pyi new file mode 100644 index 00000000000..5416666fc30 --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/__init__.pyi @@ -0,0 +1 @@ +def language() -> int: ... diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/alloc.h b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/alloc.h new file mode 100644 index 00000000000..1f4466d75c4 --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/alloc.h @@ -0,0 +1,54 @@ +#ifndef TREE_SITTER_ALLOC_H_ +#define TREE_SITTER_ALLOC_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +// Allow clients to override allocation functions +#ifdef TREE_SITTER_REUSE_ALLOCATOR + +extern void *(*ts_current_malloc)(size_t); +extern void *(*ts_current_calloc)(size_t, size_t); +extern void *(*ts_current_realloc)(void *, size_t); +extern void (*ts_current_free)(void *); + +#ifndef ts_malloc +#define ts_malloc ts_current_malloc +#endif +#ifndef ts_calloc +#define ts_calloc ts_current_calloc +#endif +#ifndef ts_realloc +#define ts_realloc ts_current_realloc +#endif +#ifndef ts_free +#define ts_free ts_current_free +#endif + +#else + +#ifndef ts_malloc +#define ts_malloc malloc +#endif +#ifndef ts_calloc +#define ts_calloc calloc +#endif +#ifndef ts_realloc +#define ts_realloc realloc +#endif +#ifndef ts_free +#define ts_free free +#endif + +#endif + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_ALLOC_H_ diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/binding.cc b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/binding.cc deleted file mode 100644 index d68a85abba8..00000000000 --- a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/binding.cc +++ /dev/null @@ -1,28 +0,0 @@ -#include "tree_sitter/parser.h" -#include -#include "nan.h" - -using namespace v8; - -extern "C" TSLanguage * tree_sitter_PARSER_NAME(); - -namespace { - -NAN_METHOD(New) {} - -void Init(Local exports, Local module) { - Local tpl = Nan::New(New); - tpl->SetClassName(Nan::New("Language").ToLocalChecked()); - tpl->InstanceTemplate()->SetInternalFieldCount(1); - - Local constructor = Nan::GetFunction(tpl).ToLocalChecked(); - Local instance = constructor->NewInstance(Nan::GetCurrentContext()).ToLocalChecked(); - Nan::SetInternalFieldPointer(instance, 0, tree_sitter_PARSER_NAME()); - - Nan::Set(instance, Nan::New("name").ToLocalChecked(), Nan::New("PARSER_NAME").ToLocalChecked()); - Nan::Set(module, Nan::New("exports").ToLocalChecked(), instance); -} - -NODE_MODULE(tree_sitter_PARSER_NAME_binding, Init) - -} // namespace diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/binding.go b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/binding.go new file mode 100644 index 00000000000..b41863c52c9 --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/binding.go @@ -0,0 +1,13 @@ +package tree_sitter_PARSER_NAME + +// #cgo CFLAGS: -std=c11 -fPIC +// #include "../../src/parser.c" +// // NOTE: if your language has an external scanner, add it here. +import "C" + +import "unsafe" + +// Get the tree-sitter Language for this grammar. +func Language() unsafe.Pointer { + return unsafe.Pointer(C.tree_sitter_LOWER_PARSER_NAME()) +} diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/binding.gyp b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/binding.gyp index ba86afb0cce..087e65555a6 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/binding.gyp +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/binding.gyp @@ -2,18 +2,20 @@ "targets": [ { "target_name": "tree_sitter_PARSER_NAME_binding", + "dependencies": [ + "=RUST_BINDING_VERSION" [build-dependencies] -cc = "1.0" +cc = "1.0.87" diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/gitattributes b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/gitattributes new file mode 100644 index 00000000000..ffb52abeccb --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/gitattributes @@ -0,0 +1,11 @@ +* text eol=lf + +src/*.json linguist-generated +src/parser.c linguist-generated +src/tree_sitter/* linguist-generated + +bindings/** linguist-generated +binding.gyp linguist-generated +setup.py linguist-generated +Makefile linguist-generated +Package.swift linguist-generated diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/gitignore b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/gitignore new file mode 100644 index 00000000000..27fc43f720d --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/gitignore @@ -0,0 +1,38 @@ +# Rust artifacts +Cargo.lock +target/ + +# Node artifacts +build/ +prebuilds/ +node_modules/ +*.tgz + +# Swift artifacts +.build/ + +# Go artifacts +go.sum +_obj/ + +# Python artifacts +.venv/ +dist/ +*.egg-info +*.whl + +# C artifacts +*.a +*.so +*.so.* +*.dylib +*.dll +*.pc + +# Example dirs +/examples/*/ + +# Grammar volatiles +*.wasm +*.obj +*.o diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/go.mod b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/go.mod new file mode 100644 index 00000000000..00e31a44728 --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/go.mod @@ -0,0 +1,5 @@ +module github.com/tree-sitter/tree-sitter-PARSER_NAME + +go 1.22 + +require github.com/smacker/go-tree-sitter v0.0.0-20230720070738-0d0a9f78d8f8 diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/grammar.js b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/grammar.js new file mode 100644 index 00000000000..62b7cf3b9fa --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/grammar.js @@ -0,0 +1,11 @@ +/// +// @ts-check + +module.exports = grammar({ + name: "LOWER_PARSER_NAME", + + rules: { + // TODO: add the actual grammar rules + source_file: $ => "hello" + } +}); diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/index.d.ts b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/index.d.ts new file mode 100644 index 00000000000..efe259eed03 --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/index.d.ts @@ -0,0 +1,28 @@ +type BaseNode = { + type: string; + named: boolean; +}; + +type ChildNode = { + multiple: boolean; + required: boolean; + types: BaseNode[]; +}; + +type NodeInfo = + | (BaseNode & { + subtypes: BaseNode[]; + }) + | (BaseNode & { + fields: { [name: string]: ChildNode }; + children: ChildNode[]; + }); + +type Language = { + name: string; + language: unknown; + nodeTypeInfo: NodeInfo[]; +}; + +declare const language: Language; +export = language; diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/index.js b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/index.js index bc5daf7ccaf..6657bcf42de 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/index.js +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/index.js @@ -1,18 +1,6 @@ -try { - module.exports = require("../../build/Release/tree_sitter_PARSER_NAME_binding"); -} catch (error1) { - if (error1.code !== 'MODULE_NOT_FOUND') { - throw error1; - } - try { - module.exports = require("../../build/Debug/tree_sitter_PARSER_NAME_binding"); - } catch (error2) { - if (error2.code !== 'MODULE_NOT_FOUND') { - throw error2; - } - throw error1 - } -} +const root = require("path").join(__dirname, "..", ".."); + +module.exports = require("node-gyp-build")(root); try { module.exports.nodeTypeInfo = require("../../src/node-types.json"); diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/js-binding.cc b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/js-binding.cc new file mode 100644 index 00000000000..5b167cc85e6 --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/js-binding.cc @@ -0,0 +1,20 @@ +#include + +typedef struct TSLanguage TSLanguage; + +extern "C" TSLanguage *tree_sitter_PARSER_NAME(); + +// "tree-sitter", "language" hashed with BLAKE2 +const napi_type_tag LANGUAGE_TYPE_TAG = { + 0x8AF2E5212AD58ABF, 0xD5006CAD83ABBA16 +}; + +Napi::Object Init(Napi::Env env, Napi::Object exports) { + exports["name"] = Napi::String::New(env, "PARSER_NAME"); + auto language = Napi::External::New(env, tree_sitter_PARSER_NAME()); + language.TypeTag(&LANGUAGE_TYPE_TAG); + exports["language"] = language; + return exports; +} + +NODE_API_MODULE(tree_sitter_PARSER_NAME_binding, Init) diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/lib.rs b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/lib.rs index dab87e4fca6..f5ce6a53f29 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/lib.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/lib.rs @@ -1,13 +1,15 @@ -//! This crate provides PARSER_NAME language support for the [tree-sitter][] parsing library. +//! This crate provides CAMEL_PARSER_NAME language support for the [tree-sitter][] parsing library. //! //! Typically, you will use the [language][language func] function to add this language to a //! tree-sitter [Parser][], and then use the parser to parse some code: //! //! ``` -//! let code = ""; +//! let code = r#" +//! "#; //! let mut parser = tree_sitter::Parser::new(); -//! parser.set_language(tree_sitter_PARSER_NAME::language()).expect("Error loading PARSER_NAME grammar"); +//! parser.set_language(&tree_sitter_PARSER_NAME::language()).expect("Error loading CAMEL_PARSER_NAME grammar"); //! let tree = parser.parse(code, None).unwrap(); +//! assert!(!tree.root_node().has_error()); //! ``` //! //! [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html @@ -31,14 +33,14 @@ pub fn language() -> Language { /// The content of the [`node-types.json`][] file for this grammar. /// /// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types -pub const NODE_TYPES: &'static str = include_str!("../../src/node-types.json"); +pub const NODE_TYPES: &str = include_str!("../../src/node-types.json"); // Uncomment these to include any queries that this grammar contains -// pub const HIGHLIGHTS_QUERY: &'static str = include_str!("../../queries/highlights.scm"); -// pub const INJECTIONS_QUERY: &'static str = include_str!("../../queries/injections.scm"); -// pub const LOCALS_QUERY: &'static str = include_str!("../../queries/locals.scm"); -// pub const TAGS_QUERY: &'static str = include_str!("../../queries/tags.scm"); +// pub const HIGHLIGHTS_QUERY: &str = include_str!("../../queries/highlights.scm"); +// pub const INJECTIONS_QUERY: &str = include_str!("../../queries/injections.scm"); +// pub const LOCALS_QUERY: &str = include_str!("../../queries/locals.scm"); +// pub const TAGS_QUERY: &str = include_str!("../../queries/tags.scm"); #[cfg(test)] mod tests { @@ -46,7 +48,7 @@ mod tests { fn test_can_load_grammar() { let mut parser = tree_sitter::Parser::new(); parser - .set_language(super::language()) - .expect("Error loading PARSER_NAME language"); + .set_language(&super::language()) + .expect("Error loading CAMEL_PARSER_NAME grammar"); } } diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/makefile b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/makefile new file mode 100644 index 00000000000..522c1fad3dd --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/makefile @@ -0,0 +1,109 @@ +VERSION := 0.0.1 + +LANGUAGE_NAME := tree-sitter-PARSER_NAME + +# repository +SRC_DIR := src + +PARSER_REPO_URL := $(shell git -C $(SRC_DIR) remote get-url origin 2>/dev/null) + +ifeq ($(PARSER_URL),) + PARSER_URL := $(subst .git,,$(PARSER_REPO_URL)) +ifeq ($(shell echo $(PARSER_URL) | grep '^[a-z][-+.0-9a-z]*://'),) + PARSER_URL := $(subst :,/,$(PARSER_URL)) + PARSER_URL := $(subst git@,https://,$(PARSER_URL)) +endif +endif + +TS ?= tree-sitter + +# ABI versioning +SONAME_MAJOR := $(word 1,$(subst ., ,$(VERSION))) +SONAME_MINOR := $(word 2,$(subst ., ,$(VERSION))) + +# install directory layout +PREFIX ?= /usr/local +INCLUDEDIR ?= $(PREFIX)/include +LIBDIR ?= $(PREFIX)/lib +PCLIBDIR ?= $(LIBDIR)/pkgconfig + +# object files +OBJS := $(patsubst %.c,%.o,$(wildcard $(SRC_DIR)/*.c)) + +# flags +ARFLAGS := rcs +override CFLAGS += -I$(SRC_DIR) -std=c11 -fPIC + +# OS-specific bits +ifeq ($(OS),Windows_NT) + $(error "Windows is not supported") +else ifeq ($(shell uname),Darwin) + SOEXT = dylib + SOEXTVER_MAJOR = $(SONAME_MAJOR).dylib + SOEXTVER = $(SONAME_MAJOR).$(SONAME_MINOR).dylib + LINKSHARED := $(LINKSHARED)-dynamiclib -Wl, + ifneq ($(ADDITIONAL_LIBS),) + LINKSHARED := $(LINKSHARED)$(ADDITIONAL_LIBS), + endif + LINKSHARED := $(LINKSHARED)-install_name,$(LIBDIR)/lib$(LANGUAGE_NAME).$(SONAME_MAJOR).dylib,-rpath,@executable_path/../Frameworks +else + SOEXT = so + SOEXTVER_MAJOR = so.$(SONAME_MAJOR) + SOEXTVER = so.$(SONAME_MAJOR).$(SONAME_MINOR) + LINKSHARED := $(LINKSHARED)-shared -Wl, + ifneq ($(ADDITIONAL_LIBS),) + LINKSHARED := $(LINKSHARED)$(ADDITIONAL_LIBS) + endif + LINKSHARED := $(LINKSHARED)-soname,lib$(LANGUAGE_NAME).so.$(SONAME_MAJOR) +endif +ifneq ($(filter $(shell uname),FreeBSD NetBSD DragonFly),) + PCLIBDIR := $(PREFIX)/libdata/pkgconfig +endif + +all: lib$(LANGUAGE_NAME).a lib$(LANGUAGE_NAME).$(SOEXT) $(LANGUAGE_NAME).pc + +lib$(LANGUAGE_NAME).a: $(OBJS) + $(AR) $(ARFLAGS) $@ $^ + +lib$(LANGUAGE_NAME).$(SOEXT): $(OBJS) + $(CC) $(LDFLAGS) $(LINKSHARED) $^ $(LDLIBS) -o $@ +ifneq ($(STRIP),) + $(STRIP) $@ +endif + +$(LANGUAGE_NAME).pc: bindings/c/$(LANGUAGE_NAME).pc.in + sed -e 's|@URL@|$(PARSER_URL)|' \ + -e 's|@VERSION@|$(VERSION)|' \ + -e 's|@LIBDIR@|$(LIBDIR)|' \ + -e 's|@INCLUDEDIR@|$(INCLUDEDIR)|' \ + -e 's|@REQUIRES@|$(REQUIRES)|' \ + -e 's|@ADDITIONAL_LIBS@|$(ADDITIONAL_LIBS)|' \ + -e 's|=$(PREFIX)|=$${prefix}|' \ + -e 's|@PREFIX@|$(PREFIX)|' $< > $@ + +$(SRC_DIR)/parser.c: grammar.js + $(TS) generate --no-bindings + +install: all + install -Dm644 bindings/c/$(LANGUAGE_NAME).h '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/$(LANGUAGE_NAME).h + install -Dm644 $(LANGUAGE_NAME).pc '$(DESTDIR)$(PCLIBDIR)'/$(LANGUAGE_NAME).pc + install -Dm755 lib$(LANGUAGE_NAME).a '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).a + install -m755 lib$(LANGUAGE_NAME).$(SOEXT) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER) + ln -sf lib$(LANGUAGE_NAME).$(SOEXTVER) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR) + ln -sf lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXT) + +uninstall: + $(RM) '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).a \ + '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER) \ + '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXTVER_MAJOR) \ + '$(DESTDIR)$(LIBDIR)'/lib$(LANGUAGE_NAME).$(SOEXT) \ + '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter/$(LANGUAGE_NAME).h \ + '$(DESTDIR)$(PCLIBDIR)'/$(LANGUAGE_NAME).pc + +clean: + $(RM) $(OBJS) $(LANGUAGE_NAME).pc lib$(LANGUAGE_NAME).a lib$(LANGUAGE_NAME).$(SOEXT) + +test: + $(TS) test + +.PHONY: all install uninstall clean test diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/package.json b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/package.json index 18598797c83..fe67099b505 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/package.json +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/package.json @@ -1,19 +1,53 @@ { "name": "tree-sitter-PARSER_NAME", "version": "0.0.1", - "description": "PARSER_NAME grammar for tree-sitter", + "description": "CAMEL_PARSER_NAME grammar for tree-sitter", + "repository": "github:tree-sitter/tree-sitter-PARSER_NAME", + "license": "MIT", "main": "bindings/node", + "types": "bindings/node", "keywords": [ + "incremental", "parsing", - "incremental" + "tree-sitter", + "LOWER_PARSER_NAME" + ], + "files": [ + "grammar.js", + "binding.gyp", + "prebuilds/**", + "bindings/node/*", + "queries/*", + "src/**" ], "dependencies": { - "nan": "^2.12.1" + "node-addon-api": "^7.1.0", + "node-gyp-build": "^4.8.0" }, "devDependencies": { + "prebuildify": "^6.0.0", "tree-sitter-cli": "^CLI_VERSION" }, + "peerDependencies": { + "tree-sitter": "^0.21.0" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + }, "scripts": { - "test": "tree-sitter test" - } + "install": "node-gyp-build", + "prebuildify": "prebuildify --napi --strip", + "build": "tree-sitter generate --no-bindings", + "build-wasm": "tree-sitter build-wasm", + "test": "tree-sitter test", + "parse": "tree-sitter parse" + }, + "tree-sitter": [ + { + "scope": "source.LOWER_PARSER_NAME", + "injection-regex": "^LOWER_PARSER_NAME$" + } + ] } diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/py-binding.c b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/py-binding.c new file mode 100644 index 00000000000..e2fed9b3016 --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/py-binding.c @@ -0,0 +1,27 @@ +#include + +typedef struct TSLanguage TSLanguage; + +TSLanguage *tree_sitter_LOWER_PARSER_NAME(void); + +static PyObject* _binding_language(PyObject *self, PyObject *args) { + return PyLong_FromVoidPtr(tree_sitter_LOWER_PARSER_NAME()); +} + +static PyMethodDef methods[] = { + {"language", _binding_language, METH_NOARGS, + "Get the tree-sitter language for this grammar."}, + {NULL, NULL, 0, NULL} +}; + +static struct PyModuleDef module = { + .m_base = PyModuleDef_HEAD_INIT, + .m_name = "_binding", + .m_doc = NULL, + .m_size = -1, + .m_methods = methods +}; + +PyMODINIT_FUNC PyInit__binding(void) { + return PyModule_Create(&module); +} diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/pyproject.toml b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/pyproject.toml new file mode 100644 index 00000000000..272dbb1b695 --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/pyproject.toml @@ -0,0 +1,29 @@ +[build-system] +requires = ["setuptools>=42", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "tree-sitter-PARSER_NAME" +description = "CAMEL_PARSER_NAME grammar for tree-sitter" +version = "0.0.1" +keywords = ["incremental", "parsing", "tree-sitter", "PARSER_NAME"] +classifiers = [ + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Topic :: Software Development :: Compilers", + "Topic :: Text Processing :: Linguistic", + "Typing :: Typed" +] +requires-python = ">=3.8" +license.text = "MIT" +readme = "README.md" + +[project.urls] +Homepage = "https://github.com/tree-sitter/tree-sitter-PARSER_NAME" + +[project.optional-dependencies] +core = ["tree-sitter~=0.21"] + +[tool.cibuildwheel] +build = "cp38-*" +build-frontend = "build" diff --git a/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/setup.py b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/setup.py new file mode 100644 index 00000000000..e06337b63e9 --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/cli/src/generate/templates/setup.py @@ -0,0 +1,57 @@ +from os.path import isdir, join +from platform import system + +from setuptools import Extension, find_packages, setup +from setuptools.command.build import build +from wheel.bdist_wheel import bdist_wheel + + +class Build(build): + def run(self): + if isdir("queries"): + dest = join(self.build_lib, "tree_sitter_PARSER_NAME", "queries") + self.copy_tree("queries", dest) + super().run() + + +class BdistWheel(bdist_wheel): + def get_tag(self): + python, abi, platform = super().get_tag() + if python.startswith("cp"): + python, abi = "cp38", "abi3" + return python, abi, platform + + +setup( + packages=find_packages("bindings/python"), + package_dir={"": "bindings/python"}, + package_data={ + "tree_sitter_LOWER_PARSER_NAME": ["*.pyi", "py.typed"], + "tree_sitter_LOWER_PARSER_NAME.queries": ["*.scm"], + }, + ext_package="tree_sitter_LOWER_PARSER_NAME", + ext_modules=[ + Extension( + name="_binding", + sources=[ + "bindings/python/tree_sitter_LOWER_PARSER_NAME/binding.c", + "src/parser.c", + # NOTE: if your language uses an external scanner, add it here. + ], + extra_compile_args=( + ["-std=c11"] if system() != 'Windows' else [] + ), + define_macros=[ + ("Py_LIMITED_API", "0x03080000"), + ("PY_SSIZE_T_CLEAN", None) + ], + include_dirs=["src"], + py_limited_api=True, + ) + ], + cmdclass={ + "build": Build, + "bdist_wheel": BdistWheel + }, + zip_safe=False +) diff --git a/third-party/tree-sitter/tree-sitter/cli/src/highlight.rs b/third-party/tree-sitter/tree-sitter/cli/src/highlight.rs index 6cf1580e9fd..e48ca4e8c37 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/highlight.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/highlight.rs @@ -1,4 +1,3 @@ -use super::util; use ansi_term::Color; use anyhow::Result; use lazy_static::lazy_static; @@ -13,7 +12,7 @@ use std::{fs, io, path, str, usize}; use tree_sitter_highlight::{HighlightConfiguration, HighlightEvent, Highlighter, HtmlRenderer}; use tree_sitter_loader::Loader; -pub const HTML_HEADER: &'static str = " +pub const HTML_HEADER: &str = " Tree-sitter Highlighting @@ -35,7 +34,7 @@ pub const HTML_HEADER: &'static str = " "; -pub const HTML_FOOTER: &'static str = " +pub const HTML_FOOTER: &str = " "; @@ -68,13 +67,14 @@ impl Theme { Ok(serde_json::from_str(&json).unwrap_or_default()) } + #[must_use] pub fn default_style(&self) -> Style { Style::default() } } impl<'de> Deserialize<'de> for Theme { - fn deserialize(deserializer: D) -> std::result::Result + fn deserialize(deserializer: D) -> std::result::Result where D: Deserializer<'de>, { @@ -144,9 +144,7 @@ impl Serialize for Theme { impl Default for Theme { fn default() -> Self { - serde_json::from_str( - r#" - { + serde_json::from_value(json!({ "attribute": {"color": 124, "italic": true}, "comment": {"color": 245, "italic": true}, "constant.builtin": {"color": 94, "bold": true}, @@ -169,9 +167,7 @@ impl Default for Theme { "type.builtin": {"color": 23, "bold": true}, "variable.builtin": {"bold": true}, "variable.parameter": {"underline": true} - } - "#, - ) + })) .unwrap() } } @@ -182,17 +178,17 @@ fn parse_style(style: &mut Style, json: Value) { match property_name.as_str() { "bold" => { if value == Value::Bool(true) { - style.ansi = style.ansi.bold() + style.ansi = style.ansi.bold(); } } "italic" => { if value == Value::Bool(true) { - style.ansi = style.ansi.italic() + style.ansi = style.ansi.italic(); } } "underline" => { if value == Value::Bool(true) { - style.ansi = style.ansi.underline() + style.ansi = style.ansi.underline(); } } "color" => { @@ -220,10 +216,7 @@ fn parse_style(style: &mut Style, json: Value) { fn parse_color(json: Value) -> Option { match json { - Value::Number(n) => match n.as_u64() { - Some(n) => Some(Color::Fixed(n as u8)), - _ => None, - }, + Value::Number(n) => n.as_u64().map(|n| Color::Fixed(n as u8)), Value::String(s) => match s.to_lowercase().as_str() { "black" => Some(Color::Black), "blue" => Some(Color::Blue), @@ -234,7 +227,7 @@ fn parse_color(json: Value) -> Option { "white" => Some(Color::White), "yellow" => Some(Color::Yellow), s => { - if let Some((red, green, blue)) = hex_string_to_rgb(&s) { + if let Some((red, green, blue)) = hex_string_to_rgb(s) { Some(Color::RGB(red, green, blue)) } else { None @@ -246,7 +239,7 @@ fn parse_color(json: Value) -> Option { } fn hex_string_to_rgb(s: &str) -> Option<(u8, u8, u8)> { - if s.starts_with("#") && s.len() >= 7 { + if s.starts_with('#') && s.len() >= 7 { if let (Ok(red), Ok(green), Ok(blue)) = ( u8::from_str_radix(&s[1..3], 16), u8::from_str_radix(&s[3..5], 16), @@ -281,7 +274,7 @@ fn style_to_css(style: ansi_term::Style) -> String { fn write_color(buffer: &mut String, color: Color) { if let Color::RGB(r, g, b) = &color { - write!(buffer, "color: #{:x?}{:x?}{:x?}", r, g, b).unwrap() + write!(buffer, "color: #{r:02x}{g:02x}{b:02x}").unwrap(); } else { write!( buffer, @@ -299,18 +292,14 @@ fn write_color(buffer: &mut String, color: Color) { Color::RGB(_, _, _) => unreachable!(), } ) - .unwrap() + .unwrap(); } } fn terminal_supports_truecolor() -> bool { - use std::env; - - if let Ok(truecolor) = env::var("COLORTERM") { + std::env::var("COLORTERM").map_or(false, |truecolor| { truecolor == "truecolor" || truecolor == "24bit" - } else { - false - } + }) } fn closest_xterm_color(red: u8, green: u8, blue: u8) -> Color { @@ -324,9 +313,9 @@ fn closest_xterm_color(red: u8, green: u8, blue: u8) -> Color { // Get the xterm color with the minimum Euclidean distance to the target color // i.e. distance = √ (r2 - r1)² + (g2 - g1)² + (b2 - b1)² let distances = colors.map(|(color_id, (r, g, b))| { - let r_delta: u32 = (max(r, red) - min(r, red)).into(); - let g_delta: u32 = (max(g, green) - min(g, green)).into(); - let b_delta: u32 = (max(b, blue) - min(b, blue)).into(); + let r_delta = (max(r, red) - min(r, red)) as u32; + let g_delta = (max(g, green) - min(g, green)) as u32; + let b_delta = (max(b, blue) - min(b, blue)) as u32; let distance = r_delta.pow(2) + g_delta.pow(2) + b_delta.pow(2); // don't need to actually take the square root for the sake of comparison (color_id, distance) @@ -385,40 +374,38 @@ pub fn html( config: &HighlightConfiguration, quiet: bool, print_time: bool, + cancellation_flag: Option<&AtomicUsize>, ) -> Result<()> { use std::io::Write; let stdout = io::stdout(); let mut stdout = stdout.lock(); let time = Instant::now(); - let cancellation_flag = util::cancel_on_stdin(); let mut highlighter = Highlighter::new(); - let events = highlighter.highlight(config, source, Some(&cancellation_flag), |string| { + let events = highlighter.highlight(config, source, cancellation_flag, |string| { loader.highlight_config_for_injection_string(string) })?; let mut renderer = HtmlRenderer::new(); renderer.render(events, source, &move |highlight| { - if let Some(css_style) = &theme.styles[highlight.0].css { - css_style.as_bytes() - } else { - "".as_bytes() - } + theme.styles[highlight.0] + .css + .as_ref() + .map_or_else(|| "".as_bytes(), |css_style| css_style.as_bytes()) })?; if !quiet { - write!(&mut stdout, "\n")?; + writeln!(&mut stdout, "
")?; for (i, line) in renderer.lines().enumerate() { - write!( + writeln!( &mut stdout, - "\n", + "", i + 1, - line )?; } - write!(&mut stdout, "
{}{}
{}{line}
\n")?; + writeln!(&mut stdout, "")?; } if print_time { @@ -433,8 +420,8 @@ mod tests { use super::*; use std::env; - const JUNGLE_GREEN: &'static str = "#26A69A"; - const DARK_CYAN: &'static str = "#00AF87"; + const JUNGLE_GREEN: &str = "#26A69A"; + const DARK_CYAN: &str = "#00AF87"; #[test] fn test_parse_style() { @@ -448,7 +435,7 @@ mod tests { env::set_var("COLORTERM", ""); parse_style(&mut style, Value::String(DARK_CYAN.to_string())); assert_eq!(style.ansi.foreground, Some(Color::Fixed(36))); - assert_eq!(style.css, Some("style=\'color: #0af87\'".to_string())); + assert_eq!(style.css, Some("style=\'color: #00af87\'".to_string())); // junglegreen is not an ANSI color and is preserved when the terminal supports it env::set_var("COLORTERM", "truecolor"); diff --git a/third-party/tree-sitter/tree-sitter/cli/src/lib.rs b/third-party/tree-sitter/tree-sitter/cli/src/lib.rs index d36417c26cc..549db7739dc 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/lib.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/lib.rs @@ -1,3 +1,5 @@ +#![doc = include_str!("../README.md")] + pub mod generate; pub mod highlight; pub mod logger; @@ -14,3 +16,7 @@ pub mod wasm; #[cfg(test)] mod tests; + +// To run compile fail tests +#[cfg(doctest)] +mod tests; diff --git a/third-party/tree-sitter/tree-sitter/cli/src/main.rs b/third-party/tree-sitter/tree-sitter/cli/src/main.rs index fb2a63270ac..c46d38d7a35 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/main.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/main.rs @@ -1,21 +1,339 @@ +use anstyle::{AnsiColor, Color, Style}; use anyhow::{anyhow, Context, Result}; -use clap::{App, AppSettings, Arg, SubCommand}; +use clap::{crate_authors, Args, Command, FromArgMatches as _, Subcommand}; use glob::glob; +use regex::Regex; +use std::collections::HashSet; use std::path::{Path, PathBuf}; use std::{env, fs, u64}; -use tree_sitter::Point; -use tree_sitter_cli::parse::ParseOutput; +use tree_sitter::{ffi, Parser, Point}; +use tree_sitter_cli::test::TestOptions; use tree_sitter_cli::{ - generate, highlight, logger, parse, playground, query, tags, test, test_highlight, test_tags, - util, wasm, + generate, highlight, logger, + parse::{self, ParseFileOptions, ParseOutput}, + playground, query, tags, test, test_highlight, test_tags, util, wasm, }; use tree_sitter_config::Config; +use tree_sitter_highlight::Highlighter; use tree_sitter_loader as loader; +use tree_sitter_tags::TagsContext; -const BUILD_VERSION: &'static str = env!("CARGO_PKG_VERSION"); +const BUILD_VERSION: &str = env!("CARGO_PKG_VERSION"); const BUILD_SHA: Option<&'static str> = option_env!("BUILD_SHA"); const DEFAULT_GENERATE_ABI_VERSION: usize = 14; +#[derive(Subcommand)] +#[command(about="Generates and tests parsers", author=crate_authors!("\n"), styles=get_styles())] +enum Commands { + InitConfig(InitConfig), + Generate(Generate), + Parse(Parse), + Test(Test), + Query(Query), + Highlight(Highlight), + Tags(Tags), + BuildWasm(BuildWasm), + Playground(Playground), + DumpLanguages(DumpLanguages), +} + +#[derive(Args)] +#[command(about = "Generate a default config file")] +struct InitConfig; + +#[derive(Args)] +#[command(about = "Generate a parser", alias = "gen", alias = "g")] +struct Generate { + #[arg(index = 1, help = "The path to the grammar file")] + pub grammar_path: Option, + #[arg(long, short, help = "Show debug log during generation")] + pub log: bool, + #[arg( + long = "abi", + value_name = "VERSION", + help = format!(concat!( + "Select the language ABI version to generate (default {}).\n", + "Use --abi=latest to generate the newest supported version ({}).", + ), + DEFAULT_GENERATE_ABI_VERSION, + tree_sitter::LANGUAGE_VERSION, + ) + )] + pub abi_version: Option, + #[arg(long, help = "Don't generate language bindings")] + pub no_bindings: bool, + #[arg( + long, + short = 'b', + help = "Compile all defined languages in the current dir" + )] + pub build: bool, + #[arg(long, short = '0', help = "Compile a parser in debug mode")] + pub debug_build: bool, + #[arg( + long, + value_name = "PATH", + help = "The path to the directory containing the parser library" + )] + pub libdir: Option, + #[arg( + long, + help = "Produce a report of the states for the given rule, use `-` to report every rule" + )] + pub report_states_for_rule: Option, + + #[arg( + long, + value_name = "EXECUTABLE", + env = "TREE_SITTER_JS_RUNTIME", + help = "The path to the JavaScript runtime to use for generating parsers" + )] + pub js_runtime: Option, +} + +#[derive(Args)] +#[command(about = "Parse files", alias = "p")] +struct Parse { + #[arg( + long = "paths", + help = "The path to a file with paths to source file(s)" + )] + pub paths_file: Option, + #[arg(num_args=1.., help = "The source file(s) to use")] + pub paths: Option>, + #[arg( + long, + help = "Select a language by the scope instead of a file extension" + )] + pub scope: Option, + #[arg(long, short = 'd', help = "Show parsing debug log")] + pub debug: bool, + #[arg(long, short = '0', help = "Compile a parser in debug mode")] + pub debug_build: bool, + #[arg( + long, + short = 'D', + help = "Produce the log.html file with debug graphs" + )] + pub debug_graph: bool, + #[arg( + long, + help = "Compile parsers to wasm instead of native dynamic libraries" + )] + pub wasm: bool, + #[arg(long = "dot", help = "Output the parse data with graphviz dot")] + pub output_dot: bool, + #[arg( + long = "xml", + short = 'x', + help = "Output the parse data in XML format" + )] + pub output_xml: bool, + #[arg(long, short, help = "Show parsing statistic")] + pub stat: bool, + #[arg(long, help = "Interrupt the parsing process by timeout (µs)")] + pub timeout: Option, + #[arg(long, short, help = "Measure execution time")] + pub time: bool, + #[arg(long, short, help = "Suppress main output")] + pub quiet: bool, + #[arg( + long, + num_args = 1.., + help = "Apply edits in the format: \"row, col delcount insert_text\"" + )] + pub edits: Option>, + #[arg(long, help = "The encoding of the input files")] + pub encoding: Option, + #[arg( + long, + help = "Open `log.html` in the default browser, if `--debug-graph` is supplied" + )] + pub open_log: bool, + #[arg(long, help = "The path to an alternative config.json file")] + pub config_path: Option, +} + +#[derive(Args)] +#[command(about = "Run a parser's tests", alias = "t")] +struct Test { + #[arg( + long, + short, + help = "Only run corpus test cases whose name includes the given string" + )] + pub filter: Option, + #[arg( + long, + short, + help = "Only run corpus test cases whose name matches the given regex" + )] + pub include: Option, + #[arg( + long, + short, + help = "Only run corpus test cases whose name does not match the given regex" + )] + pub exclude: Option, + #[arg( + long, + short, + help = "Update all syntax trees in corpus files with current parser output" + )] + pub update: bool, + #[arg(long, short = 'd', help = "Show parsing debug log")] + pub debug: bool, + #[arg(long, short = '0', help = "Compile a parser in debug mode")] + pub debug_build: bool, + #[arg( + long, + short = 'D', + help = "Produce the log.html file with debug graphs" + )] + pub debug_graph: bool, + #[arg( + long, + help = "Compile parsers to wasm instead of native dynamic libraries" + )] + pub wasm: bool, + #[arg( + long, + help = "Open `log.html` in the default browser, if `--debug-graph` is supplied" + )] + pub open_log: bool, + #[arg(long, help = "The path to an alternative config.json file")] + pub config_path: Option, +} + +#[derive(Args)] +#[command(about = "Search files using a syntax tree query", alias = "q")] +struct Query { + #[arg(help = "Path to a file with queries", index = 1, required = true)] + query_path: String, + #[arg(long, short, help = "Measure execution time")] + pub time: bool, + #[arg(long, short, help = "Suppress main output")] + pub quiet: bool, + #[arg( + long = "paths", + help = "The path to a file with paths to source file(s)" + )] + pub paths_file: Option, + #[arg(index = 2, num_args=1.., help = "The source file(s) to use")] + pub paths: Option>, + #[arg( + long, + help = "The range of byte offsets in which the query will be executed" + )] + pub byte_range: Option, + #[arg(long, help = "The range of rows in which the query will be executed")] + pub row_range: Option, + #[arg( + long, + help = "Select a language by the scope instead of a file extension" + )] + pub scope: Option, + #[arg(long, short, help = "Order by captures instead of matches")] + pub captures: bool, + #[arg(long, help = "Whether to run query tests or not")] + pub test: bool, + #[arg(long, help = "The path to an alternative config.json file")] + pub config_path: Option, +} + +#[derive(Args)] +#[command(about = "Highlight a file", alias = "hi")] +struct Highlight { + #[arg(long, short = 'H', help = "Generate highlighting as an HTML document")] + pub html: bool, + #[arg( + long, + help = "Check that highlighting captures conform strictly to standards" + )] + pub check: bool, + #[arg(long, help = "The path to a file with captures")] + pub captures_path: Option, + #[arg(long, num_args = 1.., help = "The paths to files with queries")] + pub query_paths: Option>, + #[arg( + long, + help = "Select a language by the scope instead of a file extension" + )] + pub scope: Option, + #[arg(long, short, help = "Measure execution time")] + pub time: bool, + #[arg(long, short, help = "Suppress main output")] + pub quiet: bool, + #[arg( + long = "paths", + help = "The path to a file with paths to source file(s)" + )] + pub paths_file: Option, + #[arg(num_args = 1.., help = "The source file(s) to use")] + pub paths: Option>, + #[arg(long, help = "The path to an alternative config.json file")] + pub config_path: Option, +} + +#[derive(Args)] +#[command(about = "Generate a list of tags")] +struct Tags { + #[arg( + long, + help = "Select a language by the scope instead of a file extension" + )] + pub scope: Option, + #[arg(long, short, help = "Measure execution time")] + pub time: bool, + #[arg(long, short, help = "Suppress main output")] + pub quiet: bool, + #[arg( + long = "paths", + help = "The path to a file with paths to source file(s)" + )] + pub paths_file: Option, + #[arg(num_args = 1.., help = "The source file(s) to use")] + pub paths: Option>, + #[arg(long, help = "The path to an alternative config.json file")] + pub config_path: Option, +} + +#[derive(Args)] +#[command(about = "Compile a parser to WASM", alias = "bw")] +struct BuildWasm { + #[arg( + long, + help = "Run emscripten via docker even if it is installed locally" + )] + pub docker: bool, + #[arg(index = 1, num_args = 1.., help = "The path to output the wasm file")] + pub path: Option, +} + +#[derive(Args)] +#[command( + about = "Start local playground for a parser in the browser", + alias = "play", + alias = "pg", + alias = "web-ui" +)] +struct Playground { + #[arg(long, short, help = "Don't open in default browser")] + pub quiet: bool, + #[arg( + long, + help = "Path to the directory containing the grammar and wasm files" + )] + pub grammar_path: Option, +} + +#[derive(Args)] +#[command(about = "Print info about all known language parsers", alias = "langs")] +struct DumpLanguages { + #[arg(long, help = "The path to an alternative config.json file")] + pub config_path: Option, +} + fn main() { let result = run(); if let Err(err) = &result { @@ -26,255 +344,43 @@ fn main() { } } if !err.to_string().is_empty() { - eprintln!("{:?}", err); + eprintln!("{err:?}"); } std::process::exit(1); } } fn run() -> Result<()> { - let version = if let Some(build_sha) = BUILD_SHA { - format!("{} ({})", BUILD_VERSION, build_sha) - } else { - BUILD_VERSION.to_string() - }; - - let debug_arg = Arg::with_name("debug") - .help("Show parsing debug log") - .long("debug") - .short("d"); - - let debug_graph_arg = Arg::with_name("debug-graph") - .help("Produce the log.html file with debug graphs") - .long("debug-graph") - .short("D"); - - let debug_build_arg = Arg::with_name("debug-build") - .help("Compile a parser in debug mode") - .long("debug-build") - .short("0"); - - let paths_file_arg = Arg::with_name("paths-file") - .help("The path to a file with paths to source file(s)") - .long("paths") - .takes_value(true); - - let paths_arg = Arg::with_name("paths") - .help("The source file(s) to use") - .multiple(true); - - let scope_arg = Arg::with_name("scope") - .help("Select a language by the scope instead of a file extension") - .long("scope") - .takes_value(true); - - let time_arg = Arg::with_name("time") - .help("Measure execution time") - .long("time") - .short("t"); - - let quiet_arg = Arg::with_name("quiet") - .help("Suppress main output") - .long("quiet") - .short("q"); - - let matches = App::new("tree-sitter") - .author("Max Brunsfeld ") - .about("Generates and tests parsers") - .version(version.as_str()) - .setting(AppSettings::SubcommandRequiredElseHelp) - .global_setting(AppSettings::ColoredHelp) - .global_setting(AppSettings::DeriveDisplayOrder) - .global_setting(AppSettings::DisableHelpSubcommand) - .subcommand(SubCommand::with_name("init-config").about("Generate a default config file")) - .subcommand( - SubCommand::with_name("generate") - .alias("gen") - .alias("g") - .about("Generate a parser") - .arg(Arg::with_name("grammar-path").index(1)) - .arg(Arg::with_name("log").long("log")) - .arg( - Arg::with_name("abi-version") - .long("abi") - .value_name("version") - .help(&format!( - concat!( - "Select the language ABI version to generate (default {}).\n", - "Use --abi=latest to generate the newest supported version ({}).", - ), - DEFAULT_GENERATE_ABI_VERSION, - tree_sitter::LANGUAGE_VERSION, - )), - ) - .arg(Arg::with_name("no-bindings").long("no-bindings")) - .arg( - Arg::with_name("build") - .long("build") - .short("b") - .help("Compile all defined languages in the current dir"), - ) - .arg(&debug_build_arg) - .arg( - Arg::with_name("libdir") - .long("libdir") - .takes_value(true) - .value_name("path"), - ) - .arg( - Arg::with_name("report-states-for-rule") - .long("report-states-for-rule") - .value_name("rule-name") - .takes_value(true), - ), - ) - .subcommand( - SubCommand::with_name("parse") - .alias("p") - .about("Parse files") - .arg(&paths_file_arg) - .arg(&paths_arg) - .arg(&scope_arg) - .arg(&debug_arg) - .arg(&debug_build_arg) - .arg(&debug_graph_arg) - .arg(Arg::with_name("output-dot").long("dot")) - .arg(Arg::with_name("output-xml").long("xml").short("x")) - .arg( - Arg::with_name("stat") - .help("Show parsing statistic") - .long("stat") - .short("s"), - ) - .arg( - Arg::with_name("timeout") - .help("Interrupt the parsing process by timeout (µs)") - .long("timeout") - .takes_value(true), - ) - .arg(&time_arg) - .arg(&quiet_arg) - .arg( - Arg::with_name("edits") - .help("Apply edits in the format: \"row,col del_count insert_text\"") - .long("edit") - .short("edit") - .takes_value(true) - .multiple(true) - .number_of_values(1), - ), - ) - .subcommand( - SubCommand::with_name("query") - .alias("q") - .about("Search files using a syntax tree query") - .arg( - Arg::with_name("query-path") - .help("Path to a file with queries") - .index(1) - .required(true), - ) - .arg(&time_arg) - .arg(&quiet_arg) - .arg(&paths_file_arg) - .arg(&paths_arg.clone().index(2)) - .arg( - Arg::with_name("byte-range") - .help("The range of byte offsets in which the query will be executed") - .long("byte-range") - .takes_value(true), - ) - .arg( - Arg::with_name("row-range") - .help("The range of rows in which the query will be executed") - .long("row-range") - .takes_value(true), - ) - .arg(&scope_arg) - .arg(Arg::with_name("captures").long("captures").short("c")) - .arg(Arg::with_name("test").long("test")), - ) - .subcommand( - SubCommand::with_name("tags") - .about("Generate a list of tags") - .arg(&scope_arg) - .arg(&time_arg) - .arg(&quiet_arg) - .arg(&paths_file_arg) - .arg(&paths_arg), - ) - .subcommand( - SubCommand::with_name("test") - .alias("t") - .about("Run a parser's tests") - .arg( - Arg::with_name("filter") - .long("filter") - .short("f") - .takes_value(true) - .help("Only run corpus test cases whose name includes the given string"), - ) - .arg( - Arg::with_name("update") - .long("update") - .short("u") - .help("Update all syntax trees in corpus files with current parser output"), - ) - .arg(&debug_arg) - .arg(&debug_build_arg) - .arg(&debug_graph_arg), - ) - .subcommand( - SubCommand::with_name("highlight") - .about("Highlight a file") - .arg( - Arg::with_name("html") - .help("Generate highlighting as an HTML document") - .long("html") - .short("H"), - ) - .arg(&scope_arg) - .arg(&time_arg) - .arg(&quiet_arg) - .arg(&paths_file_arg) - .arg(&paths_arg), - ) - .subcommand( - SubCommand::with_name("build-wasm") - .alias("bw") - .about("Compile a parser to WASM") - .arg( - Arg::with_name("docker") - .long("docker") - .help("Run emscripten via docker even if it is installed locally"), - ) - .arg(Arg::with_name("path").index(1).multiple(true)), - ) - .subcommand( - SubCommand::with_name("playground") - .alias("play") - .alias("pg") - .alias("web-ui") - .about("Start local playground for a parser in the browser") - .arg( - Arg::with_name("quiet") - .long("quiet") - .short("q") - .help("Don't open in default browser"), - ), + let version = BUILD_SHA.map_or_else( + || BUILD_VERSION.to_string(), + |build_sha| format!("{BUILD_VERSION} ({build_sha})"), + ); + let version: &'static str = Box::leak(version.into_boxed_str()); + + let cli = Command::new("tree-sitter") + .help_template( + "\ +{before-help}{name} {version} +{author-with-newline}{about-with-newline} +{usage-heading} {usage} + +{all-args}{after-help} +", ) - .subcommand( - SubCommand::with_name("dump-languages") - .about("Print info about all known language parsers"), - ) - .get_matches(); + .version(version) + .subcommand_required(true) + .arg_required_else_help(true) + .disable_help_subcommand(true) + .disable_colored_help(false); + let cli = Commands::augment_subcommands(cli); + + let command = Commands::from_arg_matches(&cli.get_matches())?; let current_dir = env::current_dir().unwrap(); - let config = Config::load()?; let mut loader = loader::Loader::new()?; - match matches.subcommand() { - ("init-config", Some(_)) => { + match command { + Commands::InitConfig(_) => { if let Ok(Some(config_path)) = Config::find_config_file() { return Err(anyhow!( "Remove your existing config file first: {}", @@ -291,171 +397,137 @@ fn run() -> Result<()> { ); } - ("generate", Some(matches)) => { - let grammar_path = matches.value_of("grammar-path"); - let debug_build = matches.is_present("debug-build"); - let build = matches.is_present("build"); - let libdir = matches.value_of("libdir"); - let report_symbol_name = matches.value_of("report-states-for-rule").or_else(|| { - if matches.is_present("report-states") { - Some("") - } else { - None - } - }); - if matches.is_present("log") { + Commands::Generate(generate_options) => { + if generate_options.log { logger::init(); } - let abi_version = - matches - .value_of("abi-version") - .map_or(DEFAULT_GENERATE_ABI_VERSION, |version| { - if version == "latest" { - tree_sitter::LANGUAGE_VERSION - } else { - version.parse().expect("invalid abi version flag") - } - }); - let generate_bindings = !matches.is_present("no-bindings"); + let abi_version = generate_options.abi_version.as_ref().map_or( + DEFAULT_GENERATE_ABI_VERSION, + |version| { + if version == "latest" { + tree_sitter::LANGUAGE_VERSION + } else { + version.parse().expect("invalid abi version flag") + } + }, + ); generate::generate_parser_in_directory( ¤t_dir, - grammar_path, + generate_options.grammar_path.as_deref(), abi_version, - generate_bindings, - report_symbol_name, + !generate_options.no_bindings, + generate_options.report_states_for_rule.as_deref(), + generate_options.js_runtime.as_deref(), )?; - if build { - if let Some(path) = libdir { + if generate_options.build { + if let Some(path) = generate_options.libdir { loader = loader::Loader::with_parser_lib_path(PathBuf::from(path)); } - loader.use_debug_build(debug_build); + loader.use_debug_build(generate_options.debug_build); loader.languages_at_path(¤t_dir)?; } } - ("test", Some(matches)) => { - let debug = matches.is_present("debug"); - let debug_graph = matches.is_present("debug-graph"); - let debug_build = matches.is_present("debug-build"); - let update = matches.is_present("update"); - let filter = matches.value_of("filter"); - - if debug { - // For augmenting debug logging in external scanners - env::set_var("TREE_SITTER_DEBUG", "1"); - } - - loader.use_debug_build(debug_build); - - let languages = loader.languages_at_path(¤t_dir)?; - let language = languages - .first() - .ok_or_else(|| anyhow!("No language found"))?; - let test_dir = current_dir.join("test"); - - // Run the corpus tests. Look for them at two paths: `test/corpus` and `corpus`. - let mut test_corpus_dir = test_dir.join("corpus"); - if !test_corpus_dir.is_dir() { - test_corpus_dir = current_dir.join("corpus"); - } - if test_corpus_dir.is_dir() { - test::run_tests_at_path( - *language, - &test_corpus_dir, - debug, - debug_graph, - filter, - update, - )?; - } - - // Check that all of the queries are valid. - test::check_queries_at_path(*language, ¤t_dir.join("queries"))?; - - // Run the syntax highlighting tests. - let test_highlight_dir = test_dir.join("highlight"); - if test_highlight_dir.is_dir() { - test_highlight::test_highlights(&loader, &test_highlight_dir)?; - } - - let test_tag_dir = test_dir.join("tags"); - if test_tag_dir.is_dir() { - test_tags::test_tags(&loader, &test_tag_dir)?; - } - } - - ("parse", Some(matches)) => { - let debug = matches.is_present("debug"); - let debug_graph = matches.is_present("debug-graph"); - let debug_build = matches.is_present("debug-build"); - - let output = if matches.is_present("output-dot") { + Commands::Parse(parse_options) => { + let config = Config::load(parse_options.config_path)?; + let output = if parse_options.output_dot { ParseOutput::Dot - } else if matches.is_present("output-xml") { + } else if parse_options.output_xml { ParseOutput::Xml - } else if matches.is_present("quiet") { + } else if parse_options.quiet { ParseOutput::Quiet } else { ParseOutput::Normal }; - let time = matches.is_present("time"); - let edits = matches - .values_of("edits") - .map_or(Vec::new(), |e| e.collect()); - let cancellation_flag = util::cancel_on_stdin(); + let encoding = if let Some(encoding) = parse_options.encoding { + match encoding.as_str() { + "utf16" => Some(ffi::TSInputEncodingUTF16), + "utf8" => Some(ffi::TSInputEncodingUTF8), + _ => return Err(anyhow!("Invalid encoding. Expected one of: utf8, utf16")), + } + } else { + None + }; + + let time = parse_options.time; + let edits = parse_options.edits.unwrap_or_default(); + let cancellation_flag = util::cancel_on_signal(); + let mut parser = Parser::new(); - if debug { + if parse_options.debug { // For augmenting debug logging in external scanners env::set_var("TREE_SITTER_DEBUG", "1"); } - loader.use_debug_build(debug_build); + loader.use_debug_build(parse_options.debug_build); - let timeout = matches - .value_of("timeout") - .map_or(0, |t| u64::from_str_radix(t, 10).unwrap()); + #[cfg(feature = "wasm")] + if parse_options.wasm { + let engine = tree_sitter::wasmtime::Engine::default(); + parser + .set_wasm_store(tree_sitter::WasmStore::new(engine.clone()).unwrap()) + .unwrap(); + loader.use_wasm(engine); + } - let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?; + let timeout = parse_options.timeout.unwrap_or_default(); + + let paths = collect_paths(parse_options.paths_file.as_deref(), parse_options.paths)?; let max_path_length = paths.iter().map(|p| p.chars().count()).max().unwrap_or(0); let mut has_error = false; let loader_config = config.get()?; loader.find_all_languages(&loader_config)?; - let should_track_stats = matches.is_present("stat"); + let should_track_stats = parse_options.stat; let mut stats = parse::Stats::default(); for path in paths { let path = Path::new(&path); + let language = - loader.select_language(path, ¤t_dir, matches.value_of("scope"))?; + loader.select_language(path, ¤t_dir, parse_options.scope.as_deref())?; + parser + .set_language(&language) + .context("incompatible language")?; - let this_file_errored = parse::parse_file_at_path( - language, + let opts = ParseFileOptions { + language: language.clone(), path, - &edits, + edits: &edits + .iter() + .map(std::string::String::as_str) + .collect::>(), max_path_length, output, - time, + print_time: time, timeout, - debug, - debug_graph, - Some(&cancellation_flag), - )?; + debug: parse_options.debug, + debug_graph: parse_options.debug_graph, + cancellation_flag: Some(&cancellation_flag), + encoding, + open_log: parse_options.open_log, + }; + + let parse_result = parse::parse_file_at_path(&mut parser, &opts)?; if should_track_stats { stats.total_parses += 1; - if !this_file_errored { + if parse_result.successful { stats.successful_parses += 1; } + if let Some(duration) = parse_result.duration { + stats.total_bytes += parse_result.bytes; + stats.total_duration += duration; + } } - has_error |= this_file_errored; + has_error |= !parse_result.successful; } if should_track_stats { - println!("{}", stats) + println!("\n{stats}"); } if has_error { @@ -463,97 +535,195 @@ fn run() -> Result<()> { } } - ("query", Some(matches)) => { - let ordered_captures = matches.values_of("captures").is_some(); - let quiet = matches.values_of("quiet").is_some(); - let time = matches.values_of("time").is_some(); - let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?; + Commands::Test(test_options) => { + let config = Config::load(test_options.config_path)?; + if test_options.debug { + // For augmenting debug logging in external scanners + env::set_var("TREE_SITTER_DEBUG", "1"); + } + + loader.use_debug_build(test_options.debug_build); + + let mut parser = Parser::new(); + + #[cfg(feature = "wasm")] + if test_options.wasm { + let engine = tree_sitter::wasmtime::Engine::default(); + parser + .set_wasm_store(tree_sitter::WasmStore::new(engine.clone()).unwrap()) + .unwrap(); + loader.use_wasm(engine); + } + + let languages = loader.languages_at_path(¤t_dir)?; + let language = &languages + .first() + .ok_or_else(|| anyhow!("No language found"))? + .0; + parser.set_language(language)?; + + let test_dir = current_dir.join("test"); + + // Run the corpus tests. Look for them in `test/corpus`. + let test_corpus_dir = test_dir.join("corpus"); + if test_corpus_dir.is_dir() { + let mut opts = TestOptions { + path: test_corpus_dir, + debug: test_options.debug, + debug_graph: test_options.debug_graph, + filter: test_options.filter.as_deref(), + include: test_options.include, + exclude: test_options.exclude, + update: test_options.update, + open_log: test_options.open_log, + languages: languages.iter().map(|(l, n)| (n.as_str(), l)).collect(), + }; + + test::run_tests_at_path(&mut parser, &mut opts)?; + } + + // Check that all of the queries are valid. + test::check_queries_at_path(language, ¤t_dir.join("queries"))?; + + // Run the syntax highlighting tests. + let test_highlight_dir = test_dir.join("highlight"); + if test_highlight_dir.is_dir() { + let mut highlighter = Highlighter::new(); + highlighter.parser = parser; + test_highlight::test_highlights( + &loader, + &config.get()?, + &mut highlighter, + &test_highlight_dir, + )?; + parser = highlighter.parser; + } + + let test_tag_dir = test_dir.join("tags"); + if test_tag_dir.is_dir() { + let mut tags_context = TagsContext::new(); + tags_context.parser = parser; + test_tags::test_tags(&loader, &config.get()?, &mut tags_context, &test_tag_dir)?; + } + } + + Commands::Query(query_options) => { + let config = Config::load(query_options.config_path)?; + let paths = collect_paths(query_options.paths_file.as_deref(), query_options.paths)?; let loader_config = config.get()?; loader.find_all_languages(&loader_config)?; let language = loader.select_language( Path::new(&paths[0]), ¤t_dir, - matches.value_of("scope"), + query_options.scope.as_deref(), )?; - let query_path = Path::new(matches.value_of("query-path").unwrap()); - let byte_range = matches.value_of("byte-range").and_then(|arg| { - let mut parts = arg.split(":"); + let query_path = Path::new(&query_options.query_path); + + let byte_range = query_options.byte_range.as_ref().and_then(|range| { + let mut parts = range.split(':'); let start = parts.next()?.parse().ok()?; let end = parts.next().unwrap().parse().ok()?; Some(start..end) }); - let point_range = matches.value_of("row-range").and_then(|arg| { - let mut parts = arg.split(":"); + let point_range = query_options.row_range.as_ref().and_then(|range| { + let mut parts = range.split(':'); let start = parts.next()?.parse().ok()?; let end = parts.next().unwrap().parse().ok()?; Some(Point::new(start, 0)..Point::new(end, 0)) }); - let should_test = matches.is_present("test"); + query::query_files_at_paths( - language, + &language, paths, query_path, - ordered_captures, + query_options.captures, byte_range, point_range, - should_test, - quiet, - time, - )?; - } - - ("tags", Some(matches)) => { - let loader_config = config.get()?; - loader.find_all_languages(&loader_config)?; - let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?; - tags::generate_tags( - &loader, - matches.value_of("scope"), - &paths, - matches.is_present("quiet"), - matches.is_present("time"), + query_options.test, + query_options.quiet, + query_options.time, )?; } - ("highlight", Some(matches)) => { + Commands::Highlight(highlight_options) => { + let config = Config::load(highlight_options.config_path)?; let theme_config: tree_sitter_cli::highlight::ThemeConfig = config.get()?; loader.configure_highlights(&theme_config.theme.highlight_names); let loader_config = config.get()?; loader.find_all_languages(&loader_config)?; - let time = matches.is_present("time"); - let quiet = matches.is_present("quiet"); - let html_mode = quiet || matches.is_present("html"); - let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?; + let quiet = highlight_options.quiet; + let html_mode = quiet || highlight_options.html; + let paths = collect_paths( + highlight_options.paths_file.as_deref(), + highlight_options.paths, + )?; if html_mode && !quiet { println!("{}", highlight::HTML_HEADER); } - let cancellation_flag = util::cancel_on_stdin(); + let cancellation_flag = util::cancel_on_signal(); - let mut lang = None; - if let Some(scope) = matches.value_of("scope") { - lang = loader.language_configuration_for_scope(scope)?; - if lang.is_none() { - return Err(anyhow!("Unknown scope '{}'", scope)); + let mut language = None; + if let Some(scope) = highlight_options.scope.as_deref() { + language = loader.language_configuration_for_scope(scope)?; + if language.is_none() { + return Err(anyhow!("Unknown scope '{scope}'")); } } for path in paths { let path = Path::new(&path); - let (language, language_config) = match lang { + let (language, language_config) = match language.clone() { Some(v) => v, - None => match loader.language_configuration_for_file_name(path)? { - Some(v) => v, - None => { - eprintln!("No language found for path {:?}", path); + None => { + if let Some(v) = loader.language_configuration_for_file_name(path)? { + v + } else { + eprintln!("{}", util::lang_not_found_for_path(path, &loader_config)); continue; } - }, + } }; - if let Some(highlight_config) = language_config.highlight_config(language)? { + if let Some(highlight_config) = language_config + .highlight_config(language, highlight_options.query_paths.as_deref())? + { + if highlight_options.check { + let names = if let Some(path) = highlight_options.captures_path.as_deref() { + let path = Path::new(path); + let file = fs::read_to_string(path)?; + let capture_names = file + .lines() + .filter_map(|line| { + if line.trim().is_empty() || line.trim().starts_with(';') { + return None; + } + line.split(';').next().map(|s| s.trim().trim_matches('"')) + }) + .collect::>(); + highlight_config.nonconformant_capture_names(&capture_names) + } else { + highlight_config.nonconformant_capture_names(&HashSet::new()) + }; + if names.is_empty() { + eprintln!("All highlight captures conform to standards."); + } else { + eprintln!( + "Non-standard highlight {} detected:", + if names.len() > 1 { + "captures" + } else { + "capture" + } + ); + for name in names { + eprintln!("* {name}"); + } + } + } + let source = fs::read(path)?; if html_mode { highlight::html( @@ -562,7 +732,8 @@ fn run() -> Result<()> { &source, highlight_config, quiet, - time, + highlight_options.time, + Some(&cancellation_flag), )?; } else { highlight::ansi( @@ -570,12 +741,12 @@ fn run() -> Result<()> { &theme_config.theme, &source, highlight_config, - time, + highlight_options.time, Some(&cancellation_flag), )?; } } else { - eprintln!("No syntax highlighting config found for path {:?}", path); + eprintln!("No syntax highlighting config found for path {path:?}"); } } @@ -584,17 +755,42 @@ fn run() -> Result<()> { } } - ("build-wasm", Some(matches)) => { - let grammar_path = current_dir.join(matches.value_of("path").unwrap_or("")); - wasm::compile_language_to_wasm(&grammar_path, matches.is_present("docker"))?; + Commands::Tags(tags_options) => { + let config = Config::load(tags_options.config_path)?; + let loader_config = config.get()?; + loader.find_all_languages(&loader_config)?; + let paths = collect_paths(tags_options.paths_file.as_deref(), tags_options.paths)?; + tags::generate_tags( + &loader, + &config.get()?, + tags_options.scope.as_deref(), + &paths, + tags_options.quiet, + tags_options.time, + )?; + } + + Commands::BuildWasm(wasm_options) => { + let grammar_path = current_dir.join(wasm_options.path.unwrap_or_default()); + wasm::compile_language_to_wasm( + &loader, + &grammar_path, + ¤t_dir, + wasm_options.docker, + )?; } - ("playground", Some(matches)) => { - let open_in_browser = !matches.is_present("quiet"); - playground::serve(¤t_dir, open_in_browser); + Commands::Playground(playground_options) => { + let open_in_browser = !playground_options.quiet; + let grammar_path = playground_options + .grammar_path + .map(PathBuf::from) + .unwrap_or(current_dir); + playground::serve(&grammar_path, open_in_browser)?; } - ("dump-languages", Some(_)) => { + Commands::DumpLanguages(dump_options) => { + let config = Config::load(dump_options.config_path)?; let loader_config = config.get()?; loader.find_all_languages(&loader_config)?; for (configuration, language_path) in loader.get_all_language_configurations() { @@ -616,20 +812,47 @@ fn run() -> Result<()> { ); } } - - _ => unreachable!(), } Ok(()) } -fn collect_paths<'a>( - paths_file: Option<&str>, - paths: Option>, -) -> Result> { +#[must_use] +const fn get_styles() -> clap::builder::Styles { + clap::builder::Styles::styled() + .usage( + Style::new() + .bold() + .fg_color(Some(Color::Ansi(AnsiColor::Yellow))), + ) + .header( + Style::new() + .bold() + .fg_color(Some(Color::Ansi(AnsiColor::Yellow))), + ) + .literal(Style::new().fg_color(Some(Color::Ansi(AnsiColor::Green)))) + .invalid( + Style::new() + .bold() + .fg_color(Some(Color::Ansi(AnsiColor::Red))), + ) + .error( + Style::new() + .bold() + .fg_color(Some(Color::Ansi(AnsiColor::Red))), + ) + .valid( + Style::new() + .bold() + .fg_color(Some(Color::Ansi(AnsiColor::Green))), + ) + .placeholder(Style::new().fg_color(Some(Color::Ansi(AnsiColor::White)))) +} + +fn collect_paths(paths_file: Option<&str>, paths: Option>) -> Result> { if let Some(paths_file) = paths_file { return Ok(fs::read_to_string(paths_file) - .with_context(|| format!("Failed to read paths file {}", paths_file))? + .with_context(|| format!("Failed to read paths file {paths_file}"))? .trim() .lines() .map(String::from) @@ -642,25 +865,23 @@ fn collect_paths<'a>( let mut incorporate_path = |path: &str, positive| { if positive { result.push(path.to_string()); - } else { - if let Some(index) = result.iter().position(|p| p == path) { - result.remove(index); - } + } else if let Some(index) = result.iter().position(|p| p == path) { + result.remove(index); } }; for mut path in paths { let mut positive = true; - if path.starts_with("!") { + if path.starts_with('!') { positive = false; - path = path.trim_start_matches("!"); + path = path.trim_start_matches('!').to_string(); } - if Path::new(path).exists() { - incorporate_path(path, positive); + if Path::new(&path).exists() { + incorporate_path(&path, positive); } else { let paths = - glob(path).with_context(|| format!("Invalid glob pattern {:?}", path))?; + glob(&path).with_context(|| format!("Invalid glob pattern {path:?}"))?; for path in paths { if let Some(path) = path?.to_str() { incorporate_path(path, positive); diff --git a/third-party/tree-sitter/tree-sitter/cli/src/parse.rs b/third-party/tree-sitter/tree-sitter/cli/src/parse.rs index 3e28e51a86e..4849bda3ce5 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/parse.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/parse.rs @@ -3,9 +3,9 @@ use anyhow::{anyhow, Context, Result}; use std::io::{self, Write}; use std::path::Path; use std::sync::atomic::AtomicUsize; -use std::time::Instant; +use std::time::{Duration, Instant}; use std::{fmt, fs, usize}; -use tree_sitter::{InputEdit, Language, LogType, Parser, Point, Tree}; +use tree_sitter::{ffi, InputEdit, Language, LogType, Parser, Point, Tree}; #[derive(Debug)] pub struct Edit { @@ -18,19 +18,30 @@ pub struct Edit { pub struct Stats { pub successful_parses: usize, pub total_parses: usize, + pub total_bytes: usize, + pub total_duration: Duration, } impl fmt::Display for Stats { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - return writeln!(f, "Total parses: {}; successful parses: {}; failed parses: {}; success percentage: {:.2}%", - self.total_parses, - self.successful_parses, - self.total_parses - self.successful_parses, - (self.successful_parses as f64) / (self.total_parses as f64) * 100.0); + let duration_us = self.total_duration.as_micros(); + writeln!( + f, + "Total parses: {}; successful parses: {}; failed parses: {}; success percentage: {:.2}%; average speed: {} bytes/ms", + self.total_parses, + self.successful_parses, + self.total_parses - self.successful_parses, + ((self.successful_parses as f64) / (self.total_parses as f64)) * 100.0, + if duration_us != 0 { + ((self.total_bytes as u128) * 1_000) / duration_us + } else { + 0 + } + ) } } -#[derive(Copy, Clone)] +#[derive(Copy, Clone, PartialEq, Eq)] pub enum ParseOutput { Normal, Quiet, @@ -38,71 +49,105 @@ pub enum ParseOutput { Dot, } -pub fn parse_file_at_path( - language: Language, - path: &Path, - edits: &Vec<&str>, - max_path_length: usize, - output: ParseOutput, - print_time: bool, - timeout: u64, - debug: bool, - debug_graph: bool, - cancellation_flag: Option<&AtomicUsize>, -) -> Result { +pub struct ParseFileOptions<'a> { + pub language: Language, + pub path: &'a Path, + pub edits: &'a [&'a str], + pub max_path_length: usize, + pub output: ParseOutput, + pub print_time: bool, + pub timeout: u64, + pub debug: bool, + pub debug_graph: bool, + pub cancellation_flag: Option<&'a AtomicUsize>, + pub encoding: Option, + pub open_log: bool, +} + +#[derive(Copy, Clone)] +pub struct ParseResult { + pub successful: bool, + pub bytes: usize, + pub duration: Option, +} + +pub fn parse_file_at_path(parser: &mut Parser, opts: &ParseFileOptions) -> Result { let mut _log_session = None; - let mut parser = Parser::new(); - parser.set_language(language)?; - let mut source_code = - fs::read(path).with_context(|| format!("Error reading source file {:?}", path))?; + parser.set_language(&opts.language)?; + let mut source_code = fs::read(opts.path) + .with_context(|| format!("Error reading source file {:?}", opts.path))?; // If the `--cancel` flag was passed, then cancel the parse // when the user types a newline. - unsafe { parser.set_cancellation_flag(cancellation_flag) }; + unsafe { parser.set_cancellation_flag(opts.cancellation_flag) }; // Set a timeout based on the `--time` flag. - parser.set_timeout_micros(timeout); + parser.set_timeout_micros(opts.timeout); // Render an HTML graph if `--debug-graph` was passed - if debug_graph { - _log_session = Some(util::log_graphs(&mut parser, "log.html")?); + if opts.debug_graph { + _log_session = Some(util::log_graphs(parser, "log.html", opts.open_log)?); } // Log to stderr if `--debug` was passed - else if debug { + else if opts.debug { parser.set_logger(Some(Box::new(|log_type, message| { if log_type == LogType::Lex { - io::stderr().write(b" ").unwrap(); + io::stderr().write_all(b" ").unwrap(); } - write!(&mut io::stderr(), "{}\n", message).unwrap(); + writeln!(&mut io::stderr(), "{message}").unwrap(); }))); } let time = Instant::now(); - let tree = parser.parse(&source_code, None); + + #[inline(always)] + fn is_utf16_bom(bom_bytes: &[u8]) -> bool { + bom_bytes == [0xFF, 0xFE] || bom_bytes == [0xFE, 0xFF] + } + + let tree = match opts.encoding { + Some(encoding) if encoding == ffi::TSInputEncodingUTF16 => { + let source_code_utf16 = source_code + .chunks_exact(2) + .map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]])) + .collect::>(); + parser.parse_utf16(&source_code_utf16, None) + } + None if source_code.len() >= 2 && is_utf16_bom(&source_code[0..2]) => { + let source_code_utf16 = source_code + .chunks_exact(2) + .map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]])) + .collect::>(); + parser.parse_utf16(&source_code_utf16, None) + } + _ => parser.parse(&source_code, None), + }; + + parser.stop_printing_dot_graphs(); let stdout = io::stdout(); let mut stdout = stdout.lock(); if let Some(mut tree) = tree { - if debug_graph && !edits.is_empty() { + if opts.debug_graph && !opts.edits.is_empty() { println!("BEFORE:\n{}", String::from_utf8_lossy(&source_code)); } - for (i, edit) in edits.iter().enumerate() { + for (i, edit) in opts.edits.iter().enumerate() { let edit = parse_edit_flag(&source_code, edit)?; - perform_edit(&mut tree, &mut source_code, &edit); + perform_edit(&mut tree, &mut source_code, &edit)?; tree = parser.parse(&source_code, Some(&tree)).unwrap(); - if debug_graph { - println!("AFTER {}:\n{}", i, String::from_utf8_lossy(&source_code)); + if opts.debug_graph { + println!("AFTER {i}:\n{}", String::from_utf8_lossy(&source_code)); } } let duration = time.elapsed(); - let duration_ms = duration.as_secs() * 1000 + duration.subsec_nanos() as u64 / 1000000; + let duration_ms = duration.as_micros() as f64 / 1e3; let mut cursor = tree.walk(); - if matches!(output, ParseOutput::Normal) { + if opts.output == ParseOutput::Normal { let mut needs_newline = false; let mut indent_level = 0; let mut did_visit_children = false; @@ -111,7 +156,7 @@ pub fn parse_file_at_path( let is_named = node.is_named(); if did_visit_children { if is_named { - stdout.write(b")")?; + stdout.write_all(b")")?; needs_newline = true; } if cursor.goto_next_sibling() { @@ -125,15 +170,15 @@ pub fn parse_file_at_path( } else { if is_named { if needs_newline { - stdout.write(b"\n")?; + stdout.write_all(b"\n")?; } for _ in 0..indent_level { - stdout.write(b" ")?; + stdout.write_all(b" ")?; } let start = node.start_position(); let end = node.end_position(); if let Some(field_name) = cursor.field_name() { - write!(&mut stdout, "{}: ", field_name)?; + write!(&mut stdout, "{field_name}: ")?; } write!( &mut stdout, @@ -155,49 +200,77 @@ pub fn parse_file_at_path( } } cursor.reset(tree.root_node()); - println!(""); + println!(); } - if matches!(output, ParseOutput::Xml) { + if opts.output == ParseOutput::Xml { let mut needs_newline = false; let mut indent_level = 0; let mut did_visit_children = false; - let mut tags: Vec<&str> = Vec::new(); + let mut had_named_children = false; + let mut tags = Vec::<&str>::new(); + writeln!(&mut stdout, "")?; loop { let node = cursor.node(); let is_named = node.is_named(); if did_visit_children { if is_named { let tag = tags.pop(); - write!(&mut stdout, "\n", tag.expect("there is a tag"))?; + if had_named_children { + for _ in 0..indent_level { + stdout.write_all(b" ")?; + } + } + write!(&mut stdout, "", tag.expect("there is a tag"))?; + // we only write a line in the case where it's the last sibling + if let Some(parent) = node.parent() { + if parent.child(parent.child_count() - 1).unwrap() == node { + stdout.write_all(b"\n")?; + } + } needs_newline = true; } if cursor.goto_next_sibling() { did_visit_children = false; + had_named_children = false; } else if cursor.goto_parent() { did_visit_children = true; + had_named_children = is_named; indent_level -= 1; + if !is_named && needs_newline { + stdout.write_all(b"\n")?; + for _ in 0..indent_level { + stdout.write_all(b" ")?; + } + } } else { break; } } else { if is_named { if needs_newline { - stdout.write(b"\n")?; + stdout.write_all(b"\n")?; } for _ in 0..indent_level { - stdout.write(b" ")?; + stdout.write_all(b" ")?; } write!(&mut stdout, "<{}", node.kind())?; if let Some(field_name) = cursor.field_name() { - write!(&mut stdout, " type=\"{}\"", field_name)?; + write!(&mut stdout, " field=\"{field_name}\"")?; } + let start = node.start_position(); + let end = node.end_position(); + write!(&mut stdout, " srow=\"{}\"", start.row)?; + write!(&mut stdout, " scol=\"{}\"", start.column)?; + write!(&mut stdout, " erow=\"{}\"", end.row)?; + write!(&mut stdout, " ecol=\"{}\"", end.column)?; write!(&mut stdout, ">")?; tags.push(node.kind()); needs_newline = true; } if cursor.goto_first_child() { did_visit_children = false; + had_named_children = false; indent_level += 1; } else { did_visit_children = true; @@ -205,16 +278,27 @@ pub fn parse_file_at_path( let end = node.end_byte(); let value = std::str::from_utf8(&source_code[start..end]).expect("has a string"); + // if !is_named { + // for _ in 0..indent_level { + // stdout.write_all(b" ")?; + // } + // } + if !is_named && needs_newline { + stdout.write_all(b"\n")?; + for _ in 0..indent_level { + stdout.write_all(b" ")?; + } + } write!(&mut stdout, "{}", html_escape::encode_text(value))?; } } } cursor.reset(tree.root_node()); - println!(""); + println!(); } - if matches!(output, ParseOutput::Dot) { - util::print_tree_graph(&tree, "log.html").unwrap(); + if opts.output == ParseOutput::Dot { + util::print_tree_graph(&tree, "log.html", opts.open_log).unwrap(); } let mut first_error = None; @@ -224,23 +308,22 @@ pub fn parse_file_at_path( if node.is_error() || node.is_missing() { first_error = Some(node); break; - } else { - if !cursor.goto_first_child() { - break; - } + } + if !cursor.goto_first_child() { + break; } } else if !cursor.goto_next_sibling() { break; } } - if first_error.is_some() || print_time { + if first_error.is_some() || opts.print_time { write!( &mut stdout, - "{:width$}\t{} ms", - path.to_str().unwrap(), - duration_ms, - width = max_path_length + "{:width$}\t{duration_ms:>7.2} ms\t{:>6} bytes/ms", + opts.path.to_str().unwrap(), + (source_code.len() as u128 * 1_000_000) / duration.as_nanos(), + width = opts.max_path_length )?; if let Some(node) = first_error { let start = node.start_position(); @@ -253,7 +336,7 @@ pub fn parse_file_at_path( write!( &mut stdout, "MISSING \"{}\"", - node.kind().replace("\n", "\\n") + node.kind().replace('\n', "\\n") )?; } } else { @@ -265,33 +348,42 @@ pub fn parse_file_at_path( start.row, start.column, end.row, end.column )?; } - write!(&mut stdout, "\n")?; + writeln!(&mut stdout)?; } - return Ok(first_error.is_some()); - } else if print_time { + return Ok(ParseResult { + successful: first_error.is_none(), + bytes: source_code.len(), + duration: Some(duration), + }); + } + + if opts.print_time { let duration = time.elapsed(); - let duration_ms = duration.as_secs() * 1000 + duration.subsec_nanos() as u64 / 1000000; + let duration_ms = duration.as_micros() as f64 / 1e3; writeln!( &mut stdout, - "{:width$}\t{} ms (timed out)", - path.to_str().unwrap(), - duration_ms, - width = max_path_length + "{:width$}\t{duration_ms:>7.2} ms\t(timed out)", + opts.path.to_str().unwrap(), + width = opts.max_path_length )?; } - Ok(false) + Ok(ParseResult { + successful: false, + bytes: source_code.len(), + duration: None, + }) } -pub fn perform_edit(tree: &mut Tree, input: &mut Vec, edit: &Edit) -> InputEdit { +pub fn perform_edit(tree: &mut Tree, input: &mut Vec, edit: &Edit) -> Result { let start_byte = edit.position; let old_end_byte = edit.position + edit.deleted_length; let new_end_byte = edit.position + edit.inserted_text.len(); - let start_position = position_for_offset(input, start_byte); - let old_end_position = position_for_offset(input, old_end_byte); - input.splice(start_byte..old_end_byte, edit.inserted_text.iter().cloned()); - let new_end_position = position_for_offset(input, new_end_byte); + let start_position = position_for_offset(input, start_byte)?; + let old_end_position = position_for_offset(input, old_end_byte)?; + input.splice(start_byte..old_end_byte, edit.inserted_text.iter().copied()); + let new_end_position = position_for_offset(input, new_end_byte)?; let edit = InputEdit { start_byte, old_end_byte, @@ -301,10 +393,10 @@ pub fn perform_edit(tree: &mut Tree, input: &mut Vec, edit: &Edit) -> InputE new_end_position, }; tree.edit(&edit); - edit + Ok(edit) } -fn parse_edit_flag(source_code: &Vec, flag: &str) -> Result { +fn parse_edit_flag(source_code: &[u8], flag: &str) -> Result { let error = || { anyhow!(concat!( "Invalid edit string '{}'. ", @@ -316,7 +408,7 @@ fn parse_edit_flag(source_code: &Vec, flag: &str) -> Result { // * edit position // * deleted length // * inserted text - let mut parts = flag.split(" "); + let mut parts = flag.split(' '); let position = parts.next().ok_or_else(error)?; let deleted_length = parts.next().ok_or_else(error)?; let inserted_text = parts.collect::>().join(" ").into_bytes(); @@ -324,19 +416,19 @@ fn parse_edit_flag(source_code: &Vec, flag: &str) -> Result { // Position can either be a byte_offset or row,column pair, separated by a comma let position = if position == "$" { source_code.len() - } else if position.contains(",") { - let mut parts = position.split(","); + } else if position.contains(',') { + let mut parts = position.split(','); let row = parts.next().ok_or_else(error)?; - let row = usize::from_str_radix(row, 10).map_err(|_| error())?; + let row = row.parse::().map_err(|_| error())?; let column = parts.next().ok_or_else(error)?; - let column = usize::from_str_radix(column, 10).map_err(|_| error())?; - offset_for_position(source_code, Point { row, column }) + let column = column.parse::().map_err(|_| error())?; + offset_for_position(source_code, Point { row, column })? } else { - usize::from_str_radix(position, 10).map_err(|_| error())? + position.parse::().map_err(|_| error())? }; // Deleted length must be a byte count. - let deleted_length = usize::from_str_radix(deleted_length, 10).map_err(|_| error())?; + let deleted_length = deleted_length.parse::().map_err(|_| error())?; Ok(Edit { position, @@ -345,31 +437,48 @@ fn parse_edit_flag(source_code: &Vec, flag: &str) -> Result { }) } -fn offset_for_position(input: &Vec, position: Point) -> usize { - let mut current_position = Point { row: 0, column: 0 }; - for (i, c) in input.iter().enumerate() { - if *c as char == '\n' { - current_position.row += 1; - current_position.column = 0; - } else { - current_position.column += 1; - } - if current_position > position { - return i; +pub fn offset_for_position(input: &[u8], position: Point) -> Result { + let mut row = 0; + let mut offset = 0; + let mut iter = memchr::memchr_iter(b'\n', input); + loop { + if let Some(pos) = iter.next() { + if row < position.row { + row += 1; + offset = pos; + continue; + } } + offset += 1; + break; + } + if position.row - row > 0 { + return Err(anyhow!("Failed to address a row: {}", position.row)); + } + if let Some(pos) = iter.next() { + if (pos - offset < position.column) || (input[offset] == b'\n' && position.column > 0) { + return Err(anyhow!("Failed to address a column: {}", position.column)); + }; + } else if input.len() - offset < position.column { + return Err(anyhow!("Failed to address a column over the end")); } - return input.len(); + Ok(offset + position.column) } -fn position_for_offset(input: &Vec, offset: usize) -> Point { +pub fn position_for_offset(input: &[u8], offset: usize) -> Result { + if offset > input.len() { + return Err(anyhow!("Failed to address an offset: {offset}")); + } let mut result = Point { row: 0, column: 0 }; - for c in &input[0..offset] { - if *c as char == '\n' { - result.row += 1; - result.column = 0; - } else { - result.column += 1; - } + let mut last = 0; + for pos in memchr::memchr_iter(b'\n', &input[..offset]) { + result.row += 1; + last = pos; } - result + result.column = if result.row > 0 { + offset - last - 1 + } else { + offset + }; + Ok(result) } diff --git a/third-party/tree-sitter/tree-sitter/cli/src/playground.html b/third-party/tree-sitter/tree-sitter/cli/src/playground.html index 22c874df137..420cd28dce4 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/playground.html +++ b/third-party/tree-sitter/tree-sitter/cli/src/playground.html @@ -3,8 +3,8 @@ tree-sitter THE_LANGUAGE_NAME - - + + @@ -29,6 +29,10 @@ +
+ (?) +
+ diff --git a/third-party/tree-sitter/tree-sitter/cli/src/playground.rs b/third-party/tree-sitter/tree-sitter/cli/src/playground.rs index a2dbef96763..34da71ad5ae 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/playground.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/playground.rs @@ -1,5 +1,5 @@ use super::wasm; -use anyhow::Context; +use anyhow::{anyhow, Context, Result}; use std::{ borrow::Cow, env, fs, @@ -8,12 +8,11 @@ use std::{ str::{self, FromStr as _}, }; use tiny_http::{Header, Response, Server}; -use webbrowser; macro_rules! optional_resource { ($name: tt, $path: tt) => { #[cfg(TREE_SITTER_EMBED_WASM_BINDING)] - fn $name(tree_sitter_dir: &Option) -> Cow<'static, [u8]> { + fn $name(tree_sitter_dir: Option<&Path>) -> Cow<'static, [u8]> { if let Some(tree_sitter_dir) = tree_sitter_dir { Cow::Owned(fs::read(tree_sitter_dir.join($path)).unwrap()) } else { @@ -22,7 +21,7 @@ macro_rules! optional_resource { } #[cfg(not(TREE_SITTER_EMBED_WASM_BINDING))] - fn $name(tree_sitter_dir: &Option) -> Cow<'static, [u8]> { + fn $name(tree_sitter_dir: Option<&Path>) -> Cow<'static, [u8]> { if let Some(tree_sitter_dir) = tree_sitter_dir { Cow::Owned(fs::read(tree_sitter_dir.join($path)).unwrap()) } else { @@ -36,44 +35,32 @@ optional_resource!(get_playground_js, "docs/assets/js/playground.js"); optional_resource!(get_lib_js, "lib/binding_web/tree-sitter.js"); optional_resource!(get_lib_wasm, "lib/binding_web/tree-sitter.wasm"); -fn get_main_html(tree_sitter_dir: &Option) -> Cow<'static, [u8]> { - if let Some(tree_sitter_dir) = tree_sitter_dir { - Cow::Owned(fs::read(tree_sitter_dir.join("cli/src/playground.html")).unwrap()) - } else { - Cow::Borrowed(include_bytes!("playground.html")) - } +fn get_main_html(tree_sitter_dir: Option<&Path>) -> Cow<'static, [u8]> { + tree_sitter_dir.map_or( + Cow::Borrowed(include_bytes!("playground.html")), + |tree_sitter_dir| { + Cow::Owned(fs::read(tree_sitter_dir.join("cli/src/playground.html")).unwrap()) + }, + ) } -pub fn serve(grammar_path: &Path, open_in_browser: bool) { - let server = get_server(); - let grammar_name = wasm::get_grammar_name(&grammar_path.join("src")) - .with_context(|| "Failed to get wasm filename") - .unwrap(); - let wasm_filename = format!("tree-sitter-{}.wasm", grammar_name); - let language_wasm = fs::read(grammar_path.join(&wasm_filename)) - .with_context(|| { - format!( - "Failed to read {}. Run `tree-sitter build-wasm` first.", - wasm_filename - ) - }) - .unwrap(); +pub fn serve(grammar_path: &Path, open_in_browser: bool) -> Result<()> { + let server = get_server()?; + let (grammar_name, language_wasm) = wasm::load_language_wasm_file(grammar_path)?; let url = format!("http://{}", server.server_addr()); - println!("Started playground on: {}", url); - if open_in_browser { - if let Err(_) = webbrowser::open(&url) { - eprintln!("Failed to open '{}' in a web browser", url); - } + println!("Started playground on: {url}"); + if open_in_browser && webbrowser::open(&url).is_err() { + eprintln!("Failed to open '{url}' in a web browser"); } let tree_sitter_dir = env::var("TREE_SITTER_BASE_DIR").map(PathBuf::from).ok(); - let main_html = str::from_utf8(&get_main_html(&tree_sitter_dir)) + let main_html = str::from_utf8(&get_main_html(tree_sitter_dir.as_deref())) .unwrap() .replace("THE_LANGUAGE_NAME", &grammar_name) .into_bytes(); - let playground_js = get_playground_js(&tree_sitter_dir); - let lib_js = get_lib_js(&tree_sitter_dir); - let lib_wasm = get_lib_wasm(&tree_sitter_dir); + let playground_js = get_playground_js(tree_sitter_dir.as_deref()); + let lib_js = get_lib_js(tree_sitter_dir.as_deref()); + let lib_wasm = get_lib_wasm(tree_sitter_dir.as_deref()); let html_header = Header::from_str("Content-Type: text/html").unwrap(); let js_header = Header::from_str("Content-Type: application/javascript").unwrap(); @@ -106,11 +93,15 @@ pub fn serve(grammar_path: &Path, open_in_browser: bool) { } _ => response(b"Not found", &html_header).with_status_code(404), }; - request.respond(res).expect("Failed to write HTTP response"); + request + .respond(res) + .with_context(|| "Failed to write HTTP response")?; } + + Ok(()) } -fn redirect<'a>(url: &'a str) -> Response<&'a [u8]> { +fn redirect(url: &str) -> Response<&[u8]> { Response::empty(302) .with_data("".as_bytes(), Some(0)) .with_header(Header::from_bytes("Location", url.as_bytes()).unwrap()) @@ -122,18 +113,24 @@ fn response<'a>(data: &'a [u8], header: &Header) -> Response<&'a [u8]> { .with_header(header.clone()) } -fn get_server() -> Server { - let addr = env::var("TREE_SITTER_PLAYGROUND_ADDR").unwrap_or("127.0.0.1".to_owned()); +fn get_server() -> Result { + let addr = env::var("TREE_SITTER_PLAYGROUND_ADDR").unwrap_or_else(|_| "127.0.0.1".to_owned()); let port = env::var("TREE_SITTER_PLAYGROUND_PORT") - .map(|v| v.parse::().expect("Invalid port specification")) + .map(|v| { + v.parse::() + .with_context(|| "Invalid port specification") + }) .ok(); let listener = match port { - Some(port) => bind_to(&*addr, port).expect("Can't bind to the specified port"), - None => { - get_listener_on_available_port(&*addr).expect("Can't find a free port to bind to it") + Some(port) => { + bind_to(&addr, port?).with_context(|| "Failed to bind to the specified port")? } + None => get_listener_on_available_port(&addr) + .with_context(|| "Failed to find a free port to bind to it")?, }; - Server::from_listener(listener, None).expect("Failed to start web server") + let server = + Server::from_listener(listener, None).map_err(|_| anyhow!("Failed to start web server"))?; + Ok(server) } fn get_listener_on_available_port(addr: &str) -> Option { diff --git a/third-party/tree-sitter/tree-sitter/cli/src/query.rs b/third-party/tree-sitter/tree-sitter/cli/src/query.rs index fc24cb05607..e5601a30c59 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/query.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/query.rs @@ -9,8 +9,9 @@ use std::{ }; use tree_sitter::{Language, Parser, Point, Query, QueryCursor}; +#[allow(clippy::too_many_arguments)] pub fn query_files_at_paths( - language: Language, + language: &Language, paths: Vec, query_path: &Path, ordered_captures: bool, @@ -24,7 +25,7 @@ pub fn query_files_at_paths( let mut stdout = stdout.lock(); let query_source = fs::read_to_string(query_path) - .with_context(|| format!("Error reading query file {:?}", query_path))?; + .with_context(|| format!("Error reading query file {query_path:?}"))?; let query = Query::new(language, &query_source).with_context(|| "Query compilation failed")?; let mut query_cursor = QueryCursor::new(); @@ -41,10 +42,10 @@ pub fn query_files_at_paths( for path in paths { let mut results = Vec::new(); - writeln!(&mut stdout, "{}", path)?; + writeln!(&mut stdout, "{path}")?; let source_code = - fs::read(&path).with_context(|| format!("Error reading source file {:?}", path))?; + fs::read(&path).with_context(|| format!("Error reading source file {path:?}"))?; let tree = parser.parse(&source_code, None).unwrap(); let start = Instant::now(); @@ -57,17 +58,16 @@ pub fn query_files_at_paths( if !quiet { writeln!( &mut stdout, - " pattern: {:>2}, capture: {} - {}, start: {}, end: {}, text: `{}`", + " pattern: {:>2}, capture: {} - {capture_name}, start: {}, end: {}, text: `{}`", mat.pattern_index, capture.index, - capture_name, capture.node.start_position(), capture.node.end_position(), capture.node.utf8_text(&source_code).unwrap_or("") )?; } results.push(query_testing::CaptureInfo { - name: capture_name.to_string(), + name: (*capture_name).to_string(), start: capture.node.start_position(), end: capture.node.end_position(), }); @@ -85,23 +85,19 @@ pub fn query_files_at_paths( if end.row == start.row { writeln!( &mut stdout, - " capture: {} - {}, start: {}, end: {}, text: `{}`", + " capture: {} - {capture_name}, start: {start}, end: {end}, text: `{}`", capture.index, - capture_name, - start, - end, capture.node.utf8_text(&source_code).unwrap_or("") )?; } else { writeln!( &mut stdout, - " capture: {}, start: {}, end: {}", - capture_name, start, end, + " capture: {capture_name}, start: {start}, end: {end}", )?; } } results.push(query_testing::CaptureInfo { - name: capture_name.to_string(), + name: (*capture_name).to_string(), start: capture.node.start_position(), end: capture.node.end_position(), }); @@ -115,7 +111,7 @@ pub fn query_files_at_paths( )?; } if should_test { - query_testing::assert_expected_captures(results, path, &mut parser, language)? + query_testing::assert_expected_captures(&results, path, &mut parser, language)?; } if print_time { writeln!(&mut stdout, "{:?}", start.elapsed())?; diff --git a/third-party/tree-sitter/tree-sitter/cli/src/query_testing.rs b/third-party/tree-sitter/tree-sitter/cli/src/query_testing.rs index 9950f12f39d..a0ac260d043 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/query_testing.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/query_testing.rs @@ -18,15 +18,27 @@ pub struct CaptureInfo { #[derive(Debug, PartialEq, Eq)] pub struct Assertion { pub position: Point, + pub negative: bool, pub expected_capture_name: String, } +impl Assertion { + #[must_use] + pub fn new(row: usize, col: usize, negative: bool, expected_capture_name: String) -> Self { + Self { + position: Point::new(row, col), + negative, + expected_capture_name, + } + } +} + /// Parse the given source code, finding all of the comments that contain /// highlighting assertions. Return a vector of (position, expected highlight name) /// pairs. pub fn parse_position_comments( parser: &mut Parser, - language: Language, + language: &Language, source: &[u8], ) -> Result> { let mut result = Vec::new(); @@ -45,7 +57,7 @@ pub fn parse_position_comments( let node = cursor.node(); // Find every comment node. - if node.kind().contains("comment") { + if node.kind().to_lowercase().contains("comment") { if let Ok(text) = node.utf8_text(source) { let mut position = node.start_position(); if position.row > 0 { @@ -54,6 +66,7 @@ pub fn parse_position_comments( // to its own column. let mut has_left_caret = false; let mut has_arrow = false; + let mut negative = false; let mut arrow_end = 0; for (i, c) in text.char_indices() { arrow_end = i + 1; @@ -69,6 +82,19 @@ pub fn parse_position_comments( has_left_caret = c == '<'; } + // find any ! after arrows but before capture name + if has_arrow { + for (i, c) in text[arrow_end..].char_indices() { + if c == '!' { + negative = true; + arrow_end += i + 1; + break; + } else if !c.is_whitespace() { + break; + } + } + } + // If the comment node contains an arrow and a highlight name, record the // highlight name and the position. if let (true, Some(mat)) = @@ -76,7 +102,8 @@ pub fn parse_position_comments( { assertion_ranges.push((node.start_position(), node.end_position())); result.push(Assertion { - position: position, + position, + negative, expected_capture_name: mat.as_str().to_string(), }); } @@ -99,7 +126,7 @@ pub fn parse_position_comments( // code *above* the assertion. There can be multiple lines of assertion comments, // so the positions may have to be decremented by more than one row. let mut i = 0; - for assertion in result.iter_mut() { + for assertion in &mut result { loop { let on_assertion_line = assertion_ranges[i..] .iter() @@ -124,14 +151,14 @@ pub fn parse_position_comments( } pub fn assert_expected_captures( - infos: Vec, + infos: &[CaptureInfo], path: String, parser: &mut Parser, - language: Language, + language: &Language, ) -> Result<()> { let contents = fs::read_to_string(path)?; let pairs = parse_position_comments(parser, language, contents.as_bytes())?; - for info in &infos { + for info in infos { if let Some(found) = pairs.iter().find(|p| { p.position.row == info.start.row && p.position >= info.start && p.position < info.end }) { @@ -141,7 +168,7 @@ pub fn assert_expected_captures( info.start, found.expected_capture_name, info.name - ))? + ))?; } } } diff --git a/third-party/tree-sitter/tree-sitter/cli/src/tags.rs b/third-party/tree-sitter/tree-sitter/cli/src/tags.rs index 457955ddf6b..14ecef0d2a3 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/tags.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/tags.rs @@ -4,11 +4,12 @@ use std::io::{self, Write}; use std::path::Path; use std::time::Instant; use std::{fs, str}; -use tree_sitter_loader::Loader; +use tree_sitter_loader::{Config, Loader}; use tree_sitter_tags::TagsContext; pub fn generate_tags( loader: &Loader, + loader_config: &Config, scope: Option<&str>, paths: &[String], quiet: bool, @@ -18,37 +19,37 @@ pub fn generate_tags( if let Some(scope) = scope { lang = loader.language_configuration_for_scope(scope)?; if lang.is_none() { - return Err(anyhow!("Unknown scope '{}'", scope)); + return Err(anyhow!("Unknown scope '{scope}'")); } } let mut context = TagsContext::new(); - let cancellation_flag = util::cancel_on_stdin(); + let cancellation_flag = util::cancel_on_signal(); let stdout = io::stdout(); let mut stdout = stdout.lock(); for path in paths { let path = Path::new(&path); - let (language, language_config) = match lang { + let (language, language_config) = match lang.clone() { Some(v) => v, - None => match loader.language_configuration_for_file_name(path)? { - Some(v) => v, - None => { - eprintln!("No language found for path {:?}", path); + None => { + if let Some(v) = loader.language_configuration_for_file_name(path)? { + v + } else { + eprintln!("{}", util::lang_not_found_for_path(path, loader_config)); continue; } - }, + } }; if let Some(tags_config) = language_config.tags_config(language)? { - let indent; - if paths.len() > 1 { + let indent = if paths.len() > 1 { if !quiet { writeln!(&mut stdout, "{}", path.to_string_lossy())?; } - indent = "\t" + "\t" } else { - indent = ""; + "" }; let source = fs::read(path)?; @@ -61,8 +62,7 @@ pub fn generate_tags( if !quiet { write!( &mut stdout, - "{}{:<10}\t | {:<8}\t{} {} - {} `{}`", - indent, + "{indent}{:<10}\t | {:<8}\t{} {} - {} `{}`", str::from_utf8(&source[tag.name_range]).unwrap_or(""), &tags_config.syntax_type_name(tag.syntax_type_id), if tag.is_definition { "def" } else { "ref" }, @@ -77,20 +77,15 @@ pub fn generate_tags( write!(&mut stdout, "\t{:?}", &docs)?; } } - writeln!(&mut stdout, "")?; + writeln!(&mut stdout)?; } } if time { - writeln!( - &mut stdout, - "{}time: {}ms", - indent, - t0.elapsed().as_millis(), - )?; + writeln!(&mut stdout, "{indent}time: {}ms", t0.elapsed().as_millis(),)?; } } else { - eprintln!("No tags config found for path {:?}", path); + eprintln!("No tags config found for path {path:?}"); } } diff --git a/third-party/tree-sitter/tree-sitter/cli/src/test.rs b/third-party/tree-sitter/tree-sitter/cli/src/test.rs index 69c4a66304a..74d8af48137 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/test.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/test.rs @@ -2,9 +2,11 @@ use super::util; use ansi_term::Colour; use anyhow::{anyhow, Context, Result}; use difference::{Changeset, Difference}; +use indoc::indoc; use lazy_static::lazy_static; use regex::bytes::{Regex as ByteRegex, RegexBuilder as ByteRegexBuilder}; use regex::Regex; +use std::collections::BTreeMap; use std::ffi::OsStr; use std::fmt::Write as FmtWrite; use std::fs; @@ -15,18 +17,28 @@ use tree_sitter::{Language, LogType, Parser, Query}; use walkdir::WalkDir; lazy_static! { - static ref HEADER_REGEX: ByteRegex = - ByteRegexBuilder::new(r"^===+(?P[^=\r\n][^\r\n]*)?\r?\n(?P([^=\r\n][^\r\n]*\r?\n)+)===+(?P[^=\r\n][^\r\n]*)?\r?\n") + static ref HEADER_REGEX: ByteRegex = ByteRegexBuilder::new( + r"^(?x) + (?P(?:=+){3,}) + (?P[^=\r\n][^\r\n]*)? + \r?\n + (?P(?:[^=\r\n:][^\r\n]*\r?\n)+(?:(?:[ \t]*\r?\n)+)?) + (?P((?::(?:skip|error|fail-fast|(language|platform)\([^\r\n)]+\))\r?\n)*)) + ===+ + (?P[^=\r\n][^\r\n]*)?\r?\n" + ) + .multi_line(true) + .build() + .unwrap(); + static ref DIVIDER_REGEX: ByteRegex = + ByteRegexBuilder::new(r"^(?P(?:-+){3,})(?P[^-\r\n][^\r\n]*)?\r?\n") .multi_line(true) .build() .unwrap(); - static ref DIVIDER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^---+(?P[^-\r\n][^\r\n]*)?\r?\n") - .multi_line(true) - .build() - .unwrap(); static ref COMMENT_REGEX: Regex = Regex::new(r"(?m)^\s*;.*$").unwrap(); static ref WHITESPACE_REGEX: Regex = Regex::new(r"\s+").unwrap(); static ref SEXP_FIELD_REGEX: Regex = Regex::new(r" \w+: \(").unwrap(); + static ref POINT_REGEX: Regex = Regex::new(r"\s*\[\s*\d+\s*,\s*\d+\s*\]\s*").unwrap(); } #[derive(Debug, PartialEq, Eq)] @@ -40,13 +52,25 @@ pub enum TestEntry { name: String, input: Vec, output: String, + header_delim_len: usize, + divider_delim_len: usize, has_fields: bool, + attributes: TestAttributes, }, } +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct TestAttributes { + pub skip: bool, + pub platform: bool, + pub fail_fast: bool, + pub error: bool, + pub languages: Vec>, +} + impl Default for TestEntry { fn default() -> Self { - TestEntry::Group { + Self::Group { name: String::new(), children: Vec::new(), file_path: None, @@ -54,82 +78,112 @@ impl Default for TestEntry { } } -pub fn run_tests_at_path( - language: Language, - path: &Path, - debug: bool, - debug_graph: bool, - filter: Option<&str>, - update: bool, -) -> Result<()> { - let test_entry = parse_tests(path)?; +impl Default for TestAttributes { + fn default() -> Self { + Self { + skip: false, + platform: true, + fail_fast: false, + error: false, + languages: vec!["".into()], + } + } +} + +pub struct TestOptions<'a> { + pub path: PathBuf, + pub debug: bool, + pub debug_graph: bool, + pub filter: Option<&'a str>, + pub include: Option, + pub exclude: Option, + pub update: bool, + pub open_log: bool, + pub languages: BTreeMap<&'a str, &'a Language>, +} + +pub fn run_tests_at_path(parser: &mut Parser, opts: &mut TestOptions) -> Result<()> { + let test_entry = parse_tests(&opts.path)?; let mut _log_session = None; - let mut parser = Parser::new(); - parser.set_language(language)?; - if debug_graph { - _log_session = Some(util::log_graphs(&mut parser, "log.html")?); - } else if debug { + if opts.debug_graph { + _log_session = Some(util::log_graphs(parser, "log.html", opts.open_log)?); + } else if opts.debug { parser.set_logger(Some(Box::new(|log_type, message| { if log_type == LogType::Lex { - io::stderr().write(b" ").unwrap(); + io::stderr().write_all(b" ").unwrap(); } - write!(&mut io::stderr(), "{}\n", message).unwrap(); + writeln!(&mut io::stderr(), "{message}").unwrap(); }))); } let mut failures = Vec::new(); let mut corrected_entries = Vec::new(); + let mut has_parse_errors = false; run_tests( - &mut parser, + parser, test_entry, - filter, + opts, 0, &mut failures, - update, &mut corrected_entries, + &mut has_parse_errors, )?; - if failures.len() > 0 { - println!(""); + parser.stop_printing_dot_graphs(); - if update { + if failures.is_empty() { + Ok(()) + } else { + println!(); + + if opts.update && !has_parse_errors { if failures.len() == 1 { - println!("1 update:\n") + println!("1 update:\n"); } else { - println!("{} updates:\n", failures.len()) + println!("{} updates:\n", failures.len()); } for (i, (name, ..)) in failures.iter().enumerate() { - println!(" {}. {}", i + 1, name); + println!(" {}. {name}", i + 1); } + Ok(()) } else { - if failures.len() == 1 { - println!("1 failure:") - } else { - println!("{} failures:", failures.len()) + has_parse_errors = opts.update && has_parse_errors; + + if !has_parse_errors { + if failures.len() == 1 { + println!("1 failure:"); + } else { + println!("{} failures:", failures.len()); + } } print_diff_key(); for (i, (name, actual, expected)) in failures.iter().enumerate() { - println!("\n {}. {}:", i + 1, name); - let actual = format_sexp_indented(&actual, 2); - let expected = format_sexp_indented(&expected, 2); + println!("\n {}. {name}:", i + 1); + let actual = format_sexp_indented(actual, 2); + let expected = format_sexp_indented(expected, 2); print_diff(&actual, &expected); } - Err(anyhow!("")) + + if has_parse_errors { + Err(anyhow!(indoc! {" + Some tests failed to parse with unexpected `ERROR` or `MISSING` nodes, as shown above, and cannot be updated automatically. + Either fix the grammar or manually update the tests if this is expected."})) + } else { + Err(anyhow!("")) + } } - } else { - Ok(()) } } -pub fn check_queries_at_path(language: Language, path: &Path) -> Result<()> { +pub fn check_queries_at_path(language: &Language, path: &Path) -> Result<()> { if path.exists() { for entry in WalkDir::new(path) .into_iter() - .filter_map(|e| e.ok()) + .filter_map(std::result::Result::ok) .filter(|e| { e.file_type().is_file() && e.path().extension().and_then(OsStr::to_str) == Some("scm") @@ -138,9 +192,9 @@ pub fn check_queries_at_path(language: Language, path: &Path) -> Result<()> { { let filepath = entry.file_name().to_str().unwrap_or(""); let content = fs::read_to_string(entry.path()) - .with_context(|| format!("Error reading query file {:?}", filepath))?; + .with_context(|| format!("Error reading query file {filepath:?}"))?; Query::new(language, &content) - .with_context(|| format!("Error in query file {:?}", filepath))?; + .with_context(|| format!("Error in query file {filepath:?}"))?; } } Ok(()) @@ -148,18 +202,18 @@ pub fn check_queries_at_path(language: Language, path: &Path) -> Result<()> { pub fn print_diff_key() { println!( - "\n{} / {}", + "\ncorrect / {} / {}", Colour::Green.paint("expected"), - Colour::Red.paint("actual") + Colour::Red.paint("unexpected") ); } -pub fn print_diff(actual: &String, expected: &String) { +pub fn print_diff(actual: &str, expected: &str) { let changeset = Changeset::new(actual, expected, "\n"); for diff in &changeset.diffs { match diff { Difference::Same(part) => { - print!("{}{}", part, changeset.split); + print!("{part}{}", changeset.split); } Difference::Add(part) => { print!("{}{}", Colour::Green.paint(part), changeset.split); @@ -169,112 +223,254 @@ pub fn print_diff(actual: &String, expected: &String) { } } } - println!(""); + println!(); } fn run_tests( parser: &mut Parser, test_entry: TestEntry, - filter: Option<&str>, + opts: &mut TestOptions, mut indent_level: i32, failures: &mut Vec<(String, String, String)>, - update: bool, - corrected_entries: &mut Vec<(String, String, String)>, -) -> Result<()> { + corrected_entries: &mut Vec<(String, String, String, usize, usize)>, + has_parse_errors: &mut bool, +) -> Result { match test_entry { TestEntry::Example { name, input, output, + header_delim_len, + divider_delim_len, has_fields, + attributes, } => { - if let Some(filter) = filter { - if !name.contains(filter) { - if update { - let input = String::from_utf8(input).unwrap(); - let output = format_sexp(&output); - corrected_entries.push((name, input, output)); - } - return Ok(()); - } + print!("{}", " ".repeat(indent_level as usize)); + + if attributes.skip { + println!(" {}", Colour::Yellow.paint(&name)); + return Ok(true); } - let tree = parser.parse(&input, None).unwrap(); - let mut actual = tree.root_node().to_sexp(); - if !has_fields { - actual = strip_sexp_fields(actual); + + if !attributes.platform { + println!(" {}", Colour::Purple.paint(&name)); + return Ok(true); } - for _ in 0..indent_level { - print!(" "); - } - if actual == output { - println!("✓ {}", Colour::Green.paint(&name)); - if update { - let input = String::from_utf8(input).unwrap(); - let output = format_sexp(&output); - corrected_entries.push((name, input, output)); + + for (i, language_name) in attributes.languages.iter().enumerate() { + if !language_name.is_empty() { + let language = opts + .languages + .get(language_name.as_ref()) + .ok_or_else(|| anyhow!("Language not found: {language_name}"))?; + parser.set_language(language)?; } - } else { - if update { - let input = String::from_utf8(input).unwrap(); - let output = format_sexp(&actual); - corrected_entries.push((name.clone(), input, output)); - println!("✓ {}", Colour::Blue.paint(&name)); + let tree = parser.parse(&input, None).unwrap(); + + if attributes.error { + if tree.root_node().has_error() { + println!(" {}", Colour::Green.paint(&name)); + } else { + println!(" {}", Colour::Red.paint(&name)); + } + + if attributes.fail_fast { + return Ok(false); + } } else { - println!("✗ {}", Colour::Red.paint(&name)); + let mut actual = tree.root_node().to_sexp(); + if !has_fields { + actual = strip_sexp_fields(&actual); + } + + if actual == output { + println!("✓ {}", Colour::Green.paint(&name)); + if opts.update { + let input = String::from_utf8(input.clone()).unwrap(); + let output = format_sexp(&output); + corrected_entries.push(( + name.clone(), + input, + output, + header_delim_len, + divider_delim_len, + )); + } + } else { + if opts.update { + let input = String::from_utf8(input.clone()).unwrap(); + let expected_output = format_sexp(&output); + let actual_output = format_sexp(&actual); + + // Only bail early before updating if the actual is not the output, sometimes + // users want to test cases that are intended to have errors, hence why this + // check isn't shown above + if actual.contains("ERROR") || actual.contains("MISSING") { + *has_parse_errors = true; + + // keep the original `expected` output if the actual output has an error + corrected_entries.push(( + name.clone(), + input, + expected_output, + header_delim_len, + divider_delim_len, + )); + } else { + corrected_entries.push(( + name.clone(), + input, + actual_output, + header_delim_len, + divider_delim_len, + )); + println!("✓ {}", Colour::Blue.paint(&name)); + } + } else { + println!("✗ {}", Colour::Red.paint(&name)); + } + failures.push((name.clone(), actual, output.clone())); + + if attributes.fail_fast { + // return value of false means to fail fast + return Ok(false); + } + + if i == attributes.languages.len() - 1 { + // reset back to first language + parser.set_language(opts.languages.values().next().unwrap())?; + } + } } - failures.push((name, actual, output)); } } TestEntry::Group { name, - children, + mut children, file_path, } => { - if indent_level > 0 { - for _ in 0..indent_level { - print!(" "); + children.retain(|child| { + if let TestEntry::Example { name, .. } = child { + if let Some(filter) = opts.filter { + if !name.contains(filter) { + return false; + } + } + if let Some(include) = &opts.include { + if !include.is_match(name) { + return false; + } + } + if let Some(exclude) = &opts.exclude { + if exclude.is_match(name) { + return false; + } + } } - println!("{}:", name); + true + }); + + if children.is_empty() { + return Ok(true); + } + + if indent_level > 0 { + print!("{}", " ".repeat(indent_level as usize)); + println!("{name}:"); } let failure_count = failures.len(); indent_level += 1; for child in children { - run_tests( + if !run_tests( parser, child, - filter, + opts, indent_level, failures, - update, corrected_entries, - )?; + has_parse_errors, + )? { + // fail fast + return Ok(false); + } } if let Some(file_path) = file_path { - if update && failures.len() - failure_count > 0 { + if opts.update && failures.len() - failure_count > 0 { write_tests(&file_path, corrected_entries)?; } corrected_entries.clear(); } } } - Ok(()) + Ok(true) } -fn format_sexp(sexp: &String) -> String { +fn format_sexp(sexp: &str) -> String { format_sexp_indented(sexp, 0) } -fn format_sexp_indented(sexp: &String, initial_indent_level: u32) -> String { +fn format_sexp_indented(sexp: &str, initial_indent_level: u32) -> String { let mut formatted = String::new(); + if sexp.is_empty() { + return formatted; + } + let mut indent_level = initial_indent_level; let mut has_field = false; - let mut s_iter = sexp.split(|c| c == ' ' || c == ')'); - while let Some(s) = s_iter.next() { - if s.is_empty() { + + let mut c_iter = sexp.chars().peekable(); + let mut s = String::with_capacity(sexp.len()); + let mut quote = '\0'; + let mut saw_paren = false; + let mut did_last = false; + + let mut fetch_next_str = |next: &mut String| { + next.clear(); + while let Some(c) = c_iter.next() { + if c == '\'' || c == '"' { + quote = c; + } else if c == ' ' || (c == ')' && quote != '\0') { + if let Some(next_c) = c_iter.peek() { + if *next_c == quote { + next.push(c); + next.push(*next_c); + c_iter.next(); + quote = '\0'; + continue; + } + } + break; + } + if c == ')' { + saw_paren = true; + break; + } + next.push(c); + } + + // at the end + if c_iter.peek().is_none() && next.is_empty() { + if saw_paren { + // but did we see a ) before ending? + saw_paren = false; + return Some(()); + } + if !did_last { + // but did we account for the end empty string as if we're splitting? + did_last = true; + return Some(()); + } + return None; + } + Some(()) + }; + + while fetch_next_str(&mut s).is_some() { + if s.is_empty() && indent_level > 0 { // ")" indent_level -= 1; write!(formatted, ")").unwrap(); @@ -283,7 +479,7 @@ fn format_sexp_indented(sexp: &String, initial_indent_level: u32) -> String { has_field = false; } else { if indent_level > 0 { - writeln!(formatted, "").unwrap(); + writeln!(formatted).unwrap(); for _ in 0..indent_level { write!(formatted, " ").unwrap(); } @@ -292,20 +488,27 @@ fn format_sexp_indented(sexp: &String, initial_indent_level: u32) -> String { } // "(node_name" - write!(formatted, "{}", s).unwrap(); + write!(formatted, "{s}").unwrap(); // "(MISSING node_name" or "(UNEXPECTED 'x'" if s.starts_with("(MISSING") || s.starts_with("(UNEXPECTED") { - let s = s_iter.next().unwrap(); - write!(formatted, " {}", s).unwrap(); + fetch_next_str(&mut s).unwrap(); + if s.is_empty() { + while indent_level > 0 { + indent_level -= 1; + write!(formatted, ")").unwrap(); + } + } else { + write!(formatted, " {s}").unwrap(); + } } } else if s.ends_with(':') { // "field:" - writeln!(formatted, "").unwrap(); + writeln!(formatted).unwrap(); for _ in 0..indent_level { write!(formatted, " ").unwrap(); } - write!(formatted, "{} ", s).unwrap(); + write!(formatted, "{s} ").unwrap(); has_field = true; indent_level += 1; } @@ -314,27 +517,30 @@ fn format_sexp_indented(sexp: &String, initial_indent_level: u32) -> String { formatted } -fn write_tests(file_path: &Path, corrected_entries: &Vec<(String, String, String)>) -> Result<()> { +fn write_tests( + file_path: &Path, + corrected_entries: &[(String, String, String, usize, usize)], +) -> Result<()> { let mut buffer = fs::File::create(file_path)?; write_tests_to_buffer(&mut buffer, corrected_entries) } fn write_tests_to_buffer( buffer: &mut impl Write, - corrected_entries: &Vec<(String, String, String)>, + corrected_entries: &[(String, String, String, usize, usize)], ) -> Result<()> { - for (i, (name, input, output)) in corrected_entries.iter().enumerate() { + for (i, (name, input, output, header_delim_len, divider_delim_len)) in + corrected_entries.iter().enumerate() + { if i > 0 { - write!(buffer, "\n")?; + writeln!(buffer)?; } write!( buffer, - "{}\n{}\n{}\n{}\n{}\n\n{}\n", - "=".repeat(80), - name, - "=".repeat(80), - input, - "-".repeat(80), + "{}\n{name}\n{}\n{input}\n{}\n\n{}\n", + "=".repeat(*header_delim_len), + "=".repeat(*header_delim_len), + "-".repeat(*divider_delim_len), output.trim() )?; } @@ -351,11 +557,20 @@ pub fn parse_tests(path: &Path) -> io::Result { let mut children = Vec::new(); for entry in fs::read_dir(path)? { let entry = entry?; - let hidden = entry.file_name().to_str().unwrap_or("").starts_with("."); + let hidden = entry.file_name().to_str().unwrap_or("").starts_with('.'); if !hidden { - children.push(parse_tests(&entry.path())?); + children.push(entry.path()); } } + children.sort_by(|a, b| { + a.file_name() + .unwrap_or_default() + .cmp(b.file_name().unwrap_or_default()) + }); + let children = children + .iter() + .map(|path| parse_tests(path)) + .collect::>>()?; Ok(TestEntry::Group { name, children, @@ -363,15 +578,21 @@ pub fn parse_tests(path: &Path) -> io::Result { }) } else { let content = fs::read_to_string(path)?; - Ok(parse_test_content(name, content, Some(path.to_path_buf()))) + Ok(parse_test_content(name, &content, Some(path.to_path_buf()))) } } -pub fn strip_sexp_fields(sexp: String) -> String { - SEXP_FIELD_REGEX.replace_all(&sexp, " (").to_string() +#[must_use] +pub fn strip_sexp_fields(sexp: &str) -> String { + SEXP_FIELD_REGEX.replace_all(sexp, " (").to_string() +} + +#[must_use] +pub fn strip_points(sexp: &str) -> String { + POINT_REGEX.replace_all(sexp, "").to_string() } -fn parse_test_content(name: String, content: String, file_path: Option) -> TestEntry { +fn parse_test_content(name: String, content: &str, file_path: Option) -> TestEntry { let mut children = Vec::new(); let bytes = content.as_bytes(); let mut prev_name = String::new(); @@ -388,25 +609,84 @@ fn parse_test_content(name: String, content: String, file_path: Option) // Find all of the `===` test headers, which contain the test names. // Ignore any matches whose suffix does not match the first header // suffix in the file. - let header_matches = HEADER_REGEX.captures_iter(&bytes).filter_map(|c| { + let header_matches = HEADER_REGEX.captures_iter(bytes).filter_map(|c| { + let header_delim_len = c.name("equals").map_or(80, |m| m.as_bytes().len()); let suffix1 = c .name("suffix1") .map(|m| String::from_utf8_lossy(m.as_bytes())); let suffix2 = c .name("suffix2") .map(|m| String::from_utf8_lossy(m.as_bytes())); + + let (mut skip, mut platform, mut fail_fast, mut error, mut languages) = + (false, None, false, false, vec![]); + + let markers = c.name("markers").map_or("".as_bytes(), |m| m.as_bytes()); + + for marker in markers.split(|&c| c == b'\n').filter(|s| !s.is_empty()) { + let marker = str::from_utf8(marker).unwrap(); + let (marker, right) = marker.split_at(marker.find('(').unwrap_or(marker.len())); + match marker { + ":skip" => skip = true, + ":platform" => { + if let Some(platforms) = + right.strip_prefix('(').and_then(|s| s.strip_suffix(')')) + { + platform = Some( + platform.unwrap_or(false) || platforms.trim() == std::env::consts::OS, + ); + } + } + ":fail-fast" => fail_fast = true, + ":error" => error = true, + ":language" => { + if let Some(lang) = right.strip_prefix('(').and_then(|s| s.strip_suffix(')')) { + languages.push(lang.into()); + } + } + _ => {} + } + } + + // prefer skip over error, both shouldn't be set + if skip { + error = false; + } + + // add a default language if none are specified, will defer to the first language + if languages.is_empty() { + languages.push("".into()); + } + if suffix1 == first_suffix && suffix2 == first_suffix { let header_range = c.get(0).unwrap().range(); let test_name = c .name("test_name") .map(|c| String::from_utf8_lossy(c.as_bytes()).trim_end().to_string()); - Some((header_range, test_name)) + Some(( + header_delim_len, + header_range, + test_name, + TestAttributes { + skip, + platform: platform.unwrap_or(true), + fail_fast, + error, + languages, + }, + )) } else { None } }); - for (header_range, test_name) in header_matches.chain(Some((bytes.len()..bytes.len(), None))) { + let (mut prev_header_len, mut prev_attributes) = (80, TestAttributes::default()); + for (header_delim_len, header_range, test_name, attributes) in header_matches.chain(Some(( + 80, + bytes.len()..bytes.len(), + None, + TestAttributes::default(), + ))) { // Find the longest line of dashes following each test description. That line // separates the input from the expected output. Ignore any matches whose suffix // does not match the first suffix in the file. @@ -414,19 +694,23 @@ fn parse_test_content(name: String, content: String, file_path: Option) let divider_range = DIVIDER_REGEX .captures_iter(&bytes[prev_header_end..header_range.start]) .filter_map(|m| { + let divider_delim_len = m.name("hyphens").map_or(80, |m| m.as_bytes().len()); let suffix = m .name("suffix") .map(|m| String::from_utf8_lossy(m.as_bytes())); if suffix == first_suffix { let range = m.get(0).unwrap().range(); - Some((prev_header_end + range.start)..(prev_header_end + range.end)) + Some(( + divider_delim_len, + (prev_header_end + range.start)..(prev_header_end + range.end), + )) } else { None } }) - .max_by_key(|range| range.len()); + .max_by_key(|(_, range)| range.len()); - if let Some(divider_range) = divider_range { + if let Some((divider_delim_len, divider_range)) = divider_range { if let Ok(output) = str::from_utf8(&bytes[divider_range.end..header_range.start]) { let mut input = bytes[prev_header_end..divider_range.start].to_vec(); @@ -447,16 +731,23 @@ fn parse_test_content(name: String, content: String, file_path: Option) // fields will not be checked. let has_fields = SEXP_FIELD_REGEX.is_match(&output); - children.push(TestEntry::Example { + let t = TestEntry::Example { name: prev_name, input, output, + header_delim_len: prev_header_len, + divider_delim_len, has_fields, - }); + attributes: prev_attributes, + }; + + children.push(t); } } } + prev_attributes = attributes; prev_name = test_name.unwrap_or(String::new()); + prev_header_len = header_delim_len; prev_header_end = header_range.end; } TestEntry::Group { @@ -474,7 +765,7 @@ mod tests { fn test_parse_test_content_simple() { let entry = parse_test_content( "the-filename".to_string(), - r#" + r" =============== The first test =============== @@ -492,9 +783,8 @@ The second test d --- (d) - "# - .trim() - .to_string(), + " + .trim(), None, ); @@ -505,15 +795,21 @@ d children: vec![ TestEntry::Example { name: "The first test".to_string(), - input: "\na b c\n".as_bytes().to_vec(), + input: b"\na b c\n".to_vec(), output: "(a (b c))".to_string(), + header_delim_len: 15, + divider_delim_len: 3, has_fields: false, + attributes: TestAttributes::default(), }, TestEntry::Example { name: "The second test".to_string(), - input: "d".as_bytes().to_vec(), + input: b"d".to_vec(), output: "(d)".to_string(), + header_delim_len: 16, + divider_delim_len: 3, has_fields: false, + attributes: TestAttributes::default(), }, ], file_path: None, @@ -525,7 +821,7 @@ d fn test_parse_test_content_with_dashes_in_source_code() { let entry = parse_test_content( "the-filename".to_string(), - r#" + r" ================== Code with dashes ================== @@ -546,9 +842,8 @@ abc ------------------- (c (d)) - "# - .trim() - .to_string(), + " + .trim(), None, ); @@ -559,15 +854,21 @@ abc children: vec![ TestEntry::Example { name: "Code with dashes".to_string(), - input: "abc\n---\ndefg\n----\nhijkl".as_bytes().to_vec(), + input: b"abc\n---\ndefg\n----\nhijkl".to_vec(), output: "(a (b))".to_string(), + header_delim_len: 18, + divider_delim_len: 7, has_fields: false, + attributes: TestAttributes::default(), }, TestEntry::Example { name: "Code ending with dashes".to_string(), - input: "abc\n-----------".as_bytes().to_vec(), + input: b"abc\n-----------".to_vec(), output: "(c (d))".to_string(), + header_delim_len: 25, + divider_delim_len: 19, has_fields: false, + attributes: TestAttributes::default(), }, ], file_path: None, @@ -577,9 +878,10 @@ abc #[test] fn test_format_sexp() { + assert_eq!(format_sexp(""), ""); assert_eq!( - format_sexp(&"(a b: (c) (d) e: (f (g (h (MISSING i)))))".to_string()), - r#" + format_sexp("(a b: (c) (d) e: (f (g (h (MISSING i)))))"), + r" (a b: (c) (d) @@ -587,18 +889,39 @@ abc (g (h (MISSING i))))) -"# +" + .trim() + ); + assert_eq!(format_sexp("()"), "()"); + assert_eq!(format_sexp("(A (M (B)))"), "(A\n (M\n (B)))"); + assert_eq!(format_sexp("(A (U (B)))"), "(A\n (U\n (B)))"); + assert_eq!( + format_sexp("(program (ERROR (UNEXPECTED ' ')) (identifier))"), + r" +(program + (ERROR + (UNEXPECTED ' ')) + (identifier)) +" .trim() - .to_string() ); - assert_eq!(format_sexp(&"()".to_string()), "()".to_string()); assert_eq!( - format_sexp(&"(A (M (B)))".to_string()), - "(A\n (M\n (B)))" + format_sexp(r#"(source_file (MISSING ")"))"#), + r#" +(source_file + (MISSING ")")) + "# + .trim() ); assert_eq!( - format_sexp(&"(A (U (B)))".to_string()), - "(A\n (U\n (B)))" + format_sexp(r"(source_file (ERROR (UNEXPECTED 'f') (UNEXPECTED '+')))"), + r#" +(source_file + (ERROR + (UNEXPECTED 'f') + (UNEXPECTED '+'))) +"# + .trim() ); } @@ -610,17 +933,21 @@ abc "title 1".to_string(), "input 1".to_string(), "output 1".to_string(), + 80, + 80, ), ( "title 2".to_string(), "input 2".to_string(), "output 2".to_string(), + 80, + 80, ), ]; write_tests_to_buffer(&mut buffer, &corrected_entries).unwrap(); assert_eq!( String::from_utf8(buffer).unwrap(), - r#" + r" ================================================================================ title 1 ================================================================================ @@ -636,7 +963,7 @@ input 2 -------------------------------------------------------------------------------- output 2 -"# +" .trim_start() .to_string() ); @@ -663,7 +990,7 @@ code --- ; Line start comment -(a +(a ; ignore this (b) ; also ignore this @@ -677,8 +1004,7 @@ code (MISSING ";") "# - .trim() - .to_string(), + .trim(), None, ); @@ -689,21 +1015,30 @@ code children: vec![ TestEntry::Example { name: "sexp with comment".to_string(), - input: "code".as_bytes().to_vec(), + input: b"code".to_vec(), output: "(a (b))".to_string(), + header_delim_len: 18, + divider_delim_len: 3, has_fields: false, + attributes: TestAttributes::default(), }, TestEntry::Example { name: "sexp with comment between".to_string(), - input: "code".as_bytes().to_vec(), + input: b"code".to_vec(), output: "(a (b))".to_string(), + header_delim_len: 18, + divider_delim_len: 3, has_fields: false, + attributes: TestAttributes::default(), }, TestEntry::Example { name: "sexp with ';'".to_string(), - input: "code".as_bytes().to_vec(), + input: b"code".to_vec(), output: "(MISSING \";\")".to_string(), + header_delim_len: 25, + divider_delim_len: 3, has_fields: false, + attributes: TestAttributes::default(), } ], file_path: None, @@ -715,7 +1050,7 @@ code fn test_parse_test_content_with_suffixes() { let entry = parse_test_content( "the-filename".to_string(), - r#" + r" ==================asdf\()[]|{}*+?^$.- First test ==================asdf\()[]|{}*+?^$.- @@ -754,17 +1089,15 @@ NOT A TEST HEADER ---asdf\()[]|{}*+?^$.- (a) - "# - .trim() - .to_string(), + " + .trim(), None, ); - let expected_input = "\n=========================\n\ + let expected_input = b"\n=========================\n\ NOT A TEST HEADER\n\ =========================\n\ -------------------------\n" - .as_bytes() .to_vec(); assert_eq!( entry, @@ -775,19 +1108,28 @@ NOT A TEST HEADER name: "First test".to_string(), input: expected_input.clone(), output: "(a)".to_string(), + header_delim_len: 18, + divider_delim_len: 3, has_fields: false, + attributes: TestAttributes::default(), }, TestEntry::Example { name: "Second test".to_string(), input: expected_input.clone(), output: "(a)".to_string(), + header_delim_len: 18, + divider_delim_len: 3, has_fields: false, + attributes: TestAttributes::default(), }, TestEntry::Example { name: "Test name with = symbol".to_string(), - input: expected_input.clone(), + input: expected_input, output: "(a)".to_string(), + header_delim_len: 25, + divider_delim_len: 3, has_fields: false, + attributes: TestAttributes::default(), } ], file_path: None, @@ -799,7 +1141,7 @@ NOT A TEST HEADER fn test_parse_test_content_with_newlines_in_test_names() { let entry = parse_test_content( "the-filename".to_string(), - r#" + r" =============== name with @@ -815,8 +1157,7 @@ name with === signs code with ---- --- (d) -"# - .to_string(), +", None, ); @@ -830,13 +1171,133 @@ code with ---- name: "name\nwith\nnewlines".to_string(), input: b"a".to_vec(), output: "(b)".to_string(), + header_delim_len: 15, + divider_delim_len: 3, has_fields: false, + attributes: TestAttributes::default(), }, TestEntry::Example { name: "name with === signs".to_string(), input: b"code with ----".to_vec(), output: "(d)".to_string(), + header_delim_len: 20, + divider_delim_len: 3, + has_fields: false, + attributes: TestAttributes::default(), + } + ] + } + ); + } + + #[test] + fn test_parse_test_with_markers() { + // do one with :skip, we should not see it in the entry output + + let entry = parse_test_content( + "the-filename".to_string(), + r" +===================== +Test with skip marker +:skip +===================== +a +--- +(b) +", + None, + ); + + assert_eq!( + entry, + TestEntry::Group { + name: "the-filename".to_string(), + file_path: None, + children: vec![TestEntry::Example { + name: "Test with skip marker".to_string(), + input: b"a".to_vec(), + output: "(b)".to_string(), + header_delim_len: 21, + divider_delim_len: 3, + has_fields: false, + attributes: TestAttributes { + skip: true, + platform: true, + fail_fast: false, + error: false, + languages: vec!["".into()] + }, + }] + } + ); + + let entry = parse_test_content( + "the-filename".to_string(), + &format!( + r" +========================= +Test with platform marker +:platform({}) +:fail-fast +========================= +a +--- +(b) + +============================= +Test with bad platform marker +:platform({}) +:language(foo) +============================= +a +--- +(b) +", + std::env::consts::OS, + if std::env::consts::OS == "linux" { + "macos" + } else { + "linux" + } + ), + None, + ); + + assert_eq!( + entry, + TestEntry::Group { + name: "the-filename".to_string(), + file_path: None, + children: vec![ + TestEntry::Example { + name: "Test with platform marker".to_string(), + input: b"a".to_vec(), + output: "(b)".to_string(), + header_delim_len: 25, + divider_delim_len: 3, + has_fields: false, + attributes: TestAttributes { + skip: false, + platform: true, + fail_fast: true, + error: false, + languages: vec!["".into()] + }, + }, + TestEntry::Example { + name: "Test with bad platform marker".to_string(), + input: b"a".to_vec(), + output: "(b)".to_string(), + header_delim_len: 29, + divider_delim_len: 3, has_fields: false, + attributes: TestAttributes { + skip: false, + platform: false, + fail_fast: false, + error: false, + languages: vec!["foo".into()] + }, } ] } diff --git a/third-party/tree-sitter/tree-sitter/cli/src/test_highlight.rs b/third-party/tree-sitter/tree-sitter/cli/src/test_highlight.rs index 2d9d536a94e..919d78643aa 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/test_highlight.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/test_highlight.rs @@ -1,11 +1,16 @@ -use crate::query_testing::{parse_position_comments, Assertion}; -use ansi_term::Colour; -use anyhow::{anyhow, Result}; use std::fs; use std::path::Path; + +use ansi_term::Colour; +use anyhow::{anyhow, Result}; use tree_sitter::Point; use tree_sitter_highlight::{Highlight, HighlightConfiguration, HighlightEvent, Highlighter}; -use tree_sitter_loader::Loader; +use tree_sitter_loader::{Config, Loader}; + +use super::{ + query_testing::{parse_position_comments, Assertion}, + util, +}; #[derive(Debug)] pub struct Failure { @@ -31,49 +36,91 @@ impl std::fmt::Display for Failure { if i > 0 { write!(f, ", ")?; } - write!(f, "'{}'", actual_highlight)?; + write!(f, "'{actual_highlight}'")?; } } Ok(()) } } -pub fn test_highlights(loader: &Loader, directory: &Path) -> Result<()> { +pub fn test_highlights( + loader: &Loader, + loader_config: &Config, + highlighter: &mut Highlighter, + directory: &Path, +) -> Result<()> { + println!("syntax highlighting:"); + test_highlights_indented(loader, loader_config, highlighter, directory, 2) +} + +fn test_highlights_indented( + loader: &Loader, + loader_config: &Config, + highlighter: &mut Highlighter, + directory: &Path, + indent_level: usize, +) -> Result<()> { let mut failed = false; - let mut highlighter = Highlighter::new(); - println!("syntax highlighting:"); for highlight_test_file in fs::read_dir(directory)? { let highlight_test_file = highlight_test_file?; let test_file_path = highlight_test_file.path(); let test_file_name = highlight_test_file.file_name(); - let (language, language_config) = loader - .language_configuration_for_file_name(&test_file_path)? - .ok_or_else(|| anyhow!("No language found for path {:?}", test_file_path))?; - let highlight_config = language_config - .highlight_config(language)? - .ok_or_else(|| anyhow!("No highlighting config found for {:?}", test_file_path))?; - match test_highlight( - &loader, - &mut highlighter, - highlight_config, - fs::read(&test_file_path)?.as_slice(), - ) { - Ok(assertion_count) => { - println!( - " ✓ {} ({} assertions)", - Colour::Green.paint(test_file_name.to_string_lossy().as_ref()), - assertion_count - ); - } - Err(e) => { - println!( - " ✗ {}", - Colour::Red.paint(test_file_name.to_string_lossy().as_ref()) - ); - println!(" {}", e); + print!( + "{indent:indent_level$}", + indent = "", + indent_level = indent_level * 2 + ); + if test_file_path.is_dir() && test_file_path.read_dir()?.next().is_some() { + println!("{}:", test_file_name.into_string().unwrap()); + if test_highlights_indented( + loader, + loader_config, + highlighter, + &test_file_path, + indent_level + 1, + ) + .is_err() + { failed = true; } + } else { + let (language, language_config) = loader + .language_configuration_for_file_name(&test_file_path)? + .ok_or_else(|| { + anyhow!( + "{}", + util::lang_not_found_for_path(test_file_path.as_path(), loader_config) + ) + })?; + let highlight_config = language_config + .highlight_config(language, None)? + .ok_or_else(|| anyhow!("No highlighting config found for {test_file_path:?}"))?; + match test_highlight( + loader, + highlighter, + highlight_config, + fs::read(&test_file_path)?.as_slice(), + ) { + Ok(assertion_count) => { + println!( + "✓ {} ({assertion_count} assertions)", + Colour::Green.paint(test_file_name.to_string_lossy().as_ref()), + ); + } + Err(e) => { + println!( + "✗ {}", + Colour::Red.paint(test_file_name.to_string_lossy().as_ref()) + ); + println!( + "{indent:indent_level$} {e}", + indent = "", + indent_level = indent_level * 2 + ); + failed = true; + } + } } } @@ -84,55 +131,53 @@ pub fn test_highlights(loader: &Loader, directory: &Path) -> Result<()> { } } pub fn iterate_assertions( - assertions: &Vec, - highlights: &Vec<(Point, Point, Highlight)>, - highlight_names: &Vec, + assertions: &[Assertion], + highlights: &[(Point, Point, Highlight)], + highlight_names: &[String], ) -> Result { // Iterate through all of the highlighting assertions, checking each one against the // actual highlights. let mut i = 0; - let mut actual_highlights = Vec::<&String>::new(); + let mut actual_highlights = Vec::new(); for Assertion { position, + negative, expected_capture_name: expected_highlight, } in assertions { let mut passed = false; actual_highlights.clear(); - 'highlight_loop: loop { - // The assertions are ordered by position, so skip past all of the highlights that - // end at or before this assertion's position. - if let Some(highlight) = highlights.get(i) { - if highlight.1 <= *position { - i += 1; - continue; - } - - // Iterate through all of the highlights that start at or before this assertion's, - // position, looking for one that matches the assertion. - let mut j = i; - while let (false, Some(highlight)) = (passed, highlights.get(j)) { - if highlight.0 > *position { - break 'highlight_loop; - } + // The assertions are ordered by position, so skip past all of the highlights that + // end at or before this assertion's position. + 'highlight_loop: while let Some(highlight) = highlights.get(i) { + if highlight.1 <= *position { + i += 1; + continue; + } - // If the highlight matches the assertion, this test passes. Otherwise, - // add this highlight to the list of actual highlights that span the - // assertion's position, in order to generate an error message in the event - // of a failure. - let highlight_name = &highlight_names[(highlight.2).0]; - if *highlight_name == *expected_highlight { - passed = true; - break 'highlight_loop; - } else { - actual_highlights.push(highlight_name); - } + // Iterate through all of the highlights that start at or before this assertion's, + // position, looking for one that matches the assertion. + let mut j = i; + while let (false, Some(highlight)) = (passed, highlights.get(j)) { + if highlight.0 > *position { + break 'highlight_loop; + } - j += 1; + // If the highlight matches the assertion, or if the highlight doesn't + // match the assertion but it's negative, this test passes. Otherwise, + // add this highlight to the list of actual highlights that span the + // assertion's position, in order to generate an error message in the event + // of a failure. + let highlight_name = &highlight_names[(highlight.2).0]; + if (*highlight_name == *expected_highlight) == *negative { + actual_highlights.push(highlight_name); + } else { + passed = true; + break 'highlight_loop; } - } else { - break; + + j += 1; } } @@ -160,70 +205,9 @@ pub fn test_highlight( let highlight_names = loader.highlight_names(); let highlights = get_highlight_positions(loader, highlighter, highlight_config, source)?; let assertions = - parse_position_comments(highlighter.parser(), highlight_config.language, source)?; - - iterate_assertions(&assertions, &highlights, &highlight_names)?; + parse_position_comments(highlighter.parser(), &highlight_config.language, source)?; - // Iterate through all of the highlighting assertions, checking each one against the - // actual highlights. - let mut i = 0; - let mut actual_highlights = Vec::<&String>::new(); - for Assertion { - position, - expected_capture_name: expected_highlight, - } in &assertions - { - let mut passed = false; - actual_highlights.clear(); - - 'highlight_loop: loop { - // The assertions are ordered by position, so skip past all of the highlights that - // end at or before this assertion's position. - if let Some(highlight) = highlights.get(i) { - if highlight.1 <= *position { - i += 1; - continue; - } - - // Iterate through all of the highlights that start at or before this assertion's, - // position, looking for one that matches the assertion. - let mut j = i; - while let (false, Some(highlight)) = (passed, highlights.get(j)) { - if highlight.0 > *position { - break 'highlight_loop; - } - - // If the highlight matches the assertion, this test passes. Otherwise, - // add this highlight to the list of actual highlights that span the - // assertion's position, in order to generate an error message in the event - // of a failure. - let highlight_name = &highlight_names[(highlight.2).0]; - if *highlight_name == *expected_highlight { - passed = true; - break 'highlight_loop; - } else { - actual_highlights.push(highlight_name); - } - - j += 1; - } - } else { - break; - } - } - - if !passed { - return Err(Failure { - row: position.row, - column: position.column, - expected_highlight: expected_highlight.clone(), - actual_highlights: actual_highlights.into_iter().cloned().collect(), - } - .into()); - } - } - - Ok(assertions.len()) + iterate_assertions(&assertions, &highlights, &highlight_names) } pub fn get_highlight_positions( @@ -268,7 +252,7 @@ pub fn get_highlight_positions( } } if let Some(highlight) = highlight_stack.last() { - result.push((start_position, Point::new(row, column), *highlight)) + result.push((start_position, Point::new(row, column), *highlight)); } } } diff --git a/third-party/tree-sitter/tree-sitter/cli/src/test_tags.rs b/third-party/tree-sitter/tree-sitter/cli/src/test_tags.rs index 024d094c9dd..11395e10193 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/test_tags.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/test_tags.rs @@ -1,12 +1,17 @@ -use crate::query_testing::{parse_position_comments, Assertion}; -use ansi_term::Colour; -use anyhow::{anyhow, Result}; use std::fs; use std::path::Path; + +use ansi_term::Colour; +use anyhow::{anyhow, Result}; use tree_sitter::Point; -use tree_sitter_loader::Loader; +use tree_sitter_loader::{Config, Loader}; use tree_sitter_tags::{TagsConfiguration, TagsContext}; +use super::{ + query_testing::{parse_position_comments, Assertion}, + util, +}; + #[derive(Debug)] pub struct Failure { row: usize, @@ -31,16 +36,20 @@ impl std::fmt::Display for Failure { if i > 0 { write!(f, ", ")?; } - write!(f, "'{}'", actual_tag)?; + write!(f, "'{actual_tag}'")?; } } Ok(()) } } -pub fn test_tags(loader: &Loader, directory: &Path) -> Result<()> { +pub fn test_tags( + loader: &Loader, + loader_config: &Config, + tags_context: &mut TagsContext, + directory: &Path, +) -> Result<()> { let mut failed = false; - let mut tags_context = TagsContext::new(); println!("tags:"); for tag_test_file in fs::read_dir(directory)? { @@ -49,20 +58,24 @@ pub fn test_tags(loader: &Loader, directory: &Path) -> Result<()> { let test_file_name = tag_test_file.file_name(); let (language, language_config) = loader .language_configuration_for_file_name(&test_file_path)? - .ok_or_else(|| anyhow!("No language found for path {:?}", test_file_path))?; + .ok_or_else(|| { + anyhow!( + "{}", + util::lang_not_found_for_path(test_file_path.as_path(), loader_config) + ) + })?; let tags_config = language_config .tags_config(language)? .ok_or_else(|| anyhow!("No tags config found for {:?}", test_file_path))?; match test_tag( - &mut tags_context, + tags_context, tags_config, fs::read(&test_file_path)?.as_slice(), ) { Ok(assertion_count) => { println!( - " ✓ {} ({} assertions)", + " ✓ {} ({assertion_count} assertions)", Colour::Green.paint(test_file_name.to_string_lossy().as_ref()), - assertion_count ); } Err(e) => { @@ -70,7 +83,7 @@ pub fn test_tags(loader: &Loader, directory: &Path) -> Result<()> { " ✗ {}", Colour::Red.paint(test_file_name.to_string_lossy().as_ref()) ); - println!(" {}", e); + println!(" {e}"); failed = true; } } @@ -89,45 +102,45 @@ pub fn test_tag( source: &[u8], ) -> Result { let tags = get_tag_positions(tags_context, tags_config, source)?; - let assertions = parse_position_comments(tags_context.parser(), tags_config.language, source)?; + let assertions = parse_position_comments(tags_context.parser(), &tags_config.language, source)?; // Iterate through all of the assertions, checking against the actual tags. let mut i = 0; let mut actual_tags = Vec::<&String>::new(); for Assertion { position, + negative, expected_capture_name: expected_tag, } in &assertions { let mut passed = false; - 'tag_loop: loop { - if let Some(tag) = tags.get(i) { - if tag.1 <= *position { - i += 1; - continue; + 'tag_loop: while let Some(tag) = tags.get(i) { + if tag.1 <= *position { + i += 1; + continue; + } + + // Iterate through all of the tags that start at or before this assertion's + // position, looking for one that matches the assertion + let mut j = i; + while let (false, Some(tag)) = (passed, tags.get(j)) { + if tag.0 > *position { + break 'tag_loop; } - // Iterate through all of the tags that start at or before this assertion's - // position, looking for one that matches the assertion - let mut j = i; - while let (false, Some(tag)) = (passed, tags.get(j)) { - if tag.0 > *position { - break 'tag_loop; - } - - let tag_name = &tag.2; - if *tag_name == *expected_tag { - passed = true; - break 'tag_loop; - } else { - actual_tags.push(tag_name); - } - - j += 1; + let tag_name = &tag.2; + if (*tag_name == *expected_tag) == *negative { + actual_tags.push(tag_name); + } else { + passed = true; + break 'tag_loop; + } + + j += 1; + if tag == tags.last().unwrap() { + break 'tag_loop; } - } else { - break; } } @@ -150,15 +163,15 @@ pub fn get_tag_positions( tags_config: &TagsConfiguration, source: &[u8], ) -> Result> { - let (tags_iter, _has_error) = tags_context.generate_tags(&tags_config, &source, None)?; + let (tags_iter, _has_error) = tags_context.generate_tags(tags_config, source, None)?; let tag_positions = tags_iter - .filter_map(|t| t.ok()) + .filter_map(std::result::Result::ok) .map(|tag| { let tag_postfix = tags_config.syntax_type_name(tag.syntax_type_id).to_string(); let tag_name = if tag.is_definition { - format!("definition.{}", tag_postfix) + format!("definition.{tag_postfix}") } else { - format!("reference.{}", tag_postfix) + format!("reference.{tag_postfix}") }; (tag.span.start, tag.span.end, tag_name) }) diff --git a/third-party/tree-sitter/tree-sitter/cli/src/tests/async_context_test.rs b/third-party/tree-sitter/tree-sitter/cli/src/tests/async_context_test.rs new file mode 100644 index 00000000000..db9bedd411d --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/cli/src/tests/async_context_test.rs @@ -0,0 +1,280 @@ +use super::helpers::fixtures::get_language; +use std::future::Future; +use std::pin::{pin, Pin}; +use std::ptr; +use std::task::{self, Context, Poll, RawWaker, RawWakerVTable, Waker}; +use tree_sitter::Parser; + +#[test] +fn test_node_in_fut() { + let (ret, pended) = tokio_like_spawn(async { + let mut parser = Parser::new(); + let language = get_language("bash"); + parser.set_language(&language).unwrap(); + + let tree = parser.parse("#", None).unwrap(); + + let root = tree.root_node(); + let root_ref = &root; + + let fut_val_fn = || async { + // eprintln!("fut_val_fn: {}", root.child(0).unwrap().kind()); + yield_now().await; + root.child(0).unwrap().kind() + }; + + yield_now().await; + + let fut_ref_fn = || async { + // eprintln!("fut_ref_fn: {}", root_ref.child(0).unwrap().kind()); + yield_now().await; + root_ref.child(0).unwrap().kind() + }; + + let f1 = fut_val_fn().await; + let f2 = fut_ref_fn().await; + assert_eq!(f1, f2); + + let fut_val = async { + // eprintln!("fut_val: {}", root.child(0).unwrap().kind()); + yield_now().await; + root.child(0).unwrap().kind() + }; + + let fut_ref = async { + // eprintln!("fut_ref: {}", root_ref.child(0).unwrap().kind()); + yield_now().await; + root_ref.child(0).unwrap().kind() + }; + + let f1 = fut_val.await; + let f2 = fut_ref.await; + assert_eq!(f1, f2); + + f1 + }) + .join(); + // eprintln!("pended: {pended:?}"); + assert_eq!(ret, "comment"); + assert_eq!(pended, 5); +} + +#[test] +fn test_node_and_cursor_ref_in_fut() { + let ((), pended) = tokio_like_spawn(async { + let mut parser = Parser::new(); + let language = get_language("c"); + parser.set_language(&language).unwrap(); + + let tree = parser.parse("#", None).unwrap(); + + let root = tree.root_node(); + let root_ref = &root; + + let mut cursor = tree.walk(); + let cursor_ref = &mut cursor; + + cursor_ref.goto_first_child(); + + let fut_val = async { + yield_now().await; + let _ = root.to_sexp(); + }; + + yield_now().await; + + let fut_ref = async { + yield_now().await; + let _ = root_ref.to_sexp(); + cursor_ref.goto_first_child(); + }; + + fut_val.await; + fut_ref.await; + + cursor_ref.goto_first_child(); + }) + .join(); + assert_eq!(pended, 3); +} + +#[test] +fn test_node_and_cursor_ref_in_fut_with_fut_fabrics() { + let ((), pended) = tokio_like_spawn(async { + let mut parser = Parser::new(); + let language = get_language("javascript"); + parser.set_language(&language).unwrap(); + + let tree = parser.parse("#", None).unwrap(); + + let root = tree.root_node(); + let root_ref = &root; + + let mut cursor = tree.walk(); + let cursor_ref = &mut cursor; + + cursor_ref.goto_first_child(); + + let fut_val = || async { + yield_now().await; + let _ = root.to_sexp(); + }; + + yield_now().await; + + let fut_ref = || async move { + yield_now().await; + let _ = root_ref.to_sexp(); + cursor_ref.goto_first_child(); + }; + + fut_val().await; + fut_val().await; + fut_ref().await; + }) + .join(); + assert_eq!(pended, 4); +} + +#[test] +fn test_node_and_cursor_ref_in_fut_with_inner_spawns() { + let (ret, pended) = tokio_like_spawn(async { + let mut parser = Parser::new(); + let language = get_language("rust"); + parser.set_language(&language).unwrap(); + + let tree = parser.parse("#", None).unwrap(); + + let mut cursor = tree.walk(); + let cursor_ref = &mut cursor; + + cursor_ref.goto_first_child(); + + let fut_val = || { + let tree = tree.clone(); + async move { + let root = tree.root_node(); + let mut cursor = tree.walk(); + let cursor_ref = &mut cursor; + yield_now().await; + let _ = root.to_sexp(); + cursor_ref.goto_first_child(); + } + }; + + yield_now().await; + + let fut_ref = || { + let tree = tree.clone(); + async move { + let root = tree.root_node(); + let root_ref = &root; + let mut cursor = tree.walk(); + let cursor_ref = &mut cursor; + yield_now().await; + let _ = root_ref.to_sexp(); + cursor_ref.goto_first_child(); + } + }; + + let ((), p1) = tokio_like_spawn(fut_val()).await.unwrap(); + let ((), p2) = tokio_like_spawn(fut_ref()).await.unwrap(); + + cursor_ref.goto_first_child(); + + fut_val().await; + fut_val().await; + fut_ref().await; + + cursor_ref.goto_first_child(); + + p1 + p2 + }) + .join(); + assert_eq!(pended, 4); + assert_eq!(ret, 2); +} + +fn tokio_like_spawn(future: T) -> JoinHandle<(T::Output, usize)> +where + T: Future + Send + 'static, + T::Output: Send + 'static, +{ + // No runtime, just noop waker + + let waker = noop_waker(); + let mut cx = task::Context::from_waker(&waker); + + let mut pending = 0; + let mut future = pin!(future); + let ret = loop { + match future.as_mut().poll(&mut cx) { + Poll::Pending => pending += 1, + Poll::Ready(r) => { + // eprintln!("ready, pended: {pending}"); + break r; + } + } + }; + JoinHandle::new((ret, pending)) +} + +async fn yield_now() { + struct SimpleYieldNow { + yielded: bool, + } + + impl Future for SimpleYieldNow { + type Output = (); + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<()> { + cx.waker().wake_by_ref(); + if self.yielded { + return Poll::Ready(()); + } + self.yielded = true; + Poll::Pending + } + } + + SimpleYieldNow { yielded: false }.await; +} + +pub fn noop_waker() -> Waker { + const VTABLE: RawWakerVTable = RawWakerVTable::new( + // Cloning just returns a new no-op raw waker + |_| RAW, + // `wake` does nothing + |_| {}, + // `wake_by_ref` does nothing + |_| {}, + // Dropping does nothing as we don't allocate anything + |_| {}, + ); + const RAW: RawWaker = RawWaker::new(ptr::null(), &VTABLE); + unsafe { Waker::from_raw(RAW) } +} + +struct JoinHandle { + data: Option, +} + +impl JoinHandle { + #[must_use] + const fn new(data: T) -> Self { + Self { data: Some(data) } + } + + fn join(&mut self) -> T { + self.data.take().unwrap() + } +} + +impl Future for JoinHandle { + type Output = std::result::Result; + + fn poll(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll { + let data = self.get_mut().data.take().unwrap(); + Poll::Ready(Ok(data)) + } +} diff --git a/third-party/tree-sitter/tree-sitter/cli/src/tests/corpus_test.rs b/third-party/tree-sitter/tree-sitter/cli/src/tests/corpus_test.rs index b818b2c1553..057a672f946 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/tests/corpus_test.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/tests/corpus_test.rs @@ -1,7 +1,7 @@ use super::helpers::{ allocations, edits::{get_random_edit, invert_edit}, - fixtures::{fixtures_dir, get_language, get_test_language}, + fixtures::{fixtures_dir, get_language, get_test_language, SCRATCH_BASE_DIR}, new_seed, random::Rand, scope_sequence::ScopeSequence, @@ -14,85 +14,121 @@ use crate::{ test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry}, util, }; -use proc_macro::test_with_seed; -use std::{env, fs}; +use std::{collections::HashMap, env, fs}; use tree_sitter::{LogType, Node, Parser, Point, Range, Tree}; +use tree_sitter_proc_macro::test_with_seed; #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_bash(seed: usize) { - test_language_corpus(seed, "bash"); + test_language_corpus( + "bash", + seed, + Some(&[ + // Fragile tests where edit customization changes + // lead to significant parse tree structure changes. + "bash - corpus - commands - Nested Heredocs", + "bash - corpus - commands - Quoted Heredocs", + "bash - corpus - commands - Heredocs with weird characters", + ]), + None, + ); } #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_c(seed: usize) { - test_language_corpus(seed, "c"); + test_language_corpus("c", seed, None, None); } #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_cpp(seed: usize) { - test_language_corpus(seed, "cpp"); + test_language_corpus("cpp", seed, None, None); } #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_embedded_template(seed: usize) { - test_language_corpus(seed, "embedded-template"); + test_language_corpus("embedded-template", seed, None, None); } #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_go(seed: usize) { - test_language_corpus(seed, "go"); + test_language_corpus("go", seed, None, None); } #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_html(seed: usize) { - test_language_corpus(seed, "html"); + test_language_corpus("html", seed, None, None); +} + +#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] +fn test_corpus_for_java(seed: usize) { + test_language_corpus("java", seed, None, None); } #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_javascript(seed: usize) { - test_language_corpus(seed, "javascript"); + test_language_corpus("javascript", seed, None, None); } #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_json(seed: usize) { - test_language_corpus(seed, "json"); + test_language_corpus("json", seed, None, None); } #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_php(seed: usize) { - test_language_corpus(seed, "php"); + test_language_corpus("php", seed, None, Some("php")); } #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_python(seed: usize) { - test_language_corpus(seed, "python"); + test_language_corpus("python", seed, None, None); } #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_ruby(seed: usize) { - test_language_corpus(seed, "ruby"); + test_language_corpus("ruby", seed, None, None); } #[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] fn test_corpus_for_rust(seed: usize) { - test_language_corpus(seed, "rust"); + test_language_corpus("rust", seed, None, None); +} + +#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] +fn test_corpus_for_typescript(seed: usize) { + test_language_corpus("typescript", seed, None, Some("typescript")); } -fn test_language_corpus(start_seed: usize, language_name: &str) { +#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)] +fn test_corpus_for_tsx(seed: usize) { + test_language_corpus("typescript", seed, None, Some("tsx")); +} + +fn test_language_corpus( + language_name: &str, + start_seed: usize, + skipped: Option<&[&str]>, + subdir: Option<&str>, +) { + let subdir = subdir.unwrap_or_default(); + let grammars_dir = fixtures_dir().join("grammars"); let error_corpus_dir = fixtures_dir().join("error_corpus"); let template_corpus_dir = fixtures_dir().join("template_corpus"); - let mut corpus_dir = grammars_dir.join(language_name).join("corpus"); + let mut corpus_dir = grammars_dir.join(language_name).join(subdir).join("corpus"); if !corpus_dir.is_dir() { - corpus_dir = grammars_dir.join(language_name).join("test").join("corpus"); + corpus_dir = grammars_dir + .join(language_name) + .join(subdir) + .join("test") + .join("corpus"); } - let error_corpus_file = error_corpus_dir.join(&format!("{}_errors.txt", language_name)); - let template_corpus_file = - template_corpus_dir.join(&format!("{}_templates.txt", language_name)); + let error_corpus_file = error_corpus_dir.join(format!("{language_name}_errors.txt")); + let template_corpus_file = template_corpus_dir.join(format!("{language_name}_templates.txt")); let main_tests = parse_tests(&corpus_dir).unwrap(); - let error_tests = parse_tests(&error_corpus_file).unwrap_or(TestEntry::default()); - let template_tests = parse_tests(&template_corpus_file).unwrap_or(TestEntry::default()); + let error_tests = parse_tests(&error_corpus_file).unwrap_or_default(); + let template_tests = parse_tests(&template_corpus_file).unwrap_or_default(); let mut tests = flatten_tests(main_tests); tests.extend(flatten_tests(error_tests)); tests.extend(flatten_tests(template_tests).into_iter().map(|mut t| { @@ -100,35 +136,53 @@ fn test_language_corpus(start_seed: usize, language_name: &str) { t })); - let language = get_language(language_name); + let mut skipped = skipped.map(|x| x.iter().map(|x| (*x, 0)).collect::>()); + + let language_path = if subdir.is_empty() { + language_name.to_string() + } else { + format!("{language_name}/{subdir}") + }; + let language = get_language(&language_path); let mut failure_count = 0; let log_seed = env::var("TREE_SITTER_LOG_SEED").is_ok(); + let dump_edits = env::var("TREE_SITTER_DUMP_EDITS").is_ok(); + + if log_seed { + println!(" start seed: {start_seed}"); + } println!(); - for test in tests { - println!(" {} example - {}", language_name, test.name); + for (test_index, test) in tests.iter().enumerate() { + let test_name = format!("{language_name} - {}", test.name); + if let Some(skipped) = skipped.as_mut() { + if let Some(counter) = skipped.get_mut(test_name.as_str()) { + println!(" {test_index}. {test_name} - SKIPPED"); + *counter += 1; + continue; + } + } + + println!(" {test_index}. {test_name}"); let passed = allocations::record(|| { let mut log_session = None; let mut parser = get_parser(&mut log_session, "log.html"); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); set_included_ranges(&mut parser, &test.input, test.template_delimiters); let tree = parser.parse(&test.input, None).unwrap(); let mut actual_output = tree.root_node().to_sexp(); if !test.has_fields { - actual_output = strip_sexp_fields(actual_output); + actual_output = strip_sexp_fields(&actual_output); } if actual_output != test.output { - println!( - "Incorrect initial parse for {} - {}", - language_name, test.name, - ); + println!("Incorrect initial parse for {test_name}"); print_diff_key(); print_diff(&actual_output, &test.output); - println!(""); + println!(); return false; } @@ -141,7 +195,7 @@ fn test_language_corpus(start_seed: usize, language_name: &str) { } let mut parser = Parser::new(); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); let tree = parser.parse(&test.input, None).unwrap(); drop(parser); @@ -151,7 +205,7 @@ fn test_language_corpus(start_seed: usize, language_name: &str) { let mut rand = Rand::new(seed); let mut log_session = None; let mut parser = get_parser(&mut log_session, "log.html"); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); let mut tree = tree.clone(); let mut input = test.input.clone(); @@ -164,11 +218,20 @@ fn test_language_corpus(start_seed: usize, language_name: &str) { for _ in 0..1 + rand.unsigned(*EDIT_COUNT) { let edit = get_random_edit(&mut rand, &input); undo_stack.push(invert_edit(&input, &edit)); - perform_edit(&mut tree, &mut input, &edit); + perform_edit(&mut tree, &mut input, &edit).unwrap(); } if log_seed { - println!(" seed: {}", seed); + println!(" {test_index}.{trial:<2} seed: {seed}"); + } + + if dump_edits { + fs::write( + SCRATCH_BASE_DIR + .join(format!("edit.{seed}.{test_index}.{trial} {test_name}")), + &input, + ) + .unwrap(); } if *LOG_GRAPH_ENABLED { @@ -187,7 +250,7 @@ fn test_language_corpus(start_seed: usize, language_name: &str) { // Undo all of the edits and re-parse again. while let Some(edit) = undo_stack.pop() { - perform_edit(&mut tree2, &mut input, &edit); + perform_edit(&mut tree2, &mut input, &edit).unwrap(); } if *LOG_GRAPH_ENABLED { eprintln!("{}\n", String::from_utf8_lossy(&input)); @@ -199,17 +262,14 @@ fn test_language_corpus(start_seed: usize, language_name: &str) { // Verify that the final tree matches the expectation from the corpus. let mut actual_output = tree3.root_node().to_sexp(); if !test.has_fields { - actual_output = strip_sexp_fields(actual_output); + actual_output = strip_sexp_fields(&actual_output); } if actual_output != test.output { - println!( - "Incorrect parse for {} - {} - seed {}", - language_name, test.name, seed - ); + println!("Incorrect parse for {test_name} - seed {seed}"); print_diff_key(); print_diff(&actual_output, &test.output); - println!(""); + println!(); return false; } @@ -230,8 +290,21 @@ fn test_language_corpus(start_seed: usize, language_name: &str) { } } - if failure_count > 0 { - panic!("{} {} corpus tests failed", failure_count, language_name); + assert!( + failure_count == 0, + "{failure_count} {language_name} corpus tests failed" + ); + + if let Some(skipped) = skipped.as_mut() { + skipped.retain(|_, v| *v == 0); + + if !skipped.is_empty() { + println!("Non matchable skip definitions:"); + for k in skipped.keys() { + println!(" {k}"); + } + panic!("Non matchable skip definitions needs to be removed"); + } } } @@ -240,7 +313,7 @@ fn test_feature_corpus_files() { let test_grammars_dir = fixtures_dir().join("test_grammars"); let mut failure_count = 0; - for entry in fs::read_dir(&test_grammars_dir).unwrap() { + for entry in fs::read_dir(test_grammars_dir).unwrap() { let entry = entry.unwrap(); if !entry.metadata().unwrap().is_dir() { continue; @@ -260,7 +333,7 @@ fn test_feature_corpus_files() { grammar_path = test_path.join("grammar.json"); } let error_message_path = test_path.join("expected_error.txt"); - let grammar_json = generate::load_grammar_file(&grammar_path).unwrap(); + let grammar_json = generate::load_grammar_file(&grammar_path, None).unwrap(); let generate_result = generate::generate_parser_for_grammar(&grammar_json); if error_message_path.exists() { @@ -268,7 +341,7 @@ fn test_feature_corpus_files() { continue; } - eprintln!("test language: {:?}", language_name); + eprintln!("test language: {language_name:?}"); let expected_message = fs::read_to_string(&error_message_path) .unwrap() @@ -277,24 +350,17 @@ fn test_feature_corpus_files() { let actual_message = e.to_string().replace("\r\n", "\n"); if expected_message != actual_message { eprintln!( - "Unexpected error message.\n\nExpected:\n\n{}\nActual:\n\n{}\n", - expected_message, actual_message + "Unexpected error message.\n\nExpected:\n\n{expected_message}\nActual:\n\n{actual_message}\n", ); failure_count += 1; } } else { - eprintln!( - "Expected error message but got none for test grammar '{}'", - language_name - ); + eprintln!("Expected error message but got none for test grammar '{language_name}'",); failure_count += 1; } } else { if let Err(e) = &generate_result { - eprintln!( - "Unexpected error for test grammar '{}':\n{}", - language_name, e - ); + eprintln!("Unexpected error for test grammar '{language_name}':\n{e}",); failure_count += 1; continue; } @@ -306,7 +372,7 @@ fn test_feature_corpus_files() { let tests = flatten_tests(test); if !tests.is_empty() { - eprintln!("test language: {:?}", language_name); + eprintln!("test language: {language_name:?}"); } for test in tests { @@ -315,18 +381,18 @@ fn test_feature_corpus_files() { let passed = allocations::record(|| { let mut log_session = None; let mut parser = get_parser(&mut log_session, "log.html"); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); let tree = parser.parse(&test.input, None).unwrap(); let mut actual_output = tree.root_node().to_sexp(); if !test.has_fields { - actual_output = strip_sexp_fields(actual_output); + actual_output = strip_sexp_fields(&actual_output); } if actual_output == test.output { true } else { print_diff_key(); print_diff(&actual_output, &test.output); - println!(""); + println!(); false } }); @@ -338,13 +404,12 @@ fn test_feature_corpus_files() { } } } - if failure_count > 0 { - panic!("{} corpus tests failed", failure_count); - } + + assert!(failure_count == 0, "{failure_count} corpus tests failed"); } -fn check_consistent_sizes(tree: &Tree, input: &Vec) { - fn check(node: Node, line_offsets: &Vec) { +fn check_consistent_sizes(tree: &Tree, input: &[u8]) { + fn check(node: Node, line_offsets: &[usize]) { let start_byte = node.start_byte(); let end_byte = node.end_byte(); let start_point = node.start_position(); @@ -391,7 +456,7 @@ fn check_consistent_sizes(tree: &Tree, input: &Vec) { let mut line_offsets = vec![0]; for (i, c) in input.iter().enumerate() { - if *c == '\n' as u8 { + if *c == b'\n' { line_offsets.push(i + 1); } } @@ -399,7 +464,7 @@ fn check_consistent_sizes(tree: &Tree, input: &Vec) { check(tree.root_node(), &line_offsets); } -fn check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &Vec) -> Result<(), String> { +fn check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &[u8]) -> Result<(), String> { let changed_ranges = old_tree.changed_ranges(new_tree).collect::>(); let old_scope_sequence = ScopeSequence::new(old_tree); let new_scope_sequence = ScopeSequence::new(new_tree); @@ -415,13 +480,12 @@ fn check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &Vec) -> Re for range in &changed_ranges { if range.end_byte > byte_range.end || range.end_point > point_range.end { return Err(format!( - "changed range extends outside of the old and new trees {:?}", - range + "changed range extends outside of the old and new trees {range:?}", )); } } - old_scope_sequence.check_changes(&new_scope_sequence, &input, &changed_ranges) + old_scope_sequence.check_changes(&new_scope_sequence, input, &changed_ranges) } fn set_included_ranges(parser: &mut Parser, input: &[u8], delimiters: Option<(&str, &str)>) { @@ -429,7 +493,12 @@ fn set_included_ranges(parser: &mut Parser, input: &[u8], delimiters: Option<(&s let mut ranges = Vec::new(); let mut ix = 0; while ix < input.len() { - let Some(mut start_ix) = input[ix..].windows(2).position(|win| win == start.as_bytes()) else { break }; + let Some(mut start_ix) = input[ix..] + .windows(2) + .position(|win| win == start.as_bytes()) + else { + break; + }; start_ix += ix + start.len(); let end_ix = input[start_ix..] .windows(2) @@ -469,13 +538,13 @@ fn get_parser(session: &mut Option, log_filename: &str) -> Par if *LOG_ENABLED { parser.set_logger(Some(Box::new(|log_type, msg| { if log_type == LogType::Lex { - eprintln!(" {}", msg); + eprintln!(" {msg}"); } else { - eprintln!("{}", msg); + eprintln!("{msg}"); } }))); } else if *LOG_GRAPH_ENABLED { - *session = Some(util::log_graphs(&mut parser, log_filename).unwrap()); + *session = Some(util::log_graphs(&mut parser, log_filename, false).unwrap()); } parser @@ -497,6 +566,7 @@ fn flatten_tests(test: TestEntry) -> Vec { input, output, has_fields, + .. } => { if !prefix.is_empty() { name.insert_str(0, " - "); diff --git a/third-party/tree-sitter/tree-sitter/cli/src/tests/detect_language.rs b/third-party/tree-sitter/tree-sitter/cli/src/tests/detect_language.rs new file mode 100644 index 00000000000..c4f41185f01 --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/cli/src/tests/detect_language.rs @@ -0,0 +1,134 @@ +use crate::tests::helpers::fixtures::scratch_dir; + +use std::path::Path; +use tree_sitter_loader::Loader; + +#[test] +fn detect_language_by_first_line_regex() { + let strace_dir = tree_sitter_dir( + r#"{ + "name": "tree-sitter-strace", + "version": "0.0.1", + "tree-sitter": [ + { + "scope": "source.strace", + "file-types": [ + "strace" + ], + "first-line-regex": "[0-9:.]* *execve" + } + ] +} +"#, + "strace", + ); + + let mut loader = Loader::with_parser_lib_path(scratch_dir().to_path_buf()); + let config = loader + .find_language_configurations_at_path(strace_dir.path(), false) + .unwrap(); + + // this is just to validate that we can read the package.json correctly + assert_eq!(config[0].scope.as_ref().unwrap(), "source.strace"); + + let file_name = strace_dir.path().join("strace.log"); + std::fs::write(&file_name, "execve\nworld").unwrap(); + assert_eq!( + get_lang_scope(&loader, &file_name), + Some("source.strace".into()) + ); + + let file_name = strace_dir.path().join("strace.log"); + std::fs::write(&file_name, "447845 execve\nworld").unwrap(); + assert_eq!( + get_lang_scope(&loader, &file_name), + Some("source.strace".into()) + ); + + let file_name = strace_dir.path().join("strace.log"); + std::fs::write(&file_name, "hello\nexecve").unwrap(); + assert!(get_lang_scope(&loader, &file_name).is_none()); + + let file_name = strace_dir.path().join("strace.log"); + std::fs::write(&file_name, "").unwrap(); + assert!(get_lang_scope(&loader, &file_name).is_none()); + + let dummy_dir = tree_sitter_dir( + r#"{ + "name": "tree-sitter-dummy", + "version": "0.0.1", + "tree-sitter": [ + { + "scope": "source.dummy", + "file-types": [ + "dummy" + ] + } + ] +} +"#, + "dummy", + ); + + // file-type takes precedence over first-line-regex + loader + .find_language_configurations_at_path(dummy_dir.path(), false) + .unwrap(); + let file_name = dummy_dir.path().join("strace.dummy"); + std::fs::write(&file_name, "execve").unwrap(); + assert_eq!( + get_lang_scope(&loader, &file_name), + Some("source.dummy".into()) + ); +} + +fn tree_sitter_dir(package_json: &str, name: &str) -> tempfile::TempDir { + let temp_dir = tempfile::tempdir().unwrap(); + std::fs::write(temp_dir.path().join("package.json"), package_json).unwrap(); + std::fs::create_dir(temp_dir.path().join("src")).unwrap(); + std::fs::create_dir(temp_dir.path().join("src/tree_sitter")).unwrap(); + std::fs::write( + temp_dir.path().join("src/grammar.json"), + format!(r#"{{"name":"{name}"}}"#), + ) + .unwrap(); + std::fs::write( + temp_dir.path().join("src/parser.c"), + format!( + r##" + #include "tree_sitter/parser.h" + #ifdef _WIN32 + #define TS_PUBLIC __declspec(dllexport) + #else + #define TS_PUBLIC __attribute__((visibility("default"))) + #endif + TS_PUBLIC const TSLanguage *tree_sitter_{name}() {{}} + "## + ), + ) + .unwrap(); + std::fs::write( + temp_dir.path().join("src/tree_sitter/parser.h"), + include_str!("../../../lib/src/parser.h"), + ) + .unwrap(); + temp_dir +} + +// if we manage to get the language scope, it means we correctly detected the file-type +fn get_lang_scope(loader: &Loader, file_name: &Path) -> Option { + loader + .language_configuration_for_file_name(file_name) + .ok() + .and_then(|config| { + if let Some((_, config)) = config { + config.scope.clone() + } else if let Ok(Some((_, config))) = + loader.language_configuration_for_first_line_regex(file_name) + { + config.scope.clone() + } else { + None + } + }) +} diff --git a/third-party/tree-sitter/tree-sitter/cli/src/tests/helpers/allocations.rs b/third-party/tree-sitter/tree-sitter/cli/src/tests/helpers/allocations.rs index 9a514014532..7e640741dd8 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/tests/helpers/allocations.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/tests/helpers/allocations.rs @@ -2,7 +2,7 @@ use std::{ collections::HashMap, os::raw::c_void, sync::{ - atomic::{AtomicBool, AtomicU64, Ordering::SeqCst}, + atomic::{AtomicBool, AtomicUsize, Ordering::SeqCst}, Mutex, }, }; @@ -25,12 +25,12 @@ unsafe impl Sync for Allocation {} #[derive(Default)] struct AllocationRecorder { enabled: AtomicBool, - allocation_count: AtomicU64, - outstanding_allocations: Mutex>, + allocation_count: AtomicUsize, + outstanding_allocations: Mutex>, } thread_local! { - static RECORDER: AllocationRecorder = Default::default(); + static RECORDER: AllocationRecorder = AllocationRecorder::default(); } extern "C" { @@ -60,12 +60,10 @@ pub fn record(f: impl FnOnce() -> T) -> T { .map(|e| e.1) .collect::>() }); - if !outstanding_allocation_indices.is_empty() { - panic!( - "Leaked allocation indices: {:?}", - outstanding_allocation_indices - ); - } + assert!( + outstanding_allocation_indices.is_empty(), + "Leaked allocation indices: {outstanding_allocation_indices:?}" + ); value } @@ -83,6 +81,7 @@ fn record_alloc(ptr: *mut c_void) { } fn record_dealloc(ptr: *mut c_void) { + assert!(!ptr.is_null(), "Zero pointer deallocation!"); RECORDER.with(|recorder| { if recorder.enabled.load(SeqCst) { recorder @@ -107,9 +106,13 @@ unsafe extern "C" fn ts_record_calloc(count: usize, size: usize) -> *mut c_void } unsafe extern "C" fn ts_record_realloc(ptr: *mut c_void, size: usize) -> *mut c_void { - record_dealloc(ptr); let result = realloc(ptr, size); - record_alloc(result); + if ptr.is_null() { + record_alloc(result); + } else if ptr != result { + record_dealloc(ptr); + record_alloc(result); + } result } diff --git a/third-party/tree-sitter/tree-sitter/cli/src/tests/helpers/dirs.rs b/third-party/tree-sitter/tree-sitter/cli/src/tests/helpers/dirs.rs index 4bf345d8034..4d1c49820d5 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/tests/helpers/dirs.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/tests/helpers/dirs.rs @@ -1,11 +1,47 @@ lazy_static! { - static ref ROOT_DIR: PathBuf = PathBuf::from(env!("CARGO_MANIFEST_DIR")).parent().unwrap().to_owned(); - static ref FIXTURES_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures"); - static ref HEADER_DIR: PathBuf = ROOT_DIR.join("lib").join("include"); - static ref GRAMMARS_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures").join("grammars"); - static ref SCRATCH_DIR: PathBuf = { + pub static ref ROOT_DIR: PathBuf = PathBuf::from(env!("CARGO_MANIFEST_DIR")).parent().unwrap().to_owned(); + pub static ref FIXTURES_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures"); + pub static ref HEADER_DIR: PathBuf = ROOT_DIR.join("lib").join("include"); + pub static ref GRAMMARS_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures").join("grammars"); + pub static ref SCRATCH_BASE_DIR: PathBuf = { let result = ROOT_DIR.join("target").join("scratch"); fs::create_dir_all(&result).unwrap(); result }; + pub static ref WASM_DIR: PathBuf = ROOT_DIR.join("target").join("release"); + pub static ref SCRATCH_DIR: PathBuf = { + // https://doc.rust-lang.org/reference/conditional-compilation.html + let vendor = if cfg!(target_vendor = "apple") { + "apple" + } else if cfg!(target_vendor = "fortanix") { + "fortanix" + } else if cfg!(target_vendor = "pc") { + "pc" + } else { + "unknown" + }; + let env = if cfg!(target_env = "gnu") { + "gnu" + } else if cfg!(target_env = "msvc") { + "msvc" + } else if cfg!(target_env = "musl") { + "musl" + } else if cfg!(target_env = "sgx") { + "sgx" + } else { + "unknown" + }; + let endian = if cfg!(target_endian = "little") { + "little" + } else if cfg!(target_endian = "big") { + "big" + } else { + "unknown" + }; + + let machine = format!("{}-{}-{vendor}-{env}-{endian}", std::env::consts::ARCH, std::env::consts::OS); + let result = SCRATCH_BASE_DIR.join(machine); + fs::create_dir_all(&result).unwrap(); + result + }; } diff --git a/third-party/tree-sitter/tree-sitter/cli/src/tests/helpers/edits.rs b/third-party/tree-sitter/tree-sitter/cli/src/tests/helpers/edits.rs index 4b07485c3f9..11fd659c372 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/tests/helpers/edits.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/tests/helpers/edits.rs @@ -5,12 +5,13 @@ use std::str; #[derive(Debug)] pub struct ReadRecorder<'a> { - content: &'a Vec, + content: &'a [u8], indices_read: Vec, } impl<'a> ReadRecorder<'a> { - pub fn new(content: &'a Vec) -> Self { + #[must_use] + pub const fn new(content: &'a [u8]) -> Self { Self { content, indices_read: Vec::new(), @@ -31,7 +32,7 @@ impl<'a> ReadRecorder<'a> { pub fn strings_read(&self) -> Vec<&'a str> { let mut result = Vec::new(); let mut last_range: Option> = None; - for index in self.indices_read.iter() { + for index in &self.indices_read { if let Some(ref mut range) = &mut last_range { if range.end == *index { range.end += 1; @@ -44,13 +45,13 @@ impl<'a> ReadRecorder<'a> { } } if let Some(range) = last_range { - result.push(str::from_utf8(&self.content[range.clone()]).unwrap()); + result.push(str::from_utf8(&self.content[range]).unwrap()); } result } } -pub fn invert_edit(input: &Vec, edit: &Edit) -> Edit { +pub fn invert_edit(input: &[u8], edit: &Edit) -> Edit { let position = edit.position; let removed_content = &input[position..(position + edit.deleted_length)]; Edit { @@ -60,7 +61,7 @@ pub fn invert_edit(input: &Vec, edit: &Edit) -> Edit { } } -pub fn get_random_edit(rand: &mut Rand, input: &Vec) -> Edit { +pub fn get_random_edit(rand: &mut Rand, input: &[u8]) -> Edit { let choice = rand.unsigned(10); if choice < 2 { // Insert text at end diff --git a/third-party/tree-sitter/tree-sitter/cli/src/tests/helpers/fixtures.rs b/third-party/tree-sitter/tree-sitter/cli/src/tests/helpers/fixtures.rs index 7d04b24a1ca..bad016c4838 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/tests/helpers/fixtures.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/tests/helpers/fixtures.rs @@ -1,6 +1,7 @@ +use anyhow::Context; use lazy_static::lazy_static; -use std::fs; use std::path::{Path, PathBuf}; +use std::{env, fs}; use tree_sitter::Language; use tree_sitter_highlight::HighlightConfiguration; use tree_sitter_loader::Loader; @@ -9,20 +10,34 @@ use tree_sitter_tags::TagsConfiguration; include!("./dirs.rs"); lazy_static! { - static ref TEST_LOADER: Loader = Loader::with_parser_lib_path(SCRATCH_DIR.clone()); + static ref TEST_LOADER: Loader = { + let mut loader = Loader::with_parser_lib_path(SCRATCH_DIR.clone()); + if env::var("TREE_SITTER_GRAMMAR_DEBUG").is_ok() { + loader.use_debug_build(true); + } + loader + }; } -pub fn test_loader<'a>() -> &'a Loader { - &*TEST_LOADER +pub fn test_loader() -> &'static Loader { + &TEST_LOADER } -pub fn fixtures_dir<'a>() -> &'static Path { +pub fn fixtures_dir() -> &'static Path { &FIXTURES_DIR } +pub fn scratch_dir() -> &'static Path { + &SCRATCH_DIR +} + pub fn get_language(name: &str) -> Language { TEST_LOADER - .load_language_at_path(&GRAMMARS_DIR.join(name).join("src"), &HEADER_DIR) + .load_language_at_path( + &GRAMMARS_DIR.join(name).join("src"), + &[&HEADER_DIR, &GRAMMARS_DIR.join(name).join("src")], + None, + ) .unwrap() } @@ -38,20 +53,20 @@ pub fn get_highlight_config( let language = get_language(language_name); let queries_path = get_language_queries_path(language_name); let highlights_query = fs::read_to_string(queries_path.join("highlights.scm")).unwrap(); - let injections_query = if let Some(injection_query_filename) = injection_query_filename { - fs::read_to_string(queries_path.join(injection_query_filename)).unwrap() - } else { - String::new() - }; - let locals_query = fs::read_to_string(queries_path.join("locals.scm")).unwrap_or(String::new()); + let injections_query = + injection_query_filename.map_or_else(String::new, |injection_query_filename| { + fs::read_to_string(queries_path.join(injection_query_filename)).unwrap() + }); + let locals_query = fs::read_to_string(queries_path.join("locals.scm")).unwrap_or_default(); let mut result = HighlightConfiguration::new( language, + language_name, &highlights_query, &injections_query, &locals_query, ) .unwrap(); - result.configure(&highlight_names); + result.configure(highlight_names); result } @@ -59,33 +74,74 @@ pub fn get_tags_config(language_name: &str) -> TagsConfiguration { let language = get_language(language_name); let queries_path = get_language_queries_path(language_name); let tags_query = fs::read_to_string(queries_path.join("tags.scm")).unwrap(); - let locals_query = fs::read_to_string(queries_path.join("locals.scm")).unwrap_or(String::new()); + let locals_query = fs::read_to_string(queries_path.join("locals.scm")).unwrap_or_default(); TagsConfiguration::new(language, &tags_query, &locals_query).unwrap() } pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> Language { - let parser_c_path = SCRATCH_DIR.join(&format!("{}-parser.c", name)); - if !fs::read_to_string(&parser_c_path) - .map(|content| content == parser_code) - .unwrap_or(false) - { - fs::write(&parser_c_path, parser_code).unwrap(); + let src_dir = scratch_dir().join("src").join(name); + fs::create_dir_all(&src_dir).unwrap(); + + let parser_path = src_dir.join("parser.c"); + if !fs::read_to_string(&parser_path).map_or(false, |content| content == parser_code) { + fs::write(&parser_path, parser_code).unwrap(); } - let scanner_path = path.and_then(|p| { - let result = p.join("scanner.c"); - if result.exists() { - Some(result) + + let scanner_path = if let Some(path) = path { + let scanner_path = path.join("scanner.c"); + if scanner_path.exists() { + let scanner_code = fs::read_to_string(&scanner_path).unwrap(); + let scanner_copy_path = src_dir.join("scanner.c"); + if !fs::read_to_string(&scanner_copy_path) + .map_or(false, |content| content == scanner_code) + { + fs::write(&scanner_copy_path, scanner_code).unwrap(); + } + Some(scanner_copy_path) } else { None } - }); + } else { + None + }; + + let header_path = src_dir.join("tree_sitter"); + fs::create_dir_all(&header_path).unwrap(); + + fs::write(header_path.join("alloc.h"), tree_sitter::PARSER_HEADER) + .with_context(|| { + format!( + "Failed to write {:?}", + header_path.join("alloc.h").file_name().unwrap() + ) + }) + .unwrap(); + + fs::write(header_path.join("array.h"), tree_sitter::PARSER_HEADER) + .with_context(|| { + format!( + "Failed to write {:?}", + header_path.join("array.h").file_name().unwrap() + ) + }) + .unwrap(); + + fs::write(header_path.join("parser.h"), tree_sitter::PARSER_HEADER) + .with_context(|| { + format!( + "Failed to write {:?}", + header_path.join("parser.h").file_name().unwrap() + ) + }) + .unwrap(); + + let paths_to_check = if let Some(scanner_path) = &scanner_path { + vec![parser_path, scanner_path.clone()] + } else { + vec![parser_path] + }; + TEST_LOADER - .load_language_from_sources(name, &HEADER_DIR, &parser_c_path, &scanner_path) + .load_language_at_path_with_name(&src_dir, &[&HEADER_DIR], name, Some(&paths_to_check)) .unwrap() } - -pub fn get_test_grammar(name: &str) -> (String, Option) { - let dir = fixtures_dir().join("test_grammars").join(name); - let grammar = fs::read_to_string(&dir.join("grammar.json")).unwrap(); - (grammar, Some(dir)) -} diff --git a/third-party/tree-sitter/tree-sitter/cli/src/tests/helpers/mod.rs b/third-party/tree-sitter/tree-sitter/cli/src/tests/helpers/mod.rs index 54df8809282..35e4dc86866 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/tests/helpers/mod.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/tests/helpers/mod.rs @@ -26,7 +26,7 @@ fn int_env_var(name: &'static str) -> Option { env::var(name).ok().and_then(|e| e.parse().ok()) } -pub(crate) fn new_seed() -> usize { +pub fn new_seed() -> usize { int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| { let mut rng = rand::thread_rng(); rng.gen::() diff --git a/third-party/tree-sitter/tree-sitter/cli/src/tests/helpers/query_helpers.rs b/third-party/tree-sitter/tree-sitter/cli/src/tests/helpers/query_helpers.rs index 78ae559ccb9..608d491465f 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/tests/helpers/query_helpers.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/tests/helpers/query_helpers.rs @@ -1,6 +1,8 @@ use rand::prelude::Rng; use std::{cmp::Ordering, fmt::Write, ops::Range}; -use tree_sitter::{Node, Point, Tree, TreeCursor}; +use tree_sitter::{ + Language, Node, Parser, Point, Query, QueryCapture, QueryCursor, QueryMatch, Tree, TreeCursor, +}; #[derive(Debug)] pub struct Pattern { @@ -17,7 +19,7 @@ pub struct Match<'a, 'tree> { pub last_node: Option>, } -const CAPTURE_NAMES: &'static [&'static str] = &[ +const CAPTURE_NAMES: &[&str] = &[ "one", "two", "three", "four", "five", "six", "seven", "eight", ]; @@ -55,12 +57,11 @@ impl Pattern { children: roots, }; - if pattern.children.len() == 1 { - pattern = pattern.children.pop().unwrap(); - } + if pattern.children.len() == 1 || // In a parenthesized list of sibling patterns, the first // sibling can't be an anonymous `_` wildcard. - else if pattern.children[0].kind == Some("_") && !pattern.children[0].named { + (pattern.children[0].kind == Some("_") && !pattern.children[0].named) + { pattern = pattern.children.pop().unwrap(); } // In a parenthesized list of sibling patterns, the first @@ -121,22 +122,16 @@ impl Pattern { } } - pub fn to_string(&self) -> String { - let mut result = String::new(); - self.write_to_string(&mut result, 0); - result - } - fn write_to_string(&self, string: &mut String, indent: usize) { if let Some(field) = self.field { - write!(string, "{}: ", field).unwrap(); + write!(string, "{field}: ").unwrap(); } if self.named { string.push('('); let mut has_contents = false; if let Some(kind) = &self.kind { - write!(string, "{}", kind).unwrap(); + write!(string, "{kind}").unwrap(); has_contents = true; } for child in &self.children { @@ -152,11 +147,11 @@ impl Pattern { } else if self.kind == Some("_") { string.push('_'); } else { - write!(string, "\"{}\"", self.kind.unwrap().replace("\"", "\\\"")).unwrap(); + write!(string, "\"{}\"", self.kind.unwrap().replace('\"', "\\\"")).unwrap(); } if let Some(capture) = &self.capture { - write!(string, " @{}", capture).unwrap(); + write!(string, " @{capture}").unwrap(); } } @@ -212,11 +207,10 @@ impl Pattern { // Create a match for the current node. let mat = Match { - captures: if let Some(name) = &self.capture { - vec![(name.as_str(), node)] - } else { - Vec::new() - }, + captures: self + .capture + .as_ref() + .map_or_else(Vec::new, |name| vec![(name.as_str(), node)]), last_node: Some(node), }; @@ -244,7 +238,7 @@ impl Pattern { new_match_states.push((*pattern_index + 1, combined_match)); } else { let mut existing = false; - for existing_match in finished_matches.iter_mut() { + for existing_match in &mut finished_matches { if existing_match.captures == combined_match.captures { if child_pattern.capture.is_some() { existing_match.last_node = combined_match.last_node; @@ -269,6 +263,14 @@ impl Pattern { } } +impl std::fmt::Display for Pattern { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut result = String::new(); + self.write_to_string(&mut result, 0); + write!(f, "{result}") + } +} + impl<'a, 'tree> PartialOrd for Match<'a, 'tree> { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) @@ -304,3 +306,56 @@ fn compare_depth_first(a: Node, b: Node) -> Ordering { let b = b.byte_range(); a.start.cmp(&b.start).then_with(|| b.end.cmp(&a.end)) } + +pub fn assert_query_matches( + language: &Language, + query: &Query, + source: &str, + expected: &[(usize, Vec<(&str, &str)>)], +) { + let mut parser = Parser::new(); + parser.set_language(language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + let mut cursor = QueryCursor::new(); + let matches = cursor.matches(query, tree.root_node(), source.as_bytes()); + pretty_assertions::assert_eq!(collect_matches(matches, query, source), expected); + pretty_assertions::assert_eq!(cursor.did_exceed_match_limit(), false); +} + +pub fn collect_matches<'a>( + matches: impl Iterator>, + query: &'a Query, + source: &'a str, +) -> Vec<(usize, Vec<(&'a str, &'a str)>)> { + matches + .map(|m| { + ( + m.pattern_index, + format_captures(m.captures.iter().copied(), query, source), + ) + }) + .collect() +} + +pub fn collect_captures<'a>( + captures: impl Iterator, usize)>, + query: &'a Query, + source: &'a str, +) -> Vec<(&'a str, &'a str)> { + format_captures(captures.map(|(m, i)| m.captures[i]), query, source) +} + +fn format_captures<'a>( + captures: impl Iterator>, + query: &'a Query, + source: &'a str, +) -> Vec<(&'a str, &'a str)> { + captures + .map(|capture| { + ( + query.capture_names()[capture.index as usize], + capture.node.utf8_text(source.as_bytes()).unwrap(), + ) + }) + .collect() +} diff --git a/third-party/tree-sitter/tree-sitter/cli/src/tests/helpers/random.rs b/third-party/tree-sitter/tree-sitter/cli/src/tests/helpers/random.rs index 77c347d6bdc..bac088905f0 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/tests/helpers/random.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/tests/helpers/random.rs @@ -11,7 +11,7 @@ pub struct Rand(StdRng); impl Rand { pub fn new(seed: usize) -> Self { - Rand(StdRng::seed_from_u64(seed as u64)) + Self(StdRng::seed_from_u64(seed as u64)) } pub fn unsigned(&mut self, max: usize) -> usize { @@ -24,9 +24,9 @@ impl Rand { for i in 0..word_count { if i > 0 { if self.unsigned(5) == 0 { - result.push('\n' as u8); + result.push(b'\n'); } else { - result.push(' ' as u8); + result.push(b' '); } } if self.unsigned(3) == 0 { @@ -34,7 +34,7 @@ impl Rand { result.push(OPERATORS[index] as u8); } else { for _ in 0..self.unsigned(8) { - result.push(self.0.sample(Alphanumeric) as u8); + result.push(self.0.sample(Alphanumeric)); } } } diff --git a/third-party/tree-sitter/tree-sitter/cli/src/tests/helpers/scope_sequence.rs b/third-party/tree-sitter/tree-sitter/cli/src/tests/helpers/scope_sequence.rs index 4521833cde8..436455d4a12 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/tests/helpers/scope_sequence.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/tests/helpers/scope_sequence.rs @@ -7,7 +7,7 @@ type ScopeStack = Vec<&'static str>; impl ScopeSequence { pub fn new(tree: &Tree) -> Self { - let mut result = ScopeSequence(Vec::new()); + let mut result = Self(Vec::new()); let mut scope_stack = Vec::new(); let mut cursor = tree.walk(); @@ -40,9 +40,9 @@ impl ScopeSequence { pub fn check_changes( &self, - other: &ScopeSequence, - text: &Vec, - known_changed_ranges: &Vec, + other: &Self, + text: &[u8], + known_changed_ranges: &[Range], ) -> Result<(), String> { let mut position = Point { row: 0, column: 0 }; for i in 0..(self.0.len().max(other.0.len())) { @@ -54,7 +54,7 @@ impl ScopeSequence { .find(|range| range.start_point <= position && position < range.end_point); if containing_range.is_none() { let line = &text[(i - position.column)..] - .split(|c| *c == '\n' as u8) + .split(|c| *c == b'\n') .next() .unwrap(); return Err(format!( @@ -78,7 +78,7 @@ impl ScopeSequence { } } - if text[i] == '\n' as u8 { + if text[i] == b'\n' { position.row += 1; position.column = 0; } else { diff --git a/third-party/tree-sitter/tree-sitter/cli/src/tests/highlight_test.rs b/third-party/tree-sitter/tree-sitter/cli/src/tests/highlight_test.rs index e0b356d284b..77a95d7dbca 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/tests/highlight_test.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/tests/highlight_test.rs @@ -24,6 +24,7 @@ lazy_static! { get_highlight_config("rust", Some("injections.scm"), &HIGHLIGHT_NAMES); static ref HIGHLIGHT_NAMES: Vec = [ "attribute", + "boolean", "carriage-return", "comment", "constant", @@ -48,12 +49,12 @@ lazy_static! { "variable", ] .iter() - .cloned() + .copied() .map(String::from) .collect(); static ref HTML_ATTRS: Vec = HIGHLIGHT_NAMES .iter() - .map(|s| format!("class={}", s)) + .map(|s| format!("class={s}")) .collect(); } @@ -61,7 +62,7 @@ lazy_static! { fn test_highlighting_javascript() { let source = "const a = function(b) { return b + c; }"; assert_eq!( - &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(), + &to_token_vector(source, &JS_HIGHLIGHT).unwrap(), &[vec![ ("const", vec!["keyword"]), (" ", vec![]), @@ -71,14 +72,14 @@ fn test_highlighting_javascript() { (" ", vec![]), ("function", vec!["keyword"]), ("(", vec!["punctuation.bracket"]), - ("b", vec!["variable.parameter"]), + ("b", vec!["variable"]), (")", vec!["punctuation.bracket"]), (" ", vec![]), ("{", vec!["punctuation.bracket"]), (" ", vec![]), ("return", vec!["keyword"]), (" ", vec![]), - ("b", vec!["variable.parameter"]), + ("b", vec!["variable"]), (" ", vec![]), ("+", vec!["operator"]), (" ", vec![]), @@ -92,7 +93,7 @@ fn test_highlighting_javascript() { #[test] fn test_highlighting_injected_html_in_javascript() { - let source = vec!["const s = html `
${a < b}
`;"].join("\n"); + let source = ["const s = html `
${a < b}
`;"].join("\n"); assert_eq!( &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(), @@ -156,7 +157,7 @@ fn test_highlighting_injected_javascript_in_html_mini() { #[test] fn test_highlighting_injected_javascript_in_html() { - let source = vec![ + let source = [ "", " "].join("\n"); + let source = ["
<% foo() %>
"].join("\n"); assert_eq!( &to_token_vector(&source, &EJS_HIGHLIGHT).unwrap(), @@ -376,7 +378,7 @@ fn test_highlighting_ejs_with_html_and_javascript() { fn test_highlighting_javascript_with_jsdoc() { // Regression test: the middle comment has no highlights. This should not prevent // later injections from highlighting properly. - let source = vec!["a /* @see a */ b; /* nothing */ c; /* @see b */"].join("\n"); + let source = ["a /* @see a */ b; /* nothing */ c; /* @see b */"].join("\n"); assert_eq!( &to_token_vector(&source, &JS_HIGHLIGHT).unwrap(), @@ -404,7 +406,7 @@ fn test_highlighting_javascript_with_jsdoc() { #[test] fn test_highlighting_with_content_children_included() { - let source = vec!["assert!(", " a.b.c() < D::e::()", ");"].join("\n"); + let source = ["assert!(", " a.b.c() < D::e::()", ");"].join("\n"); assert_eq!( &to_token_vector(&source, &RUST_HIGHLIGHT).unwrap(), @@ -482,7 +484,7 @@ fn test_highlighting_cancellation() { #[test] fn test_highlighting_via_c_api() { - let highlights = vec![ + let highlights = [ "class=tag\0", "class=function\0", "class=string\0", @@ -490,74 +492,86 @@ fn test_highlighting_via_c_api() { ]; let highlight_names = highlights .iter() - .map(|h| h["class=".len()..].as_ptr() as *const c_char) + .map(|h| h["class=".len()..].as_ptr().cast::()) .collect::>(); let highlight_attrs = highlights .iter() - .map(|h| h.as_bytes().as_ptr() as *const c_char) + .map(|h| h.as_bytes().as_ptr().cast::()) .collect::>(); - let highlighter = c::ts_highlighter_new( - &highlight_names[0] as *const *const c_char, - &highlight_attrs[0] as *const *const c_char, - highlights.len() as u32, - ); + let highlighter = unsafe { + c::ts_highlighter_new( + std::ptr::addr_of!(highlight_names[0]), + std::ptr::addr_of!(highlight_attrs[0]), + highlights.len() as u32, + ) + }; let source_code = c_string(""); let js_scope = c_string("source.js"); let js_injection_regex = c_string("^javascript"); let language = get_language("javascript"); + let lang_name = c_string("javascript"); let queries = get_language_queries_path("javascript"); let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap(); let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap(); let locals_query = fs::read_to_string(queries.join("locals.scm")).unwrap(); - c::ts_highlighter_add_language( - highlighter, - js_scope.as_ptr(), - js_injection_regex.as_ptr(), - language, - highlights_query.as_ptr() as *const c_char, - injections_query.as_ptr() as *const c_char, - locals_query.as_ptr() as *const c_char, - highlights_query.len() as u32, - injections_query.len() as u32, - locals_query.len() as u32, - ); + unsafe { + c::ts_highlighter_add_language( + highlighter, + lang_name.as_ptr(), + js_scope.as_ptr(), + js_injection_regex.as_ptr(), + language, + highlights_query.as_ptr().cast::(), + injections_query.as_ptr().cast::(), + locals_query.as_ptr().cast::(), + highlights_query.len() as u32, + injections_query.len() as u32, + locals_query.len() as u32, + ); + } let html_scope = c_string("text.html.basic"); let html_injection_regex = c_string("^html"); let language = get_language("html"); + let lang_name = c_string("html"); let queries = get_language_queries_path("html"); let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap(); let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap(); - c::ts_highlighter_add_language( - highlighter, - html_scope.as_ptr(), - html_injection_regex.as_ptr(), - language, - highlights_query.as_ptr() as *const c_char, - injections_query.as_ptr() as *const c_char, - ptr::null(), - highlights_query.len() as u32, - injections_query.len() as u32, - 0, - ); + unsafe { + c::ts_highlighter_add_language( + highlighter, + lang_name.as_ptr(), + html_scope.as_ptr(), + html_injection_regex.as_ptr(), + language, + highlights_query.as_ptr().cast::(), + injections_query.as_ptr().cast::(), + ptr::null(), + highlights_query.len() as u32, + injections_query.len() as u32, + 0, + ); + } let buffer = c::ts_highlight_buffer_new(); - c::ts_highlighter_highlight( - highlighter, - html_scope.as_ptr(), - source_code.as_ptr(), - source_code.as_bytes().len() as u32, - buffer, - ptr::null_mut(), - ); + unsafe { + c::ts_highlighter_highlight( + highlighter, + html_scope.as_ptr(), + source_code.as_ptr(), + source_code.as_bytes().len() as u32, + buffer, + ptr::null_mut(), + ); + } - let output_bytes = c::ts_highlight_buffer_content(buffer); - let output_line_offsets = c::ts_highlight_buffer_line_offsets(buffer); - let output_len = c::ts_highlight_buffer_len(buffer); - let output_line_count = c::ts_highlight_buffer_line_count(buffer); + let output_bytes = unsafe { c::ts_highlight_buffer_content(buffer) }; + let output_line_offsets = unsafe { c::ts_highlight_buffer_line_offsets(buffer) }; + let output_len = unsafe { c::ts_highlight_buffer_len(buffer) }; + let output_line_count = unsafe { c::ts_highlight_buffer_line_count(buffer) }; let output_bytes = unsafe { slice::from_raw_parts(output_bytes, output_len as usize) }; let output_line_offsets = @@ -568,8 +582,7 @@ fn test_highlighting_via_c_api() { let line_start = output_line_offsets[i] as usize; let line_end = output_line_offsets .get(i + 1) - .map(|x| *x as usize) - .unwrap_or(output_bytes.len()); + .map_or(output_bytes.len(), |x| *x as usize); lines.push(str::from_utf8(&output_bytes[line_start..line_end]).unwrap()); } @@ -583,8 +596,69 @@ fn test_highlighting_via_c_api() { ] ); - c::ts_highlighter_delete(highlighter); - c::ts_highlight_buffer_delete(buffer); + unsafe { + c::ts_highlighter_delete(highlighter); + c::ts_highlight_buffer_delete(buffer); + } +} + +#[test] +fn test_highlighting_with_all_captures_applied() { + let source = "fn main(a: u32, b: u32) -> { let c = a + b; }"; + let language = get_language("rust"); + let highlights_query = indoc::indoc! {" + [ + \"fn\" + \"let\" + ] @keyword + (identifier) @variable + (function_item name: (identifier) @function) + (parameter pattern: (identifier) @variable.parameter) + (primitive_type) @type.builtin + \"=\" @operator + [ \"->\" \":\" \";\" ] @punctuation.delimiter + [ \"{\" \"}\" \"(\" \")\" ] @punctuation.bracket + "}; + let mut rust_highlight_reverse = + HighlightConfiguration::new(language, "rust", highlights_query, "", "").unwrap(); + rust_highlight_reverse.configure(&HIGHLIGHT_NAMES); + + assert_eq!( + &to_token_vector(source, &rust_highlight_reverse).unwrap(), + &[[ + ("fn", vec!["keyword"]), + (" ", vec![]), + ("main", vec!["function"]), + ("(", vec!["punctuation.bracket"]), + ("a", vec!["variable.parameter"]), + (":", vec!["punctuation.delimiter"]), + (" ", vec![]), + ("u32", vec!["type.builtin"]), + (", ", vec![]), + ("b", vec!["variable.parameter"]), + (":", vec!["punctuation.delimiter"]), + (" ", vec![]), + ("u32", vec!["type.builtin"]), + (")", vec!["punctuation.bracket"]), + (" ", vec![]), + ("->", vec!["punctuation.delimiter"]), + (" ", vec![]), + ("{", vec!["punctuation.bracket"]), + (" ", vec![]), + ("let", vec!["keyword"]), + (" ", vec![]), + ("c", vec!["variable"]), + (" ", vec![]), + ("=", vec!["operator"]), + (" ", vec![]), + ("a", vec!["variable"]), + (" + ", vec![]), + ("b", vec!["variable"]), + (";", vec!["punctuation.delimiter"]), + (" ", vec![]), + ("}", vec!["punctuation.bracket"]) + ]], + ); } #[test] @@ -641,9 +715,13 @@ fn to_html<'a>( renderer .render(events, src, &|highlight| HTML_ATTRS[highlight.0].as_bytes()) .unwrap(); - Ok(renderer.lines().map(|s| s.to_string()).collect()) + Ok(renderer + .lines() + .map(std::string::ToString::to_string) + .collect()) } +#[allow(clippy::type_complexity)] fn to_token_vector<'a>( src: &'a str, language_config: &'a HighlightConfiguration, @@ -667,20 +745,20 @@ fn to_token_vector<'a>( } HighlightEvent::Source { start, end } => { let s = str::from_utf8(&src[start..end]).unwrap(); - for (i, l) in s.split("\n").enumerate() { + for (i, l) in s.split('\n').enumerate() { let l = l.trim_end_matches('\r'); if i > 0 { lines.push(line); line = Vec::new(); } - if l.len() > 0 { + if !l.is_empty() { line.push((l, highlights.clone())); } } } } } - if line.len() > 0 { + if !line.is_empty() { lines.push(line); } Ok(lines) diff --git a/third-party/tree-sitter/tree-sitter/cli/src/tests/language_test.rs b/third-party/tree-sitter/tree-sitter/cli/src/tests/language_test.rs new file mode 100644 index 00000000000..5528c77c7c1 --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/cli/src/tests/language_test.rs @@ -0,0 +1,64 @@ +use super::helpers::fixtures::get_language; +use tree_sitter::Parser; + +#[test] +fn test_lookahead_iterator() { + let mut parser = Parser::new(); + let language = get_language("rust"); + parser.set_language(&language).unwrap(); + + let tree = parser.parse("struct Stuff {}", None).unwrap(); + + let mut cursor = tree.walk(); + + assert!(cursor.goto_first_child()); // struct + assert!(cursor.goto_first_child()); // struct keyword + + let next_state = cursor.node().next_parse_state(); + assert_ne!(next_state, 0); + assert_eq!( + next_state, + language.next_state(cursor.node().parse_state(), cursor.node().grammar_id()) + ); + assert!((next_state as usize) < language.parse_state_count()); + assert!(cursor.goto_next_sibling()); // type_identifier + assert_eq!(next_state, cursor.node().parse_state()); + assert_eq!(cursor.node().grammar_name(), "identifier"); + assert_ne!(cursor.node().grammar_id(), cursor.node().kind_id()); + + let expected_symbols = ["//", "/*", "identifier", "line_comment", "block_comment"]; + let mut lookahead = language.lookahead_iterator(next_state).unwrap(); + assert_eq!(*lookahead.language(), language); + assert!(lookahead.iter_names().eq(expected_symbols)); + + lookahead.reset_state(next_state); + assert!(lookahead.iter_names().eq(expected_symbols)); + + lookahead.reset(&language, next_state); + assert!(lookahead + .map(|s| language.node_kind_for_id(s).unwrap()) + .eq(expected_symbols)); +} + +#[test] +fn test_lookahead_iterator_modifiable_only_by_mut() { + let mut parser = Parser::new(); + let language = get_language("rust"); + parser.set_language(&language).unwrap(); + + let tree = parser.parse("struct Stuff {}", None).unwrap(); + + let mut cursor = tree.walk(); + + assert!(cursor.goto_first_child()); // struct + assert!(cursor.goto_first_child()); // struct keyword + + let next_state = cursor.node().next_parse_state(); + assert_ne!(next_state, 0); + + let mut lookahead = language.lookahead_iterator(next_state).unwrap(); + let _ = lookahead.next(); + + let mut names = lookahead.iter_names(); + let _ = names.next(); +} diff --git a/third-party/tree-sitter/tree-sitter/cli/src/tests/mod.rs b/third-party/tree-sitter/tree-sitter/cli/src/tests/mod.rs index 1b804450b24..596bc8d179b 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/tests/mod.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/tests/mod.rs @@ -1,11 +1,19 @@ +mod async_context_test; mod corpus_test; +mod detect_language; mod helpers; mod highlight_test; +mod language_test; mod node_test; +mod parser_hang_test; mod parser_test; mod pathological_test; mod query_test; mod tags_test; mod test_highlight_test; mod test_tags_test; +mod text_provider_test; mod tree_test; + +#[cfg(feature = "wasm")] +mod wasm_language_test; diff --git a/third-party/tree-sitter/tree-sitter/cli/src/tests/node_test.rs b/third-party/tree-sitter/tree-sitter/cli/src/tests/node_test.rs index 6d5ed61d2a7..7fdb0069f2b 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/tests/node_test.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/tests/node_test.rs @@ -6,7 +6,7 @@ use crate::parse::perform_edit; use std::fs; use tree_sitter::{Node, Parser, Point, Tree}; -const JSON_EXAMPLE: &'static str = r#" +const JSON_EXAMPLE: &str = r#" [ 123, @@ -17,7 +17,7 @@ const JSON_EXAMPLE: &'static str = r#" ] "#; -const GRAMMAR_WITH_ALIASES_AND_EXTRAS: &'static str = r#"{ +const GRAMMAR_WITH_ALIASES_AND_EXTRAS: &str = r#"{ "name": "aliases_and_extras", "extras": [ @@ -60,8 +60,8 @@ fn test_node_child() { assert_eq!(array_node.kind(), "array"); assert_eq!(array_node.named_child_count(), 3); - assert_eq!(array_node.start_byte(), JSON_EXAMPLE.find("[").unwrap()); - assert_eq!(array_node.end_byte(), JSON_EXAMPLE.find("]").unwrap() + 1); + assert_eq!(array_node.start_byte(), JSON_EXAMPLE.find('[').unwrap()); + assert_eq!(array_node.end_byte(), JSON_EXAMPLE.find(']').unwrap() + 1); assert_eq!(array_node.start_position(), Point::new(2, 0)); assert_eq!(array_node.end_position(), Point::new(8, 1)); assert_eq!(array_node.child_count(), 7); @@ -82,13 +82,13 @@ fn test_node_child() { assert_eq!(object_node.kind(), "object"); assert_eq!(right_bracket_node.kind(), "]"); - assert_eq!(left_bracket_node.is_named(), false); - assert_eq!(number_node.is_named(), true); - assert_eq!(comma_node1.is_named(), false); - assert_eq!(false_node.is_named(), true); - assert_eq!(comma_node2.is_named(), false); - assert_eq!(object_node.is_named(), true); - assert_eq!(right_bracket_node.is_named(), false); + assert!(!left_bracket_node.is_named()); + assert!(number_node.is_named()); + assert!(!comma_node1.is_named()); + assert!(false_node.is_named()); + assert!(!comma_node2.is_named()); + assert!(object_node.is_named()); + assert!(!right_bracket_node.is_named()); assert_eq!(number_node.start_byte(), JSON_EXAMPLE.find("123").unwrap()); assert_eq!( @@ -106,7 +106,7 @@ fn test_node_child() { assert_eq!(false_node.start_position(), Point::new(4, 2)); assert_eq!(false_node.end_position(), Point::new(4, 7)); - assert_eq!(object_node.start_byte(), JSON_EXAMPLE.find("{").unwrap()); + assert_eq!(object_node.start_byte(), JSON_EXAMPLE.find('{').unwrap()); assert_eq!(object_node.start_position(), Point::new(5, 2)); assert_eq!(object_node.end_position(), Point::new(7, 3)); @@ -119,9 +119,9 @@ fn test_node_child() { assert_eq!(pair_node.kind(), "pair"); assert_eq!(right_brace_node.kind(), "}"); - assert_eq!(left_brace_node.is_named(), false); - assert_eq!(pair_node.is_named(), true); - assert_eq!(right_brace_node.is_named(), false); + assert!(!left_brace_node.is_named()); + assert!(pair_node.is_named()); + assert!(!right_brace_node.is_named()); assert_eq!(pair_node.start_byte(), JSON_EXAMPLE.find("\"x\"").unwrap()); assert_eq!(pair_node.end_byte(), JSON_EXAMPLE.find("null").unwrap() + 4); @@ -137,9 +137,9 @@ fn test_node_child() { assert_eq!(colon_node.kind(), ":"); assert_eq!(null_node.kind(), "null"); - assert_eq!(string_node.is_named(), true); - assert_eq!(colon_node.is_named(), false); - assert_eq!(null_node.is_named(), true); + assert!(string_node.is_named()); + assert!(!colon_node.is_named()); + assert!(null_node.is_named()); assert_eq!( string_node.start_byte(), @@ -202,7 +202,7 @@ fn test_node_children() { #[test] fn test_node_children_by_field_name() { let mut parser = Parser::new(); - parser.set_language(get_language("python")).unwrap(); + parser.set_language(&get_language("python")).unwrap(); let source = " if one: a() @@ -230,7 +230,7 @@ fn test_node_children_by_field_name() { #[test] fn test_node_parent_of_child_by_field_name() { let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); let tree = parser.parse("foo(a().b[0].c.d.e())", None).unwrap(); let call_node = tree .root_node() @@ -251,13 +251,15 @@ fn test_node_parent_of_child_by_field_name() { #[test] fn test_node_field_name_for_child() { let mut parser = Parser::new(); - parser.set_language(get_language("c")).unwrap(); - let tree = parser.parse("x + y;", None).unwrap(); + parser.set_language(&get_language("c")).unwrap(); + let tree = parser.parse("int w = x + y;", None).unwrap(); let translation_unit_node = tree.root_node(); - let binary_expression_node = translation_unit_node - .named_child(0) + let declaration_node = translation_unit_node.named_child(0).unwrap(); + + let binary_expression_node = declaration_node + .child_by_field_name("declarator") .unwrap() - .named_child(0) + .child_by_field_name("value") .unwrap(); assert_eq!(binary_expression_node.field_name_for_child(0), Some("left")); @@ -276,7 +278,7 @@ fn test_node_field_name_for_child() { #[test] fn test_node_child_by_field_name_with_extra_hidden_children() { let mut parser = Parser::new(); - parser.set_language(get_language("python")).unwrap(); + parser.set_language(&get_language("python")).unwrap(); // In the Python grammar, some fields are applied to `suite` nodes, // which consist of an invisible `indent` token followed by a block. @@ -319,7 +321,7 @@ fn test_node_named_child() { assert_eq!(false_node.end_position(), Point::new(4, 7)); assert_eq!(object_node.kind(), "object"); - assert_eq!(object_node.start_byte(), JSON_EXAMPLE.find("{").unwrap()); + assert_eq!(object_node.start_byte(), JSON_EXAMPLE.find('{').unwrap()); assert_eq!(object_node.start_position(), Point::new(5, 2)); assert_eq!(object_node.end_position(), Point::new(7, 3)); @@ -371,7 +373,7 @@ fn test_node_named_child_with_aliases_and_extras() { let mut parser = Parser::new(); parser - .set_language(get_test_language(&parser_name, &parser_code, None)) + .set_language(&get_test_language(&parser_name, &parser_code, None)) .unwrap(); let tree = parser.parse("b ... b ... c", None).unwrap(); @@ -385,13 +387,55 @@ fn test_node_named_child_with_aliases_and_extras() { assert_eq!(root.named_child(4).unwrap().kind(), "C"); } +#[test] +fn test_node_descendant_count() { + let tree = parse_json_example(); + let value_node = tree.root_node(); + let all_nodes = get_all_nodes(&tree); + + assert_eq!(value_node.descendant_count(), all_nodes.len()); + + let mut cursor = value_node.walk(); + for (i, node) in all_nodes.iter().enumerate() { + cursor.goto_descendant(i); + assert_eq!(cursor.node(), *node, "index {i}"); + } + + for (i, node) in all_nodes.iter().enumerate().rev() { + cursor.goto_descendant(i); + assert_eq!(cursor.node(), *node, "rev index {i}"); + } +} + +#[test] +fn test_descendant_count_single_node_tree() { + let mut parser = Parser::new(); + parser + .set_language(&get_language("embedded-template")) + .unwrap(); + let tree = parser.parse("hello", None).unwrap(); + + let nodes = get_all_nodes(&tree); + assert_eq!(nodes.len(), 2); + assert_eq!(tree.root_node().descendant_count(), 2); + + let mut cursor = tree.root_node().walk(); + + cursor.goto_descendant(0); + assert_eq!(cursor.depth(), 0); + assert_eq!(cursor.node(), nodes[0]); + cursor.goto_descendant(1); + assert_eq!(cursor.depth(), 1); + assert_eq!(cursor.node(), nodes[1]); +} + #[test] fn test_node_descendant_for_range() { let tree = parse_json_example(); - let array_node = tree.root_node().child(0).unwrap(); + let array_node = tree.root_node(); // Leaf node exactly matches the given bounds - byte query - let colon_index = JSON_EXAMPLE.find(":").unwrap(); + let colon_index = JSON_EXAMPLE.find(':').unwrap(); let colon_node = array_node .descendant_for_byte_range(colon_index, colon_index + 1) .unwrap(); @@ -412,7 +456,7 @@ fn test_node_descendant_for_range() { assert_eq!(colon_node.end_position(), Point::new(6, 8)); // The given point is between two adjacent leaf nodes - byte query - let colon_index = JSON_EXAMPLE.find(":").unwrap(); + let colon_index = JSON_EXAMPLE.find(':').unwrap(); let colon_node = array_node .descendant_for_byte_range(colon_index, colon_index) .unwrap(); @@ -506,10 +550,10 @@ fn test_node_edit() { for _ in 0..10 { let mut nodes_before = get_all_nodes(&tree); - let edit = get_random_edit(&mut rand, &mut code); + let edit = get_random_edit(&mut rand, &code); let mut tree2 = tree.clone(); - let edit = perform_edit(&mut tree2, &mut code, &edit); - for node in nodes_before.iter_mut() { + let edit = perform_edit(&mut tree2, &mut code, &edit).unwrap(); + for node in &mut nodes_before { node.edit(&edit); } @@ -532,7 +576,7 @@ fn test_node_edit() { #[test] fn test_root_node_with_offset() { let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); let tree = parser.parse(" if (a) b", None).unwrap(); let node = tree.root_node_with_offset(6, Point::new(2, 2)); @@ -560,7 +604,7 @@ fn test_root_node_with_offset() { #[test] fn test_node_is_extra() { let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); let tree = parser.parse("foo(/* hi */);", None).unwrap(); let root_node = tree.root_node(); @@ -575,7 +619,7 @@ fn test_node_is_extra() { #[test] fn test_node_sexp() { let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); let tree = parser.parse("if (a) b", None).unwrap(); let root_node = tree.root_node(); let if_node = root_node.descendant_for_byte_range(0, 0).unwrap(); @@ -664,7 +708,7 @@ fn test_node_field_names() { let mut parser = Parser::new(); let language = get_test_language(&parser_name, &parser_code, None); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); let tree = parser .parse("child-0 child-1 child-2 child-3 child-4", None) @@ -734,7 +778,7 @@ fn test_node_field_calls_in_language_without_fields() { let mut parser = Parser::new(); let language = get_test_language(&parser_name, &parser_code, None); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); let tree = parser.parse("b c d", None).unwrap(); @@ -744,7 +788,7 @@ fn test_node_field_calls_in_language_without_fields() { let mut cursor = root_node.walk(); assert_eq!(cursor.field_name(), None); - assert_eq!(cursor.goto_first_child(), true); + assert!(cursor.goto_first_child()); assert_eq!(cursor.field_name(), None); } @@ -752,7 +796,7 @@ fn test_node_field_calls_in_language_without_fields() { fn test_node_is_named_but_aliased_as_anonymous() { let (parser_name, parser_code) = generate_parser_for_grammar( &fs::read_to_string( - &fixtures_dir() + fixtures_dir() .join("test_grammars") .join("named_rule_aliased_as_anonymous") .join("grammar.json"), @@ -763,7 +807,7 @@ fn test_node_is_named_but_aliased_as_anonymous() { let mut parser = Parser::new(); let language = get_test_language(&parser_name, &parser_code, None); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); let tree = parser.parse("B C B", None).unwrap(); @@ -782,7 +826,7 @@ fn test_node_is_named_but_aliased_as_anonymous() { #[test] fn test_node_numeric_symbols_respect_simple_aliases() { let mut parser = Parser::new(); - parser.set_language(get_language("python")).unwrap(); + parser.set_language(&get_language("python")).unwrap(); // Example 1: // Python argument lists can contain "splat" arguments, which are not allowed within @@ -813,7 +857,7 @@ fn test_node_numeric_symbols_respect_simple_aliases() { // Ruby handles the unary (negative) and binary (minus) `-` operators using two different // tokens. One or more of these is an external token that's aliased as `-`. Their numeric // kind ids should match. - parser.set_language(get_language("ruby")).unwrap(); + parser.set_language(&get_language("ruby")).unwrap(); let tree = parser.parse("-a - b", None).unwrap(); let root = tree.root_node(); assert_eq!( @@ -841,22 +885,22 @@ fn get_all_nodes(tree: &Tree) -> Vec { let mut visited_children = false; let mut cursor = tree.walk(); loop { - result.push(cursor.node()); - if !visited_children && cursor.goto_first_child() { - continue; + if !visited_children { + result.push(cursor.node()); + if !cursor.goto_first_child() { + visited_children = true; + } } else if cursor.goto_next_sibling() { visited_children = false; - } else if cursor.goto_parent() { - visited_children = true; - } else { + } else if !cursor.goto_parent() { break; } } - return result; + result } fn parse_json_example() -> Tree { let mut parser = Parser::new(); - parser.set_language(get_language("json")).unwrap(); + parser.set_language(&get_language("json")).unwrap(); parser.parse(JSON_EXAMPLE, None).unwrap() } diff --git a/third-party/tree-sitter/tree-sitter/cli/src/tests/parser_hang_test.rs b/third-party/tree-sitter/tree-sitter/cli/src/tests/parser_hang_test.rs new file mode 100644 index 00000000000..0c742d805b6 --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/cli/src/tests/parser_hang_test.rs @@ -0,0 +1,104 @@ +// For some reasons `Command::spawn` doesn't work in CI env for many exotic arches. +#![cfg(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing)))] + +use crate::{ + generate::{generate_parser_for_grammar, load_grammar_file}, + tests::helpers::fixtures::{fixtures_dir, get_test_language}, +}; +use std::{ + env::VarError, + process::{Command, Stdio}, +}; +use tree_sitter::Parser; + +// The `sanitizing` cfg is required to don't run tests under specific sunitizer +// because they don't work well with subprocesses _(it's an assumption)_. +// +// Below are two alternative examples of how to disable tests for some arches +// if a way with excluding the whole mod from compilation wouldn't work well. +// +// XXX: Also may be it makes sense to keep such tests as ignored by default +// to omit surprises and enable them on CI by passing an extra option explicitly: +// +// > cargo test -- --include-ignored +// +// #[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing)))] +// #[cfg_attr(not(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing))), ignore)] +// +#[test] +fn test_grammar_that_should_hang_and_not_segfault() { + let parent_sleep_millis = 1000; + let test_name = "test_grammar_that_should_hang_and_not_segfault"; + let test_var = "CARGO_HANG_TEST"; + + eprintln!(" {test_name}"); + + let tests_exec_path = std::env::args() + .next() + .expect("Failed get get tests executable path"); + + match std::env::var(test_var) { + Ok(v) if v == test_name => { + eprintln!(" child process id {}", std::process::id()); + hang_test(); + } + + Err(VarError::NotPresent) => { + eprintln!(" parent process id {}", std::process::id()); + if true { + let mut command = Command::new(tests_exec_path); + command.arg(test_name).env(test_var, test_name); + if std::env::args().any(|x| x == "--nocapture") { + command.arg("--nocapture"); + } else { + command.stdout(Stdio::null()).stderr(Stdio::null()); + } + match command.spawn() { + Ok(mut child) => { + std::thread::sleep(std::time::Duration::from_millis(parent_sleep_millis)); + match child.try_wait() { + Ok(Some(status)) if status.success() => { + panic!("Child wasn't hang and exited successfully") + } + Ok(Some(status)) => panic!( + "Child wasn't hang and exited with status code: {:?}", + status.code() + ), + _ => (), + } + if let Err(e) = child.kill() { + eprintln!( + "Failed to kill hang test sub process id: {}, error: {e}", + child.id() + ); + } + } + Err(e) => panic!("{e}"), + } + } + } + + Err(e) => panic!("Env var error: {e}"), + _ => unreachable!(), + } + + fn hang_test() { + let test_grammar_dir = fixtures_dir() + .join("test_grammars") + .join("get_col_should_hang_not_crash"); + + let grammar_json = load_grammar_file(&test_grammar_dir.join("grammar.js"), None).unwrap(); + let (parser_name, parser_code) = + generate_parser_for_grammar(grammar_json.as_str()).unwrap(); + + let language = + get_test_language(&parser_name, &parser_code, Some(test_grammar_dir.as_path())); + + let mut parser = Parser::new(); + parser.set_language(&language).unwrap(); + + let code_that_should_hang = "\nHello"; + + parser.parse(code_that_should_hang, None).unwrap(); + } +} diff --git a/third-party/tree-sitter/tree-sitter/cli/src/tests/parser_test.rs b/third-party/tree-sitter/tree-sitter/cli/src/tests/parser_test.rs index 78c6cda4c34..b57981b6a55 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/tests/parser_test.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/tests/parser_test.rs @@ -1,24 +1,25 @@ use super::helpers::{ allocations, - edits::invert_edit, - edits::ReadRecorder, - fixtures::{get_language, get_test_grammar, get_test_language}, + edits::{invert_edit, ReadRecorder}, + fixtures::{get_language, get_test_language}, }; use crate::{ generate::generate_parser_for_grammar, parse::{perform_edit, Edit}, + tests::helpers::fixtures::fixtures_dir, }; -use proc_macro::retry; use std::{ + fs, sync::atomic::{AtomicUsize, Ordering}, thread, time, }; use tree_sitter::{IncludedRangesError, InputEdit, LogType, Parser, Point, Range}; +use tree_sitter_proc_macro::retry; #[test] fn test_parsing_simple_string() { let mut parser = Parser::new(); - parser.set_language(get_language("rust")).unwrap(); + parser.set_language(&get_language("rust")).unwrap(); let tree = parser .parse( @@ -49,7 +50,7 @@ fn test_parsing_simple_string() { #[test] fn test_parsing_with_logging() { let mut parser = Parser::new(); - parser.set_language(get_language("rust")).unwrap(); + parser.set_language(&get_language("rust")).unwrap(); let mut messages = Vec::new(); parser.set_logger(Some(Box::new(|log_type, message| { @@ -90,7 +91,7 @@ fn test_parsing_with_debug_graph_enabled() { let has_zero_indexed_row = |s: &str| s.contains("position: 0,"); let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); let mut debug_graph_file = tempfile::tempfile().unwrap(); parser.print_dot_graphs(&debug_graph_file); @@ -103,8 +104,7 @@ fn test_parsing_with_debug_graph_enabled() { for line in log_reader { assert!( !has_zero_indexed_row(&line), - "Graph log output includes zero-indexed row: {}", - line + "Graph log output includes zero-indexed row: {line}", ); } } @@ -112,7 +112,7 @@ fn test_parsing_with_debug_graph_enabled() { #[test] fn test_parsing_with_custom_utf8_input() { let mut parser = Parser::new(); - parser.set_language(get_language("rust")).unwrap(); + parser.set_language(&get_language("rust")).unwrap(); let lines = &["pub fn foo() {", " 1", "}"]; @@ -125,7 +125,7 @@ fn test_parsing_with_custom_utf8_input() { if column < lines[row].as_bytes().len() { &lines[row].as_bytes()[column..] } else { - "\n".as_bytes() + b"\n" } } else { &[] @@ -148,19 +148,19 @@ fn test_parsing_with_custom_utf8_input() { ) ); assert_eq!(root.kind(), "source_file"); - assert_eq!(root.has_error(), false); + assert!(!root.has_error()); assert_eq!(root.child(0).unwrap().kind(), "function_item"); } #[test] fn test_parsing_with_custom_utf16_input() { let mut parser = Parser::new(); - parser.set_language(get_language("rust")).unwrap(); + parser.set_language(&get_language("rust")).unwrap(); - let lines: Vec> = ["pub fn foo() {", " 1", "}"] + let lines = ["pub fn foo() {", " 1", "}"] .iter() - .map(|s| s.encode_utf16().collect()) - .collect(); + .map(|s| s.encode_utf16().collect::>()) + .collect::>(); let tree = parser .parse_utf16_with( @@ -187,14 +187,14 @@ fn test_parsing_with_custom_utf16_input() { "(source_file (function_item (visibility_modifier) name: (identifier) parameters: (parameters) body: (block (integer_literal))))" ); assert_eq!(root.kind(), "source_file"); - assert_eq!(root.has_error(), false); + assert!(!root.has_error()); assert_eq!(root.child(0).unwrap().kind(), "function_item"); } #[test] fn test_parsing_with_callback_returning_owned_strings() { let mut parser = Parser::new(); - parser.set_language(get_language("rust")).unwrap(); + parser.set_language(&get_language("rust")).unwrap(); let text = b"pub fn foo() { 1 }"; @@ -215,7 +215,7 @@ fn test_parsing_with_callback_returning_owned_strings() { #[test] fn test_parsing_text_with_byte_order_mark() { let mut parser = Parser::new(); - parser.set_language(get_language("rust")).unwrap(); + parser.set_language(&get_language("rust")).unwrap(); // Parse UTF16 text with a BOM let tree = parser @@ -274,15 +274,18 @@ fn test_parsing_text_with_byte_order_mark() { #[test] fn test_parsing_invalid_chars_at_eof() { let mut parser = Parser::new(); - parser.set_language(get_language("json")).unwrap(); + parser.set_language(&get_language("json")).unwrap(); let tree = parser.parse(b"\xdf", None).unwrap(); - assert_eq!(tree.root_node().to_sexp(), "(ERROR (UNEXPECTED INVALID))"); + assert_eq!( + tree.root_node().to_sexp(), + "(document (ERROR (UNEXPECTED INVALID)))" + ); } #[test] fn test_parsing_unexpected_null_characters_within_source() { let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); let tree = parser.parse(b"var \0 something;", None).unwrap(); assert_eq!( tree.root_node().to_sexp(), @@ -293,7 +296,7 @@ fn test_parsing_unexpected_null_characters_within_source() { #[test] fn test_parsing_ends_when_input_callback_returns_empty() { let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); let mut i = 0; let source = b"abcdefghijklmnoqrs"; let tree = parser @@ -317,7 +320,7 @@ fn test_parsing_ends_when_input_callback_returns_empty() { #[test] fn test_parsing_after_editing_beginning_of_code() { let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); let mut code = b"123 + 456 * (10 + x);".to_vec(); let mut tree = parser.parse(&code, None).unwrap(); @@ -339,7 +342,8 @@ fn test_parsing_after_editing_beginning_of_code() { deleted_length: 0, inserted_text: b" || 5".to_vec(), }, - ); + ) + .unwrap(); let mut recorder = ReadRecorder::new(&code); let tree = parser @@ -364,7 +368,7 @@ fn test_parsing_after_editing_beginning_of_code() { #[test] fn test_parsing_after_editing_end_of_code() { let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); let mut code = b"x * (100 + abc);".to_vec(); let mut tree = parser.parse(&code, None).unwrap(); @@ -386,7 +390,8 @@ fn test_parsing_after_editing_end_of_code() { deleted_length: 0, inserted_text: b".d".to_vec(), }, - ); + ) + .unwrap(); let mut recorder = ReadRecorder::new(&code); let tree = parser @@ -411,7 +416,7 @@ fn test_parsing_after_editing_end_of_code() { #[test] fn test_parsing_empty_file_with_reused_tree() { let mut parser = Parser::new(); - parser.set_language(get_language("rust")).unwrap(); + parser.set_language(&get_language("rust")).unwrap(); let tree = parser.parse("", None); parser.parse("", tree.as_ref()); @@ -422,16 +427,15 @@ fn test_parsing_empty_file_with_reused_tree() { #[test] fn test_parsing_after_editing_tree_that_depends_on_column_values() { - let (grammar, path) = get_test_grammar("uses_current_column"); + let dir = fixtures_dir() + .join("test_grammars") + .join("uses_current_column"); + let grammar = fs::read_to_string(dir.join("grammar.json")).unwrap(); let (grammar_name, parser_code) = generate_parser_for_grammar(&grammar).unwrap(); let mut parser = Parser::new(); parser - .set_language(get_test_language( - &grammar_name, - &parser_code, - path.as_ref().map(AsRef::as_ref), - )) + .set_language(&get_test_language(&grammar_name, &parser_code, Some(&dir))) .unwrap(); let mut code = b" @@ -461,7 +465,8 @@ h + i deleted_length: 0, inserted_text: b"1234".to_vec(), }, - ); + ) + .unwrap(); assert_eq!( code, @@ -500,13 +505,13 @@ h + i #[test] fn test_parsing_after_detecting_error_in_the_middle_of_a_string_token() { let mut parser = Parser::new(); - parser.set_language(get_language("python")).unwrap(); + parser.set_language(&get_language("python")).unwrap(); let mut source = b"a = b, 'c, d'".to_vec(); let tree = parser.parse(&source, None).unwrap(); assert_eq!( tree.root_node().to_sexp(), - "(module (expression_statement (assignment left: (identifier) right: (expression_list (identifier) (string string_content: (string_content))))))" + "(module (expression_statement (assignment left: (identifier) right: (expression_list (identifier) (string (string_start) (string_content) (string_end))))))" ); // Delete a suffix of the source code, starting in the middle of the string @@ -525,12 +530,12 @@ fn test_parsing_after_detecting_error_in_the_middle_of_a_string_token() { let undo = invert_edit(&source, &edit); let mut tree2 = tree.clone(); - perform_edit(&mut tree2, &mut source, &edit); + perform_edit(&mut tree2, &mut source, &edit).unwrap(); tree2 = parser.parse(&source, Some(&tree2)).unwrap(); assert!(tree2.root_node().has_error()); let mut tree3 = tree2.clone(); - perform_edit(&mut tree3, &mut source, &undo); + perform_edit(&mut tree3, &mut source, &undo).unwrap(); tree3 = parser.parse(&source, Some(&tree3)).unwrap(); assert_eq!(tree3.root_node().to_sexp(), tree.root_node().to_sexp(),); } @@ -544,7 +549,7 @@ fn test_parsing_on_multiple_threads() { let this_file_source = include_str!("parser_test.rs"); let mut parser = Parser::new(); - parser.set_language(get_language("rust")).unwrap(); + parser.set_language(&get_language("rust")).unwrap(); let tree = parser.parse(this_file_source, None).unwrap(); let mut parse_threads = Vec::new(); @@ -572,7 +577,7 @@ fn test_parsing_on_multiple_threads() { // Reparse using the old tree as a starting point. let mut parser = Parser::new(); - parser.set_language(get_language("rust")).unwrap(); + parser.set_language(&get_language("rust")).unwrap(); parser.parse(&prepended_source, Some(&tree_clone)).unwrap() })); } @@ -593,7 +598,7 @@ fn test_parsing_cancelled_by_another_thread() { let cancellation_flag = std::sync::Arc::new(AtomicUsize::new(0)); let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); unsafe { parser.set_cancellation_flag(Some(&cancellation_flag)) }; // Long input - parsing succeeds @@ -642,7 +647,7 @@ fn test_parsing_cancelled_by_another_thread() { #[retry(10)] fn test_parsing_with_a_timeout() { let mut parser = Parser::new(); - parser.set_language(get_language("json")).unwrap(); + parser.set_language(&get_language("json")).unwrap(); // Parse an infinitely-long array, but pause after 1ms of processing. parser.set_timeout_micros(1000); @@ -681,14 +686,10 @@ fn test_parsing_with_a_timeout() { parser.set_timeout_micros(0); let tree = parser .parse_with( - &mut |offset, _| { - if offset > 5000 { - "".as_bytes() - } else if offset == 5000 { - "]".as_bytes() - } else { - ",0".as_bytes() - } + &mut |offset, _| match offset { + 5001.. => "".as_bytes(), + 5000 => "]".as_bytes(), + _ => ",0".as_bytes(), }, None, ) @@ -697,9 +698,10 @@ fn test_parsing_with_a_timeout() { } #[test] +#[retry(10)] fn test_parsing_with_a_timeout_and_a_reset() { let mut parser = Parser::new(); - parser.set_language(get_language("json")).unwrap(); + parser.set_language(&get_language("json")).unwrap(); parser.set_timeout_micros(5); let tree = parser.parse( @@ -752,10 +754,11 @@ fn test_parsing_with_a_timeout_and_a_reset() { } #[test] +#[retry(10)] fn test_parsing_with_a_timeout_and_implicit_reset() { allocations::record(|| { let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); parser.set_timeout_micros(5); let tree = parser.parse( @@ -766,7 +769,7 @@ fn test_parsing_with_a_timeout_and_implicit_reset() { // Changing the parser's language implicitly resets, discarding // the previous partial parse. - parser.set_language(get_language("json")).unwrap(); + parser.set_language(&get_language("json")).unwrap(); parser.set_timeout_micros(0); let tree = parser.parse( "[null, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]", @@ -785,10 +788,11 @@ fn test_parsing_with_a_timeout_and_implicit_reset() { } #[test] +#[retry(10)] fn test_parsing_with_timeout_and_no_completion() { allocations::record(|| { let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); parser.set_timeout_micros(5); let tree = parser.parse( @@ -808,7 +812,7 @@ fn test_parsing_with_one_included_range() { let source_code = "hi"; let mut parser = Parser::new(); - parser.set_language(get_language("html")).unwrap(); + parser.set_language(&get_language("html")).unwrap(); let html_tree = parser.parse(source_code, None).unwrap(); let script_content_node = html_tree.root_node().child(1).unwrap().child(1).unwrap(); assert_eq!(script_content_node.kind(), "raw_text"); @@ -816,7 +820,7 @@ fn test_parsing_with_one_included_range() { parser .set_included_ranges(&[script_content_node.range()]) .unwrap(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); let js_tree = parser.parse(source_code, None).unwrap(); assert_eq!( @@ -824,7 +828,7 @@ fn test_parsing_with_one_included_range() { concat!( "(program (expression_statement (call_expression ", "function: (member_expression object: (identifier) property: (property_identifier)) ", - "arguments: (arguments (string)))))", + "arguments: (arguments (string (string_fragment))))))", ) ); assert_eq!( @@ -839,23 +843,23 @@ fn test_parsing_with_multiple_included_ranges() { let source_code = "html `
Hello, ${name.toUpperCase()}, it's ${now()}.
`"; let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); let js_tree = parser.parse(source_code, None).unwrap(); let template_string_node = js_tree .root_node() .descendant_for_byte_range( - source_code.find("
").unwrap(), - source_code.find("Hello").unwrap(), + source_code.find("`<").unwrap(), + source_code.find(">`").unwrap(), ) .unwrap(); assert_eq!(template_string_node.kind(), "template_string"); let open_quote_node = template_string_node.child(0).unwrap(); - let interpolation_node1 = template_string_node.child(1).unwrap(); - let interpolation_node2 = template_string_node.child(2).unwrap(); - let close_quote_node = template_string_node.child(3).unwrap(); + let interpolation_node1 = template_string_node.child(2).unwrap(); + let interpolation_node2 = template_string_node.child(4).unwrap(); + let close_quote_node = template_string_node.child(6).unwrap(); - parser.set_language(get_language("html")).unwrap(); + parser.set_language(&get_language("html")).unwrap(); let html_ranges = &[ Range { start_byte: open_quote_node.end_byte(), @@ -882,7 +886,7 @@ fn test_parsing_with_multiple_included_ranges() { assert_eq!( html_tree.root_node().to_sexp(), concat!( - "(fragment (element", + "(document (element", " (start_tag (tag_name))", " (text)", " (element (start_tag (tag_name)) (end_tag (tag_name)))", @@ -934,7 +938,7 @@ fn test_parsing_with_included_range_containing_mismatched_positions() { let source_code = "
test
{_ignore_this_part_}"; let mut parser = Parser::new(); - parser.set_language(get_language("html")).unwrap(); + parser.set_language(&get_language("html")).unwrap(); let end_byte = source_code.find("{_ignore_this_part_").unwrap(); @@ -961,7 +965,7 @@ fn test_parsing_with_included_range_containing_mismatched_positions() { assert_eq!( html_tree.root_node().to_sexp(), - "(fragment (element (start_tag (tag_name)) (text) (end_tag (tag_name))))" + "(document (element (start_tag (tag_name)) (text) (end_tag (tag_name))))" ); } @@ -1009,13 +1013,17 @@ fn test_parsing_error_in_invalid_included_ranges() { #[test] fn test_parsing_utf16_code_with_errors_at_the_end_of_an_included_range() { let source_code = ""; - let utf16_source_code: Vec = source_code.as_bytes().iter().map(|c| *c as u16).collect(); + let utf16_source_code = source_code + .as_bytes() + .iter() + .map(|c| u16::from(*c)) + .collect::>(); let start_byte = 2 * source_code.find("a.").unwrap(); let end_byte = 2 * source_code.find("").unwrap(); let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); parser .set_included_ranges(&[Range { start_byte, @@ -1037,7 +1045,7 @@ fn test_parsing_with_external_scanner_that_uses_included_range_boundaries() { let range2_end_byte = range2_start_byte + " d() ".len(); let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); parser .set_included_ranges(&[ Range { @@ -1081,7 +1089,7 @@ fn test_parsing_with_a_newly_excluded_range() { // Parse HTML including the template directive, which will cause an error let mut parser = Parser::new(); - parser.set_language(get_language("html")).unwrap(); + parser.set_language(&get_language("html")).unwrap(); let mut first_tree = parser .parse_with(&mut chunked_input(&source_code, 3), None) .unwrap(); @@ -1126,7 +1134,7 @@ fn test_parsing_with_a_newly_excluded_range() { assert_eq!( tree.root_node().to_sexp(), concat!( - "(fragment (text) (element", + "(document (text) (element", " (start_tag (tag_name))", " (element (start_tag (tag_name)) (end_tag (tag_name)))", " (end_tag (tag_name))))" @@ -1168,12 +1176,12 @@ fn test_parsing_with_a_newly_included_range() { // Parse only the first code directive as JavaScript let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); parser .set_included_ranges(&[simple_range(range1_start, range1_end)]) .unwrap(); let tree = parser - .parse_with(&mut chunked_input(&source_code, 3), None) + .parse_with(&mut chunked_input(source_code, 3), None) .unwrap(); assert_eq!( tree.root_node().to_sexp(), @@ -1192,7 +1200,7 @@ fn test_parsing_with_a_newly_included_range() { ]) .unwrap(); let tree2 = parser - .parse_with(&mut chunked_input(&source_code, 3), Some(&tree)) + .parse_with(&mut chunked_input(source_code, 3), Some(&tree)) .unwrap(); assert_eq!( tree2.root_node().to_sexp(), @@ -1216,7 +1224,7 @@ fn test_parsing_with_a_newly_included_range() { simple_range(range3_start, range3_end), ]) .unwrap(); - let tree3 = parser.parse(&source_code, Some(&tree)).unwrap(); + let tree3 = parser.parse(source_code, Some(&tree)).unwrap(); assert_eq!( tree3.root_node().to_sexp(), concat!( @@ -1260,7 +1268,7 @@ fn test_parsing_with_included_ranges_and_missing_tokens() { let mut parser = Parser::new(); parser - .set_language(get_test_language(&parser_name, &parser_code, None)) + .set_language(&get_test_language(&parser_name, &parser_code, None)) .unwrap(); // There's a missing `a` token at the beginning of the code. It must be inserted @@ -1293,7 +1301,119 @@ fn test_parsing_with_included_ranges_and_missing_tokens() { assert_eq!(root.child(3).unwrap().start_byte(), 4); } -fn simple_range(start: usize, end: usize) -> Range { +#[test] +fn test_grammars_that_can_hang_on_eof() { + let (parser_name, parser_code) = generate_parser_for_grammar( + r#" + { + "name": "test_single_null_char_regex", + "rules": { + "source_file": { + "type": "SEQ", + "members": [ + { "type": "STRING", "value": "\"" }, + { "type": "PATTERN", "value": "[\\x00]*" }, + { "type": "STRING", "value": "\"" } + ] + } + }, + "extras": [ { "type": "PATTERN", "value": "\\s" } ] + } + "#, + ) + .unwrap(); + + let mut parser = Parser::new(); + parser + .set_language(&get_test_language(&parser_name, &parser_code, None)) + .unwrap(); + parser.parse("\"", None).unwrap(); + + let (parser_name, parser_code) = generate_parser_for_grammar( + r#" + { + "name": "test_null_char_with_next_char_regex", + "rules": { + "source_file": { + "type": "SEQ", + "members": [ + { "type": "STRING", "value": "\"" }, + { "type": "PATTERN", "value": "[\\x00-\\x01]*" }, + { "type": "STRING", "value": "\"" } + ] + } + }, + "extras": [ { "type": "PATTERN", "value": "\\s" } ] + } + "#, + ) + .unwrap(); + + parser + .set_language(&get_test_language(&parser_name, &parser_code, None)) + .unwrap(); + parser.parse("\"", None).unwrap(); + + let (parser_name, parser_code) = generate_parser_for_grammar( + r#" + { + "name": "test_null_char_with_range_regex", + "rules": { + "source_file": { + "type": "SEQ", + "members": [ + { "type": "STRING", "value": "\"" }, + { "type": "PATTERN", "value": "[\\x00-\\x7F]*" }, + { "type": "STRING", "value": "\"" } + ] + } + }, + "extras": [ { "type": "PATTERN", "value": "\\s" } ] + } + "#, + ) + .unwrap(); + + parser + .set_language(&get_test_language(&parser_name, &parser_code, None)) + .unwrap(); + parser.parse("\"", None).unwrap(); +} + +#[test] +fn test_parse_stack_recursive_merge_error_cost_calculation_bug() { + let source_code = r#" +fn main() { + if n == 1 { + } else if n == 2 { + } else { + } +} + +let y = if x == 5 { 10 } else { 15 }; + +if foo && bar {} + +if foo && bar || baz {} +"#; + + let mut parser = Parser::new(); + parser.set_language(&get_language("rust")).unwrap(); + + let mut tree = parser.parse(source_code, None).unwrap(); + + let edit = Edit { + position: 60, + deleted_length: 63, + inserted_text: Vec::new(), + }; + let mut input = source_code.as_bytes().to_vec(); + perform_edit(&mut tree, &mut input, &edit).unwrap(); + + parser.parse(&input, Some(&tree)).unwrap(); +} + +const fn simple_range(start: usize, end: usize) -> Range { Range { start_byte: start, end_byte: end, diff --git a/third-party/tree-sitter/tree-sitter/cli/src/tests/pathological_test.rs b/third-party/tree-sitter/tree-sitter/cli/src/tests/pathological_test.rs index ec10884c31f..7e6dad16fe1 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/tests/pathological_test.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/tests/pathological_test.rs @@ -8,7 +8,7 @@ fn test_pathological_example_1() { allocations::record(|| { let mut parser = Parser::new(); - parser.set_language(get_language(language)).unwrap(); + parser.set_language(&get_language(language)).unwrap(); parser.parse(source, None).unwrap(); }); } diff --git a/third-party/tree-sitter/tree-sitter/cli/src/tests/proc_macro/Cargo.toml b/third-party/tree-sitter/tree-sitter/cli/src/tests/proc_macro/Cargo.toml index e6900d10e76..ade4d61682e 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/tests/proc_macro/Cargo.toml +++ b/third-party/tree-sitter/tree-sitter/cli/src/tests/proc_macro/Cargo.toml @@ -1,15 +1,15 @@ [package] -name = "proc_macro" -version = "0.1.0" -edition = "2021" -publish = false +name = "tree-sitter-tests-proc-macro" +version = "0.0.0" +edition.workspace = true rust-version.workspace = true +publish = false [lib] proc-macro = true [dependencies] -proc-macro2 = "1" -quote = "1" +proc-macro2 = "1.0.78" +quote = "1.0.35" rand = "0.8.5" -syn = { version = "1", features = ["full"] } +syn = { version = "2.0.52", features = ["full"] } diff --git a/third-party/tree-sitter/tree-sitter/cli/src/tests/proc_macro/src/lib.rs b/third-party/tree-sitter/tree-sitter/cli/src/tests/proc_macro/src/lib.rs index d831a75ba74..3079047eb46 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/tests/proc_macro/src/lib.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/tests/proc_macro/src/lib.rs @@ -81,9 +81,9 @@ pub fn test_with_seed(args: TokenStream, input: TokenStream) -> TokenStream { retry.replace(LitInt::new("0", Span::mixed_site())); } - Ok(Args { - retry: retry.expect("`retry` parameter is requred"), - seed: seed.expect("`initial_seed` parameter is required"), + Ok(Self { + retry: retry.expect("`retry` parameter is required"), + seed: seed.expect("`seed` parameter is required"), seed_fn, }) } @@ -101,8 +101,6 @@ pub fn test_with_seed(args: TokenStream, input: TokenStream) -> TokenStream { let attrs = func.attrs.clone(); let name = func.sig.ident.clone(); - // dbg!(quote::ToTokens::into_token_stream(&func)); - TokenStream::from(quote! { #[test] #(#attrs),* diff --git a/third-party/tree-sitter/tree-sitter/cli/src/tests/query_test.rs b/third-party/tree-sitter/tree-sitter/cli/src/tests/query_test.rs index 7d01c26e658..e2c3fd82111 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/tests/query_test.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/tests/query_test.rs @@ -1,15 +1,20 @@ use super::helpers::{ allocations, - fixtures::get_language, - query_helpers::{Match, Pattern}, + fixtures::{get_language, get_test_language}, + query_helpers::{assert_query_matches, Match, Pattern}, ITERATION_COUNT, }; +use crate::{ + generate::generate_parser_for_grammar, + tests::helpers::query_helpers::{collect_captures, collect_matches}, +}; +use indoc::indoc; use lazy_static::lazy_static; use rand::{prelude::StdRng, SeedableRng}; use std::{env, fmt::Write}; use tree_sitter::{ - CaptureQuantifier, Language, Node, Parser, Point, Query, QueryCapture, QueryCursor, QueryError, - QueryErrorKind, QueryMatch, QueryPredicate, QueryPredicateArg, QueryProperty, + CaptureQuantifier, Language, Node, Parser, Point, Query, QueryCursor, QueryError, + QueryErrorKind, QueryPredicate, QueryPredicateArg, QueryProperty, }; use unindent::Unindent; @@ -22,16 +27,16 @@ fn test_query_errors_on_invalid_syntax() { allocations::record(|| { let language = get_language("javascript"); - assert!(Query::new(language, "(if_statement)").is_ok()); + assert!(Query::new(&language, "(if_statement)").is_ok()); assert!(Query::new( - language, + &language, "(if_statement condition:(parenthesized_expression (identifier)))" ) .is_ok()); // Mismatched parens assert_eq!( - Query::new(language, "(if_statement").unwrap_err().message, + Query::new(&language, "(if_statement").unwrap_err().message, [ "(if_statement", // " ^", @@ -39,7 +44,7 @@ fn test_query_errors_on_invalid_syntax() { .join("\n") ); assert_eq!( - Query::new(language, "; comment 1\n; comment 2\n (if_statement))") + Query::new(&language, "; comment 1\n; comment 2\n (if_statement))") .unwrap_err() .message, [ @@ -52,7 +57,7 @@ fn test_query_errors_on_invalid_syntax() { // Return an error at the *beginning* of a bare identifier not followed a colon. // If there's a colon but no pattern, return an error at the end of the colon. assert_eq!( - Query::new(language, "(if_statement identifier)") + Query::new(&language, "(if_statement identifier)") .unwrap_err() .message, [ @@ -62,7 +67,7 @@ fn test_query_errors_on_invalid_syntax() { .join("\n") ); assert_eq!( - Query::new(language, "(if_statement condition:)") + Query::new(&language, "(if_statement condition:)") .unwrap_err() .message, [ @@ -74,19 +79,19 @@ fn test_query_errors_on_invalid_syntax() { // Return an error at the beginning of an unterminated string. assert_eq!( - Query::new(language, r#"(identifier) "h "#) + Query::new(&language, r#"(identifier) "h "#) .unwrap_err() .message, [ r#"(identifier) "h "#, // - r#" ^"#, + r" ^", ] .join("\n") ); // Empty tree pattern assert_eq!( - Query::new(language, r#"((identifier) ()"#) + Query::new(&language, r"((identifier) ()") .unwrap_err() .message, [ @@ -98,7 +103,7 @@ fn test_query_errors_on_invalid_syntax() { // Empty alternation assert_eq!( - Query::new(language, r#"((identifier) [])"#) + Query::new(&language, r"((identifier) [])") .unwrap_err() .message, [ @@ -110,7 +115,7 @@ fn test_query_errors_on_invalid_syntax() { // Unclosed sibling expression with predicate assert_eq!( - Query::new(language, r#"((identifier) (#a)"#) + Query::new(&language, r"((identifier) (#a)") .unwrap_err() .message, [ @@ -122,37 +127,37 @@ fn test_query_errors_on_invalid_syntax() { // Unclosed predicate assert_eq!( - Query::new(language, r#"((identifier) @x (#eq? @x a"#) + Query::new(&language, r"((identifier) @x (#eq? @x a") .unwrap_err() .message, [ - r#"((identifier) @x (#eq? @x a"#, - r#" ^"#, + r"((identifier) @x (#eq? @x a", + r" ^", ] .join("\n") ); // Need at least one child node for a child anchor assert_eq!( - Query::new(language, r#"(statement_block .)"#) + Query::new(&language, r"(statement_block .)") .unwrap_err() .message, [ // - r#"(statement_block .)"#, - r#" ^"# + r"(statement_block .)", + r" ^" ] .join("\n") ); // Need a field name after a negated field operator assert_eq!( - Query::new(language, r#"(statement_block ! (if_statement))"#) + Query::new(&language, r"(statement_block ! (if_statement))") .unwrap_err() .message, [ - r#"(statement_block ! (if_statement))"#, - r#" ^"# + r"(statement_block ! (if_statement))", + r" ^" ] .join("\n") ); @@ -160,12 +165,12 @@ fn test_query_errors_on_invalid_syntax() { // Unclosed alternation within a tree // tree-sitter/tree-sitter/issues/968 assert_eq!( - Query::new(get_language("c"), r#"(parameter_list [ ")" @foo)"#) + Query::new(&get_language("c"), r#"(parameter_list [ ")" @foo)"#) .unwrap_err() .message, [ r#"(parameter_list [ ")" @foo)"#, - r#" ^"# + r" ^" ] .join("\n") ); @@ -174,14 +179,14 @@ fn test_query_errors_on_invalid_syntax() { // tree-sitter/tree-sitter/issues/1436 assert_eq!( Query::new( - get_language("python"), - r#"[(unary_operator (_) @operand) (not_operator (_) @operand]"# + &get_language("python"), + r"[(unary_operator (_) @operand) (not_operator (_) @operand]" ) .unwrap_err() .message, [ - r#"[(unary_operator (_) @operand) (not_operator (_) @operand]"#, - r#" ^"# + r"[(unary_operator (_) @operand) (not_operator (_) @operand]", + r" ^" ] .join("\n") ); @@ -194,7 +199,7 @@ fn test_query_errors_on_invalid_symbols() { let language = get_language("javascript"); assert_eq!( - Query::new(language, "(clas)").unwrap_err(), + Query::new(&language, "(clas)").unwrap_err(), QueryError { row: 0, offset: 1, @@ -204,7 +209,7 @@ fn test_query_errors_on_invalid_symbols() { } ); assert_eq!( - Query::new(language, "(if_statement (arrayyyyy))").unwrap_err(), + Query::new(&language, "(if_statement (arrayyyyy))").unwrap_err(), QueryError { row: 0, offset: 15, @@ -214,7 +219,7 @@ fn test_query_errors_on_invalid_symbols() { }, ); assert_eq!( - Query::new(language, "(if_statement condition: (non_existent3))").unwrap_err(), + Query::new(&language, "(if_statement condition: (non_existent3))").unwrap_err(), QueryError { row: 0, offset: 26, @@ -224,7 +229,7 @@ fn test_query_errors_on_invalid_symbols() { }, ); assert_eq!( - Query::new(language, "(if_statement condit: (identifier))").unwrap_err(), + Query::new(&language, "(if_statement condit: (identifier))").unwrap_err(), QueryError { row: 0, offset: 14, @@ -234,7 +239,7 @@ fn test_query_errors_on_invalid_symbols() { }, ); assert_eq!( - Query::new(language, "(if_statement conditioning: (identifier))").unwrap_err(), + Query::new(&language, "(if_statement conditioning: (identifier))").unwrap_err(), QueryError { row: 0, offset: 14, @@ -244,7 +249,7 @@ fn test_query_errors_on_invalid_symbols() { } ); assert_eq!( - Query::new(language, "(if_statement !alternativ)").unwrap_err(), + Query::new(&language, "(if_statement !alternativ)").unwrap_err(), QueryError { row: 0, offset: 15, @@ -254,7 +259,7 @@ fn test_query_errors_on_invalid_symbols() { } ); assert_eq!( - Query::new(language, "(if_statement !alternatives)").unwrap_err(), + Query::new(&language, "(if_statement !alternatives)").unwrap_err(), QueryError { row: 0, offset: 15, @@ -272,7 +277,7 @@ fn test_query_errors_on_invalid_predicates() { let language = get_language("javascript"); assert_eq!( - Query::new(language, "((identifier) @id (@id))").unwrap_err(), + Query::new(&language, "((identifier) @id (@id))").unwrap_err(), QueryError { kind: QueryErrorKind::Syntax, row: 0, @@ -286,7 +291,7 @@ fn test_query_errors_on_invalid_predicates() { } ); assert_eq!( - Query::new(language, "((identifier) @id (#eq? @id))").unwrap_err(), + Query::new(&language, "((identifier) @id (#eq? @id))").unwrap_err(), QueryError { kind: QueryErrorKind::Predicate, row: 0, @@ -297,7 +302,7 @@ fn test_query_errors_on_invalid_predicates() { } ); assert_eq!( - Query::new(language, "((identifier) @id (#eq? @id @ok))").unwrap_err(), + Query::new(&language, "((identifier) @id (#eq? @id @ok))").unwrap_err(), QueryError { kind: QueryErrorKind::Capture, row: 0, @@ -317,29 +322,29 @@ fn test_query_errors_on_impossible_patterns() { allocations::record(|| { assert_eq!( Query::new( - js_lang, - "(binary_expression left: (identifier) left: (identifier))" + &js_lang, + "(binary_expression left: (expression (identifier)) left: (expression (identifier)))" ), Err(QueryError { kind: QueryErrorKind::Structure, row: 0, - offset: 38, - column: 38, + offset: 51, + column: 51, message: [ - "(binary_expression left: (identifier) left: (identifier))", - " ^" + "(binary_expression left: (expression (identifier)) left: (expression (identifier)))", + " ^", ] .join("\n"), }) ); Query::new( - js_lang, + &js_lang, "(function_declaration name: (identifier) (statement_block))", ) .unwrap(); assert_eq!( - Query::new(js_lang, "(function_declaration name: (statement_block))"), + Query::new(&js_lang, "(function_declaration name: (statement_block))"), Err(QueryError { kind: QueryErrorKind::Structure, row: 0, @@ -353,9 +358,9 @@ fn test_query_errors_on_impossible_patterns() { }) ); - Query::new(rb_lang, "(call receiver:(call))").unwrap(); + Query::new(&rb_lang, "(call receiver:(call))").unwrap(); assert_eq!( - Query::new(rb_lang, "(call receiver:(binary))"), + Query::new(&rb_lang, "(call receiver:(binary))"), Err(QueryError { kind: QueryErrorKind::Structure, row: 0, @@ -370,9 +375,9 @@ fn test_query_errors_on_impossible_patterns() { ); Query::new( - js_lang, + &js_lang, "[ - (function (identifier)) + (function_expression (identifier)) (function_declaration (identifier)) (generator_function_declaration (identifier)) ]", @@ -380,9 +385,9 @@ fn test_query_errors_on_impossible_patterns() { .unwrap(); assert_eq!( Query::new( - js_lang, + &js_lang, "[ - (function (identifier)) + (function_expression (identifier)) (function_declaration (object)) (generator_function_declaration (identifier)) ]", @@ -390,7 +395,7 @@ fn test_query_errors_on_impossible_patterns() { Err(QueryError { kind: QueryErrorKind::Structure, row: 2, - offset: 88, + offset: 99, column: 42, message: [ " (function_declaration (object))", // @@ -401,7 +406,7 @@ fn test_query_errors_on_impossible_patterns() { ); assert_eq!( - Query::new(js_lang, "(identifier (identifier))",), + Query::new(&js_lang, "(identifier (identifier))",), Err(QueryError { kind: QueryErrorKind::Structure, row: 0, @@ -415,7 +420,7 @@ fn test_query_errors_on_impossible_patterns() { }) ); assert_eq!( - Query::new(js_lang, "(true (true))",), + Query::new(&js_lang, "(true (true))",), Err(QueryError { kind: QueryErrorKind::Structure, row: 0, @@ -430,21 +435,21 @@ fn test_query_errors_on_impossible_patterns() { ); Query::new( - js_lang, + &js_lang, "(if_statement - condition: (parenthesized_expression (_expression) @cond))", + condition: (parenthesized_expression (expression) @cond))", ) .unwrap(); assert_eq!( - Query::new(js_lang, "(if_statement condition: (_expression))",), + Query::new(&js_lang, "(if_statement condition: (expression))"), Err(QueryError { kind: QueryErrorKind::Structure, row: 0, offset: 14, column: 14, message: [ - "(if_statement condition: (_expression))", // + "(if_statement condition: (expression))", // " ^", ] .join("\n") @@ -456,12 +461,12 @@ fn test_query_errors_on_impossible_patterns() { #[test] fn test_query_verifies_possible_patterns_with_aliased_parent_nodes() { allocations::record(|| { - let ruby = get_language("ruby"); + let language = get_language("ruby"); - Query::new(ruby, "(destructured_parameter (identifier))").unwrap(); + Query::new(&language, "(destructured_parameter (identifier))").unwrap(); assert_eq!( - Query::new(ruby, "(destructured_parameter (string))",), + Query::new(&language, "(destructured_parameter (string))",), Err(QueryError { kind: QueryErrorKind::Structure, row: 0, @@ -482,13 +487,13 @@ fn test_query_matches_with_simple_pattern() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, "(function_declaration name: (identifier) @fn-name)", ) .unwrap(); assert_query_matches( - language, + &language, &query, "function one() { two(); function three() {} }", &[ @@ -504,7 +509,7 @@ fn test_query_matches_with_multiple_on_same_root() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, "(class_declaration name: (identifier) @the-class-name (class_body @@ -514,7 +519,7 @@ fn test_query_matches_with_multiple_on_same_root() { .unwrap(); assert_query_matches( - language, + &language, &query, " class Person { @@ -550,7 +555,7 @@ fn test_query_matches_with_multiple_patterns_different_roots() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, " (function_declaration name:(identifier) @fn-def) (call_expression function:(identifier) @fn-ref) @@ -559,7 +564,7 @@ fn test_query_matches_with_multiple_patterns_different_roots() { .unwrap(); assert_query_matches( - language, + &language, &query, " function f1() { @@ -580,11 +585,11 @@ fn test_query_matches_with_multiple_patterns_same_root() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, " (pair key: (property_identifier) @method-def - value: (function)) + value: (function_expression)) (pair key: (property_identifier) @method-def @@ -594,7 +599,7 @@ fn test_query_matches_with_multiple_patterns_same_root() { .unwrap(); assert_query_matches( - language, + &language, &query, " a = { @@ -615,7 +620,7 @@ fn test_query_matches_with_nesting_and_no_fields() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, " (array (array @@ -626,7 +631,7 @@ fn test_query_matches_with_nesting_and_no_fields() { .unwrap(); assert_query_matches( - language, + &language, &query, " [[a]]; @@ -650,10 +655,10 @@ fn test_query_matches_with_nesting_and_no_fields() { fn test_query_matches_with_many_results() { allocations::record(|| { let language = get_language("javascript"); - let query = Query::new(language, "(array (identifier) @element)").unwrap(); + let query = Query::new(&language, "(array (identifier) @element)").unwrap(); assert_query_matches( - language, + &language, &query, &"[hello];\n".repeat(50), &vec![(0, vec![("element", "hello")]); 50], @@ -666,7 +671,7 @@ fn test_query_matches_with_many_overlapping_results() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, r#" (call_expression function: (member_expression @@ -691,7 +696,7 @@ fn test_query_matches_with_many_overlapping_results() { source += &"\n .foo(bar(BAZ))".repeat(count); assert_query_matches( - language, + &language, &query, &source, &[ @@ -713,7 +718,7 @@ fn test_query_matches_capturing_error_nodes() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, " (ERROR (identifier) @the-error-identifier) @the-error ", @@ -721,7 +726,7 @@ fn test_query_matches_capturing_error_nodes() { .unwrap(); assert_query_matches( - language, + &language, &query, "function a(b,, c, d :e:) {}", &[(0, vec![("the-error", ":e:"), ("the-error-identifier", "e")])], @@ -734,7 +739,7 @@ fn test_query_matches_with_extra_children() { allocations::record(|| { let language = get_language("ruby"); let query = Query::new( - language, + &language, " (program(comment) @top_level_comment) (argument_list (heredoc_body) @heredoc_in_args) @@ -743,7 +748,7 @@ fn test_query_matches_with_extra_children() { .unwrap(); assert_query_matches( - language, + &language, &query, " # top-level @@ -777,7 +782,7 @@ fn test_query_matches_with_named_wildcard() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, " (return_statement (_) @the-return-value) (binary_expression operator: _ @the-operator) @@ -788,7 +793,7 @@ fn test_query_matches_with_named_wildcard() { let source = "return a + b - c;"; let mut parser = Parser::new(); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); @@ -809,7 +814,7 @@ fn test_query_matches_with_wildcard_at_the_root() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, " (_ (comment) @doc @@ -821,14 +826,14 @@ fn test_query_matches_with_wildcard_at_the_root() { .unwrap(); assert_query_matches( - language, + &language, &query, "/* one */ var x; /* two */ function y() {} /* three */ class Z {}", &[(0, vec![("doc", "/* two */"), ("name", "y")])], ); let query = Query::new( - language, + &language, " (_ (string) @a) (_ (number) @b) @@ -839,7 +844,7 @@ fn test_query_matches_with_wildcard_at_the_root() { .unwrap(); assert_query_matches( - language, + &language, &query, "['hi', x(true), {y: false}]", &[ @@ -851,6 +856,33 @@ fn test_query_matches_with_wildcard_at_the_root() { }); } +#[test] +fn test_query_matches_with_wildcard_within_wildcard() { + allocations::record(|| { + let language = get_language("javascript"); + let query = Query::new( + &language, + " + (_ (_) @child) @parent + ", + ) + .unwrap(); + + assert_query_matches( + &language, + &query, + "/* a */ b; c;", + &[ + (0, vec![("parent", "/* a */ b; c;"), ("child", "/* a */")]), + (0, vec![("parent", "/* a */ b; c;"), ("child", "b;")]), + (0, vec![("parent", "b;"), ("child", "b")]), + (0, vec![("parent", "/* a */ b; c;"), ("child", "c;")]), + (0, vec![("parent", "c;"), ("child", "c")]), + ], + ); + }); +} + #[test] fn test_query_matches_with_immediate_siblings() { allocations::record(|| { @@ -864,7 +896,7 @@ fn test_query_matches_with_immediate_siblings() { // 2. Between two child nodes in a pattern, it specifies that there cannot be any // named siblings between those two child snodes. let query = Query::new( - language, + &language, " (dotted_name (identifier) @parent @@ -881,7 +913,7 @@ fn test_query_matches_with_immediate_siblings() { .unwrap(); assert_query_matches( - language, + &language, &query, "import a.b.c.d; return [w, [1, y], z]", &[ @@ -895,7 +927,7 @@ fn test_query_matches_with_immediate_siblings() { ); let query = Query::new( - language, + &language, " (block . (_) @first-stmt) (block (_) @stmt) @@ -905,7 +937,7 @@ fn test_query_matches_with_immediate_siblings() { .unwrap(); assert_query_matches( - language, + &language, &query, " if a: @@ -935,7 +967,7 @@ fn test_query_matches_with_last_named_child() { allocations::record(|| { let language = get_language("c"); let query = Query::new( - language, + &language, "(compound_statement (_) (_) @@ -944,7 +976,7 @@ fn test_query_matches_with_last_named_child() { ) .unwrap(); assert_query_matches( - language, + &language, &query, " void one() { a; b; c; } @@ -961,7 +993,7 @@ fn test_query_matches_with_negated_fields() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, " (import_specifier !alias @@ -994,7 +1026,7 @@ fn test_query_matches_with_negated_fields() { ) .unwrap(); assert_query_matches( - language, + &language, &query, " import {a as b, c} from 'p1'; @@ -1025,9 +1057,9 @@ fn test_query_matches_with_negated_fields() { fn test_query_matches_with_field_at_root() { allocations::record(|| { let language = get_language("javascript"); - let query = Query::new(language, "name: (identifier) @name").unwrap(); + let query = Query::new(&language, "name: (identifier) @name").unwrap(); assert_query_matches( - language, + &language, &query, " a(); @@ -1045,7 +1077,7 @@ fn test_query_matches_with_repeated_leaf_nodes() { let language = get_language("javascript"); let query = Query::new( - language, + &language, " ( (comment)+ @doc @@ -1065,7 +1097,7 @@ fn test_query_matches_with_repeated_leaf_nodes() { .unwrap(); assert_query_matches( - language, + &language, &query, " // one @@ -1106,14 +1138,14 @@ fn test_query_matches_with_repeated_leaf_nodes() { fn test_query_matches_with_optional_nodes_inside_of_repetitions() { allocations::record(|| { let language = get_language("javascript"); - let query = Query::new(language, r#"(array (","? (number) @num)+)"#).unwrap(); + let query = Query::new(&language, r#"(array (","? (number) @num)+)"#).unwrap(); assert_query_matches( - language, + &language, &query, - r#" + r" var a = [1, 2, 3, 4] - "#, + ", &[( 0, vec![("num", "1"), ("num", "2"), ("num", "3"), ("num", "4")], @@ -1127,17 +1159,17 @@ fn test_query_matches_with_top_level_repetitions() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, - r#" + &language, + r" (comment)+ @doc - "#, + ", ) .unwrap(); assert_query_matches( - language, + &language, &query, - r#" + r" // a // b // c @@ -1145,7 +1177,7 @@ fn test_query_matches_with_top_level_repetitions() { d() // e - "#, + ", &[ (0, vec![("doc", "// a"), ("doc", "// b"), ("doc", "// c")]), (0, vec![("doc", "// e")]), @@ -1158,17 +1190,26 @@ fn test_query_matches_with_top_level_repetitions() { fn test_query_matches_with_non_terminal_repetitions_within_root() { allocations::record(|| { let language = get_language("javascript"); - let query = Query::new(language, "(_ (expression_statement (identifier) @id)+)").unwrap(); + let query = Query::new(&language, "(_ (expression_statement (identifier) @id)+)").unwrap(); assert_query_matches( - language, + &language, &query, - r#" + r" + function f() { + d; + e; + f; + g; + } a; b; c; - "#, - &[(0, vec![("id", "a"), ("id", "b"), ("id", "c")])], + ", + &[ + (0, vec![("id", "d"), ("id", "e"), ("id", "f"), ("id", "g")]), + (0, vec![("id", "a"), ("id", "b"), ("id", "c")]), + ], ); }); } @@ -1178,7 +1219,7 @@ fn test_query_matches_with_nested_repetitions() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, r#" (variable_declaration (","? (variable_declarator name: (identifier) @x))+)+ @@ -1187,15 +1228,15 @@ fn test_query_matches_with_nested_repetitions() { .unwrap(); assert_query_matches( - language, + &language, &query, - r#" + r" var a = b, c, d var e, f // more var g - "#, + ", &[ ( 0, @@ -1215,8 +1256,8 @@ fn test_query_matches_with_multiple_repetition_patterns_that_intersect_other_pat // When this query sees a comment, it must keep track of several potential // matches: up to two for each pattern that begins with a comment. let query = Query::new( - language, - r#" + &language, + r" (call_expression function: (member_expression property: (property_identifier) @name)) @ref.method @@ -1229,7 +1270,7 @@ fn test_query_matches_with_multiple_repetition_patterns_that_intersect_other_pat ((comment)* @doc (method_definition)) (comment) @comment - "#, + ", ) .unwrap(); @@ -1242,7 +1283,7 @@ fn test_query_matches_with_multiple_repetition_patterns_that_intersect_other_pat ); assert_query_matches( - language, + &language, &query, &source, &vec![(7, vec![("comment", "// the comment")]); 64] @@ -1262,7 +1303,7 @@ fn test_query_matches_with_trailing_repetitions_of_last_child() { let language = get_language("javascript"); let query = Query::new( - language, + &language, " (unary_expression (primary_expression)+ @operand) ", @@ -1270,7 +1311,7 @@ fn test_query_matches_with_trailing_repetitions_of_last_child() { .unwrap(); assert_query_matches( - language, + &language, &query, " a = typeof (!b && ~c); @@ -1290,7 +1331,7 @@ fn test_query_matches_with_leading_zero_or_more_repeated_leaf_nodes() { let language = get_language("javascript"); let query = Query::new( - language, + &language, " ( (comment)* @doc @@ -1303,7 +1344,7 @@ fn test_query_matches_with_leading_zero_or_more_repeated_leaf_nodes() { .unwrap(); assert_query_matches( - language, + &language, &query, " function a() { @@ -1343,7 +1384,7 @@ fn test_query_matches_with_trailing_optional_nodes() { let language = get_language("javascript"); let query = Query::new( - language, + &language, " (class_declaration name: (identifier) @class @@ -1353,10 +1394,15 @@ fn test_query_matches_with_trailing_optional_nodes() { ) .unwrap(); - assert_query_matches(language, &query, "class A {}", &[(0, vec![("class", "A")])]); + assert_query_matches( + &language, + &query, + "class A {}", + &[(0, vec![("class", "A")])], + ); assert_query_matches( - language, + &language, &query, " class A {} @@ -1379,7 +1425,7 @@ fn test_query_matches_with_nested_optional_nodes() { // A function call, optionally containing a function call, which optionally contains a number let query = Query::new( - language, + &language, " (call_expression function: (identifier) @outer-fn @@ -1393,13 +1439,13 @@ fn test_query_matches_with_nested_optional_nodes() { .unwrap(); assert_query_matches( - language, + &language, &query, - r#" + r" a(b, c(), d(null, 1, 2)) e() f(g()) - "#, + ", &[ (0, vec![("outer-fn", "a"), ("inner-fn", "c")]), (0, vec![("outer-fn", "c")]), @@ -1419,7 +1465,7 @@ fn test_query_matches_with_repeated_internal_nodes() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, " (_ (method_definition @@ -1430,7 +1476,7 @@ fn test_query_matches_with_repeated_internal_nodes() { .unwrap(); assert_query_matches( - language, + &language, &query, " class A { @@ -1441,7 +1487,7 @@ fn test_query_matches_with_repeated_internal_nodes() { ", &[(0, vec![("deco", "c"), ("deco", "d"), ("name", "e")])], ); - }) + }); } #[test] @@ -1449,17 +1495,17 @@ fn test_query_matches_with_simple_alternatives() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, " (pair key: [(property_identifier) (string)] @key - value: [(function) @val1 (arrow_function) @val2]) + value: [(function_expression) @val1 (arrow_function) @val2]) ", ) .unwrap(); assert_query_matches( - language, + &language, &query, " a = { @@ -1480,7 +1526,7 @@ fn test_query_matches_with_simple_alternatives() { (0, vec![("key", "'l'"), ("val1", "function m() {}")]), ], ); - }) + }); } #[test] @@ -1488,7 +1534,7 @@ fn test_query_matches_with_alternatives_in_repetitions() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, r#" (array [(identifier) (string)] @el @@ -1503,7 +1549,7 @@ fn test_query_matches_with_alternatives_in_repetitions() { .unwrap(); assert_query_matches( - language, + &language, &query, " a = [b, 'c', d, 1, e, 'f', 'g', h]; @@ -1516,7 +1562,7 @@ fn test_query_matches_with_alternatives_in_repetitions() { ), ], ); - }) + }); } #[test] @@ -1524,7 +1570,7 @@ fn test_query_matches_with_alternatives_at_root() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, r#" [ "if" @@ -1538,7 +1584,7 @@ fn test_query_matches_with_alternatives_at_root() { .unwrap(); assert_query_matches( - language, + &language, &query, " function a(b, c, d) { @@ -1557,7 +1603,7 @@ fn test_query_matches_with_alternatives_at_root() { (0, vec![("keyword", "throw")]), ], ); - }) + }); } #[test] @@ -1565,19 +1611,19 @@ fn test_query_matches_with_alternatives_under_fields() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, - r#" + &language, + r" (assignment_expression left: [ (identifier) @variable (member_expression property: (property_identifier) @variable) ]) - "#, + ", ) .unwrap(); assert_query_matches( - language, + &language, &query, " a = b; @@ -1603,10 +1649,10 @@ fn test_query_matches_in_language_with_simple_aliases() { // HTML uses different tokens to track start tags names, end // tag names, script tag names, and style tag names. All of // these tokens are aliased to `tag_name`. - let query = Query::new(language, "(tag_name) @tag").unwrap(); + let query = Query::new(&language, "(tag_name) @tag").unwrap(); assert_query_matches( - language, + &language, &query, "
@@ -1633,7 +1679,7 @@ fn test_query_matches_with_different_tokens_with_the_same_string_value() { // and one with higher precedence for generics. let language = get_language("rust"); let query = Query::new( - language, + &language, r#" "<" @less ">" @greater @@ -1642,7 +1688,7 @@ fn test_query_matches_with_different_tokens_with_the_same_string_value() { .unwrap(); assert_query_matches( - language, + &language, &query, "const A: B = d < e || f > g;", &[ @@ -1660,7 +1706,7 @@ fn test_query_matches_with_too_many_permutations_to_track() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, " (array (identifier) @pre (identifier) @post) ", @@ -1672,7 +1718,7 @@ fn test_query_matches_with_too_many_permutations_to_track() { source.push_str("];"); let mut parser = Parser::new(); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); let tree = parser.parse(&source, None).unwrap(); let mut cursor = QueryCursor::new(); cursor.set_match_limit(32); @@ -1685,7 +1731,7 @@ fn test_query_matches_with_too_many_permutations_to_track() { collect_matches(matches, &query, source.as_str())[0], (0, vec![("pre", "hello"), ("post", "hello")]), ); - assert_eq!(cursor.did_exceed_match_limit(), true); + assert!(cursor.did_exceed_match_limit()); }); } @@ -1694,7 +1740,7 @@ fn test_query_sibling_patterns_dont_match_children_of_an_error() { allocations::record(|| { let language = get_language("rust"); let query = Query::new( - language, + &language, r#" ("{" @open "}" @close) @@ -1733,8 +1779,8 @@ fn test_query_sibling_patterns_dont_match_children_of_an_error() { "; let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); assert_eq!( @@ -1754,7 +1800,7 @@ fn test_query_matches_with_alternatives_and_too_many_permutations_to_track() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, " ( (comment) @doc @@ -1774,7 +1820,7 @@ fn test_query_matches_with_alternatives_and_too_many_permutations_to_track() { let source = "/* hi */ a.b(); ".repeat(50); let mut parser = Parser::new(); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); let tree = parser.parse(&source, None).unwrap(); let mut cursor = QueryCursor::new(); cursor.set_match_limit(32); @@ -1784,7 +1830,54 @@ fn test_query_matches_with_alternatives_and_too_many_permutations_to_track() { collect_matches(matches, &query, source.as_str()), vec![(1, vec![("method", "b")]); 50], ); - assert_eq!(cursor.did_exceed_match_limit(), true); + assert!(cursor.did_exceed_match_limit()); + }); +} + +#[test] +fn test_repetitions_before_with_alternatives() { + allocations::record(|| { + let language = get_language("rust"); + let query = Query::new( + &language, + r" + ( + (line_comment)* @comment + . + [ + (struct_item name: (_) @name) + (function_item name: (_) @name) + (enum_item name: (_) @name) + (impl_item type: (_) @name) + ] + ) + ", + ) + .unwrap(); + + assert_query_matches( + &language, + &query, + r" + // a + // b + fn c() {} + + // d + // e + impl F {} + ", + &[ + ( + 0, + vec![("comment", "// a"), ("comment", "// b"), ("name", "c")], + ), + ( + 0, + vec![("comment", "// d"), ("comment", "// e"), ("name", "F")], + ), + ], + ); }); } @@ -1793,7 +1886,7 @@ fn test_query_matches_with_anonymous_tokens() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, r#" ";" @punctuation "&&" @operator @@ -1803,7 +1896,7 @@ fn test_query_matches_with_anonymous_tokens() { .unwrap(); assert_query_matches( - language, + &language, &query, r#"foo(a && "b");"#, &[ @@ -1821,8 +1914,8 @@ fn test_query_matches_with_supertypes() { allocations::record(|| { let language = get_language("python"); let query = Query::new( - language, - r#" + &language, + r" (argument_list (expression) @arg) (keyword_argument @@ -1832,12 +1925,12 @@ fn test_query_matches_with_supertypes() { left: (identifier) @var_def) (primary_expression/identifier) @var_ref - "#, + ", ) .unwrap(); assert_query_matches( - language, + &language, &query, " a = b.c( @@ -1859,16 +1952,17 @@ fn test_query_matches_with_supertypes() { } #[test] +#[allow(clippy::reversed_empty_ranges)] fn test_query_matches_within_byte_range() { allocations::record(|| { let language = get_language("javascript"); - let query = Query::new(language, "(identifier) @element").unwrap(); + let query = Query::new(&language, "(identifier) @element").unwrap(); let source = "[a, b, c, d, e, f, g]"; let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); @@ -1917,7 +2011,7 @@ fn test_query_matches_within_byte_range() { fn test_query_matches_within_point_range() { allocations::record(|| { let language = get_language("javascript"); - let query = Query::new(language, "(identifier) @element").unwrap(); + let query = Query::new(&language, "(identifier) @element").unwrap(); let source = " [ @@ -1932,7 +2026,7 @@ fn test_query_matches_within_point_range() { .unindent(); let mut parser = Parser::new(); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); let tree = parser.parse(&source, None).unwrap(); let mut cursor = QueryCursor::new(); @@ -1983,7 +2077,7 @@ fn test_query_captures_within_byte_range() { allocations::record(|| { let language = get_language("c"); let query = Query::new( - language, + &language, " (call_expression function: (identifier) @function @@ -1997,8 +2091,8 @@ fn test_query_captures_within_byte_range() { let source = r#"DEFUN ("safe-length", Fsafe_length, Ssafe_length, 1, 1, 0)"#; let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let captures = @@ -2017,12 +2111,79 @@ fn test_query_captures_within_byte_range() { }); } +#[test] +fn test_query_cursor_next_capture_with_byte_range() { + allocations::record(|| { + let language = get_language("python"); + let query = Query::new( + &language, + "(function_definition name: (identifier) @function) + (attribute attribute: (identifier) @property) + ((identifier) @variable)", + ) + .unwrap(); + + let source = "def func():\n foo.bar.baz()\n"; + // ^ ^ ^ ^ + // byte_pos 0 12 17 27 + // point_pos (0,0) (1,0) (1,5) (1,15) + + let mut parser = Parser::new(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + + let mut cursor = QueryCursor::new(); + let captures = + cursor + .set_byte_range(12..17) + .captures(&query, tree.root_node(), source.as_bytes()); + + assert_eq!( + collect_captures(captures, &query, source), + &[("variable", "foo"),] + ); + }); +} + +#[test] +fn test_query_cursor_next_capture_with_point_range() { + allocations::record(|| { + let language = get_language("python"); + let query = Query::new( + &language, + "(function_definition name: (identifier) @function) + (attribute attribute: (identifier) @property) + ((identifier) @variable)", + ) + .unwrap(); + + let source = "def func():\n foo.bar.baz()\n"; + // ^ ^ ^ ^ + // byte_pos 0 12 17 27 + // point_pos (0,0) (1,0) (1,5) (1,15) + + let mut parser = Parser::new(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + + let mut cursor = QueryCursor::new(); + let captures = cursor + .set_point_range(Point::new(1, 0)..Point::new(1, 5)) + .captures(&query, tree.root_node(), source.as_bytes()); + + assert_eq!( + collect_captures(captures, &query, source), + &[("variable", "foo"),] + ); + }); +} + #[test] fn test_query_matches_with_unrooted_patterns_intersecting_byte_range() { allocations::record(|| { let language = get_language("rust"); let query = Query::new( - language, + &language, r#" ("{" @left "}" @right) ("<" @left ">" @right) @@ -2033,8 +2194,8 @@ fn test_query_matches_with_unrooted_patterns_intersecting_byte_range() { let source = "mod a { fn a(f: B) { g(f) } }"; let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); // within the type parameter list @@ -2076,7 +2237,7 @@ fn test_query_matches_with_wildcard_at_root_intersecting_byte_range() { allocations::record(|| { let language = get_language("python"); let query = Query::new( - language, + &language, " [ (_ body: (block)) @@ -2097,7 +2258,7 @@ fn test_query_matches_with_wildcard_at_root_intersecting_byte_range() { .trim(); let mut parser = Parser::new(); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); @@ -2138,7 +2299,7 @@ fn test_query_captures_within_byte_range_assigned_after_iterating() { allocations::record(|| { let language = get_language("rust"); let query = Query::new( - language, + &language, r#" (function_item name: (identifier) @fn_name) @@ -2171,17 +2332,17 @@ fn test_query_captures_within_byte_range_assigned_after_iterating() { "; let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let mut captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); // Retrieve some captures let mut results = Vec::new(); for (mat, capture_ix) in captures.by_ref().take(5) { - let capture = mat.captures[capture_ix as usize]; + let capture = mat.captures[capture_ix]; results.push(( - query.capture_names()[capture.index as usize].as_str(), + query.capture_names()[capture.index as usize], &source[capture.node.byte_range()], )); } @@ -2202,9 +2363,9 @@ fn test_query_captures_within_byte_range_assigned_after_iterating() { results.clear(); captures.set_byte_range(source.find("Ok").unwrap()..source.len()); for (mat, capture_ix) in captures { - let capture = mat.captures[capture_ix as usize]; + let capture = mat.captures[capture_ix]; results.push(( - query.capture_names()[capture.index as usize].as_str(), + query.capture_names()[capture.index as usize], &source[capture.node.byte_range()], )); } @@ -2224,7 +2385,7 @@ fn test_query_matches_within_range_of_long_repetition() { allocations::record(|| { let language = get_language("rust"); let query = Query::new( - language, + &language, " (function_item name: (identifier) @fn-name) ", @@ -2251,7 +2412,7 @@ fn test_query_matches_within_range_of_long_repetition() { let mut parser = Parser::new(); let mut cursor = QueryCursor::new(); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); let tree = parser.parse(&source, None).unwrap(); let matches = cursor @@ -2275,14 +2436,14 @@ fn test_query_matches_different_queries_same_cursor() { allocations::record(|| { let language = get_language("javascript"); let query1 = Query::new( - language, + &language, " (array (identifier) @id1) ", ) .unwrap(); let query2 = Query::new( - language, + &language, " (array (identifier) @id1) (pair (identifier) @id2) @@ -2290,7 +2451,7 @@ fn test_query_matches_different_queries_same_cursor() { ) .unwrap(); let query3 = Query::new( - language, + &language, " (array (identifier) @id1) (pair (identifier) @id2) @@ -2304,8 +2465,8 @@ fn test_query_matches_different_queries_same_cursor() { let mut parser = Parser::new(); let mut cursor = QueryCursor::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let matches = cursor.matches(&query1, tree.root_node(), source.as_bytes()); assert_eq!( @@ -2336,7 +2497,7 @@ fn test_query_matches_with_multiple_captures_on_a_node() { allocations::record(|| { let language = get_language("javascript"); let mut query = Query::new( - language, + &language, "(function_declaration (identifier) @name1 @name2 @name3 (statement_block) @body1 @body2)", @@ -2347,8 +2508,8 @@ fn test_query_matches_with_multiple_captures_on_a_node() { let mut parser = Parser::new(); let mut cursor = QueryCursor::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); assert_eq!( @@ -2389,7 +2550,7 @@ fn test_query_matches_with_captured_wildcard_at_root() { allocations::record(|| { let language = get_language("python"); let query = Query::new( - language, + &language, " ; captured wildcard at the root (_ [ @@ -2435,8 +2596,8 @@ fn test_query_matches_with_captured_wildcard_at_root() { let mut parser = Parser::new(); let mut cursor = QueryCursor::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let match_capture_names_and_rows = cursor .matches(&query, tree.root_node(), source.as_bytes()) @@ -2445,7 +2606,7 @@ fn test_query_matches_with_captured_wildcard_at_root() { .iter() .map(|c| { ( - query.capture_names()[c.index as usize].as_str(), + query.capture_names()[c.index as usize], c.node.kind(), c.node.start_position().row, ) @@ -2467,7 +2628,7 @@ fn test_query_matches_with_captured_wildcard_at_root() { vec![("stmt", "try_statement", 7), ("block", "block", 12)], vec![("stmt", "while_statement", 1), ("block", "block", 14)], ] - ) + ); }); } @@ -2476,16 +2637,16 @@ fn test_query_matches_with_no_captures() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, - r#" + &language, + r" (identifier) (string) @s - "#, + ", ) .unwrap(); assert_query_matches( - language, + &language, &query, " a = 'hi'; @@ -2506,13 +2667,13 @@ fn test_query_matches_with_repeated_fields() { allocations::record(|| { let language = get_language("c"); let query = Query::new( - language, + &language, "(field_declaration declarator: (field_identifier) @field)", ) .unwrap(); assert_query_matches( - language, + &language, &query, " struct S { @@ -2533,7 +2694,7 @@ fn test_query_matches_with_deeply_nested_patterns_with_fields() { allocations::record(|| { let language = get_language("python"); let query = Query::new( - language, + &language, " (call function: (_) @func @@ -2560,7 +2721,7 @@ fn test_query_matches_with_deeply_nested_patterns_with_fields() { .unwrap(); assert_query_matches( - language, + &language, &query, " a(1).b(2).c(3).d(4).e(5).f(6).g(7).h(8) @@ -2628,7 +2789,7 @@ fn test_query_matches_with_indefinite_step_containing_no_captures() { // https://github.com/tree-sitter/tree-sitter/issues/937 let language = get_language("c"); let query = Query::new( - language, + &language, "(struct_specifier name: (type_identifier) @name body: (field_declaration_list @@ -2638,7 +2799,7 @@ fn test_query_matches_with_indefinite_step_containing_no_captures() { .unwrap(); assert_query_matches( - language, + &language, &query, " struct LacksUnionField { @@ -2671,16 +2832,16 @@ fn test_query_captures_basic() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, r#" (pair key: _ @method.def - (function + (function_expression name: (identifier) @method.alias)) (variable_declarator name: _ @function.def - value: (function + value: (function_expression name: (identifier) @function.alias)) ":" @delimiter @@ -2701,8 +2862,8 @@ fn test_query_captures_basic() { "; let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); @@ -2746,7 +2907,7 @@ fn test_query_captures_with_text_conditions() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, r#" ((identifier) @constant (#match? @constant "^[A-Z]{2,}$")) @@ -2757,6 +2918,14 @@ fn test_query_captures_with_text_conditions() { ((identifier) @function.builtin (#eq? @function.builtin "require")) + ((identifier) @variable.builtin + (#any-of? @variable.builtin + "arguments" + "module" + "console" + "window" + "document")) + ((identifier) @variable (#not-match? @variable "^(lambda|load)$")) "#, @@ -2770,11 +2939,14 @@ fn test_query_captures_with_text_conditions() { lambda const ab = require('./ab'); new Cd(EF); + document; + module; + console; "; let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); @@ -2791,6 +2963,12 @@ fn test_query_captures_with_text_conditions() { ("constant", "EF"), ("constructor", "EF"), ("variable", "EF"), + ("variable.builtin", "document"), + ("variable", "document"), + ("variable.builtin", "module"), + ("variable", "module"), + ("variable.builtin", "console"), + ("variable", "console"), ], ); }); @@ -2802,8 +2980,8 @@ fn test_query_captures_with_predicates() { let language = get_language("javascript"); let query = Query::new( - language, - r#" + &language, + r" ((call_expression (identifier) @foo) (#set! name something) (#set! cool) @@ -2811,7 +2989,7 @@ fn test_query_captures_with_predicates() { ((property_identifier) @bar (#is? cool) - (#is-not? name something))"#, + (#is-not? name something))", ) .unwrap(); @@ -2829,7 +3007,8 @@ fn test_query_captures_with_predicates() { args: vec![ QueryPredicateArg::Capture(0), QueryPredicateArg::String("omg".to_string().into_boxed_str()), - ], + ] + .into_boxed_slice(), },] ); assert_eq!(query.property_settings(1), &[]); @@ -2841,6 +3020,26 @@ fn test_query_captures_with_predicates() { (QueryProperty::new("name", Some("something"), None), false), ] ); + + let source = "const a = window.b"; + let mut parser = Parser::new(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + + let query = Query::new( + &language, + r#"((identifier) @variable.builtin + (#match? @variable.builtin "^(arguments|module|console|window|document)$") + (#is-not? local)) + "#, + ) + .unwrap(); + + let mut cursor = QueryCursor::new(); + let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); + let matches = collect_matches(matches, &query, source); + + assert_eq!(matches, &[(0, vec![("variable.builtin", "window")])]); }); } @@ -2854,7 +3053,7 @@ fn test_query_captures_with_quoted_predicate_args() { // * escaped double quotes with \* // * literal backslashes with \\ let query = Query::new( - language, + &language, r#" ((call_expression (identifier) @foo) (#set! one "\"something\ngreat\"")) @@ -2896,14 +3095,14 @@ fn test_query_captures_with_duplicates() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, - r#" + &language, + r" (variable_declarator name: (identifier) @function - value: (function)) + value: (function_expression)) (identifier) @variable - "#, + ", ) .unwrap(); @@ -2912,8 +3111,8 @@ fn test_query_captures_with_duplicates() { "; let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); @@ -2931,7 +3130,7 @@ fn test_query_captures_with_many_nested_results_without_fields() { // Search for key-value pairs whose values are anonymous functions. let query = Query::new( - language, + &language, r#" (pair key: _ @method-def @@ -2951,12 +3150,12 @@ fn test_query_captures_with_many_nested_results_without_fields() { let method_count = 50; let mut source = "x = { y: {\n".to_owned(); for i in 0..method_count { - writeln!(&mut source, " method{}: $ => null,", i).unwrap(); + writeln!(&mut source, " method{i}: $ => null,").unwrap(); } source.push_str("}};\n"); let mut parser = Parser::new(); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); let tree = parser.parse(&source, None).unwrap(); let mut cursor = QueryCursor::new(); @@ -2994,15 +3193,15 @@ fn test_query_captures_with_many_nested_results_with_fields() { // Search expressions like `a ? a.b : null` let query = Query::new( - language, - r#" + &language, + r" ((ternary_expression condition: (identifier) @left consequence: (member_expression object: (identifier) @right) alternative: (null)) (#eq? @left @right)) - "#, + ", ) .unwrap(); @@ -3011,12 +3210,12 @@ fn test_query_captures_with_many_nested_results_with_fields() { let count = 50; let mut source = "a ? {".to_owned(); for i in 0..count { - writeln!(&mut source, " x: y{} ? y{}.z : null,", i, i).unwrap(); + writeln!(&mut source, " x: y{i} ? y{i}.z : null,").unwrap(); } source.push_str("} : null;\n"); let mut parser = Parser::new(); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); let tree = parser.parse(&source, None).unwrap(); let mut cursor = QueryCursor::new(); @@ -3075,8 +3274,8 @@ fn test_query_captures_with_too_many_nested_results() { // captured, but before the final `template_string` is found, those matches must // be buffered, in order to prevent captures from being returned out-of-order. let query = Query::new( - language, - r#" + &language, + r" ;; easy 👇 (call_expression function: (member_expression @@ -3087,7 +3286,7 @@ fn test_query_captures_with_too_many_nested_results() { function: (member_expression property: (property_identifier) @template-tag) arguments: (template_string)) @template-call - "#, + ", ) .unwrap(); @@ -3114,12 +3313,12 @@ fn test_query_captures_with_too_many_nested_results() { .trim(); let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); cursor.set_match_limit(32); let captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); - let captures = collect_captures(captures, &query, &source); + let captures = collect_captures(captures, &query, source); assert_eq!( &captures[0..4], @@ -3151,7 +3350,7 @@ fn test_query_captures_with_definite_pattern_containing_many_nested_matches() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, r#" (array "[" @l-bracket @@ -3177,18 +3376,17 @@ fn test_query_captures_with_definite_pattern_containing_many_nested_matches() { "; let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); assert_eq!( collect_captures(captures, &query, source), - [("l-bracket", "[")] - .iter() + std::iter::once(&("l-bracket", "[")) .chain([("dot", "."); 40].iter()) - .chain([("r-bracket", "]")].iter()) - .cloned() + .chain(std::iter::once(&("r-bracket", "]"))) + .copied() .collect::>(), ); }); @@ -3199,12 +3397,12 @@ fn test_query_captures_ordered_by_both_start_and_end_positions() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, - r#" + &language, + r" (call_expression) @call (member_expression) @member (identifier) @variable - "#, + ", ) .unwrap(); @@ -3213,8 +3411,8 @@ fn test_query_captures_ordered_by_both_start_and_end_positions() { "; let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); @@ -3239,13 +3437,13 @@ fn test_query_captures_with_matches_removed() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, - r#" + &language, + r" (binary_expression left: (identifier) @left operator: _ @op right: (identifier) @right) - "#, + ", ) .unwrap(); @@ -3254,8 +3452,8 @@ fn test_query_captures_with_matches_removed() { "; let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let mut captured_strings = Vec::new(); @@ -3283,7 +3481,7 @@ fn test_query_captures_with_matches_removed_before_they_finish() { // namespace_import node always has "*", "as" and then an identifier // for children, so captures will be emitted eagerly for this pattern. let query = Query::new( - language, + &language, r#" (namespace_import "*" @star @@ -3298,8 +3496,8 @@ fn test_query_captures_with_matches_removed_before_they_finish() { "; let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let mut captured_strings = Vec::new(); @@ -3325,10 +3523,10 @@ fn test_query_captures_and_matches_iterators_are_fused() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, - r#" + &language, + r" (comment) @comment - "#, + ", ) .unwrap(); @@ -3340,8 +3538,8 @@ fn test_query_captures_and_matches_iterators_are_fused() { "; let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let mut captures = cursor.captures(&query, tree.root_node(), source.as_bytes()); @@ -3368,7 +3566,7 @@ fn test_query_text_callback_returns_chunks() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, r#" ((identifier) @leading_upper (#match? @leading_upper "^[A-Z][A-Z_]*[a-z]")) @@ -3414,8 +3612,8 @@ fn test_query_text_callback_returns_chunks() { ); let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let captures = cursor.captures(&query, tree.root_node(), |node: Node| { chunks_in_range(node.byte_range()) @@ -3467,7 +3665,7 @@ fn test_query_start_byte_for_pattern() { source += patterns_2; source += patterns_3; - let query = Query::new(language, &source).unwrap(); + let query = Query::new(&language, &source).unwrap(); assert_eq!(query.start_byte_for_pattern(0), 0); assert_eq!(query.start_byte_for_pattern(5), patterns_1.len()); @@ -3482,7 +3680,7 @@ fn test_query_capture_names() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, r#" (if_statement condition: (parenthesized_expression (binary_expression @@ -3499,12 +3697,7 @@ fn test_query_capture_names() { assert_eq!( query.capture_names(), - &[ - "left-operand".to_string(), - "right-operand".to_string(), - "body".to_string(), - "loop-condition".to_string(), - ] + ["left-operand", "right-operand", "body", "loop-condition"] ); }); } @@ -3512,13 +3705,13 @@ fn test_query_capture_names() { #[test] fn test_query_lifetime_is_separate_from_nodes_lifetime() { allocations::record(|| { - let query = r#"(call_expression) @call"#; + let query = r"(call_expression) @call"; let source = "a(1); b(2);"; let language = get_language("javascript"); let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(&source, None).unwrap(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); fn take_first_node_from_captures<'tree>( source: &str, @@ -3528,7 +3721,7 @@ fn test_query_lifetime_is_separate_from_nodes_lifetime() { // Following 2 lines are redundant but needed to demonstrate // more understandable compiler error message let language = get_language("javascript"); - let query = Query::new(language, query).unwrap(); + let query = Query::new(&language, query).unwrap(); let mut cursor = QueryCursor::new(); let node = cursor .matches(&query, node, source.as_bytes()) @@ -3548,7 +3741,7 @@ fn test_query_lifetime_is_separate_from_nodes_lifetime() { node: Node<'tree>, ) -> Node<'tree> { let language = get_language("javascript"); - let query = Query::new(language, query).unwrap(); + let query = Query::new(&language, query).unwrap(); let mut cursor = QueryCursor::new(); let node = cursor .captures(&query, node, source.as_bytes()) @@ -3569,7 +3762,7 @@ fn test_query_lifetime_is_separate_from_nodes_lifetime() { fn test_query_with_no_patterns() { allocations::record(|| { let language = get_language("javascript"); - let query = Query::new(language, "").unwrap(); + let query = Query::new(&language, "").unwrap(); assert!(query.capture_names().is_empty()); assert_eq!(query.pattern_count(), 0); }); @@ -3580,7 +3773,7 @@ fn test_query_comments() { allocations::record(|| { let language = get_language("javascript"); let query = Query::new( - language, + &language, " ; this is my first comment ; i have two comments here @@ -3593,7 +3786,7 @@ fn test_query_comments() { let source = "function one() { }"; let mut parser = Parser::new(); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); @@ -3609,7 +3802,7 @@ fn test_query_disable_pattern() { allocations::record(|| { let language = get_language("javascript"); let mut query = Query::new( - language, + &language, " (function_declaration name: (identifier) @name) @@ -3629,7 +3822,7 @@ fn test_query_disable_pattern() { let source = "class A { constructor() {} } function b() { return 1; }"; let mut parser = Parser::new(); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); let tree = parser.parse(source, None).unwrap(); let mut cursor = QueryCursor::new(); let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); @@ -3648,7 +3841,7 @@ fn test_query_alternative_predicate_prefix() { allocations::record(|| { let language = get_language("c"); let query = Query::new( - language, + &language, r#" ((call_expression function: (identifier) @keyword @@ -3668,7 +3861,7 @@ fn test_query_alternative_predicate_prefix() { } "#; assert_query_matches( - language, + &language, &query, source, &[(0, vec![("keyword", "DEFUN"), ("function", "\"identity\"")])], @@ -3683,7 +3876,7 @@ fn test_query_random() { allocations::record(|| { let language = get_language("rust"); let mut parser = Parser::new(); - parser.set_language(language).unwrap(); + parser.set_language(&language).unwrap(); let mut cursor = QueryCursor::new(); cursor.set_match_limit(64); @@ -3704,7 +3897,7 @@ fn test_query_random() { let pattern = pattern_ast.to_string(); let expected_matches = pattern_ast.matches_in_tree(&test_tree); - let query = match Query::new(language, &pattern) { + let query = match Query::new(&language, &pattern) { Ok(query) => query, Err(e) => { panic!("failed to build query for pattern {pattern} - {e}. seed: {seed}"); @@ -3721,7 +3914,7 @@ fn test_query_random() { captures: mat .captures .iter() - .map(|c| (query.capture_names()[c.index as usize].as_str(), c.node)) + .map(|c| (query.capture_names()[c.index as usize], c.node)) .collect::>(), }) .collect::>(); @@ -3753,7 +3946,7 @@ fn test_query_is_pattern_guaranteed_at_step() { Row { description: "no guaranteed steps", language: get_language("python"), - pattern: r#"(expression_statement (string))"#, + pattern: r"(expression_statement (string))", results_by_substring: &[("expression_statement", false), ("string", false)], }, Row { @@ -3831,17 +4024,17 @@ fn test_query_is_pattern_guaranteed_at_step() { Row { description: "a guaranteed step with a field", language: get_language("javascript"), - pattern: r#"(binary_expression left: (identifier) right: (_))"#, + pattern: r"(binary_expression left: (expression) right: (_))", results_by_substring: &[ ("binary_expression", false), - ("(identifier)", false), + ("(expression)", false), ("(_)", true), ], }, Row { description: "multiple guaranteed steps with fields", language: get_language("javascript"), - pattern: r#"(function_declaration name: (identifier) body: (statement_block))"#, + pattern: r"(function_declaration name: (identifier) body: (statement_block))", results_by_substring: &[ ("function_declaration", false), ("identifier", true), @@ -3881,12 +4074,12 @@ fn test_query_is_pattern_guaranteed_at_step() { Row { description: "nesting, no guaranteed steps", language: get_language("javascript"), - pattern: r#" + pattern: r" (call_expression function: (member_expression property: (property_identifier) @template-tag) arguments: (template_string)) @template-call - "#, + ", results_by_substring: &[("property_identifier", false), ("template_string", false)], }, Row { @@ -3901,7 +4094,7 @@ fn test_query_is_pattern_guaranteed_at_step() { "#, results_by_substring: &[ ("identifier", false), - ("property_identifier", true), + ("property_identifier", false), ("[", true), ], }, @@ -3925,15 +4118,15 @@ fn test_query_is_pattern_guaranteed_at_step() { Row { description: "alternation where one branch has guaranteed steps", language: get_language("javascript"), - pattern: r#" + pattern: r" [ (unary_expression (identifier)) (call_expression function: (_) arguments: (_)) - (binary_expression right:(call_expression)) + (binary_expression right: (call_expression)) ] - "#, + ", results_by_substring: &[ ("identifier", false), ("right:", false), @@ -3978,53 +4171,56 @@ fn test_query_is_pattern_guaranteed_at_step() { Row { description: "hidden nodes that have several fields", language: get_language("java"), - pattern: r#" + pattern: r" (method_declaration name: (identifier)) - "#, + ", results_by_substring: &[("name:", true)], }, Row { description: "top-level non-terminal extra nodes", language: get_language("ruby"), - pattern: r#" + pattern: r" (heredoc_body (interpolation) (heredoc_end) @end) - "#, + ", results_by_substring: &[ ("(heredoc_body", false), ("(interpolation)", false), ("(heredoc_end)", true), ], }, - Row { - description: "multiple extra nodes", - language: get_language("rust"), - pattern: r#" - (call_expression - (line_comment) @a - (line_comment) @b - (arguments)) - "#, - results_by_substring: &[ - ("(line_comment) @a", false), - ("(line_comment) @b", false), - ("(arguments)", true), - ], - }, + // TODO: figure out why line comments, an extra, are no longer allowed *anywhere* + // likely culprits are the fact that it's no longer a token itself or that it uses an + // external token + // Row { + // description: "multiple extra nodes", + // language: get_language("rust"), + // pattern: r" + // (call_expression + // (line_comment) @a + // (line_comment) @b + // (arguments)) + // ", + // results_by_substring: &[ + // ("(line_comment) @a", false), + // ("(line_comment) @b", false), + // ("(arguments)", true), + // ], + // }, ]; allocations::record(|| { - eprintln!(""); + eprintln!(); - for row in rows.iter() { + for row in rows { if let Some(filter) = EXAMPLE_FILTER.as_ref() { if !row.description.contains(filter.as_str()) { continue; } } eprintln!(" query example: {:?}", row.description); - let query = Query::new(row.language, row.pattern).unwrap(); + let query = Query::new(&row.language, row.pattern).unwrap(); for (substring, is_definite) in row.results_by_substring { let offset = row.pattern.find(substring).unwrap(); assert_eq!( @@ -4038,7 +4234,7 @@ fn test_query_is_pattern_guaranteed_at_step() { .join(" "), substring, is_definite, - ) + ); } } }); @@ -4055,12 +4251,12 @@ fn test_query_is_pattern_rooted() { let rows = [ Row { description: "simple token", - pattern: r#"(identifier)"#, + pattern: r"(identifier)", is_rooted: true, }, Row { description: "simple non-terminal", - pattern: r#"(function_definition name: (identifier))"#, + pattern: r"(function_definition name: (identifier))", is_rooted: true, }, Row { @@ -4070,11 +4266,11 @@ fn test_query_is_pattern_rooted() { }, Row { description: "alternative of many non-terminals", - pattern: r#"[ + pattern: r"[ (function_definition name: (identifier)) (class_definition name: (identifier)) (block) - ]"#, + ]", is_rooted: true, }, Row { @@ -4084,7 +4280,7 @@ fn test_query_is_pattern_rooted() { }, Row { description: "top-level repetition", - pattern: r#"(comment)*"#, + pattern: r"(comment)*", is_rooted: false, }, Row { @@ -4099,18 +4295,18 @@ fn test_query_is_pattern_rooted() { }, Row { description: "alternative where one option has a top-level repetition", - pattern: r#"[ + pattern: r"[ (block) (class_definition) (comment)* (function_definition) - ]"#, + ]", is_rooted: false, }, ]; allocations::record(|| { - eprintln!(""); + eprintln!(); let language = get_language("python"); for row in &rows { @@ -4120,7 +4316,7 @@ fn test_query_is_pattern_rooted() { } } eprintln!(" query example: {:?}", row.description); - let query = Query::new(language, row.pattern).unwrap(); + let query = Query::new(&language, row.pattern).unwrap(); assert_eq!( query.is_pattern_rooted(0), row.is_rooted, @@ -4130,7 +4326,7 @@ fn test_query_is_pattern_rooted() { .split_ascii_whitespace() .collect::>() .join(" "), - ) + ); } }); } @@ -4147,25 +4343,25 @@ fn test_query_is_pattern_non_local() { let rows = [ Row { description: "simple token", - pattern: r#"(identifier)"#, + pattern: r"(identifier)", language: get_language("python"), is_non_local: false, }, Row { description: "siblings that can occur in an argument list", - pattern: r#"((identifier) (identifier))"#, + pattern: r"((identifier) (identifier))", language: get_language("python"), is_non_local: true, }, Row { description: "siblings that can occur in a statement block", - pattern: r#"((return_statement) (return_statement))"#, + pattern: r"((return_statement) (return_statement))", language: get_language("python"), is_non_local: true, }, Row { description: "siblings that can occur in a source file", - pattern: r#"((function_definition) (class_definition))"#, + pattern: r"((function_definition) (class_definition))", language: get_language("python"), is_non_local: true, }, @@ -4183,32 +4379,32 @@ fn test_query_is_pattern_non_local() { }, Row { description: "siblings that can occur in a class body, wildcard root", - pattern: r#"(_ (method_definition) (method_definition)) @foo"#, + pattern: r"(_ (method_definition) (method_definition)) @foo", language: get_language("javascript"), is_non_local: true, }, Row { description: "top-level repetitions that can occur in a class body", - pattern: r#"(method_definition)+ @foo"#, + pattern: r"(method_definition)+ @foo", language: get_language("javascript"), is_non_local: true, }, Row { description: "top-level repetitions that can occur in a statement block", - pattern: r#"(return_statement)+ @foo"#, + pattern: r"(return_statement)+ @foo", language: get_language("javascript"), is_non_local: true, }, Row { description: "rooted pattern that can occur in a statement block", - pattern: r#"(return_statement) @foo"#, + pattern: r"(return_statement) @foo", language: get_language("javascript"), is_non_local: false, }, ]; allocations::record(|| { - eprintln!(""); + eprintln!(); for row in &rows { if let Some(filter) = EXAMPLE_FILTER.as_ref() { @@ -4217,7 +4413,7 @@ fn test_query_is_pattern_non_local() { } } eprintln!(" query example: {:?}", row.description); - let query = Query::new(row.language, row.pattern).unwrap(); + let query = Query::new(&row.language, row.pattern).unwrap(); assert_eq!( query.is_pattern_non_local(0), row.is_non_local, @@ -4227,7 +4423,7 @@ fn test_query_is_pattern_non_local() { .split_ascii_whitespace() .collect::>() .join(" "), - ) + ); } }); } @@ -4246,17 +4442,17 @@ fn test_capture_quantifiers() { Row { description: "Top level capture", language: get_language("python"), - pattern: r#" + pattern: r" (module) @mod - "#, + ", capture_quantifiers: &[(0, "mod", CaptureQuantifier::One)], }, Row { description: "Nested list capture capture", language: get_language("javascript"), - pattern: r#" + pattern: r" (array (_)* @elems) @array - "#, + ", capture_quantifiers: &[ (0, "array", CaptureQuantifier::One), (0, "elems", CaptureQuantifier::ZeroOrMore), @@ -4265,9 +4461,9 @@ fn test_capture_quantifiers() { Row { description: "Nested non-empty list capture capture", language: get_language("javascript"), - pattern: r#" + pattern: r" (array (_)+ @elems) @array - "#, + ", capture_quantifiers: &[ (0, "array", CaptureQuantifier::One), (0, "elems", CaptureQuantifier::OneOrMore), @@ -4277,9 +4473,9 @@ fn test_capture_quantifiers() { Row { description: "capture nested in optional pattern", language: get_language("javascript"), - pattern: r#" + pattern: r" (array (call_expression (arguments (_) @arg))? @call) @array - "#, + ", capture_quantifiers: &[ (0, "array", CaptureQuantifier::One), (0, "call", CaptureQuantifier::ZeroOrOne), @@ -4289,9 +4485,9 @@ fn test_capture_quantifiers() { Row { description: "optional capture nested in non-empty list pattern", language: get_language("javascript"), - pattern: r#" + pattern: r" (array (call_expression (arguments (_)? @arg))+ @call) @array - "#, + ", capture_quantifiers: &[ (0, "array", CaptureQuantifier::One), (0, "call", CaptureQuantifier::OneOrMore), @@ -4301,9 +4497,9 @@ fn test_capture_quantifiers() { Row { description: "non-empty list capture nested in optional pattern", language: get_language("javascript"), - pattern: r#" + pattern: r" (array (call_expression (arguments (_)+ @args))? @call) @array - "#, + ", capture_quantifiers: &[ (0, "array", CaptureQuantifier::One), (0, "call", CaptureQuantifier::ZeroOrOne), @@ -4314,19 +4510,19 @@ fn test_capture_quantifiers() { Row { description: "capture is the same in all alternatives", language: get_language("javascript"), - pattern: r#"[ + pattern: r"[ (function_declaration name:(identifier) @name) (call_expression function:(identifier) @name) - ]"#, + ]", capture_quantifiers: &[(0, "name", CaptureQuantifier::One)], }, Row { description: "capture appears in some alternatives", language: get_language("javascript"), - pattern: r#"[ + pattern: r"[ (function_declaration name:(identifier) @name) - (function) - ] @fun"#, + (function_expression) + ] @fun", capture_quantifiers: &[ (0, "fun", CaptureQuantifier::One), (0, "name", CaptureQuantifier::ZeroOrOne), @@ -4335,10 +4531,10 @@ fn test_capture_quantifiers() { Row { description: "capture has different quantifiers in alternatives", language: get_language("javascript"), - pattern: r#"[ - (call_expression arguments:(arguments (_)+ @args)) - (new_expression arguments:(arguments (_)? @args)) - ] @call"#, + pattern: r"[ + (call_expression arguments: (arguments (_)+ @args)) + (new_expression arguments: (arguments (_)? @args)) + ] @call", capture_quantifiers: &[ (0, "call", CaptureQuantifier::One), (0, "args", CaptureQuantifier::ZeroOrMore), @@ -4348,9 +4544,9 @@ fn test_capture_quantifiers() { Row { description: "siblings have different captures with different quantifiers", language: get_language("javascript"), - pattern: r#" + pattern: r" (call_expression (arguments (identifier)? @self (_)* @args)) @call - "#, + ", capture_quantifiers: &[ (0, "call", CaptureQuantifier::One), (0, "self", CaptureQuantifier::ZeroOrOne), @@ -4360,9 +4556,9 @@ fn test_capture_quantifiers() { Row { description: "siblings have same capture with different quantifiers", language: get_language("javascript"), - pattern: r#" + pattern: r" (call_expression (arguments (identifier) @args (_)* @args)) @call - "#, + ", capture_quantifiers: &[ (0, "call", CaptureQuantifier::One), (0, "args", CaptureQuantifier::OneOrMore), @@ -4372,7 +4568,7 @@ fn test_capture_quantifiers() { Row { description: "combined nesting, alternatives, and siblings", language: get_language("javascript"), - pattern: r#" + pattern: r" (array (call_expression (arguments [ @@ -4381,7 +4577,7 @@ fn test_capture_quantifiers() { ]) )+ @call ) @array - "#, + ", capture_quantifiers: &[ (0, "array", CaptureQuantifier::One), (0, "call", CaptureQuantifier::OneOrMore), @@ -4393,12 +4589,12 @@ fn test_capture_quantifiers() { Row { description: "multiple patterns", language: get_language("javascript"), - pattern: r#" + pattern: r" (function_declaration name: (identifier) @x) (statement_identifier) @y (property_identifier)+ @z (array (identifier)* @x) - "#, + ", capture_quantifiers: &[ // x (0, "x", CaptureQuantifier::One), @@ -4420,7 +4616,7 @@ fn test_capture_quantifiers() { Row { description: "multiple alternatives", language: get_language("javascript"), - pattern: r#" + pattern: r" [ (array (identifier) @x) (function_declaration name: (identifier)+ @x) @@ -4429,7 +4625,7 @@ fn test_capture_quantifiers() { (array (identifier) @x) (function_declaration name: (identifier)+ @x) ] - "#, + ", capture_quantifiers: &[ (0, "x", CaptureQuantifier::OneOrMore), (1, "x", CaptureQuantifier::OneOrMore), @@ -4438,16 +4634,16 @@ fn test_capture_quantifiers() { ]; allocations::record(|| { - eprintln!(""); + eprintln!(); - for row in rows.iter() { + for row in rows { if let Some(filter) = EXAMPLE_FILTER.as_ref() { if !row.description.contains(filter.as_str()) { continue; } } eprintln!(" query example: {:?}", row.description); - let query = Query::new(row.language, row.pattern).unwrap(); + let query = Query::new(&row.language, row.pattern).unwrap(); for (pattern, capture, expected_quantifier) in row.capture_quantifiers { let index = query.capture_index_for_name(capture).unwrap(); let actual_quantifier = query.capture_quantifiers(*pattern)[index as usize]; @@ -4463,61 +4659,448 @@ fn test_capture_quantifiers() { capture, *expected_quantifier, actual_quantifier, - ) + ); } } }); } -fn assert_query_matches( - language: Language, - query: &Query, - source: &str, - expected: &[(usize, Vec<(&str, &str)>)], -) { +#[test] +fn test_query_quantified_captures() { + struct Row { + description: &'static str, + language: Language, + code: &'static str, + pattern: &'static str, + captures: &'static [(&'static str, &'static str)], + } + + // #[rustfmt::skip] + let rows = &[ + Row { + description: "doc comments where all must match the prefix", + language: get_language("c"), + code: indoc! {" + /// foo + /// bar + /// baz + + void main() {} + + /// qux + /// quux + // quuz + "}, + pattern: r#" + ((comment)+ @comment.documentation + (#match? @comment.documentation "^///")) + "#, + captures: &[ + ("comment.documentation", "/// foo"), + ("comment.documentation", "/// bar"), + ("comment.documentation", "/// baz"), + ], + }, + Row { + description: "doc comments where one must match the prefix", + language: get_language("c"), + code: indoc! {" + /// foo + /// bar + /// baz + + void main() {} + + /// qux + /// quux + // quuz + "}, + pattern: r#" + ((comment)+ @comment.documentation + (#any-match? @comment.documentation "^///")) + "#, + captures: &[ + ("comment.documentation", "/// foo"), + ("comment.documentation", "/// bar"), + ("comment.documentation", "/// baz"), + ("comment.documentation", "/// qux"), + ("comment.documentation", "/// quux"), + ("comment.documentation", "// quuz"), + ], + }, + ]; + + allocations::record(|| { + for row in rows { + eprintln!(" quantified query example: {:?}", row.description); + + let mut parser = Parser::new(); + parser.set_language(&row.language).unwrap(); + let tree = parser.parse(row.code, None).unwrap(); + + let query = Query::new(&row.language, row.pattern).unwrap(); + + let mut cursor = QueryCursor::new(); + let matches = cursor.captures(&query, tree.root_node(), row.code.as_bytes()); + + assert_eq!(collect_captures(matches, &query, row.code), row.captures); + } + }); +} + +#[test] +fn test_query_max_start_depth() { + struct Row { + description: &'static str, + pattern: &'static str, + depth: u32, + matches: &'static [(usize, &'static [(&'static str, &'static str)])], + } + + let source = indoc! {" + if (a1 && a2) { + if (b1 && b2) { } + if (c) { } + } + if (d) { + if (e1 && e2) { } + if (f) { } + } + "}; + + #[rustfmt::skip] + let rows = &[ + Row { + description: "depth 0: match translation unit", + depth: 0, + pattern: r" + (translation_unit) @capture + ", + matches: &[ + (0, &[("capture", "if (a1 && a2) {\n if (b1 && b2) { }\n if (c) { }\n}\nif (d) {\n if (e1 && e2) { }\n if (f) { }\n}\n")]), + ] + }, + Row { + description: "depth 0: match none", + depth: 0, + pattern: r" + (if_statement) @capture + ", + matches: &[] + }, + Row { + description: "depth 1: match 2 if statements at the top level", + depth: 1, + pattern: r" + (if_statement) @capture + ", + matches : &[ + (0, &[("capture", "if (a1 && a2) {\n if (b1 && b2) { }\n if (c) { }\n}")]), + (0, &[("capture", "if (d) {\n if (e1 && e2) { }\n if (f) { }\n}")]), + ] + }, + Row { + description: "depth 1 with deep pattern: match the only the first if statement", + depth: 1, + pattern: r" + (if_statement + condition: (parenthesized_expression + (binary_expression) + ) + ) @capture + ", + matches: &[ + (0, &[("capture", "if (a1 && a2) {\n if (b1 && b2) { }\n if (c) { }\n}")]), + ] + }, + Row { + description: "depth 3 with deep pattern: match all if statements with a binexpr condition", + depth: 3, + pattern: r" + (if_statement + condition: (parenthesized_expression + (binary_expression) + ) + ) @capture + ", + matches: &[ + (0, &[("capture", "if (a1 && a2) {\n if (b1 && b2) { }\n if (c) { }\n}")]), + (0, &[("capture", "if (b1 && b2) { }")]), + (0, &[("capture", "if (e1 && e2) { }")]), + ] + }, + ]; + + allocations::record(|| { + let language = get_language("c"); + let mut parser = Parser::new(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + let mut cursor = QueryCursor::new(); + + for row in rows { + eprintln!(" query example: {:?}", row.description); + + let query = Query::new(&language, row.pattern).unwrap(); + cursor.set_max_start_depth(Some(row.depth)); + + let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); + let expected = row + .matches + .iter() + .map(|x| (x.0, x.1.to_vec())) + .collect::>(); + + assert_eq!(collect_matches(matches, &query, source), expected); + } + }); +} + +#[test] +fn test_query_error_does_not_oob() { + let language = get_language("javascript"); + + assert_eq!( + Query::new(&language, "(clas").unwrap_err(), + QueryError { + row: 0, + offset: 1, + column: 1, + kind: QueryErrorKind::NodeType, + message: "clas".to_string() + } + ); +} + +#[test] +fn test_consecutive_zero_or_modifiers() { + let language = get_language("javascript"); let mut parser = Parser::new(); - parser.set_language(language).unwrap(); - let tree = parser.parse(source, None).unwrap(); - let mut cursor = QueryCursor::new(); - let matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); - assert_eq!(collect_matches(matches, &query, source), expected); - assert_eq!(cursor.did_exceed_match_limit(), false); + parser.set_language(&language).unwrap(); + + let zero_source = ""; + let three_source = "/**/ /**/ /**/"; + + let zero_tree = parser.parse(zero_source, None).unwrap(); + let three_tree = parser.parse(three_source, None).unwrap(); + + let tests = [ + "(comment)*** @capture", + "(comment)??? @capture", + "(comment)*?* @capture", + "(comment)?*? @capture", + ]; + + for test in tests { + let query = Query::new(&language, test).unwrap(); + + let mut cursor = QueryCursor::new(); + let mut matches = cursor.matches(&query, zero_tree.root_node(), zero_source.as_bytes()); + assert!(matches.next().is_some()); + + let mut cursor = QueryCursor::new(); + let matches = cursor.matches(&query, three_tree.root_node(), three_source.as_bytes()); + + let mut len_3 = false; + let mut len_1 = false; + + for m in matches { + if m.captures.len() == 3 { + len_3 = true; + } + if m.captures.len() == 1 { + len_1 = true; + } + } + + assert_eq!(len_3, test.contains('*')); + assert_eq!(len_1, test.contains("???")); + } } -fn collect_matches<'a>( - matches: impl Iterator>, - query: &'a Query, - source: &'a str, -) -> Vec<(usize, Vec<(&'a str, &'a str)>)> { - matches - .map(|m| { - ( - m.pattern_index, - format_captures(m.captures.iter().cloned(), query, source), - ) - }) - .collect() +#[test] +fn test_query_max_start_depth_more() { + struct Row { + depth: u32, + matches: &'static [(usize, &'static [(&'static str, &'static str)])], + } + + let source = indoc! {" + { + { } + { + { } + } + } + "}; + + #[rustfmt::skip] + let rows = &[ + Row { + depth: 0, + matches: &[ + (0, &[("capture", "{\n { }\n {\n { }\n }\n}")]) + ] + }, + Row { + depth: 1, + matches: &[ + (0, &[("capture", "{\n { }\n {\n { }\n }\n}")]), + (0, &[("capture", "{ }")]), + (0, &[("capture", "{\n { }\n }")]) + ] + }, + Row { + depth: 2, + matches: &[ + (0, &[("capture", "{\n { }\n {\n { }\n }\n}")]), + (0, &[("capture", "{ }")]), + (0, &[("capture", "{\n { }\n }")]), + (0, &[("capture", "{ }")]), + ] + }, + ]; + + allocations::record(|| { + let language = get_language("c"); + let mut parser = Parser::new(); + parser.set_language(&language).unwrap(); + let tree = parser.parse(source, None).unwrap(); + let mut cursor = QueryCursor::new(); + let query = Query::new(&language, "(compound_statement) @capture").unwrap(); + + let mut matches = cursor.matches(&query, tree.root_node(), source.as_bytes()); + let node = matches.next().unwrap().captures[0].node; + assert_eq!(node.kind(), "compound_statement"); + + for row in rows { + eprintln!(" depth: {}", row.depth); + + cursor.set_max_start_depth(Some(row.depth)); + + let matches = cursor.matches(&query, node, source.as_bytes()); + let expected = row + .matches + .iter() + .map(|x| (x.0, x.1.to_vec())) + .collect::>(); + + assert_eq!(collect_matches(matches, &query, source), expected); + } + }); } -fn collect_captures<'a>( - captures: impl Iterator, usize)>, - query: &'a Query, - source: &'a str, -) -> Vec<(&'a str, &'a str)> { - format_captures(captures.map(|(m, i)| m.captures[i]), query, source) +#[test] +fn test_grammar_with_aliased_literal_query() { + // module.exports = grammar({ + // name: 'test', + // + // rules: { + // source: $ => repeat(choice($.compound_statement, $.expansion)), + // + // compound_statement: $ => seq(alias(token(prec(-1, '}')), '}')), + // + // expansion: $ => seq('}'), + // }, + // }); + let (parser_name, parser_code) = generate_parser_for_grammar( + r#" + { + "name": "test", + "rules": { + "source": { + "type": "REPEAT", + "content": { + "type": "CHOICE", + "members": [ + { + "type": "SYMBOL", + "name": "compound_statement" + }, + { + "type": "SYMBOL", + "name": "expansion" + } + ] + } + }, + "compound_statement": { + "type": "SEQ", + "members": [ + { + "type": "ALIAS", + "content": { + "type": "TOKEN", + "content": { + "type": "PREC", + "value": -1, + "content": { + "type": "STRING", + "value": "}" + } + } + }, + "named": false, + "value": "}" + } + ] + }, + "expansion": { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "}" + } + ] + } + } + } + "#, + ) + .unwrap(); + + let language = get_test_language(&parser_name, &parser_code, None); + + let query = Query::new( + &language, + r#" + (compound_statement "}" @bracket1) + (expansion "}" @bracket2) + "#, + ); + + assert!(query.is_ok()); } -fn format_captures<'a>( - captures: impl Iterator>, - query: &'a Query, - source: &'a str, -) -> Vec<(&'a str, &'a str)> { - captures - .map(|capture| { - ( - query.capture_names()[capture.index as usize].as_str(), - capture.node.utf8_text(source.as_bytes()).unwrap(), - ) - }) - .collect() +#[test] +fn test_query_with_first_child_in_group_is_anchor() { + let language = get_language("c"); + let source_code = r"void fun(int a, char b, int c) { };"; + let query = r#" + (parameter_list + . + ((parameter_declaration) @constant + (#match? @constant "^int")))"#; + let query = Query::new(&language, query).unwrap(); + assert_query_matches( + &language, + &query, + source_code, + &[(0, vec![("constant", "int a")])], + ); +} + +// This test needs be executed with UBSAN enabled to check for regressions: +// ``` +// UBSAN_OPTIONS="halt_on_error=1" \ +// CFLAGS="-fsanitize=undefined" \ +// RUSTFLAGS="-lubsan" \ +// cargo test --target $(rustc -vV | sed -nr 's/^host: //p') -- --test-threads 1 +// ``` +#[test] +fn test_query_compiler_oob_access() { + let language = get_language("java"); + // UBSAN should not report any OOB access + assert!(Query::new(&language, "(package_declaration _ (_) @name _)").is_ok()); } diff --git a/third-party/tree-sitter/tree-sitter/cli/src/tests/tags_test.rs b/third-party/tree-sitter/tree-sitter/cli/src/tests/tags_test.rs index 07e5d1de88f..6139f73283b 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/tests/tags_test.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/tests/tags_test.rs @@ -9,7 +9,7 @@ use std::{ use tree_sitter::Point; use tree_sitter_tags::{c_lib as c, Error, TagsConfiguration, TagsContext}; -const PYTHON_TAG_QUERY: &'static str = r#" +const PYTHON_TAG_QUERY: &str = r#" ( (function_definition name: (identifier) @name @@ -39,7 +39,7 @@ const PYTHON_TAG_QUERY: &'static str = r#" attribute: (identifier) @name)) @reference.call "#; -const JS_TAG_QUERY: &'static str = r#" +const JS_TAG_QUERY: &str = r#" ( (comment)* @doc . (class_declaration @@ -68,7 +68,7 @@ const JS_TAG_QUERY: &'static str = r#" function: (identifier) @name) @reference.call "#; -const RUBY_TAG_QUERY: &'static str = r#" +const RUBY_TAG_QUERY: &str = r" (method name: (_) @name) @definition.method @@ -79,7 +79,7 @@ const RUBY_TAG_QUERY: &'static str = r#" ((identifier) @name @reference.call (#is-not? local)) -"#; +"; #[test] fn test_tags_python() { @@ -132,7 +132,7 @@ fn test_tags_python() { fn test_tags_javascript() { let language = get_language("javascript"); let tags_config = TagsConfiguration::new(language, JS_TAG_QUERY, "").unwrap(); - let source = br#" + let source = br" // hi // Data about a customer. @@ -150,7 +150,7 @@ fn test_tags_javascript() { class Agent { } - "#; + "; let mut tag_context = TagsContext::new(); let tags = tag_context @@ -305,10 +305,10 @@ fn test_tags_with_parse_error() { let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap(); let mut tag_context = TagsContext::new(); - let source = br#" + let source = br" class Fine: pass class Bad - "#; + "; let (tags, failed) = tag_context .generate_tags(&tags_config, source, None) @@ -359,25 +359,29 @@ fn test_tags_via_c_api() { ); let c_scope_name = CString::new(scope_name).unwrap(); - let result = c::ts_tagger_add_language( - tagger, - c_scope_name.as_ptr(), - language, - JS_TAG_QUERY.as_ptr(), - ptr::null(), - JS_TAG_QUERY.len() as u32, - 0, - ); + let result = unsafe { + c::ts_tagger_add_language( + tagger, + c_scope_name.as_ptr(), + language, + JS_TAG_QUERY.as_ptr(), + ptr::null(), + JS_TAG_QUERY.len() as u32, + 0, + ) + }; assert_eq!(result, c::TSTagsError::Ok); - let result = c::ts_tagger_tag( - tagger, - c_scope_name.as_ptr(), - source_code.as_ptr(), - source_code.len() as u32, - buffer, - ptr::null(), - ); + let result = unsafe { + c::ts_tagger_tag( + tagger, + c_scope_name.as_ptr(), + source_code.as_ptr(), + source_code.len() as u32, + buffer, + ptr::null(), + ) + }; assert_eq!(result, c::TSTagsError::Ok); let tags = unsafe { slice::from_raw_parts( @@ -387,20 +391,20 @@ fn test_tags_via_c_api() { }; let docs = str::from_utf8(unsafe { slice::from_raw_parts( - c::ts_tags_buffer_docs(buffer) as *const u8, + c::ts_tags_buffer_docs(buffer).cast::(), c::ts_tags_buffer_docs_len(buffer) as usize, ) }) .unwrap(); - let syntax_types: Vec<&str> = unsafe { + let syntax_types = unsafe { let mut len: u32 = 0; let ptr = c::ts_tagger_syntax_kinds_for_scope_name(tagger, c_scope_name.as_ptr(), &mut len); slice::from_raw_parts(ptr, len as usize) .iter() .map(|i| CStr::from_ptr(*i).to_str().unwrap()) - .collect() + .collect::>() }; assert_eq!( @@ -419,8 +423,10 @@ fn test_tags_via_c_api() { ] ); - c::ts_tags_buffer_delete(buffer); - c::ts_tagger_delete(tagger); + unsafe { + c::ts_tags_buffer_delete(buffer); + c::ts_tagger_delete(tagger); + } }); } diff --git a/third-party/tree-sitter/tree-sitter/cli/src/tests/test_highlight_test.rs b/third-party/tree-sitter/tree-sitter/cli/src/tests/test_highlight_test.rs index af2c15c5535..92ac76d7ef9 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/tests/test_highlight_test.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/tests/test_highlight_test.rs @@ -12,7 +12,7 @@ fn test_highlight_test_with_basic_test() { Some("injections.scm"), &[ "function".to_string(), - "variable.parameter".to_string(), + "variable".to_string(), "keyword".to_string(), ], ); @@ -22,28 +22,21 @@ fn test_highlight_test_with_basic_test() { " // ^ function", " // ^ keyword", " return d + e;", - " // ^ variable.parameter", + " // ^ variable", + " // ^ !variable", "};", ] .join("\n"); let assertions = - parse_position_comments(&mut Parser::new(), language, source.as_bytes()).unwrap(); + parse_position_comments(&mut Parser::new(), &language, source.as_bytes()).unwrap(); assert_eq!( assertions, &[ - Assertion { - position: Point::new(1, 5), - expected_capture_name: "function".to_string() - }, - Assertion { - position: Point::new(1, 11), - expected_capture_name: "keyword".to_string() - }, - Assertion { - position: Point::new(4, 9), - expected_capture_name: "variable.parameter".to_string() - }, + Assertion::new(1, 5, false, String::from("function")), + Assertion::new(1, 11, false, String::from("keyword")), + Assertion::new(4, 9, false, String::from("variable")), + Assertion::new(4, 11, true, String::from("variable")), ] ); @@ -60,6 +53,7 @@ fn test_highlight_test_with_basic_test() { (Point::new(1, 19), Point::new(1, 20), Highlight(1)), // "d" (Point::new(4, 2), Point::new(4, 8), Highlight(2)), // "return" (Point::new(4, 9), Point::new(4, 10), Highlight(1)), // "d" + (Point::new(4, 13), Point::new(4, 14), Highlight(1)), // "e" ] ); } diff --git a/third-party/tree-sitter/tree-sitter/cli/src/tests/test_tags_test.rs b/third-party/tree-sitter/tree-sitter/cli/src/tests/test_tags_test.rs index 61f98abd299..3efba9edfa6 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/tests/test_tags_test.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/tests/test_tags_test.rs @@ -16,28 +16,21 @@ fn test_tags_test_with_basic_test() { " # ^ reference.call", " return d(e)", " # ^ reference.call", + " # ^ !variable.parameter", "", ] .join("\n"); let assertions = - parse_position_comments(&mut Parser::new(), language, source.as_bytes()).unwrap(); + parse_position_comments(&mut Parser::new(), &language, source.as_bytes()).unwrap(); assert_eq!( assertions, &[ - Assertion { - position: Point::new(1, 4), - expected_capture_name: "definition.function".to_string(), - }, - Assertion { - position: Point::new(3, 9), - expected_capture_name: "reference.call".to_string(), - }, - Assertion { - position: Point::new(5, 11), - expected_capture_name: "reference.call".to_string(), - }, + Assertion::new(1, 4, false, String::from("definition.function")), + Assertion::new(3, 9, false, String::from("reference.call")), + Assertion::new(5, 11, false, String::from("reference.call")), + Assertion::new(5, 13, true, String::from("variable.parameter")), ] ); @@ -62,5 +55,5 @@ fn test_tags_test_with_basic_test() { "reference.call".to_string() ), ] - ) + ); } diff --git a/third-party/tree-sitter/tree-sitter/cli/src/tests/text_provider_test.rs b/third-party/tree-sitter/tree-sitter/cli/src/tests/text_provider_test.rs new file mode 100644 index 00000000000..b0b702431d9 --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/cli/src/tests/text_provider_test.rs @@ -0,0 +1,172 @@ +use std::{iter, sync::Arc}; + +use crate::tests::helpers::fixtures::get_language; +use tree_sitter::{Language, Node, Parser, Point, Query, QueryCursor, TextProvider, Tree}; + +fn parse_text(text: impl AsRef<[u8]>) -> (Tree, Language) { + let language = get_language("c"); + let mut parser = Parser::new(); + parser.set_language(&language).unwrap(); + (parser.parse(text, None).unwrap(), language) +} + +fn parse_text_with(callback: &mut F) -> (Tree, Language) +where + T: AsRef<[u8]>, + F: FnMut(usize, Point) -> T, +{ + let language = get_language("c"); + let mut parser = Parser::new(); + parser.set_language(&language).unwrap(); + let tree = parser.parse_with(callback, None).unwrap(); + // eprintln!("{}", tree.clone().root_node().to_sexp()); + assert_eq!("comment", tree.root_node().child(0).unwrap().kind()); + (tree, language) +} + +fn tree_query>(tree: &Tree, text: impl TextProvider, language: &Language) { + let query = Query::new(language, "((comment) @c (#eq? @c \"// comment\"))").unwrap(); + let mut cursor = QueryCursor::new(); + let mut captures = cursor.captures(&query, tree.root_node(), text); + let (match_, idx) = captures.next().unwrap(); + let capture = match_.captures[idx]; + assert_eq!(capture.index as usize, idx); + assert_eq!("comment", capture.node.kind()); +} + +fn check_parsing>( + parser_text: impl AsRef<[u8]>, + text_provider: impl TextProvider, +) { + let (tree, language) = parse_text(parser_text); + tree_query(&tree, text_provider, &language); +} + +fn check_parsing_callback>( + parser_callback: &mut F, + text_provider: impl TextProvider, +) where + T: AsRef<[u8]>, + F: FnMut(usize, Point) -> T, +{ + let (tree, language) = parse_text_with(parser_callback); + tree_query(&tree, text_provider, &language); +} + +#[test] +fn test_text_provider_for_str_slice() { + let text: &str = "// comment"; + + check_parsing(text, text.as_bytes()); + check_parsing(text.as_bytes(), text.as_bytes()); +} + +#[test] +fn test_text_provider_for_string() { + let text: String = "// comment".to_owned(); + + check_parsing(text.clone(), text.as_bytes()); + check_parsing(text.as_bytes(), text.as_bytes()); + check_parsing(<_ as AsRef<[u8]>>::as_ref(&text), text.as_bytes()); +} + +#[test] +fn test_text_provider_for_box_of_str_slice() { + let text = "// comment".to_owned().into_boxed_str(); + + check_parsing(text.as_bytes(), text.as_bytes()); + check_parsing(<_ as AsRef>::as_ref(&text), text.as_bytes()); + check_parsing(text.as_ref(), text.as_ref().as_bytes()); + check_parsing(text.as_ref(), text.as_bytes()); +} + +#[test] +fn test_text_provider_for_box_of_bytes_slice() { + let text = "// comment".to_owned().into_boxed_str().into_boxed_bytes(); + + check_parsing(text.as_ref(), text.as_ref()); + check_parsing(text.as_ref(), &*text); + check_parsing(&*text, &*text); +} + +#[test] +fn test_text_provider_for_vec_of_bytes() { + let text = "// comment".to_owned().into_bytes(); + + check_parsing(&*text, &*text); +} + +#[test] +fn test_text_provider_for_arc_of_bytes_slice() { + let text: Arc<[u8]> = Arc::from("// comment".to_owned().into_bytes()); + + check_parsing(&*text, &*text); + check_parsing(text.as_ref(), text.as_ref()); + check_parsing(text.clone(), text.as_ref()); +} + +#[test] +fn test_text_provider_callback_with_str_slice() { + let text: &str = "// comment"; + + check_parsing(text, |_node: Node<'_>| iter::once(text)); + check_parsing_callback( + &mut |offset, _point| { + (offset < text.len()) + .then_some(text.as_bytes()) + .unwrap_or_default() + }, + |_node: Node<'_>| iter::once(text), + ); +} + +#[test] +fn test_text_provider_callback_with_owned_string_slice() { + let text: &str = "// comment"; + + check_parsing_callback( + &mut |offset, _point| { + (offset < text.len()) + .then_some(text.as_bytes()) + .unwrap_or_default() + }, + |_node: Node<'_>| { + let slice: String = text.to_owned(); + iter::once(slice) + }, + ); +} + +#[test] +fn test_text_provider_callback_with_owned_bytes_vec_slice() { + let text: &str = "// comment"; + + check_parsing_callback( + &mut |offset, _point| { + (offset < text.len()) + .then_some(text.as_bytes()) + .unwrap_or_default() + }, + |_node: Node<'_>| { + let slice = text.to_owned().into_bytes(); + iter::once(slice) + }, + ); +} + +#[test] +fn test_text_provider_callback_with_owned_arc_of_bytes_slice() { + let text: &str = "// comment"; + + check_parsing_callback( + &mut |offset, _point| { + (offset < text.len()) + .then_some(text.as_bytes()) + .unwrap_or_default() + }, + |_node: Node<'_>| { + let slice: Arc<[u8]> = text.to_owned().into_bytes().into(); + iter::once(slice) + }, + ); +} diff --git a/third-party/tree-sitter/tree-sitter/cli/src/tests/tree_test.rs b/third-party/tree-sitter/tree-sitter/cli/src/tests/tree_test.rs index be0c4ff1dcf..f498c5fcfb6 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/tests/tree_test.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/tests/tree_test.rs @@ -7,7 +7,7 @@ use tree_sitter::{InputEdit, Parser, Point, Range, Tree}; #[test] fn test_tree_edit() { let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); let tree = parser.parse(" abc !== def", None).unwrap(); assert_eq!( @@ -44,7 +44,7 @@ fn test_tree_edit() { } // edit starting in the tree's padding but extending into its content: - // shrink the content to compenstate for the expanded padding. + // shrink the content to compensate for the expanded padding. { let mut tree = tree.clone(); tree.edit(&InputEdit { @@ -207,7 +207,7 @@ fn test_tree_edit() { // replacement that starts in whitespace and extends beyond the end of the tree: // shift the token's start position and empty out its content. { - let mut tree = tree.clone(); + let mut tree = tree; tree.edit(&InputEdit { start_byte: 6, old_end_byte: 90, @@ -235,7 +235,7 @@ fn test_tree_edit() { #[test] fn test_tree_edit_with_included_ranges() { let mut parser = Parser::new(); - parser.set_language(get_language("html")).unwrap(); + parser.set_language(&get_language("html")).unwrap(); let source = "
<% if a %>a<% else %>b<% end %>
"; @@ -300,13 +300,13 @@ fn test_tree_edit_with_included_ranges() { #[test] fn test_tree_cursor() { let mut parser = Parser::new(); - parser.set_language(get_language("rust")).unwrap(); + parser.set_language(&get_language("rust")).unwrap(); let tree = parser .parse( " struct Stuff { - a: A; + a: A, b: Option, } ", @@ -322,21 +322,103 @@ fn test_tree_cursor() { assert!(cursor.goto_first_child()); assert_eq!(cursor.node().kind(), "struct"); - assert_eq!(cursor.node().is_named(), false); + assert!(!cursor.node().is_named()); assert!(cursor.goto_next_sibling()); assert_eq!(cursor.node().kind(), "type_identifier"); - assert_eq!(cursor.node().is_named(), true); + assert!(cursor.node().is_named()); assert!(cursor.goto_next_sibling()); assert_eq!(cursor.node().kind(), "field_declaration_list"); - assert_eq!(cursor.node().is_named(), true); + assert!(cursor.node().is_named()); + + assert!(cursor.goto_last_child()); + assert_eq!(cursor.node().kind(), "}"); + assert!(!cursor.node().is_named()); + assert_eq!(cursor.node().start_position(), Point { row: 4, column: 16 }); + + assert!(cursor.goto_previous_sibling()); + assert_eq!(cursor.node().kind(), ","); + assert!(!cursor.node().is_named()); + assert_eq!(cursor.node().start_position(), Point { row: 3, column: 32 }); + + assert!(cursor.goto_previous_sibling()); + assert_eq!(cursor.node().kind(), "field_declaration"); + assert!(cursor.node().is_named()); + assert_eq!(cursor.node().start_position(), Point { row: 3, column: 20 }); + + assert!(cursor.goto_previous_sibling()); + assert_eq!(cursor.node().kind(), ","); + assert!(!cursor.node().is_named()); + assert_eq!(cursor.node().start_position(), Point { row: 2, column: 24 }); + + assert!(cursor.goto_previous_sibling()); + assert_eq!(cursor.node().kind(), "field_declaration"); + assert!(cursor.node().is_named()); + assert_eq!(cursor.node().start_position(), Point { row: 2, column: 20 }); + + assert!(cursor.goto_previous_sibling()); + assert_eq!(cursor.node().kind(), "{"); + assert!(!cursor.node().is_named()); + assert_eq!(cursor.node().start_position(), Point { row: 1, column: 29 }); + + let mut copy = tree.walk(); + copy.reset_to(&cursor); + + assert_eq!(copy.node().kind(), "{"); + assert!(!copy.node().is_named()); + + assert!(copy.goto_parent()); + assert_eq!(copy.node().kind(), "field_declaration_list"); + assert!(copy.node().is_named()); + + assert!(copy.goto_parent()); + assert_eq!(copy.node().kind(), "struct_item"); +} + +#[test] +fn test_tree_cursor_previous_sibling() { + let mut parser = Parser::new(); + parser.set_language(&get_language("rust")).unwrap(); + + let text = " + // Hi there + // This is fun! + // Another one! +"; + let tree = parser.parse(text, None).unwrap(); + + let mut cursor = tree.walk(); + assert_eq!(cursor.node().kind(), "source_file"); + + assert!(cursor.goto_last_child()); + assert_eq!(cursor.node().kind(), "line_comment"); + assert_eq!( + cursor.node().utf8_text(text.as_bytes()).unwrap(), + "// Another one!" + ); + + assert!(cursor.goto_previous_sibling()); + assert_eq!(cursor.node().kind(), "line_comment"); + assert_eq!( + cursor.node().utf8_text(text.as_bytes()).unwrap(), + "// This is fun!" + ); + + assert!(cursor.goto_previous_sibling()); + assert_eq!(cursor.node().kind(), "line_comment"); + assert_eq!( + cursor.node().utf8_text(text.as_bytes()).unwrap(), + "// Hi there" + ); + + assert!(!cursor.goto_previous_sibling()); } #[test] fn test_tree_cursor_fields() { let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); let tree = parser .parse("function /*1*/ bar /*2*/ () {}", None) @@ -373,7 +455,7 @@ fn test_tree_cursor_fields() { #[test] fn test_tree_cursor_child_for_point() { let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); let source = &" [ one, @@ -480,7 +562,7 @@ fn test_tree_cursor_child_for_point() { #[test] fn test_tree_node_equality() { let mut parser = Parser::new(); - parser.set_language(get_language("rust")).unwrap(); + parser.set_language(&get_language("rust")).unwrap(); let tree = parser.parse("struct A {}", None).unwrap(); let node1 = tree.root_node(); let node2 = tree.root_node(); @@ -494,7 +576,7 @@ fn test_get_changed_ranges() { let source_code = b"{a: null};\n".to_vec(); let mut parser = Parser::new(); - parser.set_language(get_language("javascript")).unwrap(); + parser.set_language(&get_language("javascript")).unwrap(); let tree = parser.parse(&source_code, None).unwrap(); assert_eq!( @@ -514,11 +596,11 @@ fn test_get_changed_ranges() { inserted_text: b"othing".to_vec(), }; let inverse_edit = invert_edit(&source_code, &edit); - let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, edit); + let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit); assert_eq!(ranges, vec![range_of(&source_code, "nothing")]); // Replace `nothing` with `null` - that token has changed syntax - let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, inverse_edit); + let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit); assert_eq!(ranges, vec![range_of(&source_code, "null")]); } @@ -534,11 +616,11 @@ fn test_get_changed_ranges() { inserted_text: b"\n".to_vec(), }; let inverse_edit = invert_edit(&source_code, &edit); - let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, edit); + let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit); assert_eq!(ranges, vec![]); // Remove leading newline - no changed ranges - let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, inverse_edit); + let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit); assert_eq!(ranges, vec![]); } @@ -554,7 +636,7 @@ fn test_get_changed_ranges() { inserted_text: b", b: false".to_vec(), }; let inverse_edit1 = invert_edit(&source_code, &edit1); - let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, edit1); + let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit1); assert_eq!(ranges, vec![range_of(&source_code, ", b: false")]); let edit2 = Edit { @@ -563,21 +645,21 @@ fn test_get_changed_ranges() { inserted_text: b", c: 1".to_vec(), }; let inverse_edit2 = invert_edit(&source_code, &edit2); - let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, edit2); + let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit2); assert_eq!(ranges, vec![range_of(&source_code, ", c: 1")]); // Remove the middle pair - let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, inverse_edit2); + let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit2); assert_eq!(ranges, vec![]); // Remove the second pair - let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, inverse_edit1); + let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit1); assert_eq!(ranges, vec![]); } // Wrapping elements in larger expressions { - let mut tree = tree.clone(); + let mut tree = tree; let mut source_code = source_code.clone(); // Replace `null` with the binary expression `b === null` @@ -587,23 +669,20 @@ fn test_get_changed_ranges() { inserted_text: b"b === ".to_vec(), }; let inverse_edit1 = invert_edit(&source_code, &edit1); - let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, edit1); + let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &edit1); assert_eq!(ranges, vec![range_of(&source_code, "b === null")]); // Undo - let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, inverse_edit1); + let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, &inverse_edit1); assert_eq!(ranges, vec![range_of(&source_code, "null")]); } } -fn index_of(text: &Vec, substring: &str) -> usize { - str::from_utf8(text.as_slice()) - .unwrap() - .find(substring) - .unwrap() +fn index_of(text: &[u8], substring: &str) -> usize { + str::from_utf8(text).unwrap().find(substring).unwrap() } -fn range_of(text: &Vec, substring: &str) -> Range { +fn range_of(text: &[u8], substring: &str) -> Range { let start_byte = index_of(text, substring); let end_byte = start_byte + substring.as_bytes().len(); Range { @@ -618,9 +697,9 @@ fn get_changed_ranges( parser: &mut Parser, tree: &mut Tree, source_code: &mut Vec, - edit: Edit, + edit: &Edit, ) -> Vec { - perform_edit(tree, source_code, &edit); + perform_edit(tree, source_code, edit).unwrap(); let new_tree = parser.parse(&source_code, Some(tree)).unwrap(); let result = tree.changed_ranges(&new_tree).collect(); *tree = new_tree; diff --git a/third-party/tree-sitter/tree-sitter/cli/src/tests/wasm_language_test.rs b/third-party/tree-sitter/tree-sitter/cli/src/tests/wasm_language_test.rs new file mode 100644 index 00000000000..52a170ce464 --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/cli/src/tests/wasm_language_test.rs @@ -0,0 +1,160 @@ +use crate::tests::helpers::{allocations, fixtures::WASM_DIR}; +use lazy_static::lazy_static; +use std::fs; +use tree_sitter::{ + wasmtime::Engine, Parser, Query, QueryCursor, WasmError, WasmErrorKind, WasmStore, +}; + +lazy_static! { + static ref ENGINE: Engine = Engine::default(); +} + +#[test] +fn test_wasm_stdlib_symbols() { + let symbols = tree_sitter::wasm_stdlib_symbols().collect::>(); + assert_eq!( + symbols, + { + let mut symbols = symbols.clone(); + symbols.sort_unstable(); + symbols + }, + "symbols aren't sorted" + ); + + assert!(symbols.contains(&"malloc")); + assert!(symbols.contains(&"free")); + assert!(symbols.contains(&"memset")); + assert!(symbols.contains(&"memcpy")); +} + +#[test] +fn test_load_wasm_language() { + allocations::record(|| { + let mut store = WasmStore::new(ENGINE.clone()).unwrap(); + let mut parser = Parser::new(); + + let wasm_cpp = fs::read(WASM_DIR.join("tree-sitter-cpp.wasm")).unwrap(); + let wasm_rs = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap(); + let wasm_rb = fs::read(WASM_DIR.join("tree-sitter-ruby.wasm")).unwrap(); + let wasm_typescript = fs::read(WASM_DIR.join("tree-sitter-typescript.wasm")).unwrap(); + + let language_rust = store.load_language("rust", &wasm_rs).unwrap(); + let language_cpp = store.load_language("cpp", &wasm_cpp).unwrap(); + let language_ruby = store.load_language("ruby", &wasm_rb).unwrap(); + let language_typescript = store.load_language("typescript", &wasm_typescript).unwrap(); + parser.set_wasm_store(store).unwrap(); + + let mut parser2 = Parser::new(); + parser2 + .set_wasm_store(WasmStore::new(ENGINE.clone()).unwrap()) + .unwrap(); + let mut query_cursor = QueryCursor::new(); + + for mut parser in [parser, parser2] { + for _ in 0..2 { + let query_rust = Query::new(&language_rust, "(const_item) @foo").unwrap(); + let query_typescript = + Query::new(&language_typescript, "(class_declaration) @foo").unwrap(); + + parser.set_language(&language_cpp).unwrap(); + let tree = parser.parse("A c = d();", None).unwrap(); + assert_eq!( + tree.root_node().to_sexp(), + "(translation_unit (declaration type: (template_type name: (type_identifier) arguments: (template_argument_list (type_descriptor type: (type_identifier)))) declarator: (init_declarator declarator: (identifier) value: (call_expression function: (identifier) arguments: (argument_list)))))" + ); + + parser.set_language(&language_rust).unwrap(); + let source = "const A: B = c();"; + let tree = parser.parse(source, None).unwrap(); + assert_eq!( + tree.root_node().to_sexp(), + "(source_file (const_item name: (identifier) type: (type_identifier) value: (call_expression function: (identifier) arguments: (arguments))))" + ); + assert_eq!( + query_cursor + .matches(&query_rust, tree.root_node(), source.as_bytes()) + .count(), + 1 + ); + + parser.set_language(&language_ruby).unwrap(); + let tree = parser.parse("class A; end", None).unwrap(); + assert_eq!( + tree.root_node().to_sexp(), + "(program (class name: (constant)))" + ); + + parser.set_language(&language_typescript).unwrap(); + let tree = parser.parse("class A {}", None).unwrap(); + assert_eq!( + tree.root_node().to_sexp(), + "(program (class_declaration name: (type_identifier) body: (class_body)))" + ); + assert_eq!( + query_cursor + .matches(&query_typescript, tree.root_node(), source.as_bytes()) + .count(), + 1 + ); + } + } + }); +} + +#[test] +fn test_load_and_reload_wasm_language() { + allocations::record(|| { + let mut store = WasmStore::new(ENGINE.clone()).unwrap(); + + let wasm_rust = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap(); + let wasm_typescript = fs::read(WASM_DIR.join("tree-sitter-typescript.wasm")).unwrap(); + + let language_rust = store.load_language("rust", &wasm_rust).unwrap(); + let language_typescript = store.load_language("typescript", &wasm_typescript).unwrap(); + assert_eq!(store.language_count(), 2); + + // When a language is dropped, stores can release their instances of that language. + drop(language_rust); + assert_eq!(store.language_count(), 1); + + let language_rust = store.load_language("rust", &wasm_rust).unwrap(); + assert_eq!(store.language_count(), 2); + + drop(language_rust); + drop(language_typescript); + assert_eq!(store.language_count(), 0); + }); +} + +#[test] +fn test_load_wasm_errors() { + allocations::record(|| { + let mut store = WasmStore::new(ENGINE.clone()).unwrap(); + let wasm = fs::read(WASM_DIR.join("tree-sitter-rust.wasm")).unwrap(); + + let bad_wasm = &wasm[1..]; + assert_eq!( + store.load_language("rust", bad_wasm).unwrap_err(), + WasmError { + kind: WasmErrorKind::Parse, + message: "failed to parse dylink section of wasm module".into(), + } + ); + + assert_eq!( + store.load_language("not_rust", &wasm).unwrap_err(), + WasmError { + kind: WasmErrorKind::Instantiate, + message: "module did not contain language function: tree_sitter_not_rust".into(), + } + ); + + let mut bad_wasm = wasm.clone(); + bad_wasm[300..500].iter_mut().for_each(|b| *b = 0); + assert_eq!( + store.load_language("rust", &bad_wasm).unwrap_err().kind, + WasmErrorKind::Compile, + ); + }); +} diff --git a/third-party/tree-sitter/tree-sitter/cli/src/util.rs b/third-party/tree-sitter/tree-sitter/cli/src/util.rs index d180cd54300..fd4f4699ddb 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/util.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/util.rs @@ -1,18 +1,18 @@ -use anyhow::Result; -use std::io; -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::sync::Arc; -use std::thread; -use tree_sitter::{Parser, Tree}; +use std::{ + path::{Path, PathBuf}, + process::{Child, ChildStdin, Command, Stdio}, + sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, + }, +}; -#[cfg(unix)] -use anyhow::{anyhow, Context}; -#[cfg(unix)] -use std::path::PathBuf; -#[cfg(unix)] -use std::process::{Child, ChildStdin, Command, Stdio}; +use anyhow::{anyhow, Context, Result}; +use indoc::indoc; +use tree_sitter::{Parser, Tree}; +use tree_sitter_config::Config; +use tree_sitter_loader::Config as LoaderConfig; -#[cfg(unix)] const HTML_HEADER: &[u8] = b" @@ -22,62 +22,72 @@ svg { width: 100%; } "; -pub fn cancel_on_stdin() -> Arc { +#[must_use] +pub fn lang_not_found_for_path(path: &Path, loader_config: &LoaderConfig) -> String { + let path = path.display(); + format!( + indoc! {" + No language found for path `{}` + + If a language should be associated with this file extension, please ensure the path to `{}` is inside one of the following directories as specified by your 'config.json':\n\n{}\n + If the directory that contains the relevant grammar for `{}` is not listed above, please add the directory to the list of directories in your config file, {} + "}, + path, + path, + loader_config + .parser_directories + .iter() + .enumerate() + .map(|(i, d)| format!(" {}. {}", i + 1, d.display())) + .collect::>() + .join(" \n"), + path, + if let Ok(Some(config_path)) = Config::find_config_file() { + format!("located at {}", config_path.display()) + } else { + String::from("which you need to create by running `tree-sitter init-config`") + } + ) +} + +#[must_use] +pub fn cancel_on_signal() -> Arc { let result = Arc::new(AtomicUsize::new(0)); - if atty::is(atty::Stream::Stdin) { - thread::spawn({ - let flag = result.clone(); - move || { - let mut line = String::new(); - io::stdin().read_line(&mut line).unwrap(); - flag.store(1, Ordering::Relaxed); - } - }); - } + ctrlc::set_handler({ + let flag = result.clone(); + move || { + flag.store(1, Ordering::Relaxed); + } + }) + .expect("Error setting Ctrl-C handler"); result } -#[cfg(windows)] -pub struct LogSession; - -#[cfg(unix)] pub struct LogSession { path: PathBuf, dot_process: Option, dot_process_stdin: Option, + open_log: bool, } -#[cfg(windows)] -pub fn print_tree_graph(_tree: &Tree, _path: &str) -> Result<()> { - Ok(()) -} - -#[cfg(windows)] -pub fn log_graphs(_parser: &mut Parser, _path: &str) -> Result { - Ok(LogSession) -} - -#[cfg(unix)] -pub fn print_tree_graph(tree: &Tree, path: &str) -> Result<()> { - let session = LogSession::new(path)?; +pub fn print_tree_graph(tree: &Tree, path: &str, quiet: bool) -> Result<()> { + let session = LogSession::new(path, quiet)?; tree.print_dot_graph(session.dot_process_stdin.as_ref().unwrap()); Ok(()) } -#[cfg(unix)] -pub fn log_graphs(parser: &mut Parser, path: &str) -> Result { - let session = LogSession::new(path)?; +pub fn log_graphs(parser: &mut Parser, path: &str, open_log: bool) -> Result { + let session = LogSession::new(path, open_log)?; parser.print_dot_graphs(session.dot_process_stdin.as_ref().unwrap()); Ok(session) } -#[cfg(unix)] impl LogSession { - fn new(path: &str) -> Result { + fn new(path: &str, open_log: bool) -> Result { use std::io::Write; let mut dot_file = std::fs::File::create(path)?; - dot_file.write(HTML_HEADER)?; + dot_file.write_all(HTML_HEADER)?; let mut dot_process = Command::new("dot") .arg("-Tsvg") .stdin(Stdio::piped()) @@ -94,11 +104,11 @@ impl LogSession { path: PathBuf::from(path), dot_process: Some(dot_process), dot_process_stdin: Some(dot_stdin), + open_log, }) } } -#[cfg(unix)] impl Drop for LogSession { fn drop(&mut self) { use std::fs; @@ -106,10 +116,8 @@ impl Drop for LogSession { drop(self.dot_process_stdin.take().unwrap()); let output = self.dot_process.take().unwrap().wait_with_output().unwrap(); if output.status.success() { - if cfg!(target_os = "macos") - && fs::metadata(&self.path).unwrap().len() > HTML_HEADER.len() as u64 - { - Command::new("open").arg(&self.path).output().unwrap(); + if self.open_log && fs::metadata(&self.path).unwrap().len() > HTML_HEADER.len() as u64 { + webbrowser::open(&self.path.to_string_lossy()).unwrap(); } } else { eprintln!( diff --git a/third-party/tree-sitter/tree-sitter/cli/src/wasm.rs b/third-party/tree-sitter/tree-sitter/cli/src/wasm.rs index 467fef7139a..11dbb5c574e 100644 --- a/third-party/tree-sitter/tree-sitter/cli/src/wasm.rs +++ b/third-party/tree-sitter/tree-sitter/cli/src/wasm.rs @@ -1,130 +1,102 @@ use super::generate::parse_grammar::GrammarJSON; use anyhow::{anyhow, Context, Result}; -use std::ffi::{OsStr, OsString}; -use std::fs; -use std::path::Path; -use std::process::Command; -use which::which; - -const EMSCRIPTEN_TAG: &'static str = concat!("emscripten/emsdk:", env!("EMSCRIPTEN_VERSION")); +use std::{fs, path::Path}; +use tree_sitter::wasm_stdlib_symbols; +use tree_sitter_loader::Loader; +use wasmparser::Parser; + +pub fn load_language_wasm_file(language_dir: &Path) -> Result<(String, Vec)> { + let grammar_name = get_grammar_name(language_dir) + .with_context(|| "Failed to get wasm filename") + .unwrap(); + let wasm_filename = format!("tree-sitter-{grammar_name}.wasm"); + let contents = fs::read(language_dir.join(&wasm_filename)).with_context(|| { + format!("Failed to read {wasm_filename}. Run `tree-sitter build-wasm` first.",) + })?; + Ok((grammar_name, contents)) +} -pub fn get_grammar_name(src_dir: &Path) -> Result { +pub fn get_grammar_name(language_dir: &Path) -> Result { + let src_dir = language_dir.join("src"); let grammar_json_path = src_dir.join("grammar.json"); let grammar_json = fs::read_to_string(&grammar_json_path) - .with_context(|| format!("Failed to read grammar file {:?}", grammar_json_path))?; + .with_context(|| format!("Failed to read grammar file {grammar_json_path:?}"))?; let grammar: GrammarJSON = serde_json::from_str(&grammar_json) - .with_context(|| format!("Failed to parse grammar file {:?}", grammar_json_path))?; + .with_context(|| format!("Failed to parse grammar file {grammar_json_path:?}"))?; Ok(grammar.name) } -pub fn compile_language_to_wasm(language_dir: &Path, force_docker: bool) -> Result<()> { - let src_dir = language_dir.join("src"); - let grammar_name = get_grammar_name(&src_dir)?; - let output_filename = format!("tree-sitter-{}.wasm", grammar_name); - - let emcc_bin = if cfg!(windows) { "emcc.bat" } else { "emcc" }; - let emcc_path = which(emcc_bin) - .ok() - .and_then(|p| Command::new(&p).output().and(Ok(p)).ok()); - - let mut command; - if !force_docker && emcc_path.is_some() { - command = Command::new(emcc_path.unwrap()); - command.current_dir(&language_dir); - } else if Command::new("docker").output().is_ok() { - command = Command::new("docker"); - command.args(&["run", "--rm"]); - - // Mount the parser directory as a volume - let mut volume_string; - if let (Some(parent), Some(filename)) = (language_dir.parent(), language_dir.file_name()) { - volume_string = OsString::from(parent); - volume_string.push(":/src:Z"); - command.arg("--workdir"); - command.arg(&Path::new("/src").join(filename)); - } else { - volume_string = OsString::from(language_dir); - volume_string.push(":/src:Z"); - command.args(&["--workdir", "/src"]); - } - - command.args(&[OsStr::new("--volume"), &volume_string]); - - // Get the current user id so that files created in the docker container will have - // the same owner. - if cfg!(unix) { - let user_id_output = Command::new("id") - .arg("-u") - .output() - .with_context(|| "Failed to get get current user id")?; - let user_id = String::from_utf8_lossy(&user_id_output.stdout); - let user_id = user_id.trim(); - command.args(&["--user", user_id]); - } - - // Run `emcc` in a container using the `emscripten-slim` image - command.args(&[EMSCRIPTEN_TAG, "emcc"]); - } else { - if force_docker { - return Err(anyhow!( - "You must have docker on your PATH to run this command with --docker" - )); - } - return Err(anyhow!( - "You must have either emcc or docker on your PATH to run this command" - )); - } - - command.args(&[ - "-o", +pub fn compile_language_to_wasm( + loader: &Loader, + language_dir: &Path, + output_dir: &Path, + force_docker: bool, +) -> Result<()> { + let grammar_name = get_grammar_name(language_dir)?; + let output_filename = output_dir.join(format!("tree-sitter-{grammar_name}.wasm")); + let src_path = language_dir.join("src"); + let scanner_path = loader.get_scanner_path(&src_path); + loader.compile_parser_to_wasm( + &grammar_name, + &src_path, + scanner_path + .as_ref() + .and_then(|p| Some(Path::new(p.file_name()?))), &output_filename, - "-Os", - "-s", - "WASM=1", - "-s", - "SIDE_MODULE=1", - "-s", - "TOTAL_MEMORY=33554432", - "-s", - "NODEJS_CATCH_EXIT=0", - "-s", - "NODEJS_CATCH_REJECTION=0", - "-s", - &format!("EXPORTED_FUNCTIONS=[\"_tree_sitter_{}\"]", grammar_name), - "-fno-exceptions", - "-I", - "src", - ]); - - let src = Path::new("src"); - let parser_c_path = src.join("parser.c"); - let scanner_c_path = src.join("scanner.c"); - let scanner_cc_path = src.join("scanner.cc"); - let scanner_cpp_path = src.join("scanner.cpp"); - - if language_dir.join(&scanner_cc_path).exists() { - command.arg("-xc++").arg(&scanner_cc_path); - } else if language_dir.join(&scanner_cpp_path).exists() { - command.arg("-xc++").arg(&scanner_cpp_path); - } else if language_dir.join(&scanner_c_path).exists() { - command.arg(&scanner_c_path); + force_docker, + )?; + + // Exit with an error if the external scanner uses symbols from the + // C or C++ standard libraries that aren't available to wasm parsers. + let stdlib_symbols = wasm_stdlib_symbols().collect::>(); + let dylink_symbols = [ + "__indirect_function_table", + "__memory_base", + "__stack_pointer", + "__table_base", + "__table_base", + "memory", + ]; + let builtin_symbols = [ + "__assert_fail", + "__cxa_atexit", + "abort", + "emscripten_notify_memory_growth", + "proc_exit", + ]; + + let mut missing_symbols = Vec::new(); + let wasm_bytes = fs::read(&output_filename)?; + let parser = Parser::new(0); + for payload in parser.parse_all(&wasm_bytes) { + if let wasmparser::Payload::ImportSection(imports) = payload? { + for import in imports { + let import = import?.name; + if !builtin_symbols.contains(&import) + && !stdlib_symbols.contains(&import) + && !dylink_symbols.contains(&import) + { + missing_symbols.push(import); + } + } + } } - command.arg(&parser_c_path); - - let output = command - .output() - .with_context(|| "Failed to run emcc command")?; - if !output.status.success() { - return Err(anyhow!( - "emcc command failed - {}", - String::from_utf8_lossy(&output.stderr) - )); + if !missing_symbols.is_empty() { + Err(anyhow!( + concat!( + "This external scanner uses a symbol that isn't available to wasm parsers.\n", + "\n", + "Missing symbols:\n", + " {}\n", + "\n", + "Available symbols:\n", + " {}", + ), + missing_symbols.join("\n "), + stdlib_symbols.join("\n ") + ))?; } - // Move the created `.wasm` file into the current working directory. - fs::rename(&language_dir.join(&output_filename), &output_filename) - .with_context(|| format!("Couldn't find output file {:?}", output_filename))?; - Ok(()) } diff --git a/third-party/tree-sitter/tree-sitter/docs/Gemfile.lock b/third-party/tree-sitter/tree-sitter/docs/Gemfile.lock index 3b2801bef5b..47aca534d8c 100644 --- a/third-party/tree-sitter/tree-sitter/docs/Gemfile.lock +++ b/third-party/tree-sitter/tree-sitter/docs/Gemfile.lock @@ -1,22 +1,32 @@ GEM remote: https://rubygems.org/ specs: - activesupport (7.0.4.3) + activesupport (7.1.3) + base64 + bigdecimal concurrent-ruby (~> 1.0, >= 1.0.2) + connection_pool (>= 2.2.5) + drb i18n (>= 1.6, < 2) minitest (>= 5.1) + mutex_m tzinfo (~> 2.0) addressable (2.8.1) public_suffix (>= 2.0.2, < 6.0) + base64 (0.2.0) + bigdecimal (3.1.6) coffee-script (2.4.1) coffee-script-source execjs coffee-script-source (1.11.1) colorator (1.1.0) - commonmarker (0.23.8) - concurrent-ruby (1.2.2) + commonmarker (0.23.10) + concurrent-ruby (1.2.3) + connection_pool (2.4.1) dnsruby (1.61.9) simpleidn (~> 0.1) + drb (2.2.0) + ruby2_keywords em-websocket (0.5.3) eventmachine (>= 0.12.9) http_parser.rb (~> 0) @@ -86,7 +96,7 @@ GEM activesupport (>= 2) nokogiri (>= 1.4) http_parser.rb (0.8.0) - i18n (1.12.0) + i18n (1.14.1) concurrent-ruby (~> 1.0) jekyll (3.9.3) addressable (~> 2.4) @@ -209,8 +219,9 @@ GEM jekyll (>= 3.5, < 5.0) jekyll-feed (~> 0.9) jekyll-seo-tag (~> 2.1) - minitest (5.18.0) - nokogiri (1.14.2-x86_64-linux) + minitest (5.21.2) + mutex_m (0.2.0) + nokogiri (1.16.2-x86_64-linux) racc (~> 1.4) octokit (4.25.1) faraday (>= 1, < 3) @@ -218,7 +229,7 @@ GEM pathutil (0.16.2) forwardable-extended (~> 2.6) public_suffix (4.0.7) - racc (1.6.2) + racc (1.7.3) rb-fsevent (0.11.2) rb-inotify (0.10.1) ffi (~> 1.0) @@ -250,7 +261,7 @@ GEM webrick (1.8.1) PLATFORMS - ruby + x86_64-linux DEPENDENCIES github-pages diff --git a/third-party/tree-sitter/tree-sitter/docs/assets/css/style.scss b/third-party/tree-sitter/tree-sitter/docs/assets/css/style.scss index 2b7a018b7ad..b838211fbca 100644 --- a/third-party/tree-sitter/tree-sitter/docs/assets/css/style.scss +++ b/third-party/tree-sitter/tree-sitter/docs/assets/css/style.scss @@ -33,6 +33,16 @@ a[href^="http"]:after { padding: $padding 0; } +#main-content code:not(pre code, a code) { + color: #c7254e; + font-size: 0.9em; + background-color: #f8f8f8; + border: 1px solid #eaeaea; + border-radius: 3px; + margin: 0 2px; + padding: 0 5px; +} + #sidebar { position: fixed; background: white; @@ -162,6 +172,10 @@ a > span { .CodeMirror div.CodeMirror-cursor { border-left: 3px solid red; } + + h4#about { + margin: 10ex 0 0 0; + } } #output-container { diff --git a/third-party/tree-sitter/tree-sitter/docs/assets/js/playground.js b/third-party/tree-sitter/tree-sitter/docs/assets/js/playground.js index c14bf0f5139..62bff3620c6 100644 --- a/third-party/tree-sitter/tree-sitter/docs/assets/js/playground.js +++ b/third-party/tree-sitter/tree-sitter/docs/assets/js/playground.js @@ -175,7 +175,7 @@ let tree; const start = cursor.startPosition; const end = cursor.endPosition; const id = cursor.nodeId; - let fieldName = cursor.currentFieldName(); + let fieldName = cursor.currentFieldName; if (fieldName) { fieldName += ': '; } else { diff --git a/third-party/tree-sitter/tree-sitter/docs/index.md b/third-party/tree-sitter/tree-sitter/docs/index.md index 2e3b59ed62f..5ea312a407c 100644 --- a/third-party/tree-sitter/tree-sitter/docs/index.md +++ b/third-party/tree-sitter/tree-sitter/docs/index.md @@ -15,35 +15,43 @@ Tree-sitter is a parser generator tool and an incremental parsing library. It ca There are currently bindings that allow Tree-sitter to be used from the following languages: +* [C#](https://github.com/tree-sitter/csharp-tree-sitter) * [Go](https://github.com/smacker/go-tree-sitter) +* [Guile](https://github.com/Z572/guile-ts) * [Haskell](https://github.com/tree-sitter/haskell-tree-sitter) +* [Java](https://github.com/serenadeai/java-tree-sitter) +* [Java](https://github.com/bonede/tree-sitter-ng) +* [Java (Android)](https://github.com/AndroidIDEOfficial/android-tree-sitter) * [JavaScript (Node.js)](https://github.com/tree-sitter/node-tree-sitter) * [JavaScript (Wasm)](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web) +* [Kotlin](https://github.com/oxisto/kotlintree) * [Lua](https://github.com/euclidianAce/ltreesitter) * [OCaml](https://github.com/returntocorp/ocaml-tree-sitter-core) +* [Odin](https://github.com/laytan/odin-tree-sitter) * [Perl](https://metacpan.org/pod/Text::Treesitter) * [Python](https://github.com/tree-sitter/py-tree-sitter) -* [Ruby](https://github.com/tree-sitter/ruby-tree-sitter) +* [Ruby](https://github.com/Faveod/ruby-tree-sitter) * [Ruby](https://github.com/calicoday/ruby-tree-sitter-ffi) * [Rust](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust) * [Swift](https://github.com/ChimeHQ/SwiftTreeSitter) -* [Kotlin](https://github.com/oxisto/kotlintree) -* [Java](https://github.com/serenadeai/java-tree-sitter) ### Parsers * [Ada](https://github.com/briot/tree-sitter-ada) * [Agda](https://github.com/tree-sitter/tree-sitter-agda) * [Apex](https://github.com/aheber/tree-sitter-sfapex) +* [ApexCode](https://github.com/jsuarez-chipiron/tree-sitter-apex) +* [AWS Event Rule](https://github.com/3p3r/tree-sitter-eventrule) * [Bash](https://github.com/tree-sitter/tree-sitter-bash) * [Beancount](https://github.com/zwpaper/tree-sitter-beancount) * [Cap'n Proto](https://github.com/amaanq/tree-sitter-capnp) * [C](https://github.com/tree-sitter/tree-sitter-c) * [C++](https://github.com/tree-sitter/tree-sitter-cpp) * [C#](https://github.com/tree-sitter/tree-sitter-c-sharp) +* [CEL](https://github.com/bufbuild/tree-sitter-cel) * [Clojure](https://github.com/sogaiu/tree-sitter-clojure) * [CMake](https://github.com/uyha/tree-sitter-cmake) -* [Comment](https://github.com/stsewd/tree-sitter-comment) +* [COBOL](https://github.com/yutaro-sakamoto/tree-sitter-cobol) * [Common Lisp](https://github.com/theHamsta/tree-sitter-commonlisp) * [CSS](https://github.com/tree-sitter/tree-sitter-css) * [CUDA](https://github.com/theHamsta/tree-sitter-cuda) @@ -68,45 +76,55 @@ There are currently bindings that allow Tree-sitter to be used from the followin * [Go](https://github.com/tree-sitter/tree-sitter-go) * [Go mod](https://github.com/camdencheek/tree-sitter-go-mod) * [Go work](https://github.com/omertuc/tree-sitter-go-work) -* [Graphql](https://github.com/bkegley/tree-sitter-graphql) +* [GraphQL](https://github.com/bkegley/tree-sitter-graphql) * [Hack](https://github.com/slackhq/tree-sitter-hack) * [Haskell](https://github.com/tree-sitter/tree-sitter-haskell) * [HCL](https://github.com/MichaHoffmann/tree-sitter-hcl) * [HTML](https://github.com/tree-sitter/tree-sitter-html) +* [ISPC](https://github.com/fab4100/tree-sitter-ispc) * [Java](https://github.com/tree-sitter/tree-sitter-java) * [JavaScript](https://github.com/tree-sitter/tree-sitter-javascript) * [jq](https://github.com/flurie/tree-sitter-jq) -* [JSON5](https://github.com/Joakker/tree-sitter-json5) * [JSON](https://github.com/tree-sitter/tree-sitter-json) +* [JSON5](https://github.com/Joakker/tree-sitter-json5) * [Julia](https://github.com/tree-sitter/tree-sitter-julia) +* [Just](https://github.com/IndianBoy42/tree-sitter-just) * [Kotlin](https://github.com/fwcd/tree-sitter-kotlin) * [LALRPOP](https://github.com/traxys/tree-sitter-lalrpop) -* [Latex](https://github.com/latex-lsp/tree-sitter-latex) +* [LaTeX](https://github.com/latex-lsp/tree-sitter-latex) * [Lean](https://github.com/Julian/tree-sitter-lean) * [LLVM](https://github.com/benwilliamgraham/tree-sitter-llvm) * [LLVM MachineIR](https://github.com/Flakebi/tree-sitter-llvm-mir) +* [LLVM MLIR](https://github.com/artagnon/tree-sitter-mlir) * [LLVM TableGen](https://github.com/Flakebi/tree-sitter-tablegen) -* [Lua](https://github.com/Azganoth/tree-sitter-lua) +* [Lua](https://github.com/MunifTanjim/tree-sitter-lua) +* [Magik](https://github.com/krn-robin/tree-sitter-magik) * [Make](https://github.com/alemuller/tree-sitter-make) * [Markdown](https://github.com/ikatyang/tree-sitter-markdown) * [Markdown](https://github.com/MDeiml/tree-sitter-markdown) * [Meson](https://github.com/Decodetalkers/tree-sitter-meson) * [Meson](https://github.com/staysail/tree-sitter-meson) -* [Motorola 68000 Assembly](https://github.com/grahambates/tree-sitter-m68k) +* [Motorola 68000 assembly](https://github.com/grahambates/tree-sitter-m68k) +* [Nim](https://github.com/alaviss/tree-sitter-nim) * [Nix](https://github.com/cstrahan/tree-sitter-nix) +* [Noir](https://github.com/hhamud/tree-sitter-noir) * [Objective-C](https://github.com/jiyee/tree-sitter-objc) * [OCaml](https://github.com/tree-sitter/tree-sitter-ocaml) +* [Odin](https://github.com/amaanq/tree-sitter-odin) +* [Ohm](https://github.com/novusnota/tree-sitter-ohm) * [Org](https://github.com/milisims/tree-sitter-org) +* [P4](https://github.com/ace-design/tree-sitter-p4) * [Pascal](https://github.com/Isopod/tree-sitter-pascal) * [Perl](https://github.com/ganezdragon/tree-sitter-perl) * [Perl](https://github.com/tree-sitter-perl/tree-sitter-perl) * [Perl POD](https://github.com/tree-sitter-perl/tree-sitter-pod) * [PHP](https://github.com/tree-sitter/tree-sitter-php) * [Portable Game Notation](https://github.com/rolandwalker/tree-sitter-pgn) -* [PowerShell](https://github.com/PowerShell/tree-sitter-PowerShell) +* [PowerShell](https://github.com/airbus-cert/tree-sitter-powershell) * [Protocol Buffers](https://github.com/mitchellh/tree-sitter-proto) * [Python](https://github.com/tree-sitter/tree-sitter-python) * [QML](https://github.com/yuja/tree-sitter-qmljs) +* [QuakeC](https://github.com/vkazanov/tree-sitter-quakec) * [Racket](https://github.com/6cdh/tree-sitter-racket) * [Rasi](https://github.com/Fymyte/tree-sitter-rasi) * [re2c](https://github.com/alemuller/tree-sitter-re2c) @@ -114,36 +132,46 @@ There are currently bindings that allow Tree-sitter to be used from the followin * [Rego](https://github.com/FallenAngel97/tree-sitter-rego) * [reStructuredText](https://github.com/stsewd/tree-sitter-rst) * [R](https://github.com/r-lib/tree-sitter-r) +* [Robot](https://github.com/Hubro/tree-sitter-robot) * [Ruby](https://github.com/tree-sitter/tree-sitter-ruby) * [Rust](https://github.com/tree-sitter/tree-sitter-rust) * [Scala](https://github.com/tree-sitter/tree-sitter-scala) * [Scheme](https://github.com/6cdh/tree-sitter-scheme) -* [Scss](https://github.com/serenadeai/tree-sitter-scss) +* [SCSS](https://github.com/serenadeai/tree-sitter-scss) * [S-expressions](https://github.com/AbstractMachinesLab/tree-sitter-sexp) * [Smali](https://github.com/amaanq/tree-sitter-smali) * [Smali](https://git.sr.ht/~yotam/tree-sitter-smali) -* [Sourcepawn](https://github.com/nilshelmig/tree-sitter-sourcepawn) +* [SourcePawn](https://github.com/nilshelmig/tree-sitter-sourcepawn) * [SPARQL](https://github.com/BonaBeavis/tree-sitter-sparql) * [SQL - BigQuery](https://github.com/takegue/tree-sitter-sql-bigquery) +* [SQL - General](https://github.com/DerekStride/tree-sitter-sql) * [SQL - PostgreSQL](https://github.com/m-novikov/tree-sitter-sql) * [SQL - SQLite](https://github.com/dhcmrlchtdj/tree-sitter-sqlite) * [SSH](https://github.com/metio/tree-sitter-ssh-client-config) +* [Supercollider](https://github.com/madskjeldgaard/tree-sitter-supercollider) * [Svelte](https://github.com/Himujjal/tree-sitter-svelte) * [Swift](https://github.com/alex-pinkus/tree-sitter-swift) * [SystemRDL](https://github.com/SystemRDL/tree-sitter-systemrdl) +* [Tact](https://github.com/tact-lang/tree-sitter-tact) * [Thrift](https://github.com/duskmoon314/tree-sitter-thrift) +* ["TODO:" comments](https://github.com/stsewd/tree-sitter-comment) * [TOML](https://github.com/ikatyang/tree-sitter-toml) * [Tree-sitter Query](https://github.com/nvim-treesitter/tree-sitter-query) * [Turtle](https://github.com/BonaBeavis/tree-sitter-turtle) +* [Twig](https://github.com/kaermorchen/tree-sitter-twig) * [Twig](https://github.com/gbprod/tree-sitter-twig) * [TypeScript](https://github.com/tree-sitter/tree-sitter-typescript) +* [Ungrammar](https://github.com/Philipp-M/tree-sitter-ungrammar) +* [USD](https://github.com/ColinKennedy/tree-sitter-usd) * [Verilog](https://github.com/tree-sitter/tree-sitter-verilog) * [VHDL](https://github.com/alemuller/tree-sitter-vhdl) * [Vue](https://github.com/ikatyang/tree-sitter-vue) -* [WASM](https://github.com/wasm-lsp/tree-sitter-wasm) -* [WGSL WebGPU Shading Language](https://github.com/mehmetoguzderin/tree-sitter-wgsl) +* [Wasm](https://github.com/wasm-lsp/tree-sitter-wasm) +* [WDL](https://github.com/jdidion/tree-sitter-wdl) +* [WGSL (WebGPU Shading Language)](https://github.com/mehmetoguzderin/tree-sitter-wgsl) * [YAML](https://github.com/ikatyang/tree-sitter-yaml) * [YANG](https://github.com/Hubro/tree-sitter-yang) +* [Yuck](https://github.com/Philipp-M/tree-sitter-yuck) * [Zig](https://github.com/maxxnino/tree-sitter-zig) ### Talks on Tree-sitter @@ -156,9 +184,9 @@ There are currently bindings that allow Tree-sitter to be used from the followin The design of Tree-sitter was greatly influenced by the following research papers: -- [Practical Algorithms for Incremental Software Development Environments](https://www2.eecs.berkeley.edu/Pubs/TechRpts/1997/CSD-97-946.pdf) -- [Context Aware Scanning for Parsing Extensible Languages](https://www-users.cse.umn.edu/~evw/pubs/vanwyk07gpce/vanwyk07gpce.pdf) -- [Efficient and Flexible Incremental Parsing](http://harmonia.cs.berkeley.edu/papers/twagner-parsing.pdf) -- [Incremental Analysis of Real Programming Languages](http://harmonia.cs.berkeley.edu/papers/twagner-glr.pdf) -- [Error Detection and Recovery in LR Parsers](http://what-when-how.com/compiler-writing/bottom-up-parsing-compiler-writing-part-13) -- [Error Recovery for LR Parsers](https://apps.dtic.mil/sti/pdfs/ADA043470.pdf) +* [Practical Algorithms for Incremental Software Development Environments](https://www2.eecs.berkeley.edu/Pubs/TechRpts/1997/CSD-97-946.pdf) +* [Context Aware Scanning for Parsing Extensible Languages](https://www-users.cse.umn.edu/~evw/pubs/vanwyk07gpce/vanwyk07gpce.pdf) +* [Efficient and Flexible Incremental Parsing](https://harmonia.cs.berkeley.edu/papers/twagner-parsing.pdf) +* [Incremental Analysis of Real Programming Languages](https://harmonia.cs.berkeley.edu/papers/twagner-glr.pdf) +* [Error Detection and Recovery in LR Parsers](https://what-when-how.com/compiler-writing/bottom-up-parsing-compiler-writing-part-13) +* [Error Recovery for LR Parsers](https://apps.dtic.mil/sti/pdfs/ADA043470.pdf) diff --git a/third-party/tree-sitter/tree-sitter/docs/section-2-using-parsers.md b/third-party/tree-sitter/tree-sitter/docs/section-2-using-parsers.md index ea32f4f57b5..2b5e04b885a 100644 --- a/third-party/tree-sitter/tree-sitter/docs/section-2-using-parsers.md +++ b/third-party/tree-sitter/tree-sitter/docs/section-2-using-parsers.md @@ -51,7 +51,7 @@ Here's an example of a simple C program that uses the Tree-sitter [JSON parser]( // Declare the `tree_sitter_json` function, which is // implemented by the `tree-sitter-json` library. -TSLanguage *tree_sitter_json(); +const TSLanguage *tree_sitter_json(void); int main() { // Create a parser. @@ -137,7 +137,7 @@ TSTree *ts_parser_parse( ); ``` -The `TSInput` structure lets you to provide your own function for reading a chunk of text at a given byte offset and row/column position. The function can return text encoded in either UTF8 or UTF16. This interface allows you to efficiently parse text that is stored in your own data structure. +The `TSInput` structure lets you provide your own function for reading a chunk of text at a given byte offset and row/column position. The function can return text encoded in either UTF8 or UTF16. This interface allows you to efficiently parse text that is stored in your own data structure. ```c typedef struct { @@ -290,7 +290,7 @@ This `ts_node_edit` function is _only_ needed in the case where you have retriev ### Multi-language Documents -Sometimes, different parts of a file may be written in different languages. For example, templating languages like [EJS](http://ejs.co) and [ERB](https://ruby-doc.org/stdlib-2.5.1/libdoc/erb/rdoc/ERB.html) allow you to generate HTML by writing a mixture of HTML and another language like JavaScript or Ruby. +Sometimes, different parts of a file may be written in different languages. For example, templating languages like [EJS](https://ejs.co) and [ERB](https://ruby-doc.org/stdlib-2.5.1/libdoc/erb/rdoc/ERB.html) allow you to generate HTML by writing a mixture of HTML and another language like JavaScript or Ruby. Tree-sitter handles these types of documents by allowing you to create a syntax tree based on the text in certain _ranges_ of a file. @@ -326,13 +326,13 @@ Conceptually, it can be represented by three syntax trees with overlapping range #include // These functions are each implemented in their own repo. -const TSLanguage *tree_sitter_embedded_template(); -const TSLanguage *tree_sitter_html(); -const TSLanguage *tree_sitter_ruby(); +const TSLanguage *tree_sitter_embedded_template(void); +const TSLanguage *tree_sitter_html(void); +const TSLanguage *tree_sitter_ruby(void); int main(int argc, const char **argv) { const char *text = argv[1]; - unsigned len = strlen(src); + unsigned len = strlen(text); // Parse the entire text as ERB. TSParser *parser = ts_parser_new(); @@ -410,6 +410,12 @@ Internally, copying a syntax tree just entails incrementing an atomic reference You can access every node in a syntax tree using the `TSNode` APIs [described above](#retrieving-nodes), but if you need to access a large number of nodes, the fastest way to do so is with a _tree cursor_. A cursor is a stateful object that allows you to walk a syntax tree with maximum efficiency. +Note that the given input node is considered the root of the cursor, and the +cursor cannot walk outside this node, so going to the parent or any sibling +of the root node will return `false`. This has no unexpected effects if the given +input node is the actual `root` node of the tree, but is something to keep in mind +when using nodes that are not the `root` node. + You can initialize a cursor from any node: ```c @@ -442,13 +448,13 @@ Many code analysis tasks involve searching for patterns in syntax trees. Tree-si A _query_ consists of one or more _patterns_, where each pattern is an [S-expression](https://en.wikipedia.org/wiki/S-expression) that matches a certain set of nodes in a syntax tree. The expression to match a given node consists of a pair of parentheses containing two things: the node's type, and optionally, a series of other S-expressions that match the node's children. For example, this pattern would match any `binary_expression` node whose children are both `number_literal` nodes: -``` scheme +```scheme (binary_expression (number_literal) (number_literal)) ``` Children can also be omitted. For example, this would match any `binary_expression` where at least _one_ of child is a `string_literal` node: -``` scheme +```scheme (binary_expression (string_literal)) ``` @@ -456,7 +462,7 @@ Children can also be omitted. For example, this would match any `binary_expressi In general, it's a good idea to make patterns more specific by specifying [field names](#node-field-names) associated with child nodes. You do this by prefixing a child pattern with a field name followed by a colon. For example, this pattern would match an `assignment_expression` node where the `left` child is a `member_expression` whose `object` is a `call_expression`. -``` scheme +```scheme (assignment_expression left: (member_expression object: (call_expression))) @@ -464,9 +470,9 @@ In general, it's a good idea to make patterns more specific by specifying [field #### Negated Fields -You can also constrain a pattern so that it only matches nodes that *lack* a certain field. To do this, add a field name prefixed by a `!` within the parent pattern. For example, this pattern would match a class declaration with no type parameters: +You can also constrain a pattern so that it only matches nodes that _lack_ a certain field. To do this, add a field name prefixed by a `!` within the parent pattern. For example, this pattern would match a class declaration with no type parameters: -``` scheme +```scheme (class_declaration name: (identifier) @class_name !type_parameters) @@ -476,7 +482,7 @@ You can also constrain a pattern so that it only matches nodes that *lack* a cer The parenthesized syntax for writing nodes only applies to [named nodes](#named-vs-anonymous-nodes). To match specific anonymous nodes, you write their name between double quotes. For example, this pattern would match any `binary_expression` where the operator is `!=` and the right side is `null`: -``` scheme +```scheme (binary_expression operator: "!=" right: (null)) @@ -488,7 +494,7 @@ When matching patterns, you may want to process specific nodes within the patter For example, this pattern would match any assignment of a `function` to an `identifier`, and it would associate the name `the-function-name` with the identifier: -``` scheme +```scheme (assignment_expression left: (identifier) @the-function-name right: (function)) @@ -496,7 +502,7 @@ For example, this pattern would match any assignment of a `function` to an `iden And this pattern would match all method definitions, associating the name `the-method-name` with the method name, `the-class-name` with the containing class name: -``` scheme +```scheme (class_declaration name: (identifier) @the-class-name body: (class_body @@ -510,13 +516,13 @@ You can match a repeating sequence of sibling nodes using the postfix `+` and `* For example, this pattern would match a sequence of one or more comments: -``` scheme +```scheme (comment)+ ``` This pattern would match a class declaration, capturing all of the decorators if any were present: -``` scheme +```scheme (class_declaration (decorator)* @the-decorator name: (identifier) @the-name) @@ -524,7 +530,7 @@ This pattern would match a class declaration, capturing all of the decorators if You can also mark a node as optional using the `?` operator. For example, this pattern would match all function calls, capturing a string argument if one was present: -``` scheme +```scheme (call_expression function: (identifier) @the-function arguments: (arguments (string)? @the-string-arg)) @@ -534,7 +540,7 @@ You can also mark a node as optional using the `?` operator. For example, this p You can also use parentheses for grouping a sequence of _sibling_ nodes. For example, this pattern would match a comment followed by a function declaration: -``` scheme +```scheme ( (comment) (function_declaration) @@ -543,7 +549,7 @@ You can also use parentheses for grouping a sequence of _sibling_ nodes. For exa Any of the quantification operators mentioned above (`+`, `*`, and `?`) can also be applied to groups. For example, this pattern would match a comma-separated series of numbers: -``` scheme +```scheme ( (number) ("," (number))* @@ -558,7 +564,7 @@ This is similar to _character classes_ from regular expressions (`[abc]` matches For example, this pattern would match a call to either a variable or an object property. In the case of a variable, capture it as `@function`, and in the case of a property, capture it as `@method`: -``` scheme +```scheme (call_expression function: [ (identifier) @function @@ -569,7 +575,7 @@ In the case of a variable, capture it as `@function`, and in the case of a prope This pattern would match a set of possible keyword tokens, capturing them as `@keyword`: -``` scheme +```scheme [ "break" "delete" @@ -592,7 +598,7 @@ and `_` will match any named or anonymous node. For example, this pattern would match any node inside a call: -``` scheme +```scheme (call (_) @call.inner) ``` @@ -602,7 +608,7 @@ The anchor operator, `.`, is used to constrain the ways in which child patterns When `.` is placed before the _first_ child within a parent pattern, the child will only match when it is the first named node in the parent. For example, the below pattern matches a given `array` node at most once, assigning the `@the-element` capture to the first `identifier` node in the parent `array`: -``` scheme +```scheme (array . (identifier) @the-element) ``` @@ -610,13 +616,13 @@ Without this anchor, the pattern would match once for every identifier in the ar Similarly, an anchor placed after a pattern's _last_ child will cause that child pattern to only match nodes that are the last named child of their parent. The below pattern matches only nodes that are the last named child within a `block`. -``` scheme +```scheme (block (_) @last-expression .) ``` Finally, an anchor _between_ two child patterns will cause the patterns to only match nodes that are immediate siblings. The pattern below, given a long dotted name like `a.b.c.d`, will only match pairs of consecutive identifiers: `a, b`, `b, c`, and `c, d`. -``` scheme +```scheme (dotted_name (identifier) @prev-id . @@ -629,20 +635,38 @@ The restrictions placed on a pattern by an anchor operator ignore anonymous node #### Predicates -You can also specify arbitrary metadata and conditions associated with a pattern by adding _predicate_ S-expressions anywhere within your pattern. Predicate S-expressions start with a _predicate name_ beginning with a `#` character. After that, they can contain an arbitrary number of `@`-prefixed capture names or strings. +You can also specify arbitrary metadata and conditions associated with a pattern +by adding _predicate_ S-expressions anywhere within your pattern. Predicate S-expressions +start with a _predicate name_ beginning with a `#` character. After that, they can +contain an arbitrary number of `@`-prefixed capture names or strings. -For example, this pattern would match identifier whose names is written in `SCREAMING_SNAKE_CASE`: +Tree-Sitter's CLI supports the following predicates by default: -``` scheme -( - (identifier) @constant - (#match? @constant "^[A-Z][A-Z_]+") -) +##### eq?, not-eq?, any-eq?, any-not-eq? + +This family of predicates allows you to match against a single capture or string +value. + +The first argument must be a capture, but the second can be either a capture to +compare the two captures' text, or a string to compare first capture's text +against. + +The base predicate is "#eq?", but its complement "#not-eq?" can be used to _not_ +match a value. + +Consider the following example targeting C: + +```scheme +((identifier) @variable.builtin + (#eq? @variable.builtin "self")) ``` -And this pattern would match key-value pairs where the `value` is an identifier with the same name as the key: +This pattern would match any identifier that is `self`. + +And this pattern would match key-value pairs where the `value` is an identifier +with the same name as the key: -``` scheme +```scheme ( (pair key: (property_identifier) @key-name @@ -651,7 +675,87 @@ And this pattern would match key-value pairs where the `value` is an identifier ) ``` -_Note_ - Predicates are not handled directly by the Tree-sitter C library. They are just exposed in a structured form so that higher-level code can perform the filtering. However, higher-level bindings to Tree-sitter like [the Rust crate](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust) or the [WebAssembly binding](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web) implement a few common predicates like `#eq?` and `#match?`. +The prefix "any-" is meant for use with quantified captures. Here's +an example finding a segment of empty comments + +```scheme +((comment)+ @comment.empty + (#any-eq? @comment.empty "//")) +``` + +Note that "#any-eq?" will match a quantified capture if +_any_ of the nodes match the predicate, while by default a quantified capture +will only match if _all_ the nodes match the predicate. + +##### match?, not-match?, any-match?, any-not-match? + +These predicates are similar to the eq? predicates, but they use regular expressions +to match against the capture's text. + +The first argument must be a capture, and the second must be a string containing +a regular expression. + +For example, this pattern would match identifier whose name is written in `SCREAMING_SNAKE_CASE`: + +```scheme +((identifier) @constant + (#match? @constant "^[A-Z][A-Z_]+")) +``` + +Here's an example finding potential documentation comments in C + +```scheme +((comment)+ @comment.documentation + (#match? @comment.documentation "^///\s+.*")) +``` + +Here's another example finding Cgo comments to potentially inject with C + +```scheme +((comment)+ @injection.content + . + (import_declaration + (import_spec path: (interpreted_string_literal) @_import_c)) + (#eq? @_import_c "\"C\"") + (#match? @injection.content "^//")) +``` + +##### any-of?, not-any-of? + +The "any-of?" predicate allows you to match a capture against multiple strings, +and will match if the capture's text is equal to any of the strings. + +Consider this example that targets JavaScript: + +```scheme +((identifier) @variable.builtin + (#any-of? @variable.builtin + "arguments" + "module" + "console" + "window" + "document")) +``` + +This will match any of the builtin variables in JavaScript. + +_Note_ — Predicates are not handled directly by the Tree-sitter C library. +They are just exposed in a structured form so that higher-level code can perform +the filtering. However, higher-level bindings to Tree-sitter like +[the Rust Crate](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_rust) +or the [WebAssembly binding](https://github.com/tree-sitter/tree-sitter/tree/master/lib/binding_web) +do implement a few common predicates like the `#eq?`, `#match?`, and `#any-of?` +predicates explained above. + +To recap about the predicates Tree-Sitter's bindings support: + +- `#eq?` checks for a direct match against a capture or string +- `#match?` checks for a match against a regular expression +- `#any-of?` checks for a match against a list of strings +- Adding `not-` to the beginning of any of these predicates will negate the match +- By default, a quantified capture will only match if _all_ of the nodes match the predicate +- Adding `any-` before the `eq` or `match` predicates will instead match if any of the nodes match the predicate + ### The Query API diff --git a/third-party/tree-sitter/tree-sitter/docs/section-3-creating-parsers.md b/third-party/tree-sitter/tree-sitter/docs/section-3-creating-parsers.md index 5677292f0cd..33976442b96 100644 --- a/third-party/tree-sitter/tree-sitter/docs/section-3-creating-parsers.md +++ b/third-party/tree-sitter/tree-sitter/docs/section-3-creating-parsers.md @@ -5,7 +5,7 @@ permalink: creating-parsers # Creating parsers -Developing Tree-sitter grammars can have a difficult learning curve, but once you get the hang of it, it can be fun and even zen-like. This document will help get you to get started and to develop a useful mental model. +Developing Tree-sitter grammars can have a difficult learning curve, but once you get the hang of it, it can be fun and even zen-like. This document will help you to get started and to develop a useful mental model. ## Getting Started @@ -20,7 +20,7 @@ In order to develop a Tree-sitter parser, there are two dependencies that you ne To create a Tree-sitter parser, you need to use [the `tree-sitter` CLI][tree-sitter-cli]. You can install the CLI in a few different ways: -* Build the `tree-sitter-cli` [Rust crate][crate] from source using [`cargo`][cargo], the Rust package manager. This works on any platform. See [the contributing docs](/docs/section-5-contributing.md#developing-tree-sitter) for more information. +* Build the `tree-sitter-cli` [Rust crate][crate] from source using [`cargo`][cargo], the Rust package manager. This works on any platform. See [the contributing docs](./contributing#developing-tree-sitter) for more information. * Install the `tree-sitter-cli` [Node.js module][node-module] using [`npm`][npm], the Node package manager. This approach is fast, but is only works on certain platforms, because it relies on pre-built binaries. * Download a binary for your platform from [the latest GitHub release][releases], and put it into a directory on your `PATH`. @@ -46,7 +46,7 @@ npm install --save nan npm install --save-dev tree-sitter-cli ``` -The last command will install the CLI into the `node_modules` folder in your working directory. An executable program called `tree-sitter` will be created inside of `node_modules/.bin/`. You may want to follow the Node.js convention of adding that folder to your your `PATH` so that you can easily run this program when working in this directory. +The last command will install the CLI into the `node_modules` folder in your working directory. An executable program called `tree-sitter` will be created inside of `node_modules/.bin/`. You may want to follow the Node.js convention of adding that folder to your `PATH` so that you can easily run this program when working in this directory. ```sh # In your shell profile script @@ -80,7 +80,9 @@ You can test this parser by creating a source file with the contents "hello" and echo 'hello' > example-file tree-sitter parse example-file ``` + Alternatively, in Windows PowerShell: + ```pwsh "hello" | Out-File example-file -Encoding utf8 tree-sitter parse example-file @@ -88,7 +90,7 @@ tree-sitter parse example-file This should print the following: -``` +```text (source_file [0, 0] - [1, 0]) ``` @@ -102,14 +104,47 @@ Let's go over all of the functionality of the `tree-sitter` command line tool. The most important command you'll use is `tree-sitter generate`. This command reads the `grammar.js` file in your current working directory and creates a file called `src/parser.c`, which implements the parser. After making changes to your grammar, just run `tree-sitter generate` again. -The first time you run `tree-sitter generate`, it will also generate a few other files: +The first time you run `tree-sitter generate`, it will also generate a few other files for bindings for the following languages: + +#### C/C++ + +* `Makefile` - This file tells `make` how to compile your language. +* `bindings/c/tree-sitter-language.h` - This file provides the C interface of your language. +* `bindings/c/tree-sitter-language.pc` - This file provides pkg-config metadata about your language's C library. +* `src/tree_sitter/parser.h` - This file provides some basic C definitions that are used in your generated `parser.c` file. +* `src/tree_sitter/alloc.h` - This file provides some memory allocation macros that are to be used in your external scanner, if you have one. +* `src/tree_sitter/array.h` - This file provides some array macros that are to be used in your external scanner, if you have one. + +#### Go + +* `bindings/go/binding.go` - This file wraps your language in a Go module. +* `bindings/go/binding_test.go` - This file contains a test for the Go package. + +#### Node * `binding.gyp` - This file tells Node.js how to compile your language. * `bindings/node/index.js` - This is the file that Node.js initially loads when using your language. -* `bindings/node/binding.cc` - This file wraps your language in a JavaScript object when used in Node.js. +* `bindings/node/binding.cc` - This file wraps your language in a JavaScript module for Node.js. + +#### Python + +* `pyproject.toml` - This file is the manifest of the Python package. +* `setup.py` - This file tells Python how to compile your language. +* `bindings/python/binding.c` - This file wraps your language in a Python module. +* `bindings/python/tree_sitter_language/__init__.py` - This file tells Python how to load your language. +* `bindings/python/tree_sitter_language/__init__.pyi` - This file provides type hints for your parser when used in Python. +* `bindings/python/tree_sitter_language/py.typed` - This file provides type hints for your parser when used in Python. + +#### Rust + +* `Cargo.toml` - This file is the manifest of the Rust package. * `bindings/rust/lib.rs` - This file wraps your language in a Rust crate when used in Rust. * `bindings/rust/build.rs` - This file wraps the building process for the Rust crate. -* `src/tree_sitter/parser.h` - This file provides some basic C definitions that are used in your generated `parser.c` file. + +#### Swift + +* `Package.swift` - This file tells Swift how to compile your language. +* `bindings/swift/TreeSitterLanguage/language.h` - This file wraps your language in a Swift module when used in Swift. If there is an ambiguity or *local ambiguity* in your grammar, Tree-sitter will detect it during parser generation, and it will exit with a `Unresolved conflict` error message. See below for more information on these errors. @@ -117,11 +152,11 @@ If there is an ambiguity or *local ambiguity* in your grammar, Tree-sitter will The `tree-sitter test` command allows you to easily test that your parser is working correctly. -For each rule that you add to the grammar, you should first create a *test* that describes how the syntax trees should look when parsing that rule. These tests are written using specially-formatted text files in the `corpus/` or `test/corpus/` directories within your parser's root folder. +For each rule that you add to the grammar, you should first create a *test* that describes how the syntax trees should look when parsing that rule. These tests are written using specially-formatted text files in the `test/corpus/` directory within your parser's root folder. For example, you might have a file called `test/corpus/statements.txt` that contains a series of entries like this: -``` +```text ================== Return statements ================== @@ -147,7 +182,7 @@ func x() int { The expected output section can also *optionally* show the [*field names*][field-names-section] associated with each child node. To include field names in your tests, you write a node's field name followed by a colon, before the node itself in the S-expression: -``` +```text (source_file (function_definition name: (identifier) @@ -159,7 +194,7 @@ func x() int { * If your language's syntax conflicts with the `===` and `---` test separators, you can optionally add an arbitrary identical suffix (in the below example, `|||`) to disambiguate them: -``` +```text ==================||| Basic module ==================||| @@ -179,13 +214,73 @@ increment(n) == n + 1 These tests are important. They serve as the parser's API documentation, and they can be run every time you change the grammar to verify that everything still parses correctly. -By default, the `tree-sitter test` command runs all of the tests in your `corpus` or `test/corpus/` folder. To run a particular test, you can use the `-f` flag: +By default, the `tree-sitter test` command runs all of the tests in your `test/corpus/` folder. To run a particular test, you can use the `-f` flag: ```sh tree-sitter test -f 'Return statements' ``` -The recommendation is to be comprehensive in adding tests. If it's a visible node, add it to a test file in your `corpus` directory. It's typically a good idea to test all of the permutations of each language construct. This increases test coverage, but doubly acquaints readers with a way to examine expected outputs and understand the "edges" of a language. +The recommendation is to be comprehensive in adding tests. If it's a visible node, add it to a test file in your `test/corpus` directory. It's typically a good idea to test all of the permutations of each language construct. This increases test coverage, but doubly acquaints readers with a way to examine expected outputs and understand the "edges" of a language. + +#### Attributes + +Tests can be annotated with a few `attributes`. Attributes must be put in the header, below the test name, and start with a `:`. +A couple of attributes also take in a parameter, which require the use of parenthesis. + +**Note**: If you'd like to supply in multiple parameters, e.g. to run tests on multiple platforms or to test multiple languages, you can repeat the attribute on a new line. + +The following attributes are available: + +- `:skip` — This attribute will skip the test when running `tree-sitter test`. + This is useful when you want to temporarily disable running a test without deleting it. +- `:error` — This attribute will assert that the parse tree contains an error. It's useful to just validate that a certain input is invalid without displaying the whole parse tree, as such you should omit the parse tree below the `---` line. +- `:fail-fast` — This attribute will stop the testing additional tests if the test marked with this attribute fails. +- `:language(LANG)` — This attribute will run the tests using the parser for the specified language. This is useful for multi-parser repos, such as XML and DTD, or Typescript and TSX. The default parser will be the first entry in the `tree-sitter` field in the root `package.json`, so having a way to pick a second or even third parser is useful. +- `:platform(PLATFORM)` — This attribute specifies the platform on which the test should run. It is useful to test platform-specific behavior (e.g. Windows newlines are different from Unix). This attribute must match up with Rust's [`std::env::consts::OS`](https://doc.rust-lang.org/std/env/consts/constant.OS.html). + +Examples using attributes: + +```text +========================= +Test that will be skipped +:skip +========================= + +int main() {} + +------------------------- + +==================================== +Test that will run on Linux or macOS + +:platform(linux) +:platform(macos) +==================================== + +int main() {} + +------------------------------------ + +======================================================================== +Test that expects an error, and will fail fast if there's no parse error +:fail-fast +:error +======================================================================== + +int main ( {} + +------------------------------------------------------------------------ + +================================================= +Test that will parse with both Typescript and TSX +:language(typescript) +:language(tsx) +================================================= + +console.log('Hello, world!'); + +------------------------------------------------- +``` #### Automatic Compilation @@ -199,7 +294,7 @@ The `tree-sitter test` command will *also* run any syntax highlighting tests in You can run your parser on an arbitrary file using `tree-sitter parse`. This will print the resulting the syntax tree, including nodes' ranges and field names, like this: -``` +```text (source_file [0, 0] - [3, 0] (function_declaration [0, 0] - [2, 1] name: (identifier [0, 5] - [0, 9]) @@ -227,6 +322,20 @@ The following is a complete list of built-in functions you can use in your `gram * **Symbols (the `$` object)** - Every grammar rule is written as a JavaScript function that takes a parameter conventionally called `$`. The syntax `$.identifier` is how you refer to another grammar symbol within a rule. Names starting with `$.MISSING` or `$.UNEXPECTED` should be avoided as they have special meaning for the `tree-sitter test` command. * **String and Regex literals** - The terminal symbols in a grammar are described using JavaScript strings and regular expressions. Of course during parsing, Tree-sitter does not actually use JavaScript's regex engine to evaluate these regexes; it generates its own regex-matching logic as part of each parser. Regex literals are just used as a convenient way of writing regular expressions in your grammar. +* **Regex Limitations** - Currently, only a subset of the Regex engine is actually +supported. This is due to certain features like lookahead and lookaround assertions +not feasible to use in an LR(1) grammar, as well as certain flags being unnecessary +for tree-sitter. However, plenty of features are supported by default: + + * Character classes + * Character ranges + * Character sets + * Quantifiers + * Alternation + * Grouping + * Unicode character escapes + * Unicode property escapes + * **Sequences : `seq(rule1, rule2, ...)`** - This function creates a rule that matches any number of other rules, one after another. It is analogous to simply writing multiple symbols next to each other in [EBNF notation][ebnf]. * **Alternatives : `choice(rule1, rule2, ...)`** - This function creates a rule that matches *one* of a set of possible rules. The order of the arguments does not matter. This is analogous to the `|` (pipe) operator in EBNF notation. * **Repetitions : `repeat(rule)`** - This function creates a rule that matches *zero-or-more* occurrences of a given rule. It is analogous to the `{x}` (curly brace) syntax in EBNF notation. @@ -236,7 +345,15 @@ The following is a complete list of built-in functions you can use in your `gram * **Left Associativity : `prec.left([number], rule)`** - This function marks the given rule as left-associative (and optionally applies a numerical precedence). When an LR(1) conflict arises in which all of the rules have the same numerical precedence, Tree-sitter will consult the rules' associativity. If there is a left-associative rule, Tree-sitter will prefer matching a rule that ends *earlier*. This works similarly to [associativity directives][yacc-prec] in Yacc grammars. * **Right Associativity : `prec.right([number], rule)`** - This function is like `prec.left`, but it instructs Tree-sitter to prefer matching a rule that ends *later*. * **Dynamic Precedence : `prec.dynamic(number, rule)`** - This function is similar to `prec`, but the given numerical precedence is applied at *runtime* instead of at parser generation time. This is only necessary when handling a conflict dynamically using the `conflicts` field in the grammar, and when there is a genuine *ambiguity*: multiple rules correctly match a given piece of code. In that event, Tree-sitter compares the total dynamic precedence associated with each rule, and selects the one with the highest total. This is similar to [dynamic precedence directives][bison-dprec] in Bison grammars. -* **Tokens : `token(rule)`** - This function marks the given rule as producing only a single token. Tree-sitter's default is to treat each String or RegExp literal in the grammar as a separate token. Each token is matched separately by the lexer and returned as its own leaf node in the tree. The `token` function allows you to express a complex rule using the functions described above (rather than as a single regular expression) but still have Tree-sitter treat it as a single token. +* **Tokens : `token(rule)`** - This function marks the given rule as producing only +a single token. Tree-sitter's default is to treat each String or RegExp literal +in the grammar as a separate token. Each token is matched separately by the lexer +and returned as its own leaf node in the tree. The `token` function allows you to +express a complex rule using the functions described above (rather than as a single +regular expression) but still have Tree-sitter treat it as a single token. +The token function will only accept terminal rules, so `token($.foo)` will not work. +You can think of it as a shortcut for squashing complex rules of strings or regexes +down to a single token. * **Immediate Tokens : `token.immediate(rule)`** - Usually, whitespace (and any other extras, such as comments) is optional before each token. This function means that the token will only match if there is no whitespace. * **Aliases : `alias(rule, name)`** - This function causes the given rule to *appear* with an alternative name in the syntax tree. If `name` is a *symbol*, as in `alias($.foo, $.bar)`, then the aliased rule will *appear* as a [named node][named-vs-anonymous-nodes-section] called `bar`. And if `name` is a *string literal*, as in `alias($.foo, 'bar')`, then the aliased rule will appear as an [anonymous node][named-vs-anonymous-nodes-section], as if the rule had been written as the simple string. * **Field Names : `field(name, rule)`** - This function assigns a *field name* to the child node(s) matched by the given rule. In the resulting syntax tree, you can then use that field name to access specific children. @@ -251,7 +368,6 @@ In addition to the `name` and `rules` fields, grammars have a few other optional * **`word`** - the name of a token that will match keywords for the purpose of the [keyword extraction](#keyword-extraction) optimization. * **`supertypes`** an array of hidden rule names which should be considered to be 'supertypes' in the generated [*node types* file][static-node-types]. - ## Writing the Grammar Writing a grammar requires creativity. There are an infinite number of CFGs (context-free grammars) that can be used to describe any given language. In order to produce a good Tree-sitter parser, you need to create a grammar with two important properties: @@ -363,7 +479,7 @@ With this structure in place, you can now freely decide what part of the grammar After developing the *type* sublanguage a bit further, you might decide to switch to working on *statements* or *expressions* instead. It's often useful to check your progress by trying to parse some real code using `tree-sitter parse`. -**And remember to add tests for each rule in your `corpus` folder!** +**And remember to add tests for each rule in your `test/corpus` folder!** ### Structuring Rules Well @@ -375,7 +491,7 @@ return x + y; According to the specification, this line is a `ReturnStatement`, the fragment `x + y` is an `AdditiveExpression`, and `x` and `y` are both `IdentifierReferences`. The relationship between these constructs is captured by a complex series of production rules: -``` +```text ReturnStatement -> 'return' Expression Expression -> AssignmentExpression AssignmentExpression -> ConditionalExpression @@ -432,7 +548,7 @@ To produce a readable syntax tree, we'd like to model JavaScript expressions usi Of course, this flat structure is highly ambiguous. If we try to generate a parser, Tree-sitter gives us an error message: -``` +```text Error: Unresolved conflict for symbol sequence: '-' _expression • '*' … @@ -468,7 +584,7 @@ For an expression like `-a * b`, it's not clear whether the `-` operator applies Applying a higher precedence in `unary_expression` fixes that conflict, but there is still another conflict: -``` +```text Error: Unresolved conflict for symbol sequence: _expression '*' _expression • '*' … @@ -526,29 +642,21 @@ Tree-sitter's parsing process is divided into two phases: parsing (which is desc Grammars often contain multiple tokens that can match the same characters. For example, a grammar might contain the tokens (`"if"` and `/[a-z]+/`). Tree-sitter differentiates between these conflicting tokens in a few ways. -1. **External Scanning** - If your grammar has an external scanner and one or more tokens in your `externals` array are valid at the current location, your external scanner will always be called first to determine whether those tokens are present. +1. **Context-aware Lexing** - Tree-sitter performs lexing on-demand, during the parsing process. At any given position in a source document, the lexer only tries to recognize tokens that are *valid* at that position in the document. -1. **Context-Aware Lexing** - Tree-sitter performs lexing on-demand, during the parsing process. At any given position in a source document, the lexer only tries to recognize tokens that are *valid* at that position in the document. +2. **Lexical Precedence** - When the precedence functions described [above](#the-grammar-dsl) are used *within* the `token` function, the given explicit precedence values serve as instructions to the lexer. If there are two valid tokens that match the characters at a given position in the document, Tree-sitter will select the one with the higher precedence. -1. **Earliest Starting Position** - Tree-sitter will prefer tokens with an earlier starting position. This is most often seen with very permissive regular expressions similar to `/.*/`, which are greedy and will consume as much text as possible. In this example the regex would consume all text until hitting a newline - even if text on that line could be interpreted as a different token. +3. **Match Length** - If multiple valid tokens with the same precedence match the characters at a given position in a document, Tree-sitter will select the token that matches the [longest sequence of characters][longest-match]. -1. **Explicit Lexical Precedence** - When the precedence functions described [above](#the-grammar-dsl) are used within the `token` function, the given precedence values serve as instructions to the lexer. If there are two valid tokens that match the characters at a given position in the document, Tree-sitter will select the one with the higher precedence. +4. **Match Specificity** - If there are two valid tokens with the same precedence and which both match the same number of characters, Tree-sitter will prefer a token that is specified in the grammar as a `String` over a token specified as a `RegExp`. -1. **Match Length** - If multiple valid tokens with the same precedence match the characters at a given position in a document, Tree-sitter will select the token that matches the [longest sequence of characters][longest-match]. +5. **Rule Order** - If none of the above criteria can be used to select one token over another, Tree-sitter will prefer the token that appears earlier in the grammar. -1. **Match Specificity** - If there are two valid tokens with the same precedence and which both match the same number of characters, Tree-sitter will prefer a token that is specified in the grammar as a `String` over a token specified as a `RegExp`. - -1. **Rule Order** - If none of the above criteria can be used to select one token over another, Tree-sitter will prefer the token that appears earlier in the grammar. +If there is an external scanner it may have [an additional impact](#other-external-scanner-details) over regular tokens defined in the grammar. ### Lexical Precedence vs. Parse Precedence -One common mistake involves not distinguishing lexical precedence from parse precedence. -Parse precedence determines which rule is chosen to interpret a given sequence of tokens. -Lexical precedence determines which token is chosen to interpret a given section of text. -It is a lower-level operation that is done first. -The above list fully capture tree-sitter's lexical precedence rules, and you will probably refer back to this section of the documentation more often than any other. -Most of the time when you really get stuck, you're dealing with a lexical precedence problem. -Pay particular attention to the difference in meaning between using `prec` inside the `token` function versus outside of it. +One common mistake involves not distinguishing *lexical precedence* from *parse precedence*. Parse precedence determines which rule is chosen to interpret a given sequence of tokens. *Lexical precedence* determines which token is chosen to interpret at a given position of text and it is a lower-level operation that is done first. The above list fully captures Tree-sitter's lexical precedence rules, and you will probably refer back to this section of the documentation more often than any other. Most of the time when you really get stuck, you're dealing with a lexical precedence problem. Pay particular attention to the difference in meaning between using `prec` inside of the `token` function versus outside of it. The *lexical precedence* syntax is `token(prec(N, ...))`. ### Keywords @@ -587,7 +695,7 @@ grammar({ ), binary_expression: $ => choice( - prec.left(1, seq($._expression, 'instanceof', $._expression) + prec.left(1, seq($._expression, 'instanceof', $._expression)) // ... ), @@ -608,6 +716,7 @@ Aside from improving error detection, keyword extraction also has performance be ### External Scanners Many languages have some tokens whose structure is impossible or inconvenient to describe with a regular expression. Some examples: + * [Indent and dedent][indent-tokens] tokens in Python * [Heredocs][heredoc] in Bash and Ruby * [Percent strings][percent-string] in Ruby @@ -632,10 +741,19 @@ grammar({ Then, add another C or C++ source file to your project. Currently, its path must be `src/scanner.c` or `src/scanner.cc` for the CLI to recognize it. Be sure to add this file to the `sources` section of your `binding.gyp` file so that it will be included when your project is compiled by Node.js and uncomment the appropriate block in your `bindings/rust/build.rs` file so that it will be included in your Rust crate. +> **Note** +> +> C++ scanners are now deprecated and will be removed in the near future. +> While it is currently possible to write an external scanner in C++, it can be difficult +> to get working cross-platform and introduces extra requirements; therefore it +> is *greatly* preferred to use C. + In this new source file, define an [`enum`][enum] type containing the names of all of your external tokens. The ordering of this enum must match the order in your grammar's `externals` array; the actual names do not matter. ```c -#include +#include "tree_sitter/parser.h" +#include "tree_sitter/alloc.h" +#include "tree_sitter/array.h" enum TokenType { INDENT, @@ -656,7 +774,6 @@ void * tree_sitter_my_language_external_scanner_create() { This function should create your scanner object. It will only be called once anytime your language is set on a parser. Often, you will want to allocate memory on the heap and return a pointer to it. If your external scanner doesn't need to maintain any state, it's ok to return `NULL`. - #### Destroy ```c @@ -716,13 +833,13 @@ This function is responsible for recognizing external tokens. It should return ` * **`void (*advance)(TSLexer *, bool skip)`** - A function for advancing to the next character. If you pass `true` for the second argument, the current character will be treated as whitespace; whitespace won't be included in the text range associated with tokens emitted by the external scanner. * **`void (*mark_end)(TSLexer *)`** - A function for marking the end of the recognized token. This allows matching tokens that require multiple characters of lookahead. By default (if you don't call `mark_end`), any character that you moved past using the `advance` function will be included in the size of the token. But once you call `mark_end`, then any later calls to `advance` will *not* increase the size of the returned token. You can call `mark_end` multiple times to increase the size of the token. * **`uint32_t (*get_column)(TSLexer *)`** - A function for querying the current column position of the lexer. It returns the number of codepoints since the start of the current line. The codepoint position is recalculated on every call to this function by reading from the start of the line. -* **`bool (*is_at_included_range_start)(const TSLexer *)`** - A function for checking whether the parser has just skipped some characters in the document. When parsing an embedded document using the `ts_parser_set_included_ranges` function (described in the [multi-language document section][multi-language-section]), your scanner may want to apply some special behavior when moving to a disjoint part of the document. For example, in [EJS documents][ejs], the JavaScript parser uses this function to enable inserting automatic semicolon tokens in between the code directives, delimited by `<%` and `%>`. +* **`bool (*is_at_included_range_start)(const TSLexer *)`** - A function for checking whether the parser has just skipped some characters in the document. When parsing an embedded document using the `ts_parser_set_included_ranges` function (described in the [multi-language document section][multi-language-section]), the scanner may want to apply some special behavior when moving to a disjoint part of the document. For example, in [EJS documents][ejs], the JavaScript parser uses this function to enable inserting automatic semicolon tokens in between the code directives, delimited by `<%` and `%>`. * **`bool (*eof)(const TSLexer *)`** - A function for determining whether the lexer is at the end of the file. The value of `lookahead` will be `0` at the end of a file, but this function should be used instead of checking for that value because the `0` or "NUL" value is also a valid character that could be present in the file being parsed. -The third argument to the `scan` function is an array of booleans that indicates which of your external tokens are currently expected by the parser. You should only look for a given token if it is valid according to this array. At the same time, you cannot backtrack, so you may need to combine certain pieces of logic. +The third argument to the `scan` function is an array of booleans that indicates which of external tokens are currently expected by the parser. You should only look for a given token if it is valid according to this array. At the same time, you cannot backtrack, so you may need to combine certain pieces of logic. ```c -if (valid_symbols[INDENT] || valid_symbol[DEDENT]) { +if (valid_symbols[INDENT] || valid_symbols[DEDENT]) { // ... logic that is common to both `INDENT` and `DEDENT` @@ -736,23 +853,124 @@ if (valid_symbols[INDENT] || valid_symbol[DEDENT]) { } ``` +#### External Scanner Helpers + +##### Allocator + +Instead of using libc's `malloc`, `calloc`, `realloc`, and `free`, you should use the versions prefixed with `ts_` from `tree_sitter/alloc.h`. +These macros can allow a potential consumer to override the default allocator with their own implementation, but by default will use the libc functions. + +As a consumer of the tree-sitter core library as well as any parser libraries that might use allocations, you can enable overriding the default allocator and have it use the same one as the library allocator, of which you can set with `ts_set_allocator`. +To enable this overriding in scanners, you must compile them with the `TREE_SITTER_REUSE_ALLOCATOR` macro defined, and tree-sitter the library must be linked into your final app dynamically, since it needs to resolve the internal functions at runtime. If you are compiling +an executable binary that uses the core library, but want to load parsers dynamically at runtime, then you will have to use a special linker flag on Unix. For non-Darwin systems, that would be `--dynamic-list` and for Darwin systems, that would be `-exported_symbols_list`. +The CLI does exactly this, so you can use it as a reference (check out `cli/build.rs`). + +For example, assuming you wanted to allocate 100 bytes for your scanner, you'd do so like the following example: + +```c +#include "tree_sitter/parser.h" +#include "tree_sitter/alloc.h" + +// ... + +void* tree_sitter_my_language_external_scanner_create() { + return ts_calloc(100, 1); // or ts_malloc(100) +} + +// ... + +``` + +##### Arrays + +If you need to use array-like types in your scanner, such as tracking a stack of indentations or tags, you should use the array macros from `tree_sitter/array.h`. + +There are quite a few of them provided for you, but here's how you could get started tracking some . Check out the header itself for more detailed documentation. + +**NOTE**: Do not use any of the array functions or macros that are prefixed with an underscore and have comments saying that it is not what you are looking for. +These are internal functions used as helpers by other macros that are public. They are not meant to be used directly, nor are they what you want. + +```c +#include "tree_sitter/parser.h" +#include "tree_sitter/array.h" + +enum TokenType { + INDENT, + DEDENT, + NEWLINE, + STRING, +} + +// Create the array in your create function + +void* tree_sitter_my_language_external_scanner_create() { + return ts_calloc(1, sizeof(Array(int))); + + // or if you want to zero out the memory yourself + + Array(int) *stack = ts_malloc(sizeof(Array(int))); + array_init(&stack); + return stack; +} + +bool tree_sitter_my_language_external_scanner_scan( + void *payload, + TSLexer *lexer, + const bool *valid_symbols +) { + Array(int) *stack = payload; + if (valid_symbols[INDENT]) { + array_push(stack, lexer->get_column(lexer)); + lexer->result_symbol = INDENT; + return true; + } + if (valid_symbols[DEDENT]) { + array_pop(stack); // this returns the popped element by value, but we don't need it + lexer->result_symbol = DEDENT; + return true; + } + + // we can also use an array on the stack to keep track of a string + + Array(char) next_string = array_new(); + + if (valid_symbols[STRING] && lexer->lookahead == '"') { + lexer->advance(lexer, false); + while (lexer->lookahead != '"' && lexer->lookahead != '\n' && !lexer->eof(lexer)) { + array_push(&next_string, lexer->lookahead); + lexer->advance(lexer, false); + } + + // assume we have some arbitrary constraint of not having more than 100 characters in a string + if (lexer->lookahead == '"' && next_string.size <= 100) { + lexer->advance(lexer, false); + lexer->result_symbol = STRING; + return true; + } + } + + return false; +} + +``` + #### Other External Scanner Details -If a token in your `externals` array is valid at the current position in the parse, your external scanner will be called first before anything else is done. -This means your external scanner functions as a powerful override of tree-sitter's lexing behavior, and can be used to solve problems that can't be cracked with ordinary lexical, parse, or dynamic precedence. +If a token in the `externals` array is valid at a given position in the parse, the external scanner will be called first before anything else is done. This means the external scanner functions as a powerful override of Tree-sitter's lexing behavior, and can be used to solve problems that can't be cracked with ordinary lexical, parse, or dynamic precedence. + +If a syntax error is encountered during regular parsing, Tree-sitter's first action during error recovery will be to call the external scanner's `scan` function with all tokens marked valid. The scanner should detect this case and handle it appropriately. One simple method of detection is to add an unused token to the end of the `externals` array, for example `externals: $ => [$.token1, $.token2, $.error_sentinel]`, then check whether that token is marked valid to determine whether Tree-sitter is in error correction mode. + +If you put terminal keywords in the `externals` array, for example `externals: $ => ['if', 'then', 'else']`, then any time those terminals are present in the grammar they will be tokenized by the external scanner. It is similar to writing `externals: [$.if_keyword, $.then_keyword, $.else_keyword]` then using `alias($.if_keyword, 'if')` in the grammar. -If a syntax error is encountered during regular parsing, tree-sitter's first action during error recovery will be to call your external scanner's `scan` function with all tokens marked valid. -Your scanner should detect this case and handle it appropriately. -One simple method of detection is to add an unused token to the end of your `externals` array, for example `externals: $ => [$.token1, $.token2, $.error_sentinel]`, then check whether that token is marked valid to determine whether tree-sitter is in error correction mode. +If in the `externals` array use literal keywords then lexing works in two steps, the external scanner will be called first and if it sets a resulting token and returns `true` then the token considered as recognized and Tree-sitter moves to a next token. But the external scanner may return `false` and in this case Tree-sitter fallbacks to the internal lexing mechanism. -If you put terminal keywords in your `externals` array, for example `externals: $ => ['if', 'then', 'else']`, then any time those terminals are present in your grammar they will be tokenized by your external scanner. -It is equivalent to writing `externals: [$.if_keyword, $.then_keyword, $.else_keyword]` then using `alias($.if_keyword, 'if')` in your grammar. +In case of some keywords defined in the `externals` array in a rule referencing form like `$.if_keyword` and there is no additional definition of that rule in the grammar rules, e.g., `if_keyword: $ => 'if'` then fallback to the internal lexer isn't possible because Tree-sitter doesn't know the actual keyword and it's fully the external scanner resposibilty to recognize such tokens. External scanners are a common cause of infinite loops. Be very careful when emitting zero-width tokens from your external scanner, and if you consume characters in a loop be sure use the `eof` function to check whether you are at the end of the file. [ambiguous-grammar]: https://en.wikipedia.org/wiki/Ambiguous_grammar -[antlr]: http://www.antlr.org/ +[antlr]: https://www.antlr.org [bison-dprec]: https://www.gnu.org/software/bison/manual/html_node/Generalized-LR-Parsing.html [bison]: https://en.wikipedia.org/wiki/GNU_bison [c-linkage]: https://en.cppreference.com/w/cpp/language/language_linkage diff --git a/third-party/tree-sitter/tree-sitter/docs/section-4-syntax-highlighting.md b/third-party/tree-sitter/tree-sitter/docs/section-4-syntax-highlighting.md index a6e5d74c8fd..818172fd2ed 100644 --- a/third-party/tree-sitter/tree-sitter/docs/section-4-syntax-highlighting.md +++ b/third-party/tree-sitter/tree-sitter/docs/section-4-syntax-highlighting.md @@ -9,8 +9,6 @@ Syntax highlighting is a very common feature in applications that deal with code This document explains how the Tree-sitter syntax highlighting system works, using the command line interface. If you are using `tree-sitter-highlight` library (either from C or from Rust), all of these concepts are still applicable, but the configuration data is provided using in-memory objects, rather than files. -**Note - If you are working on syntax highlighting in the [Atom](https://atom.io/) text editor, you should consult [the grammar-creation page](https://flight-manual.atom.io/hacking-atom/sections/creating-a-grammar/) of the Atom Flight Manual, *not* this document. Atom currently uses a different syntax highlighting system that is also based on Tree-sitter, but is older than the one described here.** - ## Overview All of the files needed to highlight a given language are normally included in the same git repository as the Tree-sitter grammar for that language (for example, [`tree-sitter-javascript`](https://github.com/tree-sitter/tree-sitter-javascript), [`tree-sitter-ruby`](https://github.com/tree-sitter/tree-sitter-ruby)). In order to run syntax highlighting from the command-line, three types of files are needed: @@ -27,9 +25,9 @@ The Tree-sitter CLI automatically creates two directories in your home folder. These directories are created in the "normal" place for your platform: -- On Linux, `~/.config/tree-sitter` and `~/.cache/tree-sitter` -- On Mac, `~/Library/Application Support/tree-sitter` and `~/Library/Caches/tree-sitter` -- On Windows, `C:\Users\[username]\AppData\Roaming\tree-sitter` and `C:\Users\[username]\AppData\Local\tree-sitter` +* On Linux, `~/.config/tree-sitter` and `~/.cache/tree-sitter` +* On Mac, `~/Library/Application Support/tree-sitter` and `~/Library/Caches/tree-sitter` +* On Windows, `C:\Users\[username]\AppData\Roaming\tree-sitter` and `C:\Users\[username]\AppData\Local\tree-sitter` The CLI will work if there's no config file present, falling back on default values for each configuration option. To create a config file that you can edit, run this command: @@ -63,6 +61,7 @@ In your config file, the `"theme"` value is an object whose keys are dot-separat #### Highlight Names A theme can contain multiple keys that share a common subsequence. Examples: + * `variable` and `variable.parameter` * `function`, `function.builtin`, and `function.method` @@ -93,6 +92,11 @@ These keys specify basic information about the parser: * `path` (optional) - A relative path from the directory containing `package.json` to another directory containing the `src/` folder, which contains the actual generated parser. The default value is `"."` (so that `src/` is in the same folder as `package.json`), and this very rarely needs to be overridden. +* `external-files` (optional) - A list of relative paths from the root dir of a +parser to files that should be checked for modifications during recompilation. +This is useful during development to have changes to other files besides scanner.c +be picked up by the cli. + ### Language Detection These keys help to decide whether the language applies to a given file: @@ -160,7 +164,7 @@ func increment(a int) int { With this syntax tree: -``` +```scheme (source_file (function_declaration name: (identifier) @@ -180,6 +184,7 @@ With this syntax tree: #### Example Query Suppose we wanted to render this code with the following colors: + * keywords `func` and `return` in purple * function `increment` in blue * type `int` in green @@ -187,7 +192,7 @@ Suppose we wanted to render this code with the following colors: We can assign each of these categories a *highlight name* using a query like this: -``` +```scheme ; highlights.scm "func" @keyword @@ -254,7 +259,7 @@ list = [item] With this syntax tree: -``` +```scheme (program (method name: (identifier) @@ -297,7 +302,7 @@ There are several different types of names within this method: Let's write some queries that let us clearly distinguish between these types of names. First, set up the highlighting query, as described in the previous section. We'll assign distinct colors to method calls, method definitions, and formal parameters: -``` +```scheme ; highlights.scm (call method: (identifier) @function.method) @@ -314,7 +319,7 @@ Let's write some queries that let us clearly distinguish between these types of Then, we'll set up a local variable query to keep track of the variables and scopes. Here, we're indicating that methods and blocks create local *scopes*, parameters and assignments create *definitions*, and other identifiers should be considered *references*: -``` +```scheme ; locals.scm (method) @local.scope @@ -347,6 +352,7 @@ Running `tree-sitter highlight` on this ruby file would produce output like this ### Language Injection Some source files contain code written in multiple different languages. Examples include: + * HTML files, which can contain JavaScript inside of ` diff --git a/third-party/tree-sitter/tree-sitter/docs/section-8-code-navigation-systems.md b/third-party/tree-sitter/tree-sitter/docs/section-8-code-navigation-systems.md index a1b6a280b3a..04346e465d1 100644 --- a/third-party/tree-sitter/tree-sitter/docs/section-8-code-navigation-systems.md +++ b/third-party/tree-sitter/tree-sitter/docs/section-8-code-navigation-systems.md @@ -9,7 +9,7 @@ Tree-sitter can be used in conjunction with its [tree query language](https://tr ## Tagging and captures -*Tagging* is the act of identifying the entities that can be named in a program. We use Tree-sitter queries to find those entities. Having found them, you use a syntax capture to label the entity and its name. +_Tagging_ is the act of identifying the entities that can be named in a program. We use Tree-sitter queries to find those entities. Having found them, you use a syntax capture to label the entity and its name. The essence of a given tag lies in two pieces of data: the _role_ of the entity that is matched (i.e. whether it is a definition or a reference) and the _kind_ of that entity, which describes how the entity is used (i.e. whether it's a class definition, function call, variable reference, and so on). Our convention is to use a syntax capture following the `@role.kind` capture name format, and another inner capture, always called `@name`, that pulls out the name of a given identifier. @@ -19,14 +19,14 @@ You may optionally include a capture named `@doc` to bind a docstring. For conve This [query](https://github.com/tree-sitter/tree-sitter-python/blob/78c4e9b6b2f08e1be23b541ffced47b15e2972ad/queries/tags.scm#L4-L5) recognizes Python function definitions and captures their declared name. The `function_definition` syntax node is defined in the [Python Tree-sitter grammar](https://github.com/tree-sitter/tree-sitter-python/blob/78c4e9b6b2f08e1be23b541ffced47b15e2972ad/grammar.js#L354). -``` scheme +```scheme (function_definition name: (identifier) @name) @definition.function ``` A more sophisticated query can be found in the [JavaScript Tree-sitter repository](https://github.com/tree-sitter/tree-sitter-javascript/blob/fdeb68ac8d2bd5a78b943528bb68ceda3aade2eb/queries/tags.scm#L63-L70): -``` scheme +```scheme (assignment_expression left: [ (identifier) @name @@ -39,7 +39,7 @@ A more sophisticated query can be found in the [JavaScript Tree-sitter repositor An even more sophisticated query is in the [Ruby Tree-sitter repository](https://github.com/tree-sitter/tree-sitter-ruby/blob/1ebfdb288842dae5a9233e2509a135949023dd82/queries/tags.scm#L24-L43), which uses built-in functions to strip the Ruby comment character (`#`) from the docstrings associated with a class or singleton-class declaration, then selects only the docstrings adjacent to the node matched as `@definition.class`. -``` scheme +```scheme ( (comment)* @doc . @@ -79,7 +79,7 @@ The below table describes a standard vocabulary for kinds and roles during the t You can use the `tree-sitter tags` command to test out a tags query file, passing as arguments one or more files to tag. We can run this tool from within the Tree-sitter Ruby repository, over code in a file called `test.rb`: -``` ruby +```ruby module Foo class Bar # won't be included @@ -93,7 +93,7 @@ end Invoking `tree-sitter tags test.rb` produces the following console output, representing matched entities' name, role, location, first line, and docstring: -``` +```text test.rb Foo | module def (0, 7) - (0, 10) `module Foo` Bar | class def (1, 8) - (1, 11) `class Bar` diff --git a/third-party/tree-sitter/tree-sitter/highlight/Cargo.toml b/third-party/tree-sitter/tree-sitter/highlight/Cargo.toml index e85ced8e7c2..694f5064e1f 100644 --- a/third-party/tree-sitter/tree-sitter/highlight/Cargo.toml +++ b/third-party/tree-sitter/tree-sitter/highlight/Cargo.toml @@ -1,26 +1,26 @@ [package] name = "tree-sitter-highlight" +version.workspace = true description = "Library for performing syntax highlighting with Tree-sitter" -version = "0.20.1" authors = [ "Max Brunsfeld ", "Tim Clem ", ] -license = "MIT" +edition.workspace = true +rust-version.workspace = true readme = "README.md" -edition = "2018" +homepage.workspace = true +repository.workspace = true +license.workspace = true keywords = ["incremental", "parsing", "syntax", "highlighting"] categories = ["parsing", "text-editors"] -repository = "https://github.com/tree-sitter/tree-sitter" -rust-version.workspace = true [lib] crate-type = ["lib", "staticlib"] [dependencies] -regex = "1" -thiserror = "1.0" +lazy_static.workspace = true +regex.workspace = true +thiserror.workspace = true -[dependencies.tree-sitter] -version = "0.20" -path = "../lib" +tree-sitter.workspace = true diff --git a/third-party/tree-sitter/tree-sitter/highlight/README.md b/third-party/tree-sitter/tree-sitter/highlight/README.md index e8a5d063a18..982e510a60d 100644 --- a/third-party/tree-sitter/tree-sitter/highlight/README.md +++ b/third-party/tree-sitter/tree-sitter/highlight/README.md @@ -1,22 +1,25 @@ -# `tree-sitter-highlight` +# Tree-sitter Highlight -[![Crates.io](https://img.shields.io/crates/v/tree-sitter-highlight.svg)](https://crates.io/crates/tree-sitter-highlight) +[![crates.io badge]][crates.io] -### Usage +[crates.io]: https://crates.io/crates/tree-sitter-highlight +[crates.io badge]: https://img.shields.io/crates/v/tree-sitter-highlight.svg?color=%23B48723 -Add this crate, and the language-specific crates for whichever languages you want to parse, to your `Cargo.toml`: +## Usage + +Add this crate, and the language-specific crates for whichever languages you want +to parse, to your `Cargo.toml`: ```toml [dependencies] -tree-sitter-highlight = "0.19" -tree-sitter-html = "0.19" -tree-sitter-javascript = "0.19" +tree-sitter-highlight = "^0.21.0" +tree-sitter-javascript = "0.20.3" ``` Define the list of highlight names that you will recognize: ```rust -let highlight_names = &[ +let highlight_names = [ "attribute", "constant", "function.builtin", @@ -38,34 +41,29 @@ let highlight_names = &[ ]; ``` -Create a highlighter. You need one of these for each thread that you're using for syntax highlighting: +Create a highlighter. You need one of these for each thread that you're using for +syntax highlighting: ```rust use tree_sitter_highlight::Highlighter; -let highlighter = Highlighter::new(); +let mut highlighter = Highlighter::new(); ``` -Load some highlighting queries from the `queries` directory of some language repositories: +Load some highlighting queries from the `queries` directory of the language repository: ```rust use tree_sitter_highlight::HighlightConfiguration; -let html_language = unsafe { tree_sitter_html() }; -let javascript_language = unsafe { tree_sitter_javascript() }; - -let html_config = HighlightConfiguration::new( - tree_sitter_html::language(), - tree_sitter_html::HIGHLIGHTS_QUERY, - tree_sitter_html::INJECTIONS_QUERY, - "", -).unwrap(); +let javascript_language = tree_sitter_javascript::language(); -let javascript_config = HighlightConfiguration::new( - tree_sitter_javascript::language(), - tree_sitter_javascript::HIGHLIGHTS_QUERY, - tree_sitter_javascript::INJECTIONS_QUERY, - tree_sitter_javascript::LCOALS_QUERY, +let mut javascript_config = HighlightConfiguration::new( + javascript_language, + "javascript", + tree_sitter_javascript::HIGHLIGHT_QUERY, + tree_sitter_javascript::INJECTION_QUERY, + tree_sitter_javascript::LOCALS_QUERY, + false, ).unwrap(); ``` @@ -102,4 +100,6 @@ for event in highlights { } ``` -The last parameter to `highlight` is a *language injection* callback. This allows other languages to be retrieved when Tree-sitter detects an embedded document (for example, a piece of JavaScript code inside of a `script` tag within HTML). +The last parameter to `highlight` is a _language injection_ callback. This allows +other languages to be retrieved when Tree-sitter detects an embedded document +(for example, a piece of JavaScript code inside a `script` tag within HTML). diff --git a/third-party/tree-sitter/tree-sitter/highlight/include/tree_sitter/highlight.h b/third-party/tree-sitter/tree-sitter/highlight/include/tree_sitter/highlight.h index 496faea4f8a..5db458c14e1 100644 --- a/third-party/tree-sitter/tree-sitter/highlight/include/tree_sitter/highlight.h +++ b/third-party/tree-sitter/tree-sitter/highlight/include/tree_sitter/highlight.h @@ -48,7 +48,7 @@ TSHighlightError ts_highlighter_add_language( const char *locals_query, uint32_t highlight_query_len, uint32_t injection_query_len, - uint32_t locals_query_len + uint32_t locals_query_len, ); // Compute syntax highlighting for a given document. You must first diff --git a/third-party/tree-sitter/tree-sitter/highlight/src/c_lib.rs b/third-party/tree-sitter/tree-sitter/highlight/src/c_lib.rs index d48a180c119..6b4d1cf8cb9 100644 --- a/third-party/tree-sitter/tree-sitter/highlight/src/c_lib.rs +++ b/third-party/tree-sitter/tree-sitter/highlight/src/c_lib.rs @@ -29,25 +29,30 @@ pub enum ErrorCode { InvalidUtf8, InvalidRegex, InvalidQuery, + InvalidLanguageName, } +/// Create a new [`TSHighlighter`] instance. +/// +/// # Safety +/// +/// The caller must ensure that the `highlight_names` and `attribute_strings` arrays are valid for +/// the lifetime of the returned [`TSHighlighter`] instance, and are non-null. #[no_mangle] -pub extern "C" fn ts_highlighter_new( +pub unsafe extern "C" fn ts_highlighter_new( highlight_names: *const *const c_char, attribute_strings: *const *const c_char, highlight_count: u32, ) -> *mut TSHighlighter { - let highlight_names = - unsafe { slice::from_raw_parts(highlight_names, highlight_count as usize) }; - let attribute_strings = - unsafe { slice::from_raw_parts(attribute_strings, highlight_count as usize) }; + let highlight_names = slice::from_raw_parts(highlight_names, highlight_count as usize); + let attribute_strings = slice::from_raw_parts(attribute_strings, highlight_count as usize); let highlight_names = highlight_names - .into_iter() - .map(|s| unsafe { CStr::from_ptr(*s).to_string_lossy().to_string() }) + .iter() + .map(|s| CStr::from_ptr(*s).to_string_lossy().to_string()) .collect::>(); let attribute_strings = attribute_strings - .into_iter() - .map(|s| unsafe { CStr::from_ptr(*s).to_bytes() }) + .iter() + .map(|s| CStr::from_ptr(*s).to_bytes()) .collect(); let carriage_return_index = highlight_names.iter().position(|s| s == "carriage-return"); Box::into_raw(Box::new(TSHighlighter { @@ -58,9 +63,21 @@ pub extern "C" fn ts_highlighter_new( })) } +/// Add a language to a [`TSHighlighter`] instance. +/// +/// Returns an [`ErrorCode`] indicating whether the language was added successfully or not. +/// +/// # Safety +/// +/// `this` must be non-null and must be a valid pointer to a [`TSHighlighter`] instance +/// created by [`ts_highlighter_new`]. +/// +/// The caller must ensure that any `*const c_char` (C-style string) parameters are valid for the lifetime of +/// the [`TSHighlighter`] instance, and are non-null. #[no_mangle] -pub extern "C" fn ts_highlighter_add_language( +pub unsafe extern "C" fn ts_highlighter_add_language( this: *mut TSHighlighter, + language_name: *const c_char, scope_name: *const c_char, injection_regex: *const c_char, language: Language, @@ -73,7 +90,7 @@ pub extern "C" fn ts_highlighter_add_language( ) -> ErrorCode { let f = move || { let this = unwrap_mut_ptr(this); - let scope_name = unsafe { CStr::from_ptr(scope_name) }; + let scope_name = CStr::from_ptr(scope_name); let scope_name = scope_name .to_str() .or(Err(ErrorCode::InvalidUtf8))? @@ -81,38 +98,44 @@ pub extern "C" fn ts_highlighter_add_language( let injection_regex = if injection_regex.is_null() { None } else { - let pattern = unsafe { CStr::from_ptr(injection_regex) }; + let pattern = CStr::from_ptr(injection_regex); let pattern = pattern.to_str().or(Err(ErrorCode::InvalidUtf8))?; Some(Regex::new(pattern).or(Err(ErrorCode::InvalidRegex))?) }; - let highlight_query = unsafe { - slice::from_raw_parts(highlight_query as *const u8, highlight_query_len as usize) - }; + let highlight_query = + slice::from_raw_parts(highlight_query.cast::(), highlight_query_len as usize); + let highlight_query = str::from_utf8(highlight_query).or(Err(ErrorCode::InvalidUtf8))?; let injection_query = if injection_query_len > 0 { - let query = unsafe { - slice::from_raw_parts(injection_query as *const u8, injection_query_len as usize) - }; + let query = + slice::from_raw_parts(injection_query.cast::(), injection_query_len as usize); str::from_utf8(query).or(Err(ErrorCode::InvalidUtf8))? } else { "" }; let locals_query = if locals_query_len > 0 { - let query = unsafe { - slice::from_raw_parts(locals_query as *const u8, locals_query_len as usize) - }; + let query = slice::from_raw_parts(locals_query.cast::(), locals_query_len as usize); str::from_utf8(query).or(Err(ErrorCode::InvalidUtf8))? } else { "" }; - let mut config = - HighlightConfiguration::new(language, highlight_query, injection_query, locals_query) - .or(Err(ErrorCode::InvalidQuery))?; - config.configure(&this.highlight_names.as_slice()); + let lang = CStr::from_ptr(language_name) + .to_str() + .or(Err(ErrorCode::InvalidLanguageName))?; + + let mut config = HighlightConfiguration::new( + language, + lang, + highlight_query, + injection_query, + locals_query, + ) + .or(Err(ErrorCode::InvalidQuery))?; + config.configure(this.highlight_names.as_slice()); this.languages.insert(scope_name, (injection_regex, config)); Ok(()) @@ -132,42 +155,102 @@ pub extern "C" fn ts_highlight_buffer_new() -> *mut TSHighlightBuffer { })) } +/// Deletes a [`TSHighlighter`] instance. +/// +/// # Safety +/// +/// `this` must be non-null and must be a valid pointer to a [`TSHighlighter`] instance +/// created by [`ts_highlighter_new`]. +/// +/// It cannot be used after this function is called. #[no_mangle] -pub extern "C" fn ts_highlighter_delete(this: *mut TSHighlighter) { - drop(unsafe { Box::from_raw(this) }) +pub unsafe extern "C" fn ts_highlighter_delete(this: *mut TSHighlighter) { + drop(Box::from_raw(this)); } +/// Deletes a [`TSHighlightBuffer`] instance. +/// +/// # Safety +/// +/// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance +/// created by [`ts_highlight_buffer_new`] +/// +/// It cannot be used after this function is called. #[no_mangle] -pub extern "C" fn ts_highlight_buffer_delete(this: *mut TSHighlightBuffer) { - drop(unsafe { Box::from_raw(this) }) +pub unsafe extern "C" fn ts_highlight_buffer_delete(this: *mut TSHighlightBuffer) { + drop(Box::from_raw(this)); } +/// Get the HTML content of a [`TSHighlightBuffer`] instance as a raw pointer. +/// +/// # Safety +/// +/// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance +/// created by [`ts_highlight_buffer_new`]. +/// +/// The returned pointer, a C-style string, must not outlive the [`TSHighlightBuffer`] instance, else the +/// data will point to garbage. +/// +/// To get the length of the HTML content, use [`ts_highlight_buffer_len`]. #[no_mangle] -pub extern "C" fn ts_highlight_buffer_content(this: *const TSHighlightBuffer) -> *const u8 { +pub unsafe extern "C" fn ts_highlight_buffer_content(this: *const TSHighlightBuffer) -> *const u8 { let this = unwrap_ptr(this); this.renderer.html.as_slice().as_ptr() } +/// Get the line offsets of a [`TSHighlightBuffer`] instance as a C-style array. +/// +/// # Safety +/// +/// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance +/// created by [`ts_highlight_buffer_new`]. +/// +/// The returned pointer, a C-style array of [`u32`]s, must not outlive the [`TSHighlightBuffer`] instance, else the +/// data will point to garbage. +/// +/// To get the length of the array, use [`ts_highlight_buffer_line_count`]. #[no_mangle] -pub extern "C" fn ts_highlight_buffer_line_offsets(this: *const TSHighlightBuffer) -> *const u32 { +pub unsafe extern "C" fn ts_highlight_buffer_line_offsets( + this: *const TSHighlightBuffer, +) -> *const u32 { let this = unwrap_ptr(this); this.renderer.line_offsets.as_slice().as_ptr() } +/// Get the length of the HTML content of a [`TSHighlightBuffer`] instance. +/// +/// # Safety +/// +/// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance +/// created by [`ts_highlight_buffer_new`]. #[no_mangle] -pub extern "C" fn ts_highlight_buffer_len(this: *const TSHighlightBuffer) -> u32 { +pub unsafe extern "C" fn ts_highlight_buffer_len(this: *const TSHighlightBuffer) -> u32 { let this = unwrap_ptr(this); this.renderer.html.len() as u32 } +/// Get the number of lines in a [`TSHighlightBuffer`] instance. +/// +/// # Safety +/// +/// `this` must be non-null and must be a valid pointer to a [`TSHighlightBuffer`] instance +/// created by [`ts_highlight_buffer_new`]. #[no_mangle] -pub extern "C" fn ts_highlight_buffer_line_count(this: *const TSHighlightBuffer) -> u32 { +pub unsafe extern "C" fn ts_highlight_buffer_line_count(this: *const TSHighlightBuffer) -> u32 { let this = unwrap_ptr(this); this.renderer.line_offsets.len() as u32 } +/// Highlight a string of source code. +/// +/// # Safety +/// +/// The caller must ensure that `scope_name`, `source_code`, `output`, and `cancellation_flag` are valid for +/// the lifetime of the [`TSHighlighter`] instance, and are non-null. +/// +/// `this` must be a non-null pointer to a [`TSHighlighter`] instance created by [`ts_highlighter_new`] #[no_mangle] -pub extern "C" fn ts_highlighter_highlight( +pub unsafe extern "C" fn ts_highlighter_highlight( this: *const TSHighlighter, scope_name: *const c_char, source_code: *const c_char, @@ -177,10 +260,9 @@ pub extern "C" fn ts_highlighter_highlight( ) -> ErrorCode { let this = unwrap_ptr(this); let output = unwrap_mut_ptr(output); - let scope_name = unwrap(unsafe { CStr::from_ptr(scope_name).to_str() }); - let source_code = - unsafe { slice::from_raw_parts(source_code as *const u8, source_code_len as usize) }; - let cancellation_flag = unsafe { cancellation_flag.as_ref() }; + let scope_name = unwrap(CStr::from_ptr(scope_name).to_str()); + let source_code = slice::from_raw_parts(source_code.cast::(), source_code_len as usize); + let cancellation_flag = cancellation_flag.as_ref(); this.highlight(source_code, scope_name, output, cancellation_flag) } @@ -225,15 +307,8 @@ impl TSHighlighter { .renderer .render(highlights, source_code, &|s| self.attribute_strings[s.0]); match result { - Err(Error::Cancelled) => { - return ErrorCode::Timeout; - } - Err(Error::InvalidLanguage) => { - return ErrorCode::InvalidLanguage; - } - Err(Error::Unknown) => { - return ErrorCode::Timeout; - } + Err(Error::Cancelled | Error::Unknown) => ErrorCode::Timeout, + Err(Error::InvalidLanguage) => ErrorCode::InvalidLanguage, Ok(()) => ErrorCode::Ok, } } else { @@ -242,15 +317,15 @@ impl TSHighlighter { } } -fn unwrap_ptr<'a, T>(result: *const T) -> &'a T { - unsafe { result.as_ref() }.unwrap_or_else(|| { +unsafe fn unwrap_ptr<'a, T>(result: *const T) -> &'a T { + result.as_ref().unwrap_or_else(|| { eprintln!("{}:{} - pointer must not be null", file!(), line!()); abort(); }) } -fn unwrap_mut_ptr<'a, T>(result: *mut T) -> &'a mut T { - unsafe { result.as_mut() }.unwrap_or_else(|| { +unsafe fn unwrap_mut_ptr<'a, T>(result: *mut T) -> &'a mut T { + result.as_mut().unwrap_or_else(|| { eprintln!("{}:{} - pointer must not be null", file!(), line!()); abort(); }) @@ -258,7 +333,7 @@ fn unwrap_mut_ptr<'a, T>(result: *mut T) -> &'a mut T { fn unwrap(result: Result) -> T { result.unwrap_or_else(|error| { - eprintln!("tree-sitter highlight error: {}", error); + eprintln!("tree-sitter highlight error: {error}"); abort(); }) } diff --git a/third-party/tree-sitter/tree-sitter/highlight/src/lib.rs b/third-party/tree-sitter/tree-sitter/highlight/src/lib.rs index 8a79c624f2b..22c0bc8603f 100644 --- a/third-party/tree-sitter/tree-sitter/highlight/src/lib.rs +++ b/third-party/tree-sitter/tree-sitter/highlight/src/lib.rs @@ -1,7 +1,10 @@ +#![doc = include_str!("../README.md")] + pub mod c_lib; -pub mod util; pub use c_lib as c; +use lazy_static::lazy_static; +use std::collections::HashSet; use std::sync::atomic::{AtomicUsize, Ordering}; use std::{iter, mem, ops, str, usize}; use thiserror::Error; @@ -14,6 +17,65 @@ const CANCELLATION_CHECK_INTERVAL: usize = 100; const BUFFER_HTML_RESERVE_CAPACITY: usize = 10 * 1024; const BUFFER_LINES_RESERVE_CAPACITY: usize = 1000; +lazy_static! { + static ref STANDARD_CAPTURE_NAMES: HashSet<&'static str> = vec![ + "attribute", + "boolean", + "carriage-return", + "comment", + "comment.documentation", + "constant", + "constant.builtin", + "constructor", + "constructor.builtin", + "embedded", + "error", + "escape", + "function", + "function.builtin", + "keyword", + "markup", + "markup.bold", + "markup.heading", + "markup.italic", + "markup.link", + "markup.link.url", + "markup.list", + "markup.list.checked", + "markup.list.numbered", + "markup.list.unchecked", + "markup.list.unnumbered", + "markup.quote", + "markup.raw", + "markup.raw.block", + "markup.raw.inline", + "markup.strikethrough", + "module", + "number", + "operator", + "property", + "property.builtin", + "punctuation", + "punctuation.bracket", + "punctuation.delimiter", + "punctuation.special", + "string", + "string.escape", + "string.regexp", + "string.special", + "string.special.symbol", + "tag", + "type", + "type.builtin", + "variable", + "variable.builtin", + "variable.member", + "variable.parameter", + ] + .into_iter() + .collect(); +} + /// Indicates which highlight should be applied to a region of source code. #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub struct Highlight(pub usize); @@ -42,6 +104,7 @@ pub enum HighlightEvent { /// This struct is immutable and can be shared between threads. pub struct HighlightConfiguration { pub language: Language, + pub language_name: String, pub query: Query, combined_injections_query: Option, locals_pattern_index: usize, @@ -62,7 +125,7 @@ pub struct HighlightConfiguration { /// syntax highlighting calls. A separate highlighter is needed for each thread that /// is performing highlighting. pub struct Highlighter { - parser: Parser, + pub parser: Parser, cursors: Vec, } @@ -92,6 +155,7 @@ where F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a, { source: &'a [u8], + language_name: &'a str, byte_offset: usize, highlighter: &'a mut Highlighter, injection_callback: F, @@ -105,7 +169,7 @@ where struct HighlightIterLayer<'a> { _tree: Tree, cursor: QueryCursor, - captures: iter::Peekable>, + captures: iter::Peekable>, config: &'a HighlightConfiguration, highlight_end_stack: Vec, scope_stack: Vec>, @@ -113,9 +177,16 @@ struct HighlightIterLayer<'a> { depth: usize, } +impl Default for Highlighter { + fn default() -> Self { + Self::new() + } +} + impl Highlighter { + #[must_use] pub fn new() -> Self { - Highlighter { + Self { parser: Parser::new(), cursors: Vec::new(), } @@ -135,6 +206,7 @@ impl Highlighter { ) -> Result> + 'a, Error> { let layers = HighlightIterLayer::new( source, + None, self, cancellation_flag, &mut injection_callback, @@ -150,12 +222,13 @@ impl Highlighter { assert_ne!(layers.len(), 0); let mut result = HighlightIter { source, + language_name: &config.language_name, byte_offset: 0, injection_callback, cancellation_flag, highlighter: self, iter_count: 0, - layers: layers, + layers, next_event: None, last_highlight_range: None, }; @@ -181,6 +254,7 @@ impl HighlightConfiguration { /// Returns a `HighlightConfiguration` that can then be used with the `highlight` method. pub fn new( language: Language, + name: impl Into, highlights_query: &str, injection_query: &str, locals_query: &str, @@ -195,7 +269,7 @@ impl HighlightConfiguration { // Construct a single query by concatenating the three query strings, but record the // range of pattern indices that belong to each individual string. - let mut query = Query::new(language, &query_source)?; + let mut query = Query::new(&language, &query_source)?; let mut locals_pattern_index = 0; let mut highlights_pattern_index = 0; for i in 0..(query.pattern_count()) { @@ -212,7 +286,7 @@ impl HighlightConfiguration { // Construct a separate query just for dealing with the 'combined injections'. // Disable the combined injection patterns in the main query. - let mut combined_injections_query = Query::new(language, injection_query)?; + let mut combined_injections_query = Query::new(&language, injection_query)?; let mut has_combined_queries = false; for pattern_index in 0..locals_pattern_index { let settings = query.property_settings(pattern_index); @@ -249,7 +323,7 @@ impl HighlightConfiguration { let mut local_scope_capture_index = None; for (i, name) in query.capture_names().iter().enumerate() { let i = Some(i as u32); - match name.as_str() { + match *name { "injection.content" => injection_content_capture_index = i, "injection.language" => injection_language_capture_index = i, "local.definition" => local_def_capture_index = i, @@ -261,8 +335,9 @@ impl HighlightConfiguration { } let highlight_indices = vec![None; query.capture_names().len()]; - Ok(HighlightConfiguration { + Ok(Self { language, + language_name: name.into(), query, combined_injections_query, locals_pattern_index, @@ -279,7 +354,8 @@ impl HighlightConfiguration { } /// Get a slice containing all of the highlight names used in the configuration. - pub fn names(&self) -> &[String] { + #[must_use] + pub const fn names(&self) -> &[&str] { self.query.capture_names() } @@ -303,7 +379,7 @@ impl HighlightConfiguration { let mut best_index = None; let mut best_match_len = 0; - for (i, recognized_name) in recognized_names.into_iter().enumerate() { + for (i, recognized_name) in recognized_names.iter().enumerate() { let mut len = 0; let mut matches = true; for part in recognized_name.as_ref().split('.') { @@ -321,6 +397,23 @@ impl HighlightConfiguration { best_index.map(Highlight) })); } + + // Return the list of this configuration's capture names that are neither present in the + // list of predefined 'canonical' names nor start with an underscore (denoting 'private' captures + // used as part of capture internals). + #[must_use] + pub fn nonconformant_capture_names(&self, capture_names: &HashSet<&str>) -> Vec<&str> { + let capture_names = if capture_names.is_empty() { + &*STANDARD_CAPTURE_NAMES + } else { + capture_names + }; + self.names() + .iter() + .filter(|&n| !(n.starts_with('_') || capture_names.contains(n))) + .copied() + .collect() + } } impl<'a> HighlightIterLayer<'a> { @@ -329,8 +422,10 @@ impl<'a> HighlightIterLayer<'a> { /// In the even that the new layer contains "combined injections" (injections where multiple /// disjoint ranges are parsed as one syntax tree), these will be eagerly processed and /// added to the returned vector. + #[allow(clippy::too_many_arguments)] fn new Option<&'a HighlightConfiguration> + 'a>( source: &'a [u8], + parent_name: Option<&str>, highlighter: &mut Highlighter, cancellation_flag: Option<&'a AtomicUsize>, injection_callback: &mut F, @@ -344,7 +439,7 @@ impl<'a> HighlightIterLayer<'a> { if highlighter.parser.set_included_ranges(&ranges).is_ok() { highlighter .parser - .set_language(config.language) + .set_language(&config.language) .map_err(|_| Error::InvalidLanguage)?; unsafe { highlighter.parser.set_cancellation_flag(cancellation_flag) }; @@ -353,7 +448,7 @@ impl<'a> HighlightIterLayer<'a> { .parse(source, None) .ok_or(Error::Cancelled)?; unsafe { highlighter.parser.set_cancellation_flag(None) }; - let mut cursor = highlighter.cursors.pop().unwrap_or(QueryCursor::new()); + let mut cursor = highlighter.cursors.pop().unwrap_or_default(); // Process combined injections. if let Some(combined_injections_query) = &config.combined_injections_query { @@ -363,8 +458,13 @@ impl<'a> HighlightIterLayer<'a> { cursor.matches(combined_injections_query, tree.root_node(), source); for mat in matches { let entry = &mut injections_by_pattern_index[mat.pattern_index]; - let (language_name, content_node, include_children) = - injection_for_match(config, combined_injections_query, &mat, source); + let (language_name, content_node, include_children) = injection_for_match( + config, + parent_name, + combined_injections_query, + &mat, + source, + ); if language_name.is_some() { entry.0 = language_name; } @@ -418,12 +518,12 @@ impl<'a> HighlightIterLayer<'a> { if queue.is_empty() { break; - } else { - let (next_config, next_depth, next_ranges) = queue.remove(0); - config = next_config; - depth = next_depth; - ranges = next_ranges; } + + let (next_config, next_depth, next_ranges) = queue.remove(0); + config = next_config; + depth = next_depth; + ranges = next_ranges; } Ok(result) @@ -449,7 +549,7 @@ impl<'a> HighlightIterLayer<'a> { let mut parent_range = parent_range_iter .next() .expect("Layers should only be constructed with non-empty ranges vectors"); - for node in nodes.iter() { + for node in nodes { let mut preceding_range = Range { start_byte: 0, start_point: Point::new(0, 0), @@ -472,7 +572,7 @@ impl<'a> HighlightIterLayer<'a> { Some(child.range()) } }) - .chain([following_range].iter().cloned()) + .chain(std::iter::once(following_range)) { let mut range = Range { start_byte: preceding_range.end_byte, @@ -532,7 +632,7 @@ impl<'a> HighlightIterLayer<'a> { .captures .peek() .map(|(m, i)| m.captures[*i].node.start_byte()); - let next_end = self.highlight_end_stack.last().cloned(); + let next_end = self.highlight_end_stack.last().copied(); match (next_start, next_end) { (Some(start), Some(end)) => { if start < end { @@ -589,10 +689,9 @@ where self.layers[0..(i + 1)].rotate_left(1); } break; - } else { - let layer = self.layers.remove(0); - self.highlighter.cursors.push(layer.cursor); } + let layer = self.layers.remove(0); + self.highlighter.cursors.push(layer.cursor); } } @@ -664,7 +763,7 @@ where // If any previous highlight ends before this node starts, then before // processing this capture, emit the source code up until the end of the // previous highlight, and an end event for that highlight. - if let Some(end_byte) = layer.highlight_end_stack.last().cloned() { + if let Some(end_byte) = layer.highlight_end_stack.last().copied() { if end_byte <= range.start { layer.highlight_end_stack.pop(); return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); @@ -673,20 +772,26 @@ where } // If there are no more captures, then emit any remaining highlight end events. // And if there are none of those, then just advance to the end of the document. - else if let Some(end_byte) = layer.highlight_end_stack.last().cloned() { - layer.highlight_end_stack.pop(); - return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); - } else { + else { + if let Some(end_byte) = layer.highlight_end_stack.last().copied() { + layer.highlight_end_stack.pop(); + return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd)); + } return self.emit_event(self.source.len(), None); - }; + } let (mut match_, capture_index) = layer.captures.next().unwrap(); let mut capture = match_.captures[capture_index]; // If this capture represents an injection, then process the injection. if match_.pattern_index < layer.config.locals_pattern_index { - let (language_name, content_node, include_children) = - injection_for_match(&layer.config, &layer.config.query, &match_, &self.source); + let (language_name, content_node, include_children) = injection_for_match( + layer.config, + Some(self.language_name), + &layer.config.query, + &match_, + self.source, + ); // Explicitly remove this match so that none of its other captures will remain // in the stream of captures. @@ -704,6 +809,7 @@ where if !ranges.is_empty() { match HighlightIterLayer::new( self.source, + Some(self.language_name), self.highlighter, self.cancellation_flag, &mut self.injection_callback, @@ -746,12 +852,9 @@ where local_defs: Vec::new(), }; for prop in layer.config.query.property_settings(match_.pattern_index) { - match prop.key.as_ref() { - "local.scope-inherits" => { - scope.inherits = - prop.value.as_ref().map_or(true, |r| r.as_ref() == "true"); - } - _ => {} + if prop.key.as_ref() == "local.scope-inherits" { + scope.inherits = + prop.value.as_ref().map_or(true, |r| r.as_ref() == "true"); } } layer.scope_stack.push(scope); @@ -782,26 +885,24 @@ where } // If the node represents a reference, then try to find the corresponding // definition in the scope stack. - else if Some(capture.index) == layer.config.local_ref_capture_index { - if definition_highlight.is_none() { - definition_highlight = None; - if let Ok(name) = str::from_utf8(&self.source[range.clone()]) { - for scope in layer.scope_stack.iter().rev() { - if let Some(highlight) = - scope.local_defs.iter().rev().find_map(|def| { - if def.name == name && range.start >= def.value_range.end { - Some(def.highlight) - } else { - None - } - }) - { - reference_highlight = highlight; - break; - } - if !scope.inherits { - break; + else if Some(capture.index) == layer.config.local_ref_capture_index + && definition_highlight.is_none() + { + definition_highlight = None; + if let Ok(name) = str::from_utf8(&self.source[range.clone()]) { + for scope in layer.scope_stack.iter().rev() { + if let Some(highlight) = scope.local_defs.iter().rev().find_map(|def| { + if def.name == name && range.start >= def.value_range.end { + Some(def.highlight) + } else { + None } + }) { + reference_highlight = highlight; + break; + } + if !scope.inherits { + break; } } } @@ -831,34 +932,26 @@ where } } - // If the current node was found to be a local variable, then skip over any - // highlighting patterns that are disabled for local variables. - if definition_highlight.is_some() || reference_highlight.is_some() { - while layer.config.non_local_variable_patterns[match_.pattern_index] { - match_.remove(); - if let Some((next_match, next_capture_index)) = layer.captures.peek() { - let next_capture = next_match.captures[*next_capture_index]; - if next_capture.node == capture.node { - capture = next_capture; - match_ = layer.captures.next().unwrap().0; - continue; - } - } - - self.sort_layers(); - continue 'main; - } - } - - // Once a highlighting pattern is found for the current node, skip over - // any later highlighting patterns that also match this node. Captures - // for a given node are ordered by pattern index, so these subsequent + // Once a highlighting pattern is found for the current node, keep iterating over + // any later highlighting patterns that also match this node and set the match to it. + // Captures for a given node are ordered by pattern index, so these subsequent // captures are guaranteed to be for highlighting, not injections or // local variables. while let Some((next_match, next_capture_index)) = layer.captures.peek() { let next_capture = next_match.captures[*next_capture_index]; if next_capture.node == capture.node { - layer.captures.next(); + let following_match = layer.captures.next().unwrap().0; + // If the current node was found to be a local variable, then ignore + // the following match if it's a highlighting pattern that is disabled + // for local variables. + if (definition_highlight.is_some() || reference_highlight.is_some()) + && layer.config.non_local_variable_patterns[following_match.pattern_index] + { + continue; + } + match_.remove(); + capture = next_capture; + match_ = following_match; } else { break; } @@ -885,9 +978,16 @@ where } } +impl Default for HtmlRenderer { + fn default() -> Self { + Self::new() + } +} + impl HtmlRenderer { + #[must_use] pub fn new() -> Self { - let mut result = HtmlRenderer { + let mut result = Self { html: Vec::with_capacity(BUFFER_HTML_RESERVE_CAPACITY), line_offsets: Vec::with_capacity(BUFFER_LINES_RESERVE_CAPACITY), carriage_return_highlight: None, @@ -987,10 +1087,21 @@ impl HtmlRenderer { self.html.extend(b""); } - fn add_text<'a, F>(&mut self, src: &[u8], highlights: &Vec, attribute_callback: &F) + fn add_text<'a, F>(&mut self, src: &[u8], highlights: &[Highlight], attribute_callback: &F) where F: Fn(Highlight) -> &'a [u8], { + pub const fn html_escape(c: u8) -> Option<&'static [u8]> { + match c as char { + '>' => Some(b">"), + '<' => Some(b"<"), + '&' => Some(b"&"), + '\'' => Some(b"'"), + '"' => Some(b"""), + _ => None, + } + } + let mut last_char_was_cr = false; for c in LossyUtf8::new(src).flat_map(|p| p.bytes()) { // Don't render carriage return characters, but allow lone carriage returns (not @@ -1014,7 +1125,7 @@ impl HtmlRenderer { highlights .iter() .for_each(|scope| self.start_highlight(*scope, attribute_callback)); - } else if let Some(escape) = util::html_escape(c) { + } else if let Some(escape) = html_escape(c) { self.html.extend_from_slice(escape); } else { self.html.push(c); @@ -1024,7 +1135,8 @@ impl HtmlRenderer { } fn injection_for_match<'a>( - config: &HighlightConfiguration, + config: &'a HighlightConfiguration, + parent_name: Option<&'a str>, query: &'a Query, query_match: &QueryMatch<'a, 'a>, source: &'a [u8], @@ -1034,6 +1146,7 @@ fn injection_for_match<'a>( let mut language_name = None; let mut content_node = None; + for capture in query_match.captures { let index = Some(capture.index); if index == language_capture_index { @@ -1051,7 +1164,25 @@ fn injection_for_match<'a>( // that sets the injection.language key. "injection.language" => { if language_name.is_none() { - language_name = prop.value.as_ref().map(|s| s.as_ref()) + language_name = prop.value.as_ref().map(std::convert::AsRef::as_ref); + } + } + + // Setting the `injection.self` key can be used to specify that the + // language name should be the same as the language of the current + // layer. + "injection.self" => { + if language_name.is_none() { + language_name = Some(config.language_name.as_str()); + } + } + + // Setting the `injection.parent` key can be used to specify that + // the language name should be the same as the language of the + // parent layer + "injection.parent" => { + if language_name.is_none() { + language_name = parent_name; } } diff --git a/third-party/tree-sitter/tree-sitter/highlight/src/util.rs b/third-party/tree-sitter/tree-sitter/highlight/src/util.rs deleted file mode 100644 index 29adb13b11f..00000000000 --- a/third-party/tree-sitter/tree-sitter/highlight/src/util.rs +++ /dev/null @@ -1,10 +0,0 @@ -pub fn html_escape(c: u8) -> Option<&'static [u8]> { - match c as char { - '>' => Some(b">"), - '<' => Some(b"<"), - '&' => Some(b"&"), - '\'' => Some(b"'"), - '"' => Some(b"""), - _ => None, - } -} diff --git a/third-party/tree-sitter/tree-sitter/lib/Cargo.toml b/third-party/tree-sitter/tree-sitter/lib/Cargo.toml index 39e07916157..b40939f7abf 100644 --- a/third-party/tree-sitter/tree-sitter/lib/Cargo.toml +++ b/third-party/tree-sitter/tree-sitter/lib/Cargo.toml @@ -1,33 +1,51 @@ [package] name = "tree-sitter" +version.workspace = true description = "Rust bindings to the Tree-sitter parsing library" -version = "0.20.10" -authors = ["Max Brunsfeld "] -edition = "2021" -license = "MIT" +authors.workspace = true +edition.workspace = true +rust-version.workspace = true readme = "binding_rust/README.md" -keywords = ["incremental", "parsing"] +homepage.workspace = true +repository.workspace = true +license.workspace = true +keywords.workspace = true categories = ["api-bindings", "parsing", "text-editors"] -repository = "https://github.com/tree-sitter/tree-sitter" -rust-version.workspace = true build = "binding_rust/build.rs" +links = "tree-sitter" include = [ "/binding_rust/*", "/Cargo.toml", - "/include/*", "/src/*.h", "/src/*.c", "/src/unicode/*", + "/src/wasm/*", + "/include/tree_sitter/api.h", ] +[features] +wasm = ["wasmtime", "wasmtime-c-api"] + [dependencies] -lazy_static = { version = "1.2.0", optional = true } -regex = "1" +regex.workspace = true + +[dependencies.wasmtime] +version = "18.0.1" +optional = true +default-features = false +features = ["cranelift"] + +[dependencies.wasmtime-c-api] +version = "18.0.1" +optional = true +package = "wasmtime-c-api-impl" +default-features = false [build-dependencies] -cc = "^1.0.58" +bindgen = { version = "0.69.4", optional = true } +cc.workspace = true [lib] path = "binding_rust/lib.rs" diff --git a/third-party/tree-sitter/tree-sitter/lib/README.md b/third-party/tree-sitter/tree-sitter/lib/README.md index 82ebc5a54fa..231fe2abbb7 100644 --- a/third-party/tree-sitter/tree-sitter/lib/README.md +++ b/third-party/tree-sitter/tree-sitter/lib/README.md @@ -1,5 +1,4 @@ -Subdirectories --------------- +## Subdirectories * [`src`](./src) - C source code for the Tree-sitter library * [`include`](./include) - C headers for the Tree-sitter library diff --git a/third-party/tree-sitter/tree-sitter/lib/binding_rust/README.md b/third-party/tree-sitter/tree-sitter/lib/binding_rust/README.md index dffe76619c9..b75e0122027 100644 --- a/third-party/tree-sitter/tree-sitter/lib/binding_rust/README.md +++ b/third-party/tree-sitter/tree-sitter/lib/binding_rust/README.md @@ -1,10 +1,13 @@ # Rust Tree-sitter -[![Crates.io](https://img.shields.io/crates/v/tree-sitter.svg)](https://crates.io/crates/tree-sitter) +[![crates.io badge]][crates.io] + +[crates.io]: https://crates.io/crates/tree-sitter +[crates.io badge]: https://img.shields.io/crates/v/tree-sitter.svg?color=%23B48723 Rust bindings to the [Tree-sitter][] parsing library. -### Basic Usage +## Basic Usage First, create a parser: @@ -14,22 +17,6 @@ use tree_sitter::{Parser, Language}; let mut parser = Parser::new(); ``` -Tree-sitter languages consist of generated C code. To make sure they're properly compiled and linked, you can create a [build script](https://doc.rust-lang.org/cargo/reference/build-scripts.html) like the following (assuming `tree-sitter-javascript` is in your root directory): - -```rust -use std::path::PathBuf; - -fn main() { - let dir: PathBuf = ["tree-sitter-javascript", "src"].iter().collect(); - - cc::Build::new() - .include(&dir) - .file(dir.join("parser.c")) - .file(dir.join("scanner.c")) - .compile("tree-sitter-javascript"); -} -``` - Add the `cc` crate to your `Cargo.toml` under `[build-dependencies]`: ```toml @@ -37,15 +24,18 @@ Add the `cc` crate to your `Cargo.toml` under `[build-dependencies]`: cc="*" ``` -To then use languages from rust, you must declare them as `extern "C"` functions and invoke them with `unsafe`. Then you can assign them to the parser. +Then, add a language as a dependency: -```rust -extern "C" { fn tree_sitter_c() -> Language; } -extern "C" { fn tree_sitter_rust() -> Language; } -extern "C" { fn tree_sitter_javascript() -> Language; } +```toml +[dependencies] +tree-sitter = "0.21.0" +tree-sitter-rust = "0.20.4" +``` + +To then use a language, you assign them to the parser. -let language = unsafe { tree_sitter_rust() }; -parser.set_language(language).unwrap(); +```rust +parser.set_language(tree_sitter_rust::language()).expect("Error loading Rust grammar"); ``` Now you can parse source code: @@ -62,12 +52,13 @@ assert_eq!(root_node.end_position().column, 12); ### Editing -Once you have a syntax tree, you can update it when your source code changes. Passing in the previous edited tree makes `parse` run much more quickly: +Once you have a syntax tree, you can update it when your source code changes. +Passing in the previous edited tree makes `parse` run much more quickly: ```rust let new_source_code = "fn test(a: u32) {}" -tree.edit(InputEdit { +tree.edit(&InputEdit { start_byte: 8, old_end_byte: 8, new_end_byte: 14, @@ -81,7 +72,8 @@ let new_tree = parser.parse(new_source_code, Some(&tree)); ### Text Input -The source code to parse can be provided either as a string, a slice, a vector, or as a function that returns a slice. The text can be encoded as either UTF8 or UTF16: +The source code to parse can be provided either as a string, a slice, a vector, +or as a function that returns a slice. The text can be encoded as either UTF8 or UTF16: ```rust // Store some source code in an array of lines. @@ -100,7 +92,7 @@ let tree = parser.parse_with(&mut |_byte: u32, position: Point| -> &[u8] { if column < lines[row].as_bytes().len() { &lines[row].as_bytes()[column..] } else { - "\n".as_bytes() + b"\n" } } else { &[] diff --git a/third-party/tree-sitter/tree-sitter/lib/binding_rust/bindings.rs b/third-party/tree-sitter/tree-sitter/lib/binding_rust/bindings.rs index be117f83892..227d142d092 100644 --- a/third-party/tree-sitter/tree-sitter/lib/binding_rust/bindings.rs +++ b/third-party/tree-sitter/tree-sitter/lib/binding_rust/bindings.rs @@ -1,38 +1,46 @@ -/* automatically generated by rust-bindgen 0.59.2 */ +/* automatically generated by rust-bindgen 0.69.4 */ +pub const TREE_SITTER_LANGUAGE_VERSION: u32 = 14; +pub const TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION: u32 = 13; +pub type TSStateId = u16; pub type TSSymbol = u16; pub type TSFieldId = u16; #[repr(C)] -#[derive(Debug, Copy, Clone)] +#[derive(Debug)] pub struct TSLanguage { _unused: [u8; 0], } #[repr(C)] -#[derive(Debug, Copy, Clone)] +#[derive(Debug)] pub struct TSParser { _unused: [u8; 0], } #[repr(C)] -#[derive(Debug, Copy, Clone)] +#[derive(Debug)] pub struct TSTree { _unused: [u8; 0], } #[repr(C)] -#[derive(Debug, Copy, Clone)] +#[derive(Debug)] pub struct TSQuery { _unused: [u8; 0], } #[repr(C)] -#[derive(Debug, Copy, Clone)] +#[derive(Debug)] pub struct TSQueryCursor { _unused: [u8; 0], } -pub const TSInputEncoding_TSInputEncodingUTF8: TSInputEncoding = 0; -pub const TSInputEncoding_TSInputEncodingUTF16: TSInputEncoding = 1; +#[repr(C)] +#[derive(Debug)] +pub struct TSLookaheadIterator { + _unused: [u8; 0], +} +pub const TSInputEncodingUTF8: TSInputEncoding = 0; +pub const TSInputEncodingUTF16: TSInputEncoding = 1; pub type TSInputEncoding = ::std::os::raw::c_uint; -pub const TSSymbolType_TSSymbolTypeRegular: TSSymbolType = 0; -pub const TSSymbolType_TSSymbolTypeAnonymous: TSSymbolType = 1; -pub const TSSymbolType_TSSymbolTypeAuxiliary: TSSymbolType = 2; +pub const TSSymbolTypeRegular: TSSymbolType = 0; +pub const TSSymbolTypeAnonymous: TSSymbolType = 1; +pub const TSSymbolTypeAuxiliary: TSSymbolType = 2; pub type TSSymbolType = ::std::os::raw::c_uint; #[repr(C)] #[derive(Debug, Copy, Clone)] @@ -49,7 +57,7 @@ pub struct TSRange { pub end_byte: u32, } #[repr(C)] -#[derive(Debug, Copy, Clone)] +#[derive(Debug)] pub struct TSInput { pub payload: *mut ::std::os::raw::c_void, pub read: ::std::option::Option< @@ -62,18 +70,18 @@ pub struct TSInput { >, pub encoding: TSInputEncoding, } -pub const TSLogType_TSLogTypeParse: TSLogType = 0; -pub const TSLogType_TSLogTypeLex: TSLogType = 1; +pub const TSLogTypeParse: TSLogType = 0; +pub const TSLogTypeLex: TSLogType = 1; pub type TSLogType = ::std::os::raw::c_uint; #[repr(C)] -#[derive(Debug, Copy, Clone)] +#[derive(Debug)] pub struct TSLogger { pub payload: *mut ::std::os::raw::c_void, pub log: ::std::option::Option< unsafe extern "C" fn( payload: *mut ::std::os::raw::c_void, - arg1: TSLogType, - arg2: *const ::std::os::raw::c_char, + log_type: TSLogType, + buffer: *const ::std::os::raw::c_char, ), >, } @@ -102,42 +110,42 @@ pub struct TSTreeCursor { pub context: [u32; 2usize], } #[repr(C)] -#[derive(Debug, Copy, Clone)] +#[derive(Debug)] pub struct TSQueryCapture { pub node: TSNode, pub index: u32, } -pub const TSQuantifier_TSQuantifierZero: TSQuantifier = 0; -pub const TSQuantifier_TSQuantifierZeroOrOne: TSQuantifier = 1; -pub const TSQuantifier_TSQuantifierZeroOrMore: TSQuantifier = 2; -pub const TSQuantifier_TSQuantifierOne: TSQuantifier = 3; -pub const TSQuantifier_TSQuantifierOneOrMore: TSQuantifier = 4; +pub const TSQuantifierZero: TSQuantifier = 0; +pub const TSQuantifierZeroOrOne: TSQuantifier = 1; +pub const TSQuantifierZeroOrMore: TSQuantifier = 2; +pub const TSQuantifierOne: TSQuantifier = 3; +pub const TSQuantifierOneOrMore: TSQuantifier = 4; pub type TSQuantifier = ::std::os::raw::c_uint; #[repr(C)] -#[derive(Debug, Copy, Clone)] +#[derive(Debug)] pub struct TSQueryMatch { pub id: u32, pub pattern_index: u16, pub capture_count: u16, pub captures: *const TSQueryCapture, } -pub const TSQueryPredicateStepType_TSQueryPredicateStepTypeDone: TSQueryPredicateStepType = 0; -pub const TSQueryPredicateStepType_TSQueryPredicateStepTypeCapture: TSQueryPredicateStepType = 1; -pub const TSQueryPredicateStepType_TSQueryPredicateStepTypeString: TSQueryPredicateStepType = 2; +pub const TSQueryPredicateStepTypeDone: TSQueryPredicateStepType = 0; +pub const TSQueryPredicateStepTypeCapture: TSQueryPredicateStepType = 1; +pub const TSQueryPredicateStepTypeString: TSQueryPredicateStepType = 2; pub type TSQueryPredicateStepType = ::std::os::raw::c_uint; #[repr(C)] -#[derive(Debug, Copy, Clone)] +#[derive(Debug)] pub struct TSQueryPredicateStep { pub type_: TSQueryPredicateStepType, pub value_id: u32, } -pub const TSQueryError_TSQueryErrorNone: TSQueryError = 0; -pub const TSQueryError_TSQueryErrorSyntax: TSQueryError = 1; -pub const TSQueryError_TSQueryErrorNodeType: TSQueryError = 2; -pub const TSQueryError_TSQueryErrorField: TSQueryError = 3; -pub const TSQueryError_TSQueryErrorCapture: TSQueryError = 4; -pub const TSQueryError_TSQueryErrorStructure: TSQueryError = 5; -pub const TSQueryError_TSQueryErrorLanguage: TSQueryError = 6; +pub const TSQueryErrorNone: TSQueryError = 0; +pub const TSQueryErrorSyntax: TSQueryError = 1; +pub const TSQueryErrorNodeType: TSQueryError = 2; +pub const TSQueryErrorField: TSQueryError = 3; +pub const TSQueryErrorCapture: TSQueryError = 4; +pub const TSQueryErrorStructure: TSQueryError = 5; +pub const TSQueryErrorLanguage: TSQueryError = 6; pub type TSQueryError = ::std::os::raw::c_uint; extern "C" { #[doc = " Create a new parser."] @@ -145,94 +153,30 @@ extern "C" { } extern "C" { #[doc = " Delete the parser, freeing all of the memory that it used."] - pub fn ts_parser_delete(parser: *mut TSParser); -} -extern "C" { - #[doc = " Set the language that the parser should use for parsing."] - #[doc = ""] - #[doc = " Returns a boolean indicating whether or not the language was successfully"] - #[doc = " assigned. True means assignment succeeded. False means there was a version"] - #[doc = " mismatch: the language was generated with an incompatible version of the"] - #[doc = " Tree-sitter CLI. Check the language's version using `ts_language_version`"] - #[doc = " and compare it to this library's `TREE_SITTER_LANGUAGE_VERSION` and"] - #[doc = " `TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION` constants."] - pub fn ts_parser_set_language(self_: *mut TSParser, language: *const TSLanguage) -> bool; + pub fn ts_parser_delete(self_: *mut TSParser); } extern "C" { #[doc = " Get the parser's current language."] pub fn ts_parser_language(self_: *const TSParser) -> *const TSLanguage; } extern "C" { - #[doc = " Set the ranges of text that the parser should include when parsing."] - #[doc = ""] - #[doc = " By default, the parser will always include entire documents. This function"] - #[doc = " allows you to parse only a *portion* of a document but still return a syntax"] - #[doc = " tree whose ranges match up with the document as a whole. You can also pass"] - #[doc = " multiple disjoint ranges."] - #[doc = ""] - #[doc = " The second and third parameters specify the location and length of an array"] - #[doc = " of ranges. The parser does *not* take ownership of these ranges; it copies"] - #[doc = " the data, so it doesn't matter how these ranges are allocated."] - #[doc = ""] - #[doc = " If `length` is zero, then the entire document will be parsed. Otherwise,"] - #[doc = " the given ranges must be ordered from earliest to latest in the document,"] - #[doc = " and they must not overlap. That is, the following must hold for all"] - #[doc = " `i` < `length - 1`: ranges[i].end_byte <= ranges[i + 1].start_byte"] - #[doc = ""] - #[doc = " If this requirement is not satisfied, the operation will fail, the ranges"] - #[doc = " will not be assigned, and this function will return `false`. On success,"] - #[doc = " this function returns `true`"] + #[doc = " Set the language that the parser should use for parsing.\n\n Returns a boolean indicating whether or not the language was successfully\n assigned. True means assignment succeeded. False means there was a version\n mismatch: the language was generated with an incompatible version of the\n Tree-sitter CLI. Check the language's version using [`ts_language_version`]\n and compare it to this library's [`TREE_SITTER_LANGUAGE_VERSION`] and\n [`TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION`] constants."] + pub fn ts_parser_set_language(self_: *mut TSParser, language: *const TSLanguage) -> bool; +} +extern "C" { + #[doc = " Set the ranges of text that the parser should include when parsing.\n\n By default, the parser will always include entire documents. This function\n allows you to parse only a *portion* of a document but still return a syntax\n tree whose ranges match up with the document as a whole. You can also pass\n multiple disjoint ranges.\n\n The second and third parameters specify the location and length of an array\n of ranges. The parser does *not* take ownership of these ranges; it copies\n the data, so it doesn't matter how these ranges are allocated.\n\n If `count` is zero, then the entire document will be parsed. Otherwise,\n the given ranges must be ordered from earliest to latest in the document,\n and they must not overlap. That is, the following must hold for all:\n\n `i < count - 1`: `ranges[i].end_byte <= ranges[i + 1].start_byte`\n\n If this requirement is not satisfied, the operation will fail, the ranges\n will not be assigned, and this function will return `false`. On success,\n this function returns `true`"] pub fn ts_parser_set_included_ranges( self_: *mut TSParser, ranges: *const TSRange, - length: u32, + count: u32, ) -> bool; } extern "C" { - #[doc = " Get the ranges of text that the parser will include when parsing."] - #[doc = ""] - #[doc = " The returned pointer is owned by the parser. The caller should not free it"] - #[doc = " or write to it. The length of the array will be written to the given"] - #[doc = " `length` pointer."] - pub fn ts_parser_included_ranges(self_: *const TSParser, length: *mut u32) -> *const TSRange; -} -extern "C" { - #[doc = " Use the parser to parse some source code and create a syntax tree."] - #[doc = ""] - #[doc = " If you are parsing this document for the first time, pass `NULL` for the"] - #[doc = " `old_tree` parameter. Otherwise, if you have already parsed an earlier"] - #[doc = " version of this document and the document has since been edited, pass the"] - #[doc = " previous syntax tree so that the unchanged parts of it can be reused."] - #[doc = " This will save time and memory. For this to work correctly, you must have"] - #[doc = " already edited the old syntax tree using the `ts_tree_edit` function in a"] - #[doc = " way that exactly matches the source code changes."] - #[doc = ""] - #[doc = " The `TSInput` parameter lets you specify how to read the text. It has the"] - #[doc = " following three fields:"] - #[doc = " 1. `read`: A function to retrieve a chunk of text at a given byte offset"] - #[doc = " and (row, column) position. The function should return a pointer to the"] - #[doc = " text and write its length to the `bytes_read` pointer. The parser does"] - #[doc = " not take ownership of this buffer; it just borrows it until it has"] - #[doc = " finished reading it. The function should write a zero value to the"] - #[doc = " `bytes_read` pointer to indicate the end of the document."] - #[doc = " 2. `payload`: An arbitrary pointer that will be passed to each invocation"] - #[doc = " of the `read` function."] - #[doc = " 3. `encoding`: An indication of how the text is encoded. Either"] - #[doc = " `TSInputEncodingUTF8` or `TSInputEncodingUTF16`."] - #[doc = ""] - #[doc = " This function returns a syntax tree on success, and `NULL` on failure. There"] - #[doc = " are three possible reasons for failure:"] - #[doc = " 1. The parser does not have a language assigned. Check for this using the"] - #[doc = "`ts_parser_language` function."] - #[doc = " 2. Parsing was cancelled due to a timeout that was set by an earlier call to"] - #[doc = " the `ts_parser_set_timeout_micros` function. You can resume parsing from"] - #[doc = " where the parser left out by calling `ts_parser_parse` again with the"] - #[doc = " same arguments. Or you can start parsing from scratch by first calling"] - #[doc = " `ts_parser_reset`."] - #[doc = " 3. Parsing was cancelled using a cancellation flag that was set by an"] - #[doc = " earlier call to `ts_parser_set_cancellation_flag`. You can resume parsing"] - #[doc = " from where the parser left out by calling `ts_parser_parse` again with"] - #[doc = " the same arguments."] + #[doc = " Get the ranges of text that the parser will include when parsing.\n\n The returned pointer is owned by the parser. The caller should not free it\n or write to it. The length of the array will be written to the given\n `count` pointer."] + pub fn ts_parser_included_ranges(self_: *const TSParser, count: *mut u32) -> *const TSRange; +} +extern "C" { + #[doc = " Use the parser to parse some source code and create a syntax tree.\n\n If you are parsing this document for the first time, pass `NULL` for the\n `old_tree` parameter. Otherwise, if you have already parsed an earlier\n version of this document and the document has since been edited, pass the\n previous syntax tree so that the unchanged parts of it can be reused.\n This will save time and memory. For this to work correctly, you must have\n already edited the old syntax tree using the [`ts_tree_edit`] function in a\n way that exactly matches the source code changes.\n\n The [`TSInput`] parameter lets you specify how to read the text. It has the\n following three fields:\n 1. [`read`]: A function to retrieve a chunk of text at a given byte offset\n and (row, column) position. The function should return a pointer to the\n text and write its length to the [`bytes_read`] pointer. The parser does\n not take ownership of this buffer; it just borrows it until it has\n finished reading it. The function should write a zero value to the\n [`bytes_read`] pointer to indicate the end of the document.\n 2. [`payload`]: An arbitrary pointer that will be passed to each invocation\n of the [`read`] function.\n 3. [`encoding`]: An indication of how the text is encoded. Either\n `TSInputEncodingUTF8` or `TSInputEncodingUTF16`.\n\n This function returns a syntax tree on success, and `NULL` on failure. There\n are three possible reasons for failure:\n 1. The parser does not have a language assigned. Check for this using the\n[`ts_parser_language`] function.\n 2. Parsing was cancelled due to a timeout that was set by an earlier call to\n the [`ts_parser_set_timeout_micros`] function. You can resume parsing from\n where the parser left out by calling [`ts_parser_parse`] again with the\n same arguments. Or you can start parsing from scratch by first calling\n [`ts_parser_reset`].\n 3. Parsing was cancelled using a cancellation flag that was set by an\n earlier call to [`ts_parser_set_cancellation_flag`]. You can resume parsing\n from where the parser left out by calling [`ts_parser_parse`] again with\n the same arguments.\n\n [`read`]: TSInput::read\n [`payload`]: TSInput::payload\n [`encoding`]: TSInput::encoding\n [`bytes_read`]: TSInput::read"] pub fn ts_parser_parse( self_: *mut TSParser, old_tree: *const TSTree, @@ -240,10 +184,7 @@ extern "C" { ) -> *mut TSTree; } extern "C" { - #[doc = " Use the parser to parse some source code stored in one contiguous buffer."] - #[doc = " The first two parameters are the same as in the `ts_parser_parse` function"] - #[doc = " above. The second two parameters indicate the location of the buffer and its"] - #[doc = " length in bytes."] + #[doc = " Use the parser to parse some source code stored in one contiguous buffer.\n The first two parameters are the same as in the [`ts_parser_parse`] function\n above. The second two parameters indicate the location of the buffer and its\n length in bytes."] pub fn ts_parser_parse_string( self_: *mut TSParser, old_tree: *const TSTree, @@ -252,10 +193,7 @@ extern "C" { ) -> *mut TSTree; } extern "C" { - #[doc = " Use the parser to parse some source code stored in one contiguous buffer with"] - #[doc = " a given encoding. The first four parameters work the same as in the"] - #[doc = " `ts_parser_parse_string` method above. The final parameter indicates whether"] - #[doc = " the text is encoded as UTF8 or UTF16."] + #[doc = " Use the parser to parse some source code stored in one contiguous buffer with\n a given encoding. The first four parameters work the same as in the\n [`ts_parser_parse_string`] method above. The final parameter indicates whether\n the text is encoded as UTF8 or UTF16."] pub fn ts_parser_parse_string_encoding( self_: *mut TSParser, old_tree: *const TSTree, @@ -265,33 +203,19 @@ extern "C" { ) -> *mut TSTree; } extern "C" { - #[doc = " Instruct the parser to start the next parse from the beginning."] - #[doc = ""] - #[doc = " If the parser previously failed because of a timeout or a cancellation, then"] - #[doc = " by default, it will resume where it left off on the next call to"] - #[doc = " `ts_parser_parse` or other parsing functions. If you don't want to resume,"] - #[doc = " and instead intend to use this parser to parse some other document, you must"] - #[doc = " call `ts_parser_reset` first."] + #[doc = " Instruct the parser to start the next parse from the beginning.\n\n If the parser previously failed because of a timeout or a cancellation, then\n by default, it will resume where it left off on the next call to\n [`ts_parser_parse`] or other parsing functions. If you don't want to resume,\n and instead intend to use this parser to parse some other document, you must\n call [`ts_parser_reset`] first."] pub fn ts_parser_reset(self_: *mut TSParser); } extern "C" { - #[doc = " Set the maximum duration in microseconds that parsing should be allowed to"] - #[doc = " take before halting."] - #[doc = ""] - #[doc = " If parsing takes longer than this, it will halt early, returning NULL."] - #[doc = " See `ts_parser_parse` for more information."] - pub fn ts_parser_set_timeout_micros(self_: *mut TSParser, timeout: u64); + #[doc = " Set the maximum duration in microseconds that parsing should be allowed to\n take before halting.\n\n If parsing takes longer than this, it will halt early, returning NULL.\n See [`ts_parser_parse`] for more information."] + pub fn ts_parser_set_timeout_micros(self_: *mut TSParser, timeout_micros: u64); } extern "C" { #[doc = " Get the duration in microseconds that parsing is allowed to take."] pub fn ts_parser_timeout_micros(self_: *const TSParser) -> u64; } extern "C" { - #[doc = " Set the parser's current cancellation flag pointer."] - #[doc = ""] - #[doc = " If a non-null pointer is assigned, then the parser will periodically read"] - #[doc = " from this pointer during parsing. If it reads a non-zero value, it will"] - #[doc = " halt early, returning NULL. See `ts_parser_parse` for more information."] + #[doc = " Set the parser's current cancellation flag pointer.\n\n If a non-null pointer is assigned, then the parser will periodically read\n from this pointer during parsing. If it reads a non-zero value, it will\n halt early, returning NULL. See [`ts_parser_parse`] for more information."] pub fn ts_parser_set_cancellation_flag(self_: *mut TSParser, flag: *const usize); } extern "C" { @@ -299,11 +223,7 @@ extern "C" { pub fn ts_parser_cancellation_flag(self_: *const TSParser) -> *const usize; } extern "C" { - #[doc = " Set the logger that a parser should use during parsing."] - #[doc = ""] - #[doc = " The parser does not take ownership over the logger payload. If a logger was"] - #[doc = " previously assigned, the caller is responsible for releasing any memory"] - #[doc = " owned by the previous logger."] + #[doc = " Set the logger that a parser should use during parsing.\n\n The parser does not take ownership over the logger payload. If a logger was\n previously assigned, the caller is responsible for releasing any memory\n owned by the previous logger."] pub fn ts_parser_set_logger(self_: *mut TSParser, logger: TSLogger); } extern "C" { @@ -311,17 +231,11 @@ extern "C" { pub fn ts_parser_logger(self_: *const TSParser) -> TSLogger; } extern "C" { - #[doc = " Set the file descriptor to which the parser should write debugging graphs"] - #[doc = " during parsing. The graphs are formatted in the DOT language. You may want"] - #[doc = " to pipe these graphs directly to a `dot(1)` process in order to generate"] - #[doc = " SVG output. You can turn off this logging by passing a negative number."] - pub fn ts_parser_print_dot_graphs(self_: *mut TSParser, file: ::std::os::raw::c_int); + #[doc = " Set the file descriptor to which the parser should write debugging graphs\n during parsing. The graphs are formatted in the DOT language. You may want\n to pipe these graphs directly to a `dot(1)` process in order to generate\n SVG output. You can turn off this logging by passing a negative number."] + pub fn ts_parser_print_dot_graphs(self_: *mut TSParser, fd: ::std::os::raw::c_int); } extern "C" { - #[doc = " Create a shallow copy of the syntax tree. This is very fast."] - #[doc = ""] - #[doc = " You need to copy a syntax tree in order to use it on more than one thread at"] - #[doc = " a time, as syntax trees are not thread safe."] + #[doc = " Create a shallow copy of the syntax tree. This is very fast.\n\n You need to copy a syntax tree in order to use it on more than one thread at\n a time, as syntax trees are not thread safe."] pub fn ts_tree_copy(self_: *const TSTree) -> *mut TSTree; } extern "C" { @@ -333,45 +247,27 @@ extern "C" { pub fn ts_tree_root_node(self_: *const TSTree) -> TSNode; } extern "C" { - #[doc = " Get the root node of the syntax tree, but with its position"] - #[doc = " shifted forward by the given offset."] + #[doc = " Get the root node of the syntax tree, but with its position\n shifted forward by the given offset."] pub fn ts_tree_root_node_with_offset( self_: *const TSTree, offset_bytes: u32, - offset_point: TSPoint, + offset_extent: TSPoint, ) -> TSNode; } extern "C" { #[doc = " Get the language that was used to parse the syntax tree."] - pub fn ts_tree_language(arg1: *const TSTree) -> *const TSLanguage; + pub fn ts_tree_language(self_: *const TSTree) -> *const TSLanguage; } extern "C" { - #[doc = " Get the array of included ranges that was used to parse the syntax tree."] - #[doc = ""] - #[doc = " The returned pointer must be freed by the caller."] - pub fn ts_tree_included_ranges(arg1: *const TSTree, length: *mut u32) -> *mut TSRange; + #[doc = " Get the array of included ranges that was used to parse the syntax tree.\n\n The returned pointer must be freed by the caller."] + pub fn ts_tree_included_ranges(self_: *const TSTree, length: *mut u32) -> *mut TSRange; } extern "C" { - #[doc = " Edit the syntax tree to keep it in sync with source code that has been"] - #[doc = " edited."] - #[doc = ""] - #[doc = " You must describe the edit both in terms of byte offsets and in terms of"] - #[doc = " (row, column) coordinates."] + #[doc = " Edit the syntax tree to keep it in sync with source code that has been\n edited.\n\n You must describe the edit both in terms of byte offsets and in terms of\n (row, column) coordinates."] pub fn ts_tree_edit(self_: *mut TSTree, edit: *const TSInputEdit); } extern "C" { - #[doc = " Compare an old edited syntax tree to a new syntax tree representing the same"] - #[doc = " document, returning an array of ranges whose syntactic structure has changed."] - #[doc = ""] - #[doc = " For this to work correctly, the old syntax tree must have been edited such"] - #[doc = " that its ranges match up to the new tree. Generally, you'll want to call"] - #[doc = " this function right after calling one of the `ts_parser_parse` functions."] - #[doc = " You need to pass the old tree that was passed to parse, as well as the new"] - #[doc = " tree that was returned from that function."] - #[doc = ""] - #[doc = " The returned array is allocated using `malloc` and the caller is responsible"] - #[doc = " for freeing it using `free`. The length of the array will be written to the"] - #[doc = " given `length` pointer."] + #[doc = " Compare an old edited syntax tree to a new syntax tree representing the same\n document, returning an array of ranges whose syntactic structure has changed.\n\n For this to work correctly, the old syntax tree must have been edited such\n that its ranges match up to the new tree. Generally, you'll want to call\n this function right after calling one of the [`ts_parser_parse`] functions.\n You need to pass the old tree that was passed to parse, as well as the new\n tree that was returned from that function.\n\n The returned array is allocated using `malloc` and the caller is responsible\n for freeing it using `free`. The length of the array will be written to the\n given `length` pointer."] pub fn ts_tree_get_changed_ranges( old_tree: *const TSTree, new_tree: *const TSTree, @@ -380,253 +276,259 @@ extern "C" { } extern "C" { #[doc = " Write a DOT graph describing the syntax tree to the given file."] - pub fn ts_tree_print_dot_graph(arg1: *const TSTree, file_descriptor: ::std::os::raw::c_int); + pub fn ts_tree_print_dot_graph(self_: *const TSTree, file_descriptor: ::std::os::raw::c_int); } extern "C" { #[doc = " Get the node's type as a null-terminated string."] - pub fn ts_node_type(arg1: TSNode) -> *const ::std::os::raw::c_char; + pub fn ts_node_type(self_: TSNode) -> *const ::std::os::raw::c_char; } extern "C" { #[doc = " Get the node's type as a numerical id."] - pub fn ts_node_symbol(arg1: TSNode) -> TSSymbol; + pub fn ts_node_symbol(self_: TSNode) -> TSSymbol; +} +extern "C" { + #[doc = " Get the node's language."] + pub fn ts_node_language(self_: TSNode) -> *const TSLanguage; +} +extern "C" { + #[doc = " Get the node's type as it appears in the grammar ignoring aliases as a\n null-terminated string."] + pub fn ts_node_grammar_type(self_: TSNode) -> *const ::std::os::raw::c_char; +} +extern "C" { + #[doc = " Get the node's type as a numerical id as it appears in the grammar ignoring\n aliases. This should be used in [`ts_language_next_state`] instead of\n [`ts_node_symbol`]."] + pub fn ts_node_grammar_symbol(self_: TSNode) -> TSSymbol; } extern "C" { #[doc = " Get the node's start byte."] - pub fn ts_node_start_byte(arg1: TSNode) -> u32; + pub fn ts_node_start_byte(self_: TSNode) -> u32; } extern "C" { #[doc = " Get the node's start position in terms of rows and columns."] - pub fn ts_node_start_point(arg1: TSNode) -> TSPoint; + pub fn ts_node_start_point(self_: TSNode) -> TSPoint; } extern "C" { #[doc = " Get the node's end byte."] - pub fn ts_node_end_byte(arg1: TSNode) -> u32; + pub fn ts_node_end_byte(self_: TSNode) -> u32; } extern "C" { #[doc = " Get the node's end position in terms of rows and columns."] - pub fn ts_node_end_point(arg1: TSNode) -> TSPoint; + pub fn ts_node_end_point(self_: TSNode) -> TSPoint; } extern "C" { - #[doc = " Get an S-expression representing the node as a string."] - #[doc = ""] - #[doc = " This string is allocated with `malloc` and the caller is responsible for"] - #[doc = " freeing it using `free`."] - pub fn ts_node_string(arg1: TSNode) -> *mut ::std::os::raw::c_char; + #[doc = " Get an S-expression representing the node as a string.\n\n This string is allocated with `malloc` and the caller is responsible for\n freeing it using `free`."] + pub fn ts_node_string(self_: TSNode) -> *mut ::std::os::raw::c_char; } extern "C" { - #[doc = " Check if the node is null. Functions like `ts_node_child` and"] - #[doc = " `ts_node_next_sibling` will return a null node to indicate that no such node"] - #[doc = " was found."] - pub fn ts_node_is_null(arg1: TSNode) -> bool; + #[doc = " Check if the node is null. Functions like [`ts_node_child`] and\n [`ts_node_next_sibling`] will return a null node to indicate that no such node\n was found."] + pub fn ts_node_is_null(self_: TSNode) -> bool; } extern "C" { - #[doc = " Check if the node is *named*. Named nodes correspond to named rules in the"] - #[doc = " grammar, whereas *anonymous* nodes correspond to string literals in the"] - #[doc = " grammar."] - pub fn ts_node_is_named(arg1: TSNode) -> bool; + #[doc = " Check if the node is *named*. Named nodes correspond to named rules in the\n grammar, whereas *anonymous* nodes correspond to string literals in the\n grammar."] + pub fn ts_node_is_named(self_: TSNode) -> bool; } extern "C" { - #[doc = " Check if the node is *missing*. Missing nodes are inserted by the parser in"] - #[doc = " order to recover from certain kinds of syntax errors."] - pub fn ts_node_is_missing(arg1: TSNode) -> bool; + #[doc = " Check if the node is *missing*. Missing nodes are inserted by the parser in\n order to recover from certain kinds of syntax errors."] + pub fn ts_node_is_missing(self_: TSNode) -> bool; } extern "C" { - #[doc = " Check if the node is *extra*. Extra nodes represent things like comments,"] - #[doc = " which are not required the grammar, but can appear anywhere."] - pub fn ts_node_is_extra(arg1: TSNode) -> bool; + #[doc = " Check if the node is *extra*. Extra nodes represent things like comments,\n which are not required the grammar, but can appear anywhere."] + pub fn ts_node_is_extra(self_: TSNode) -> bool; } extern "C" { #[doc = " Check if a syntax node has been edited."] - pub fn ts_node_has_changes(arg1: TSNode) -> bool; + pub fn ts_node_has_changes(self_: TSNode) -> bool; } extern "C" { #[doc = " Check if the node is a syntax error or contains any syntax errors."] - pub fn ts_node_has_error(arg1: TSNode) -> bool; + pub fn ts_node_has_error(self_: TSNode) -> bool; +} +extern "C" { + #[doc = " Check if the node is a syntax error."] + pub fn ts_node_is_error(self_: TSNode) -> bool; +} +extern "C" { + #[doc = " Get this node's parse state."] + pub fn ts_node_parse_state(self_: TSNode) -> TSStateId; +} +extern "C" { + #[doc = " Get the parse state after this node."] + pub fn ts_node_next_parse_state(self_: TSNode) -> TSStateId; } extern "C" { #[doc = " Get the node's immediate parent."] - pub fn ts_node_parent(arg1: TSNode) -> TSNode; + pub fn ts_node_parent(self_: TSNode) -> TSNode; } extern "C" { - #[doc = " Get the node's child at the given index, where zero represents the first"] - #[doc = " child."] - pub fn ts_node_child(arg1: TSNode, arg2: u32) -> TSNode; + #[doc = " Get the node's child at the given index, where zero represents the first\n child."] + pub fn ts_node_child(self_: TSNode, child_index: u32) -> TSNode; } extern "C" { - #[doc = " Get the field name for node's child at the given index, where zero represents"] - #[doc = " the first child. Returns NULL, if no field is found."] - pub fn ts_node_field_name_for_child(arg1: TSNode, arg2: u32) -> *const ::std::os::raw::c_char; + #[doc = " Get the field name for node's child at the given index, where zero represents\n the first child. Returns NULL, if no field is found."] + pub fn ts_node_field_name_for_child( + self_: TSNode, + child_index: u32, + ) -> *const ::std::os::raw::c_char; } extern "C" { #[doc = " Get the node's number of children."] - pub fn ts_node_child_count(arg1: TSNode) -> u32; + pub fn ts_node_child_count(self_: TSNode) -> u32; } extern "C" { - #[doc = " Get the node's *named* child at the given index."] - #[doc = ""] - #[doc = " See also `ts_node_is_named`."] - pub fn ts_node_named_child(arg1: TSNode, arg2: u32) -> TSNode; + #[doc = " Get the node's *named* child at the given index.\n\n See also [`ts_node_is_named`]."] + pub fn ts_node_named_child(self_: TSNode, child_index: u32) -> TSNode; } extern "C" { - #[doc = " Get the node's number of *named* children."] - #[doc = ""] - #[doc = " See also `ts_node_is_named`."] - pub fn ts_node_named_child_count(arg1: TSNode) -> u32; + #[doc = " Get the node's number of *named* children.\n\n See also [`ts_node_is_named`]."] + pub fn ts_node_named_child_count(self_: TSNode) -> u32; } extern "C" { #[doc = " Get the node's child with the given field name."] pub fn ts_node_child_by_field_name( self_: TSNode, - field_name: *const ::std::os::raw::c_char, - field_name_length: u32, + name: *const ::std::os::raw::c_char, + name_length: u32, ) -> TSNode; } extern "C" { - #[doc = " Get the node's child with the given numerical field id."] - #[doc = ""] - #[doc = " You can convert a field name to an id using the"] - #[doc = " `ts_language_field_id_for_name` function."] - pub fn ts_node_child_by_field_id(arg1: TSNode, arg2: TSFieldId) -> TSNode; + #[doc = " Get the node's child with the given numerical field id.\n\n You can convert a field name to an id using the\n [`ts_language_field_id_for_name`] function."] + pub fn ts_node_child_by_field_id(self_: TSNode, field_id: TSFieldId) -> TSNode; } extern "C" { #[doc = " Get the node's next / previous sibling."] - pub fn ts_node_next_sibling(arg1: TSNode) -> TSNode; + pub fn ts_node_next_sibling(self_: TSNode) -> TSNode; } extern "C" { - pub fn ts_node_prev_sibling(arg1: TSNode) -> TSNode; + pub fn ts_node_prev_sibling(self_: TSNode) -> TSNode; } extern "C" { #[doc = " Get the node's next / previous *named* sibling."] - pub fn ts_node_next_named_sibling(arg1: TSNode) -> TSNode; + pub fn ts_node_next_named_sibling(self_: TSNode) -> TSNode; } extern "C" { - pub fn ts_node_prev_named_sibling(arg1: TSNode) -> TSNode; + pub fn ts_node_prev_named_sibling(self_: TSNode) -> TSNode; } extern "C" { #[doc = " Get the node's first child that extends beyond the given byte offset."] - pub fn ts_node_first_child_for_byte(arg1: TSNode, arg2: u32) -> TSNode; + pub fn ts_node_first_child_for_byte(self_: TSNode, byte: u32) -> TSNode; } extern "C" { #[doc = " Get the node's first named child that extends beyond the given byte offset."] - pub fn ts_node_first_named_child_for_byte(arg1: TSNode, arg2: u32) -> TSNode; + pub fn ts_node_first_named_child_for_byte(self_: TSNode, byte: u32) -> TSNode; } extern "C" { - #[doc = " Get the smallest node within this node that spans the given range of bytes"] - #[doc = " or (row, column) positions."] - pub fn ts_node_descendant_for_byte_range(arg1: TSNode, arg2: u32, arg3: u32) -> TSNode; + #[doc = " Get the node's number of descendants, including one for the node itself."] + pub fn ts_node_descendant_count(self_: TSNode) -> u32; } extern "C" { - pub fn ts_node_descendant_for_point_range(arg1: TSNode, arg2: TSPoint, arg3: TSPoint) - -> TSNode; + #[doc = " Get the smallest node within this node that spans the given range of bytes\n or (row, column) positions."] + pub fn ts_node_descendant_for_byte_range(self_: TSNode, start: u32, end: u32) -> TSNode; } extern "C" { - #[doc = " Get the smallest named node within this node that spans the given range of"] - #[doc = " bytes or (row, column) positions."] - pub fn ts_node_named_descendant_for_byte_range(arg1: TSNode, arg2: u32, arg3: u32) -> TSNode; + pub fn ts_node_descendant_for_point_range( + self_: TSNode, + start: TSPoint, + end: TSPoint, + ) -> TSNode; +} +extern "C" { + #[doc = " Get the smallest named node within this node that spans the given range of\n bytes or (row, column) positions."] + pub fn ts_node_named_descendant_for_byte_range(self_: TSNode, start: u32, end: u32) -> TSNode; } extern "C" { pub fn ts_node_named_descendant_for_point_range( - arg1: TSNode, - arg2: TSPoint, - arg3: TSPoint, + self_: TSNode, + start: TSPoint, + end: TSPoint, ) -> TSNode; } extern "C" { - #[doc = " Edit the node to keep it in-sync with source code that has been edited."] - #[doc = ""] - #[doc = " This function is only rarely needed. When you edit a syntax tree with the"] - #[doc = " `ts_tree_edit` function, all of the nodes that you retrieve from the tree"] - #[doc = " afterward will already reflect the edit. You only need to use `ts_node_edit`"] - #[doc = " when you have a `TSNode` instance that you want to keep and continue to use"] - #[doc = " after an edit."] - pub fn ts_node_edit(arg1: *mut TSNode, arg2: *const TSInputEdit); + #[doc = " Edit the node to keep it in-sync with source code that has been edited.\n\n This function is only rarely needed. When you edit a syntax tree with the\n [`ts_tree_edit`] function, all of the nodes that you retrieve from the tree\n afterward will already reflect the edit. You only need to use [`ts_node_edit`]\n when you have a [`TSNode`] instance that you want to keep and continue to use\n after an edit."] + pub fn ts_node_edit(self_: *mut TSNode, edit: *const TSInputEdit); } extern "C" { #[doc = " Check if two nodes are identical."] - pub fn ts_node_eq(arg1: TSNode, arg2: TSNode) -> bool; + pub fn ts_node_eq(self_: TSNode, other: TSNode) -> bool; } extern "C" { - #[doc = " Create a new tree cursor starting from the given node."] - #[doc = ""] - #[doc = " A tree cursor allows you to walk a syntax tree more efficiently than is"] - #[doc = " possible using the `TSNode` functions. It is a mutable object that is always"] - #[doc = " on a certain syntax node, and can be moved imperatively to different nodes."] - pub fn ts_tree_cursor_new(arg1: TSNode) -> TSTreeCursor; + #[doc = " Create a new tree cursor starting from the given node.\n\n A tree cursor allows you to walk a syntax tree more efficiently than is\n possible using the [`TSNode`] functions. It is a mutable object that is always\n on a certain syntax node, and can be moved imperatively to different nodes."] + pub fn ts_tree_cursor_new(node: TSNode) -> TSTreeCursor; } extern "C" { #[doc = " Delete a tree cursor, freeing all of the memory that it used."] - pub fn ts_tree_cursor_delete(arg1: *mut TSTreeCursor); + pub fn ts_tree_cursor_delete(self_: *mut TSTreeCursor); } extern "C" { #[doc = " Re-initialize a tree cursor to start at a different node."] - pub fn ts_tree_cursor_reset(arg1: *mut TSTreeCursor, arg2: TSNode); + pub fn ts_tree_cursor_reset(self_: *mut TSTreeCursor, node: TSNode); +} +extern "C" { + #[doc = " Re-initialize a tree cursor to the same position as another cursor.\n\n Unlike [`ts_tree_cursor_reset`], this will not lose parent information and\n allows reusing already created cursors."] + pub fn ts_tree_cursor_reset_to(dst: *mut TSTreeCursor, src: *const TSTreeCursor); } extern "C" { #[doc = " Get the tree cursor's current node."] - pub fn ts_tree_cursor_current_node(arg1: *const TSTreeCursor) -> TSNode; + pub fn ts_tree_cursor_current_node(self_: *const TSTreeCursor) -> TSNode; } extern "C" { - #[doc = " Get the field name of the tree cursor's current node."] - #[doc = ""] - #[doc = " This returns `NULL` if the current node doesn't have a field."] - #[doc = " See also `ts_node_child_by_field_name`."] + #[doc = " Get the field name of the tree cursor's current node.\n\n This returns `NULL` if the current node doesn't have a field.\n See also [`ts_node_child_by_field_name`]."] pub fn ts_tree_cursor_current_field_name( - arg1: *const TSTreeCursor, + self_: *const TSTreeCursor, ) -> *const ::std::os::raw::c_char; } extern "C" { - #[doc = " Get the field id of the tree cursor's current node."] - #[doc = ""] - #[doc = " This returns zero if the current node doesn't have a field."] - #[doc = " See also `ts_node_child_by_field_id`, `ts_language_field_id_for_name`."] - pub fn ts_tree_cursor_current_field_id(arg1: *const TSTreeCursor) -> TSFieldId; + #[doc = " Get the field id of the tree cursor's current node.\n\n This returns zero if the current node doesn't have a field.\n See also [`ts_node_child_by_field_id`], [`ts_language_field_id_for_name`]."] + pub fn ts_tree_cursor_current_field_id(self_: *const TSTreeCursor) -> TSFieldId; +} +extern "C" { + #[doc = " Move the cursor to the parent of its current node.\n\n This returns `true` if the cursor successfully moved, and returns `false`\n if there was no parent node (the cursor was already on the root node)."] + pub fn ts_tree_cursor_goto_parent(self_: *mut TSTreeCursor) -> bool; +} +extern "C" { + #[doc = " Move the cursor to the next sibling of its current node.\n\n This returns `true` if the cursor successfully moved, and returns `false`\n if there was no next sibling node."] + pub fn ts_tree_cursor_goto_next_sibling(self_: *mut TSTreeCursor) -> bool; +} +extern "C" { + #[doc = " Move the cursor to the previous sibling of its current node.\n\n This returns `true` if the cursor successfully moved, and returns `false` if\n there was no previous sibling node.\n\n Note, that this function may be slower than\n [`ts_tree_cursor_goto_next_sibling`] due to how node positions are stored. In\n the worst case, this will need to iterate through all the children upto the\n previous sibling node to recalculate its position."] + pub fn ts_tree_cursor_goto_previous_sibling(self_: *mut TSTreeCursor) -> bool; } extern "C" { - #[doc = " Move the cursor to the parent of its current node."] - #[doc = ""] - #[doc = " This returns `true` if the cursor successfully moved, and returns `false`"] - #[doc = " if there was no parent node (the cursor was already on the root node)."] - pub fn ts_tree_cursor_goto_parent(arg1: *mut TSTreeCursor) -> bool; + #[doc = " Move the cursor to the first child of its current node.\n\n This returns `true` if the cursor successfully moved, and returns `false`\n if there were no children."] + pub fn ts_tree_cursor_goto_first_child(self_: *mut TSTreeCursor) -> bool; } extern "C" { - #[doc = " Move the cursor to the next sibling of its current node."] - #[doc = ""] - #[doc = " This returns `true` if the cursor successfully moved, and returns `false`"] - #[doc = " if there was no next sibling node."] - pub fn ts_tree_cursor_goto_next_sibling(arg1: *mut TSTreeCursor) -> bool; + #[doc = " Move the cursor to the last child of its current node.\n\n This returns `true` if the cursor successfully moved, and returns `false` if\n there were no children.\n\n Note that this function may be slower than [`ts_tree_cursor_goto_first_child`]\n because it needs to iterate through all the children to compute the child's\n position."] + pub fn ts_tree_cursor_goto_last_child(self_: *mut TSTreeCursor) -> bool; } extern "C" { - #[doc = " Move the cursor to the first child of its current node."] - #[doc = ""] - #[doc = " This returns `true` if the cursor successfully moved, and returns `false`"] - #[doc = " if there were no children."] - pub fn ts_tree_cursor_goto_first_child(arg1: *mut TSTreeCursor) -> bool; + #[doc = " Move the cursor to the node that is the nth descendant of\n the original node that the cursor was constructed with, where\n zero represents the original node itself."] + pub fn ts_tree_cursor_goto_descendant(self_: *mut TSTreeCursor, goal_descendant_index: u32); } extern "C" { - #[doc = " Move the cursor to the first child of its current node that extends beyond"] - #[doc = " the given byte offset or point."] - #[doc = ""] - #[doc = " This returns the index of the child node if one was found, and returns -1"] - #[doc = " if no such child was found."] - pub fn ts_tree_cursor_goto_first_child_for_byte(arg1: *mut TSTreeCursor, arg2: u32) -> i64; + #[doc = " Get the index of the cursor's current node out of all of the\n descendants of the original node that the cursor was constructed with."] + pub fn ts_tree_cursor_current_descendant_index(self_: *const TSTreeCursor) -> u32; } extern "C" { - pub fn ts_tree_cursor_goto_first_child_for_point(arg1: *mut TSTreeCursor, arg2: TSPoint) - -> i64; + #[doc = " Get the depth of the cursor's current node relative to the original\n node that the cursor was constructed with."] + pub fn ts_tree_cursor_current_depth(self_: *const TSTreeCursor) -> u32; } extern "C" { - pub fn ts_tree_cursor_copy(arg1: *const TSTreeCursor) -> TSTreeCursor; + #[doc = " Move the cursor to the first child of its current node that extends beyond\n the given byte offset or point.\n\n This returns the index of the child node if one was found, and returns -1\n if no such child was found."] + pub fn ts_tree_cursor_goto_first_child_for_byte( + self_: *mut TSTreeCursor, + goal_byte: u32, + ) -> i64; } extern "C" { - #[doc = " Create a new query from a string containing one or more S-expression"] - #[doc = " patterns. The query is associated with a particular language, and can"] - #[doc = " only be run on syntax nodes parsed with that language."] - #[doc = ""] - #[doc = " If all of the given patterns are valid, this returns a `TSQuery`."] - #[doc = " If a pattern is invalid, this returns `NULL`, and provides two pieces"] - #[doc = " of information about the problem:"] - #[doc = " 1. The byte offset of the error is written to the `error_offset` parameter."] - #[doc = " 2. The type of error is written to the `error_type` parameter."] + pub fn ts_tree_cursor_goto_first_child_for_point( + self_: *mut TSTreeCursor, + goal_point: TSPoint, + ) -> i64; +} +extern "C" { + pub fn ts_tree_cursor_copy(cursor: *const TSTreeCursor) -> TSTreeCursor; +} +extern "C" { + #[doc = " Create a new query from a string containing one or more S-expression\n patterns. The query is associated with a particular language, and can\n only be run on syntax nodes parsed with that language.\n\n If all of the given patterns are valid, this returns a [`TSQuery`].\n If a pattern is invalid, this returns `NULL`, and provides two pieces\n of information about the problem:\n 1. The byte offset of the error is written to the `error_offset` parameter.\n 2. The type of error is written to the `error_type` parameter."] pub fn ts_query_new( language: *const TSLanguage, source: *const ::std::os::raw::c_char, @@ -637,187 +539,152 @@ extern "C" { } extern "C" { #[doc = " Delete a query, freeing all of the memory that it used."] - pub fn ts_query_delete(arg1: *mut TSQuery); + pub fn ts_query_delete(self_: *mut TSQuery); } extern "C" { #[doc = " Get the number of patterns, captures, or string literals in the query."] - pub fn ts_query_pattern_count(arg1: *const TSQuery) -> u32; + pub fn ts_query_pattern_count(self_: *const TSQuery) -> u32; } extern "C" { - pub fn ts_query_capture_count(arg1: *const TSQuery) -> u32; + pub fn ts_query_capture_count(self_: *const TSQuery) -> u32; } extern "C" { - pub fn ts_query_string_count(arg1: *const TSQuery) -> u32; + pub fn ts_query_string_count(self_: *const TSQuery) -> u32; } extern "C" { - #[doc = " Get the byte offset where the given pattern starts in the query's source."] - #[doc = ""] - #[doc = " This can be useful when combining queries by concatenating their source"] - #[doc = " code strings."] - pub fn ts_query_start_byte_for_pattern(arg1: *const TSQuery, arg2: u32) -> u32; + #[doc = " Get the byte offset where the given pattern starts in the query's source.\n\n This can be useful when combining queries by concatenating their source\n code strings."] + pub fn ts_query_start_byte_for_pattern(self_: *const TSQuery, pattern_index: u32) -> u32; } extern "C" { - #[doc = " Get all of the predicates for the given pattern in the query."] - #[doc = ""] - #[doc = " The predicates are represented as a single array of steps. There are three"] - #[doc = " types of steps in this array, which correspond to the three legal values for"] - #[doc = " the `type` field:"] - #[doc = " - `TSQueryPredicateStepTypeCapture` - Steps with this type represent names"] - #[doc = " of captures. Their `value_id` can be used with the"] - #[doc = " `ts_query_capture_name_for_id` function to obtain the name of the capture."] - #[doc = " - `TSQueryPredicateStepTypeString` - Steps with this type represent literal"] - #[doc = " strings. Their `value_id` can be used with the"] - #[doc = " `ts_query_string_value_for_id` function to obtain their string value."] - #[doc = " - `TSQueryPredicateStepTypeDone` - Steps with this type are *sentinels*"] - #[doc = " that represent the end of an individual predicate. If a pattern has two"] - #[doc = " predicates, then there will be two steps with this `type` in the array."] + #[doc = " Get all of the predicates for the given pattern in the query.\n\n The predicates are represented as a single array of steps. There are three\n types of steps in this array, which correspond to the three legal values for\n the `type` field:\n - `TSQueryPredicateStepTypeCapture` - Steps with this type represent names\n of captures. Their `value_id` can be used with the\n [`ts_query_capture_name_for_id`] function to obtain the name of the capture.\n - `TSQueryPredicateStepTypeString` - Steps with this type represent literal\n strings. Their `value_id` can be used with the\n [`ts_query_string_value_for_id`] function to obtain their string value.\n - `TSQueryPredicateStepTypeDone` - Steps with this type are *sentinels*\n that represent the end of an individual predicate. If a pattern has two\n predicates, then there will be two steps with this `type` in the array."] pub fn ts_query_predicates_for_pattern( self_: *const TSQuery, pattern_index: u32, - length: *mut u32, + step_count: *mut u32, ) -> *const TSQueryPredicateStep; } extern "C" { - pub fn ts_query_is_pattern_non_local(self_: *const TSQuery, pattern_index: u32) -> bool; + pub fn ts_query_is_pattern_rooted(self_: *const TSQuery, pattern_index: u32) -> bool; } extern "C" { - pub fn ts_query_is_pattern_rooted(self_: *const TSQuery, pattern_index: u32) -> bool; + pub fn ts_query_is_pattern_non_local(self_: *const TSQuery, pattern_index: u32) -> bool; } extern "C" { pub fn ts_query_is_pattern_guaranteed_at_step(self_: *const TSQuery, byte_offset: u32) -> bool; } extern "C" { - #[doc = " Get the name and length of one of the query's captures, or one of the"] - #[doc = " query's string literals. Each capture and string is associated with a"] - #[doc = " numeric id based on the order that it appeared in the query's source."] + #[doc = " Get the name and length of one of the query's captures, or one of the\n query's string literals. Each capture and string is associated with a\n numeric id based on the order that it appeared in the query's source."] pub fn ts_query_capture_name_for_id( - arg1: *const TSQuery, - id: u32, + self_: *const TSQuery, + index: u32, length: *mut u32, ) -> *const ::std::os::raw::c_char; } extern "C" { - #[doc = " Get the quantifier of the query's captures. Each capture is * associated"] - #[doc = " with a numeric id based on the order that it appeared in the query's source."] + #[doc = " Get the quantifier of the query's captures. Each capture is * associated\n with a numeric id based on the order that it appeared in the query's source."] pub fn ts_query_capture_quantifier_for_id( - arg1: *const TSQuery, - pattern_id: u32, - capture_id: u32, + self_: *const TSQuery, + pattern_index: u32, + capture_index: u32, ) -> TSQuantifier; } extern "C" { pub fn ts_query_string_value_for_id( - arg1: *const TSQuery, - id: u32, + self_: *const TSQuery, + index: u32, length: *mut u32, ) -> *const ::std::os::raw::c_char; } extern "C" { - #[doc = " Disable a certain capture within a query."] - #[doc = ""] - #[doc = " This prevents the capture from being returned in matches, and also avoids"] - #[doc = " any resource usage associated with recording the capture. Currently, there"] - #[doc = " is no way to undo this."] + #[doc = " Disable a certain capture within a query.\n\n This prevents the capture from being returned in matches, and also avoids\n any resource usage associated with recording the capture. Currently, there\n is no way to undo this."] pub fn ts_query_disable_capture( - arg1: *mut TSQuery, - arg2: *const ::std::os::raw::c_char, - arg3: u32, + self_: *mut TSQuery, + name: *const ::std::os::raw::c_char, + length: u32, ); } extern "C" { - #[doc = " Disable a certain pattern within a query."] - #[doc = ""] - #[doc = " This prevents the pattern from matching and removes most of the overhead"] - #[doc = " associated with the pattern. Currently, there is no way to undo this."] - pub fn ts_query_disable_pattern(arg1: *mut TSQuery, arg2: u32); -} -extern "C" { - #[doc = " Create a new cursor for executing a given query."] - #[doc = ""] - #[doc = " The cursor stores the state that is needed to iteratively search"] - #[doc = " for matches. To use the query cursor, first call `ts_query_cursor_exec`"] - #[doc = " to start running a given query on a given syntax node. Then, there are"] - #[doc = " two options for consuming the results of the query:"] - #[doc = " 1. Repeatedly call `ts_query_cursor_next_match` to iterate over all of the"] - #[doc = " *matches* in the order that they were found. Each match contains the"] - #[doc = " index of the pattern that matched, and an array of captures. Because"] - #[doc = " multiple patterns can match the same set of nodes, one match may contain"] - #[doc = " captures that appear *before* some of the captures from a previous match."] - #[doc = " 2. Repeatedly call `ts_query_cursor_next_capture` to iterate over all of the"] - #[doc = " individual *captures* in the order that they appear. This is useful if"] - #[doc = " don't care about which pattern matched, and just want a single ordered"] - #[doc = " sequence of captures."] - #[doc = ""] - #[doc = " If you don't care about consuming all of the results, you can stop calling"] - #[doc = " `ts_query_cursor_next_match` or `ts_query_cursor_next_capture` at any point."] - #[doc = " You can then start executing another query on another node by calling"] - #[doc = " `ts_query_cursor_exec` again."] + #[doc = " Disable a certain pattern within a query.\n\n This prevents the pattern from matching and removes most of the overhead\n associated with the pattern. Currently, there is no way to undo this."] + pub fn ts_query_disable_pattern(self_: *mut TSQuery, pattern_index: u32); +} +extern "C" { + #[doc = " Create a new cursor for executing a given query.\n\n The cursor stores the state that is needed to iteratively search\n for matches. To use the query cursor, first call [`ts_query_cursor_exec`]\n to start running a given query on a given syntax node. Then, there are\n two options for consuming the results of the query:\n 1. Repeatedly call [`ts_query_cursor_next_match`] to iterate over all of the\n *matches* in the order that they were found. Each match contains the\n index of the pattern that matched, and an array of captures. Because\n multiple patterns can match the same set of nodes, one match may contain\n captures that appear *before* some of the captures from a previous match.\n 2. Repeatedly call [`ts_query_cursor_next_capture`] to iterate over all of the\n individual *captures* in the order that they appear. This is useful if\n don't care about which pattern matched, and just want a single ordered\n sequence of captures.\n\n If you don't care about consuming all of the results, you can stop calling\n [`ts_query_cursor_next_match`] or [`ts_query_cursor_next_capture`] at any point.\n You can then start executing another query on another node by calling\n [`ts_query_cursor_exec`] again."] pub fn ts_query_cursor_new() -> *mut TSQueryCursor; } extern "C" { #[doc = " Delete a query cursor, freeing all of the memory that it used."] - pub fn ts_query_cursor_delete(arg1: *mut TSQueryCursor); + pub fn ts_query_cursor_delete(self_: *mut TSQueryCursor); } extern "C" { #[doc = " Start running a given query on a given node."] - pub fn ts_query_cursor_exec(arg1: *mut TSQueryCursor, arg2: *const TSQuery, arg3: TSNode); + pub fn ts_query_cursor_exec(self_: *mut TSQueryCursor, query: *const TSQuery, node: TSNode); } extern "C" { - #[doc = " Manage the maximum number of in-progress matches allowed by this query"] - #[doc = " cursor."] - #[doc = ""] - #[doc = " Query cursors have an optional maximum capacity for storing lists of"] - #[doc = " in-progress captures. If this capacity is exceeded, then the"] - #[doc = " earliest-starting match will silently be dropped to make room for further"] - #[doc = " matches. This maximum capacity is optional — by default, query cursors allow"] - #[doc = " any number of pending matches, dynamically allocating new space for them as"] - #[doc = " needed as the query is executed."] - pub fn ts_query_cursor_did_exceed_match_limit(arg1: *const TSQueryCursor) -> bool; + #[doc = " Manage the maximum number of in-progress matches allowed by this query\n cursor.\n\n Query cursors have an optional maximum capacity for storing lists of\n in-progress captures. If this capacity is exceeded, then the\n earliest-starting match will silently be dropped to make room for further\n matches. This maximum capacity is optional — by default, query cursors allow\n any number of pending matches, dynamically allocating new space for them as\n needed as the query is executed."] + pub fn ts_query_cursor_did_exceed_match_limit(self_: *const TSQueryCursor) -> bool; } extern "C" { - pub fn ts_query_cursor_match_limit(arg1: *const TSQueryCursor) -> u32; + pub fn ts_query_cursor_match_limit(self_: *const TSQueryCursor) -> u32; } extern "C" { - pub fn ts_query_cursor_set_match_limit(arg1: *mut TSQueryCursor, arg2: u32); + pub fn ts_query_cursor_set_match_limit(self_: *mut TSQueryCursor, limit: u32); } extern "C" { - #[doc = " Set the range of bytes or (row, column) positions in which the query"] - #[doc = " will be executed."] - pub fn ts_query_cursor_set_byte_range(arg1: *mut TSQueryCursor, arg2: u32, arg3: u32); + #[doc = " Set the range of bytes or (row, column) positions in which the query\n will be executed."] + pub fn ts_query_cursor_set_byte_range( + self_: *mut TSQueryCursor, + start_byte: u32, + end_byte: u32, + ); } extern "C" { - pub fn ts_query_cursor_set_point_range(arg1: *mut TSQueryCursor, arg2: TSPoint, arg3: TSPoint); + pub fn ts_query_cursor_set_point_range( + self_: *mut TSQueryCursor, + start_point: TSPoint, + end_point: TSPoint, + ); } extern "C" { - #[doc = " Advance to the next match of the currently running query."] - #[doc = ""] - #[doc = " If there is a match, write it to `*match` and return `true`."] - #[doc = " Otherwise, return `false`."] - pub fn ts_query_cursor_next_match(arg1: *mut TSQueryCursor, match_: *mut TSQueryMatch) -> bool; + #[doc = " Advance to the next match of the currently running query.\n\n If there is a match, write it to `*match` and return `true`.\n Otherwise, return `false`."] + pub fn ts_query_cursor_next_match(self_: *mut TSQueryCursor, match_: *mut TSQueryMatch) + -> bool; } extern "C" { - pub fn ts_query_cursor_remove_match(arg1: *mut TSQueryCursor, id: u32); + pub fn ts_query_cursor_remove_match(self_: *mut TSQueryCursor, match_id: u32); } extern "C" { - #[doc = " Advance to the next capture of the currently running query."] - #[doc = ""] - #[doc = " If there is a capture, write its match to `*match` and its index within"] - #[doc = " the matche's capture list to `*capture_index`. Otherwise, return `false`."] + #[doc = " Advance to the next capture of the currently running query.\n\n If there is a capture, write its match to `*match` and its index within\n the matche's capture list to `*capture_index`. Otherwise, return `false`."] pub fn ts_query_cursor_next_capture( - arg1: *mut TSQueryCursor, + self_: *mut TSQueryCursor, match_: *mut TSQueryMatch, capture_index: *mut u32, ) -> bool; } +extern "C" { + #[doc = " Set the maximum start depth for a query cursor.\n\n This prevents cursors from exploring children nodes at a certain depth.\n Note if a pattern includes many children, then they will still be checked.\n\n The zero max start depth value can be used as a special behavior and\n it helps to destructure a subtree by staying on a node and using captures\n for interested parts. Note that the zero max start depth only limit a search\n depth for a pattern's root node but other nodes that are parts of the pattern\n may be searched at any depth what defined by the pattern structure.\n\n Set to `UINT32_MAX` to remove the maximum start depth."] + pub fn ts_query_cursor_set_max_start_depth(self_: *mut TSQueryCursor, max_start_depth: u32); +} +extern "C" { + #[doc = " Get another reference to the given language."] + pub fn ts_language_copy(self_: *const TSLanguage) -> *const TSLanguage; +} +extern "C" { + #[doc = " Free any dynamically-allocated resources for this language, if\n this is the last reference."] + pub fn ts_language_delete(self_: *const TSLanguage); +} extern "C" { #[doc = " Get the number of distinct node types in the language."] - pub fn ts_language_symbol_count(arg1: *const TSLanguage) -> u32; + pub fn ts_language_symbol_count(self_: *const TSLanguage) -> u32; +} +extern "C" { + #[doc = " Get the number of valid states in this language."] + pub fn ts_language_state_count(self_: *const TSLanguage) -> u32; } extern "C" { #[doc = " Get a node type string for the given numerical id."] pub fn ts_language_symbol_name( - arg1: *const TSLanguage, - arg2: TSSymbol, + self_: *const TSLanguage, + symbol: TSSymbol, ) -> *const ::std::os::raw::c_char; } extern "C" { @@ -831,53 +698,145 @@ extern "C" { } extern "C" { #[doc = " Get the number of distinct field names in the language."] - pub fn ts_language_field_count(arg1: *const TSLanguage) -> u32; + pub fn ts_language_field_count(self_: *const TSLanguage) -> u32; } extern "C" { #[doc = " Get the field name string for the given numerical id."] pub fn ts_language_field_name_for_id( - arg1: *const TSLanguage, - arg2: TSFieldId, + self_: *const TSLanguage, + id: TSFieldId, ) -> *const ::std::os::raw::c_char; } extern "C" { #[doc = " Get the numerical id for the given field name string."] pub fn ts_language_field_id_for_name( - arg1: *const TSLanguage, - arg2: *const ::std::os::raw::c_char, - arg3: u32, + self_: *const TSLanguage, + name: *const ::std::os::raw::c_char, + name_length: u32, ) -> TSFieldId; } extern "C" { - #[doc = " Check whether the given node type id belongs to named nodes, anonymous nodes,"] - #[doc = " or a hidden nodes."] - #[doc = ""] - #[doc = " See also `ts_node_is_named`. Hidden nodes are never returned from the API."] - pub fn ts_language_symbol_type(arg1: *const TSLanguage, arg2: TSSymbol) -> TSSymbolType; -} -extern "C" { - #[doc = " Get the ABI version number for this language. This version number is used"] - #[doc = " to ensure that languages were generated by a compatible version of"] - #[doc = " Tree-sitter."] - #[doc = ""] - #[doc = " See also `ts_parser_set_language`."] - pub fn ts_language_version(arg1: *const TSLanguage) -> u32; -} -extern "C" { - #[doc = " Set the allocation functions used by the library."] - #[doc = ""] - #[doc = " By default, Tree-sitter uses the standard libc allocation functions,"] - #[doc = " but aborts the process when an allocation fails. This function lets"] - #[doc = " you supply alternative allocation functions at runtime."] - #[doc = ""] - #[doc = " If you pass `NULL` for any parameter, Tree-sitter will switch back to"] - #[doc = " its default implementation of that function."] - #[doc = ""] - #[doc = " If you call this function after the library has already been used, then"] - #[doc = " you must ensure that either:"] - #[doc = " 1. All the existing objects have been freed."] - #[doc = " 2. The new allocator shares its state with the old one, so it is capable"] - #[doc = " of freeing memory that was allocated by the old allocator."] + #[doc = " Check whether the given node type id belongs to named nodes, anonymous nodes,\n or a hidden nodes.\n\n See also [`ts_node_is_named`]. Hidden nodes are never returned from the API."] + pub fn ts_language_symbol_type(self_: *const TSLanguage, symbol: TSSymbol) -> TSSymbolType; +} +extern "C" { + #[doc = " Get the ABI version number for this language. This version number is used\n to ensure that languages were generated by a compatible version of\n Tree-sitter.\n\n See also [`ts_parser_set_language`]."] + pub fn ts_language_version(self_: *const TSLanguage) -> u32; +} +extern "C" { + #[doc = " Get the next parse state. Combine this with lookahead iterators to generate\n completion suggestions or valid symbols in error nodes. Use\n [`ts_node_grammar_symbol`] for valid symbols."] + pub fn ts_language_next_state( + self_: *const TSLanguage, + state: TSStateId, + symbol: TSSymbol, + ) -> TSStateId; +} +extern "C" { + #[doc = " Create a new lookahead iterator for the given language and parse state.\n\n This returns `NULL` if state is invalid for the language.\n\n Repeatedly using [`ts_lookahead_iterator_next`] and\n [`ts_lookahead_iterator_current_symbol`] will generate valid symbols in the\n given parse state. Newly created lookahead iterators will contain the `ERROR`\n symbol.\n\n Lookahead iterators can be useful to generate suggestions and improve syntax\n error diagnostics. To get symbols valid in an ERROR node, use the lookahead\n iterator on its first leaf node state. For `MISSING` nodes, a lookahead\n iterator created on the previous non-extra leaf node may be appropriate."] + pub fn ts_lookahead_iterator_new( + self_: *const TSLanguage, + state: TSStateId, + ) -> *mut TSLookaheadIterator; +} +extern "C" { + #[doc = " Delete a lookahead iterator freeing all the memory used."] + pub fn ts_lookahead_iterator_delete(self_: *mut TSLookaheadIterator); +} +extern "C" { + #[doc = " Reset the lookahead iterator to another state.\n\n This returns `true` if the iterator was reset to the given state and `false`\n otherwise."] + pub fn ts_lookahead_iterator_reset_state( + self_: *mut TSLookaheadIterator, + state: TSStateId, + ) -> bool; +} +extern "C" { + #[doc = " Reset the lookahead iterator.\n\n This returns `true` if the language was set successfully and `false`\n otherwise."] + pub fn ts_lookahead_iterator_reset( + self_: *mut TSLookaheadIterator, + language: *const TSLanguage, + state: TSStateId, + ) -> bool; +} +extern "C" { + #[doc = " Get the current language of the lookahead iterator."] + pub fn ts_lookahead_iterator_language(self_: *const TSLookaheadIterator) -> *const TSLanguage; +} +extern "C" { + #[doc = " Advance the lookahead iterator to the next symbol.\n\n This returns `true` if there is a new symbol and `false` otherwise."] + pub fn ts_lookahead_iterator_next(self_: *mut TSLookaheadIterator) -> bool; +} +extern "C" { + #[doc = " Get the current symbol of the lookahead iterator;"] + pub fn ts_lookahead_iterator_current_symbol(self_: *const TSLookaheadIterator) -> TSSymbol; +} +extern "C" { + #[doc = " Get the current symbol type of the lookahead iterator as a null terminated\n string."] + pub fn ts_lookahead_iterator_current_symbol_name( + self_: *const TSLookaheadIterator, + ) -> *const ::std::os::raw::c_char; +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct wasm_engine_t { + _unused: [u8; 0], +} +pub type TSWasmEngine = wasm_engine_t; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSWasmStore { + _unused: [u8; 0], +} +pub const TSWasmErrorKindNone: TSWasmErrorKind = 0; +pub const TSWasmErrorKindParse: TSWasmErrorKind = 1; +pub const TSWasmErrorKindCompile: TSWasmErrorKind = 2; +pub const TSWasmErrorKindInstantiate: TSWasmErrorKind = 3; +pub const TSWasmErrorKindAllocate: TSWasmErrorKind = 4; +pub type TSWasmErrorKind = ::std::os::raw::c_uint; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSWasmError { + pub kind: TSWasmErrorKind, + pub message: *mut ::std::os::raw::c_char, +} +extern "C" { + #[doc = " Create a Wasm store."] + pub fn ts_wasm_store_new( + engine: *mut TSWasmEngine, + error: *mut TSWasmError, + ) -> *mut TSWasmStore; +} +extern "C" { + #[doc = " Free the memory associated with the given Wasm store."] + pub fn ts_wasm_store_delete(arg1: *mut TSWasmStore); +} +extern "C" { + #[doc = " Create a language from a buffer of Wasm. The resulting language behaves\n like any other Tree-sitter language, except that in order to use it with\n a parser, that parser must have a Wasm store. Note that the language\n can be used with any Wasm store, it doesn't need to be the same store that\n was used to originally load it."] + pub fn ts_wasm_store_load_language( + arg1: *mut TSWasmStore, + name: *const ::std::os::raw::c_char, + wasm: *const ::std::os::raw::c_char, + wasm_len: u32, + error: *mut TSWasmError, + ) -> *const TSLanguage; +} +extern "C" { + #[doc = " Get the number of languages instantiated in the given wasm store."] + pub fn ts_wasm_store_language_count(arg1: *const TSWasmStore) -> usize; +} +extern "C" { + #[doc = " Check if the language came from a Wasm module. If so, then in order to use\n this language with a Parser, that parser must have a Wasm store assigned."] + pub fn ts_language_is_wasm(arg1: *const TSLanguage) -> bool; +} +extern "C" { + #[doc = " Assign the given Wasm store to the parser. A parser must have a Wasm store\n in order to use Wasm languages."] + pub fn ts_parser_set_wasm_store(arg1: *mut TSParser, arg2: *mut TSWasmStore); +} +extern "C" { + #[doc = " Remove the parser's current Wasm store and return it. This returns NULL if\n the parser doesn't have a Wasm store."] + pub fn ts_parser_take_wasm_store(arg1: *mut TSParser) -> *mut TSWasmStore; +} +extern "C" { + #[doc = " Set the allocation functions used by the library.\n\n By default, Tree-sitter uses the standard libc allocation functions,\n but aborts the process when an allocation fails. This function lets\n you supply alternative allocation functions at runtime.\n\n If you pass `NULL` for any parameter, Tree-sitter will switch back to\n its default implementation of that function.\n\n If you call this function after the library has already been used, then\n you must ensure that either:\n 1. All the existing objects have been freed.\n 2. The new allocator shares its state with the old one, so it is capable\n of freeing memory that was allocated by the old allocator."] pub fn ts_set_allocator( new_malloc: ::std::option::Option< unsafe extern "C" fn(arg1: usize) -> *mut ::std::os::raw::c_void, @@ -894,6 +853,3 @@ extern "C" { new_free: ::std::option::Option, ); } - -pub const TREE_SITTER_LANGUAGE_VERSION: usize = 14; -pub const TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION: usize = 13; diff --git a/third-party/tree-sitter/tree-sitter/lib/binding_rust/build.rs b/third-party/tree-sitter/tree-sitter/lib/binding_rust/build.rs index 5798cde3fa6..285448974f5 100644 --- a/third-party/tree-sitter/tree-sitter/lib/binding_rust/build.rs +++ b/third-party/tree-sitter/tree-sitter/lib/binding_rust/build.rs @@ -2,6 +2,8 @@ use std::path::{Path, PathBuf}; use std::{env, fs}; fn main() { + let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap()); + println!("cargo:rerun-if-env-changed=TREE_SITTER_STATIC_ANALYSIS"); if env::var("TREE_SITTER_STATIC_ANALYSIS").is_ok() { if let (Some(clang_path), Some(scan_build_path)) = (which("clang"), which("scan-build")) { @@ -9,28 +11,90 @@ fn main() { let scan_build_path = scan_build_path.to_str().unwrap(); env::set_var( "CC", - &format!( - "{} -analyze-headers --use-analyzer={} cc", - scan_build_path, clang_path - ), + format!("{scan_build_path} -analyze-headers --use-analyzer={clang_path} cc",), ); } } - let src_path = Path::new("src"); + #[cfg(feature = "bindgen")] + generate_bindings(&out_dir); + + fs::copy( + "src/wasm/stdlib-symbols.txt", + out_dir.join("stdlib-symbols.txt"), + ) + .unwrap(); + + let mut config = cc::Build::new(); + + println!("cargo:rerun-if-env-changed=CARGO_FEATURE_WASM"); + if env::var("CARGO_FEATURE_WASM").is_ok() { + config + .define("TREE_SITTER_FEATURE_WASM", "") + .define("static_assert(...)", "") + .include(env::var("DEP_WASMTIME_C_API_INCLUDE").unwrap()) + .include(env::var("DEP_WASMTIME_C_API_WASM_INCLUDE").unwrap()); + } + + let manifest_path = Path::new(env!("CARGO_MANIFEST_DIR")); + let include_path = manifest_path.join("include"); + let src_path = manifest_path.join("src"); + let wasm_path = src_path.join("wasm"); for entry in fs::read_dir(&src_path).unwrap() { let entry = entry.unwrap(); let path = src_path.join(entry.file_name()); println!("cargo:rerun-if-changed={}", path.to_str().unwrap()); } - cc::Build::new() - .flag_if_supported("-std=c99") + config + .flag_if_supported("-std=c11") + .flag_if_supported("-fvisibility=hidden") + .flag_if_supported("-Wshadow") .flag_if_supported("-Wno-unused-parameter") - .include(src_path) - .include("include") + .include(&src_path) + .include(&wasm_path) + .include(&include_path) .file(src_path.join("lib.c")) .compile("tree-sitter"); + + println!("cargo:include={}", include_path.display()); +} + +#[cfg(feature = "bindgen")] +fn generate_bindings(out_dir: &Path) { + const HEADER_PATH: &str = "include/tree_sitter/api.h"; + + println!("cargo:rerun-if-changed={HEADER_PATH}"); + + let no_copy = [ + "TSInput", + "TSLanguage", + "TSLogger", + "TSLookaheadIterator", + "TSParser", + "TSTree", + "TSQuery", + "TSQueryCursor", + "TSQueryCapture", + "TSQueryMatch", + "TSQueryPredicateStep", + ]; + + let bindings = bindgen::Builder::default() + .header(HEADER_PATH) + .layout_tests(false) + .allowlist_type("^TS.*") + .allowlist_function("^ts_.*") + .allowlist_var("^TREE_SITTER.*") + .no_copy(no_copy.join("|")) + .prepend_enum_name(false) + .generate() + .expect("Failed to generate bindings"); + + let bindings_rs = out_dir.join("bindings.rs"); + bindings + .write_to_file(&bindings_rs) + .unwrap_or_else(|_| panic!("Failed to write bindings into path: {bindings_rs:?}")); } fn which(exe_name: impl AsRef) -> Option { diff --git a/third-party/tree-sitter/tree-sitter/lib/binding_rust/ffi.rs b/third-party/tree-sitter/tree-sitter/lib/binding_rust/ffi.rs index 685ed765580..23b9e33fe8e 100644 --- a/third-party/tree-sitter/tree-sitter/lib/binding_rust/ffi.rs +++ b/third-party/tree-sitter/tree-sitter/lib/binding_rust/ffi.rs @@ -2,8 +2,174 @@ #![allow(non_upper_case_globals)] #![allow(non_camel_case_types)] +#[cfg(feature = "bindgen")] +include!(concat!(env!("OUT_DIR"), "/bindings.rs")); + +#[cfg(not(feature = "bindgen"))] include!("./bindings.rs"); +#[cfg(unix)] +extern "C" { + pub(crate) fn _ts_dup(fd: std::os::raw::c_int) -> std::os::raw::c_int; +} + +#[cfg(windows)] extern "C" { - pub(crate) fn dup(fd: std::os::raw::c_int) -> std::os::raw::c_int; + pub(crate) fn _ts_dup(handle: *mut std::os::raw::c_void) -> std::os::raw::c_int; +} + +use crate::{ + Language, LookaheadIterator, Node, Parser, Query, QueryCursor, QueryError, Tree, TreeCursor, +}; +use std::{marker::PhantomData, mem::ManuallyDrop, ptr::NonNull, str}; + +impl Language { + /// Reconstructs a [`Language`] from a raw pointer. + /// + /// # Safety + /// + /// `ptr` must be non-null. + #[must_use] + pub const unsafe fn from_raw(ptr: *const TSLanguage) -> Self { + Self(ptr) + } + + /// Consumes the [`Language`], returning a raw pointer to the underlying C structure. + #[must_use] + pub fn into_raw(self) -> *const TSLanguage { + ManuallyDrop::new(self).0 + } +} + +impl Parser { + /// Reconstructs a [`Parser`] from a raw pointer. + /// + /// # Safety + /// + /// `ptr` must be non-null. + #[must_use] + pub const unsafe fn from_raw(ptr: *mut TSParser) -> Self { + Self(NonNull::new_unchecked(ptr)) + } + + /// Consumes the [`Parser`], returning a raw pointer to the underlying C structure. + /// + /// # Safety + /// + /// It's a caller responsibility to adjust parser's state + /// like disable logging or dot graphs printing if this + /// may cause issues like use after free. + #[must_use] + pub fn into_raw(self) -> *mut TSParser { + ManuallyDrop::new(self).0.as_ptr() + } +} + +impl Tree { + /// Reconstructs a [`Tree`] from a raw pointer. + /// + /// # Safety + /// + /// `ptr` must be non-null. + #[must_use] + pub const unsafe fn from_raw(ptr: *mut TSTree) -> Self { + Self(NonNull::new_unchecked(ptr)) + } + + /// Consumes the [`Tree`], returning a raw pointer to the underlying C structure. + #[must_use] + pub fn into_raw(self) -> *mut TSTree { + ManuallyDrop::new(self).0.as_ptr() + } +} + +impl<'tree> Node<'tree> { + /// Reconstructs a [`Node`] from a raw pointer. + /// + /// # Safety + /// + /// `ptr` must be non-null. + #[must_use] + pub const unsafe fn from_raw(raw: TSNode) -> Node<'tree> { + Self(raw, PhantomData) + } + + /// Consumes the [`Node`], returning a raw pointer to the underlying C structure. + #[must_use] + pub fn into_raw(self) -> TSNode { + ManuallyDrop::new(self).0 + } +} + +impl<'a> TreeCursor<'a> { + /// Reconstructs a [`TreeCursor`] from a raw pointer. + /// + /// # Safety + /// + /// `ptr` must be non-null. + #[must_use] + pub const unsafe fn from_raw(raw: TSTreeCursor) -> TreeCursor<'a> { + Self(raw, PhantomData) + } + + /// Consumes the [`TreeCursor`], returning a raw pointer to the underlying C structure. + #[must_use] + pub fn into_raw(self) -> TSTreeCursor { + ManuallyDrop::new(self).0 + } +} + +impl Query { + /// Reconstructs a [`Query`] from a raw pointer. + /// + /// # Safety + /// + /// `ptr` must be non-null. + pub unsafe fn from_raw(ptr: *mut TSQuery, source: &str) -> Result { + Self::from_raw_parts(ptr, source) + } + + /// Consumes the [`Query`], returning a raw pointer to the underlying C structure. + #[must_use] + pub fn into_raw(self) -> *mut TSQuery { + ManuallyDrop::new(self).ptr.as_ptr() + } +} + +impl QueryCursor { + /// Reconstructs a [`QueryCursor`] from a raw pointer. + /// + /// # Safety + /// + /// `ptr` must be non-null. + #[must_use] + pub const unsafe fn from_raw(ptr: *mut TSQueryCursor) -> Self { + Self { + ptr: NonNull::new_unchecked(ptr), + } + } + + /// Consumes the [`QueryCursor`], returning a raw pointer to the underlying C structure. + #[must_use] + pub fn into_raw(self) -> *mut TSQueryCursor { + ManuallyDrop::new(self).ptr.as_ptr() + } +} + +impl LookaheadIterator { + /// Reconstructs a [`LookaheadIterator`] from a raw pointer. + /// + /// # Safety + /// + /// `ptr` must be non-null. + #[must_use] + pub const unsafe fn from_raw(ptr: *mut TSLookaheadIterator) -> Self { + Self(NonNull::new_unchecked(ptr)) + } + + /// Consumes the [`LookaheadIterator`], returning a raw pointer to the underlying C structure. + #[must_use] + pub fn into_raw(self) -> *mut TSLookaheadIterator { + ManuallyDrop::new(self).0.as_ptr() + } } diff --git a/third-party/tree-sitter/tree-sitter/lib/binding_rust/lib.rs b/third-party/tree-sitter/tree-sitter/lib/binding_rust/lib.rs index 579bf8e2ede..0ec9a8a013f 100644 --- a/third-party/tree-sitter/tree-sitter/lib/binding_rust/lib.rs +++ b/third-party/tree-sitter/tree-sitter/lib/binding_rust/lib.rs @@ -1,8 +1,12 @@ -mod ffi; +#![doc = include_str!("./README.md")] + +pub mod ffi; mod util; #[cfg(unix)] use std::os::unix::io::AsRawFd; +#[cfg(windows)] +use std::os::windows::io::AsRawHandle; use std::{ char, error, @@ -10,7 +14,8 @@ use std::{ fmt, hash, iter, marker::PhantomData, mem::MaybeUninit, - ops, + num::NonZeroU16, + ops::{self, Deref}, os::raw::{c_char, c_void}, ptr::{self, NonNull}, slice, str, @@ -18,6 +23,11 @@ use std::{ u16, }; +#[cfg(feature = "wasm")] +mod wasm_language; +#[cfg(feature = "wasm")] +pub use wasm_language::*; + /// The latest ABI version that is supported by the current version of the /// library. /// @@ -26,22 +36,26 @@ use std::{ /// The Tree-sitter library is generally backwards-compatible with languages /// generated using older CLI versions, but is not forwards-compatible. #[doc(alias = "TREE_SITTER_LANGUAGE_VERSION")] -pub const LANGUAGE_VERSION: usize = ffi::TREE_SITTER_LANGUAGE_VERSION; +pub const LANGUAGE_VERSION: usize = ffi::TREE_SITTER_LANGUAGE_VERSION as usize; /// The earliest ABI version that is supported by the current version of the /// library. #[doc(alias = "TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION")] -pub const MIN_COMPATIBLE_LANGUAGE_VERSION: usize = ffi::TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION; +pub const MIN_COMPATIBLE_LANGUAGE_VERSION: usize = + ffi::TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION as usize; -pub const PARSER_HEADER: &'static str = include_str!("../include/tree_sitter/parser.h"); +pub const ARRAY_HEADER: &str = include_str!("../src/array.h"); +pub const PARSER_HEADER: &str = include_str!("../src/parser.h"); /// An opaque object that defines how to parse a particular language. The code for each /// `Language` is generated by the Tree-sitter CLI. #[doc(alias = "TSLanguage")] -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +#[derive(Debug, PartialEq, Eq, Hash)] #[repr(transparent)] pub struct Language(*const ffi::TSLanguage); +pub struct LanguageRef<'a>(*const ffi::TSLanguage, PhantomData<&'a ()>); + /// A tree that represents the syntactic structure of a source code file. #[doc(alias = "TSTree")] pub struct Tree(NonNull); @@ -57,7 +71,7 @@ pub struct Point { /// A range of positions in a multi-line text document, both in terms of bytes and of /// rows and columns. -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] pub struct Range { pub start_byte: usize, pub end_byte: usize, @@ -76,16 +90,21 @@ pub struct InputEdit { pub new_end_position: Point, } -/// A single node within a syntax `Tree`. +/// A single node within a syntax [`Tree`]. #[doc(alias = "TSNode")] #[derive(Clone, Copy)] #[repr(transparent)] -pub struct Node<'a>(ffi::TSNode, PhantomData<&'a ()>); +pub struct Node<'tree>(ffi::TSNode, PhantomData<&'tree ()>); -/// A stateful object that this is used to produce a `Tree` based on some source code. +/// A stateful object that this is used to produce a [`Tree`] based on some source code. #[doc(alias = "TSParser")] pub struct Parser(NonNull); +/// A stateful object that is used to look up symbols valid in a specific parse state +#[doc(alias = "TSLookaheadIterator")] +pub struct LookaheadIterator(NonNull); +struct LookaheadNamesIterator<'a>(&'a mut LookaheadIterator); + /// A type of log message. #[derive(Debug, PartialEq, Eq)] pub enum LogType { @@ -93,24 +112,27 @@ pub enum LogType { Lex, } +type FieldId = NonZeroU16; + /// A callback that receives log messages during parser. type Logger<'a> = Box; -/// A stateful object for walking a syntax `Tree` efficiently. +/// A stateful object for walking a syntax [`Tree`] efficiently. #[doc(alias = "TSTreeCursor")] -pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>); +pub struct TreeCursor<'cursor>(ffi::TSTreeCursor, PhantomData<&'cursor ()>); /// A set of patterns that match nodes in a syntax tree. #[doc(alias = "TSQuery")] #[derive(Debug)] +#[allow(clippy::type_complexity)] pub struct Query { ptr: NonNull, - capture_names: Vec, - capture_quantifiers: Vec>, - text_predicates: Vec>, - property_settings: Vec>, - property_predicates: Vec>, - general_predicates: Vec>, + capture_names: Box<[&'static str]>, + capture_quantifiers: Box<[Box<[CaptureQuantifier]>]>, + text_predicates: Box<[Box<[TextPredicateCapture]>]>, + property_settings: Box<[Box<[QueryProperty]>]>, + property_predicates: Box<[Box<[(QueryProperty, bool)]>]>, + general_predicates: Box<[Box<[QueryPredicate]>]>, } /// A quantifier for captures @@ -126,23 +148,23 @@ pub enum CaptureQuantifier { impl From for CaptureQuantifier { fn from(value: ffi::TSQuantifier) -> Self { match value { - ffi::TSQuantifier_TSQuantifierZero => CaptureQuantifier::Zero, - ffi::TSQuantifier_TSQuantifierZeroOrOne => CaptureQuantifier::ZeroOrOne, - ffi::TSQuantifier_TSQuantifierZeroOrMore => CaptureQuantifier::ZeroOrMore, - ffi::TSQuantifier_TSQuantifierOne => CaptureQuantifier::One, - ffi::TSQuantifier_TSQuantifierOneOrMore => CaptureQuantifier::OneOrMore, - _ => panic!("Unrecognized quantifier: {}", value), + ffi::TSQuantifierZero => Self::Zero, + ffi::TSQuantifierZeroOrOne => Self::ZeroOrOne, + ffi::TSQuantifierZeroOrMore => Self::ZeroOrMore, + ffi::TSQuantifierOne => Self::One, + ffi::TSQuantifierOneOrMore => Self::OneOrMore, + _ => panic!("Unrecognized quantifier: {value}"), } } } -/// A stateful object for executing a `Query` on a syntax `Tree`. +/// A stateful object for executing a [`Query`] on a syntax [`Tree`]. #[doc(alias = "TSQueryCursor")] pub struct QueryCursor { ptr: NonNull, } -/// A key-value pair associated with a particular pattern in a `Query`. +/// A key-value pair associated with a particular pattern in a [`Query`]. #[derive(Debug, PartialEq, Eq)] pub struct QueryProperty { pub key: Box, @@ -156,14 +178,14 @@ pub enum QueryPredicateArg { String(Box), } -/// A key-value pair associated with a particular pattern in a `Query`. +/// A key-value pair associated with a particular pattern in a [`Query`]. #[derive(Debug, PartialEq, Eq)] pub struct QueryPredicate { pub operator: Box, - pub args: Vec, + pub args: Box<[QueryPredicateArg]>, } -/// A match of a `Query` to a particular set of `Node`s. +/// A match of a [`Query`] to a particular set of [`Node`]s. pub struct QueryMatch<'cursor, 'tree> { pub pattern_index: usize, pub captures: &'cursor [QueryCapture<'tree>], @@ -171,50 +193,53 @@ pub struct QueryMatch<'cursor, 'tree> { cursor: *mut ffi::TSQueryCursor, } -/// A sequence of `QueryMatch`es associated with a given `QueryCursor`. -pub struct QueryMatches<'a, 'tree: 'a, T: TextProvider<'a>> { +/// A sequence of [`QueryMatch`]es associated with a given [`QueryCursor`]. +pub struct QueryMatches<'query, 'cursor, T: TextProvider, I: AsRef<[u8]>> { ptr: *mut ffi::TSQueryCursor, - query: &'a Query, + query: &'query Query, text_provider: T, buffer1: Vec, buffer2: Vec, - _tree: PhantomData<&'tree ()>, + _phantom: PhantomData<(&'cursor (), I)>, } -/// A sequence of `QueryCapture`s associated with a given `QueryCursor`. -pub struct QueryCaptures<'a, 'tree: 'a, T: TextProvider<'a>> { +/// A sequence of [`QueryCapture`]s associated with a given [`QueryCursor`]. +pub struct QueryCaptures<'query, 'cursor, T: TextProvider, I: AsRef<[u8]>> { ptr: *mut ffi::TSQueryCursor, - query: &'a Query, + query: &'query Query, text_provider: T, buffer1: Vec, buffer2: Vec, - _tree: PhantomData<&'tree ()>, + _phantom: PhantomData<(&'cursor (), I)>, } -pub trait TextProvider<'a> { - type I: Iterator + 'a; +pub trait TextProvider +where + I: AsRef<[u8]>, +{ + type I: Iterator; fn text(&mut self, node: Node) -> Self::I; } -/// A particular `Node` that has been captured with a particular name within a `Query`. +/// A particular [`Node`] that has been captured with a particular name within a [`Query`]. #[derive(Clone, Copy, Debug)] #[repr(C)] -pub struct QueryCapture<'a> { - pub node: Node<'a>, +pub struct QueryCapture<'tree> { + pub node: Node<'tree>, pub index: u32, } -/// An error that occurred when trying to assign an incompatible `Language` to a `Parser`. +/// An error that occurred when trying to assign an incompatible [`Language`] to a [`Parser`]. #[derive(Debug, PartialEq, Eq)] pub struct LanguageError { version: usize, } -/// An error that occurred in `Parser::set_included_ranges`. +/// An error that occurred in [`Parser::set_included_ranges`]. #[derive(Debug, PartialEq, Eq)] pub struct IncludedRangesError(pub usize); -/// An error that occurred when trying to create a `Query`. +/// An error that occurred when trying to create a [`Query`]. #[derive(Debug, PartialEq, Eq)] pub struct QueryError { pub row: usize, @@ -236,10 +261,16 @@ pub enum QueryErrorKind { } #[derive(Debug)] -enum TextPredicate { - CaptureEqString(u32, String, bool), - CaptureEqCapture(u32, u32, bool), - CaptureMatchString(u32, regex::bytes::Regex, bool), +/// The first item is the capture index +/// The next is capture specific, depending on what item is expected +/// The first bool is if the capture is positive +/// The last item is a bool signifying whether or not it's meant to match +/// any or all captures +enum TextPredicateCapture { + EqString(u32, Box, bool, bool), + EqCapture(u32, u32, bool, bool), + MatchString(u32, regex::bytes::Regex, bool, bool), + AnyString(u32, Box<[Box]>, bool), } // TODO: Remove this struct at at some point. If `core::str::lossy::Utf8Lossy` @@ -251,36 +282,43 @@ pub struct LossyUtf8<'a> { impl Language { /// Get the ABI version number that indicates which version of the Tree-sitter CLI - /// that was used to generate this `Language`. + /// that was used to generate this [`Language`]. #[doc(alias = "ts_language_version")] + #[must_use] pub fn version(&self) -> usize { unsafe { ffi::ts_language_version(self.0) as usize } } /// Get the number of distinct node types in this language. #[doc(alias = "ts_language_symbol_count")] + #[must_use] pub fn node_kind_count(&self) -> usize { unsafe { ffi::ts_language_symbol_count(self.0) as usize } } + /// Get the number of valid states in this language. + #[doc(alias = "ts_language_state_count")] + #[must_use] + pub fn parse_state_count(&self) -> usize { + unsafe { ffi::ts_language_state_count(self.0) as usize } + } + /// Get the name of the node kind for the given numerical id. #[doc(alias = "ts_language_symbol_name")] + #[must_use] pub fn node_kind_for_id(&self, id: u16) -> Option<&'static str> { let ptr = unsafe { ffi::ts_language_symbol_name(self.0, id) }; - if ptr.is_null() { - None - } else { - Some(unsafe { CStr::from_ptr(ptr) }.to_str().unwrap()) - } + (!ptr.is_null()).then(|| unsafe { CStr::from_ptr(ptr) }.to_str().unwrap()) } /// Get the numeric id for the given node kind. #[doc(alias = "ts_language_symbol_for_name")] + #[must_use] pub fn id_for_node_kind(&self, kind: &str, named: bool) -> u16 { unsafe { ffi::ts_language_symbol_for_name( self.0, - kind.as_bytes().as_ptr() as *const c_char, + kind.as_bytes().as_ptr().cast::(), kind.len() as u32, named, ) @@ -289,59 +327,115 @@ impl Language { /// Check if the node type for the given numerical id is named (as opposed /// to an anonymous node type). + #[must_use] pub fn node_kind_is_named(&self, id: u16) -> bool { - unsafe { ffi::ts_language_symbol_type(self.0, id) == ffi::TSSymbolType_TSSymbolTypeRegular } + unsafe { ffi::ts_language_symbol_type(self.0, id) == ffi::TSSymbolTypeRegular } } #[doc(alias = "ts_language_symbol_type")] + #[must_use] pub fn node_kind_is_visible(&self, id: u16) -> bool { - unsafe { - ffi::ts_language_symbol_type(self.0, id) <= ffi::TSSymbolType_TSSymbolTypeAnonymous - } + unsafe { ffi::ts_language_symbol_type(self.0, id) <= ffi::TSSymbolTypeAnonymous } } /// Get the number of distinct field names in this language. #[doc(alias = "ts_language_field_count")] + #[must_use] pub fn field_count(&self) -> usize { unsafe { ffi::ts_language_field_count(self.0) as usize } } /// Get the field names for the given numerical id. #[doc(alias = "ts_language_field_name_for_id")] + #[must_use] pub fn field_name_for_id(&self, field_id: u16) -> Option<&'static str> { let ptr = unsafe { ffi::ts_language_field_name_for_id(self.0, field_id) }; - if ptr.is_null() { - None - } else { - Some(unsafe { CStr::from_ptr(ptr) }.to_str().unwrap()) - } + (!ptr.is_null()).then(|| unsafe { CStr::from_ptr(ptr) }.to_str().unwrap()) } /// Get the numerical id for the given field name. #[doc(alias = "ts_language_field_id_for_name")] - pub fn field_id_for_name(&self, field_name: impl AsRef<[u8]>) -> Option { + #[must_use] + pub fn field_id_for_name(&self, field_name: impl AsRef<[u8]>) -> Option { let field_name = field_name.as_ref(); let id = unsafe { ffi::ts_language_field_id_for_name( self.0, - field_name.as_ptr() as *const c_char, + field_name.as_ptr().cast::(), field_name.len() as u32, ) }; - if id == 0 { - None - } else { - Some(id) - } + FieldId::new(id) + } + + /// Get the next parse state. Combine this with + /// [`lookahead_iterator`](Language::lookahead_iterator) to + /// generate completion suggestions or valid symbols in error nodes. + /// + /// Example: + /// ``` + /// let state = language.next_state(node.parse_state(), node.grammar_id()); + /// ``` + #[doc(alias = "ts_language_next_state")] + #[must_use] + pub fn next_state(&self, state: u16, id: u16) -> u16 { + unsafe { ffi::ts_language_next_state(self.0, state, id) } + } + + /// Create a new lookahead iterator for this language and parse state. + /// + /// This returns `None` if state is invalid for this language. + /// + /// Iterating [`LookaheadIterator`] will yield valid symbols in the given + /// parse state. Newly created lookahead iterators will return the `ERROR` + /// symbol from [`LookaheadIterator::current_symbol`]. + /// + /// Lookahead iterators can be useful to generate suggestions and improve + /// syntax error diagnostics. To get symbols valid in an ERROR node, use the + /// lookahead iterator on its first leaf node state. For `MISSING` nodes, a + /// lookahead iterator created on the previous non-extra leaf node may be + /// appropriate. + #[doc(alias = "ts_lookahead_iterator_new")] + #[must_use] + pub fn lookahead_iterator(&self, state: u16) -> Option { + let ptr = unsafe { ffi::ts_lookahead_iterator_new(self.0, state) }; + (!ptr.is_null()).then(|| unsafe { LookaheadIterator::from_raw(ptr) }) + } +} + +impl Clone for Language { + fn clone(&self) -> Self { + unsafe { Self(ffi::ts_language_copy(self.0)) } + } +} + +impl Drop for Language { + fn drop(&mut self) { + unsafe { ffi::ts_language_delete(self.0) } + } +} + +impl<'a> Deref for LanguageRef<'a> { + type Target = Language; + + fn deref(&self) -> &Self::Target { + unsafe { &*(std::ptr::addr_of!(self.0).cast::()) } + } +} + +impl Default for Parser { + fn default() -> Self { + Self::new() } } impl Parser { /// Create a new parser. - pub fn new() -> Parser { + #[must_use] + pub fn new() -> Self { unsafe { let parser = ffi::ts_parser_new(); - Parser(NonNull::new_unchecked(parser)) + Self(NonNull::new_unchecked(parser)) } } @@ -350,38 +444,36 @@ impl Parser { /// Returns a Result indicating whether or not the language was successfully /// assigned. True means assignment succeeded. False means there was a version /// mismatch: the language was generated with an incompatible version of the - /// Tree-sitter CLI. Check the language's version using [Language::version] - /// and compare it to this library's [LANGUAGE_VERSION](LANGUAGE_VERSION) and - /// [MIN_COMPATIBLE_LANGUAGE_VERSION](MIN_COMPATIBLE_LANGUAGE_VERSION) constants. + /// Tree-sitter CLI. Check the language's version using [`Language::version`] + /// and compare it to this library's [`LANGUAGE_VERSION`](LANGUAGE_VERSION) and + /// [`MIN_COMPATIBLE_LANGUAGE_VERSION`](MIN_COMPATIBLE_LANGUAGE_VERSION) constants. #[doc(alias = "ts_parser_set_language")] - pub fn set_language(&mut self, language: Language) -> Result<(), LanguageError> { + pub fn set_language(&mut self, language: &Language) -> Result<(), LanguageError> { let version = language.version(); - if version < MIN_COMPATIBLE_LANGUAGE_VERSION || version > LANGUAGE_VERSION { - Err(LanguageError { version }) - } else { + if (MIN_COMPATIBLE_LANGUAGE_VERSION..=LANGUAGE_VERSION).contains(&version) { unsafe { ffi::ts_parser_set_language(self.0.as_ptr(), language.0); } Ok(()) + } else { + Err(LanguageError { version }) } } /// Get the parser's current language. #[doc(alias = "ts_parser_language")] + #[must_use] pub fn language(&self) -> Option { let ptr = unsafe { ffi::ts_parser_language(self.0.as_ptr()) }; - if ptr.is_null() { - None - } else { - Some(Language(ptr)) - } + (!ptr.is_null()).then(|| Language(ptr)) } /// Get the parser's current logger. #[doc(alias = "ts_parser_logger")] + #[must_use] pub fn logger(&self) -> Option<&Logger> { let logger = unsafe { ffi::ts_parser_logger(self.0.as_ptr()) }; - unsafe { (logger.payload as *mut Logger).as_ref() } + unsafe { logger.payload.cast::().as_ref() } } /// Set the logging callback that a parser should use during parsing. @@ -389,7 +481,7 @@ impl Parser { pub fn set_logger(&mut self, logger: Option) { let prev_logger = unsafe { ffi::ts_parser_logger(self.0.as_ptr()) }; if !prev_logger.payload.is_null() { - drop(unsafe { Box::from_raw(prev_logger.payload as *mut Logger) }); + drop(unsafe { Box::from_raw(prev_logger.payload.cast::()) }); } let c_logger; @@ -401,9 +493,9 @@ impl Parser { c_log_type: ffi::TSLogType, c_message: *const c_char, ) { - let callback = (payload as *mut Logger).as_mut().unwrap(); + let callback = payload.cast::().as_mut().unwrap(); if let Ok(message) = CStr::from_ptr(c_message).to_str() { - let log_type = if c_log_type == ffi::TSLogType_TSLogTypeParse { + let log_type = if c_log_type == ffi::TSLogTypeParse { LogType::Parse } else { LogType::Lex @@ -415,7 +507,7 @@ impl Parser { let raw_container = Box::into_raw(container); c_logger = ffi::TSLogger { - payload: raw_container as *mut c_void, + payload: raw_container.cast::(), log: Some(log), }; } else { @@ -432,11 +524,27 @@ impl Parser { /// during parsing. The graphs are formatted in the DOT language. You may want /// to pipe these graphs directly to a `dot(1)` process in order to generate /// SVG output. - #[cfg(unix)] #[doc(alias = "ts_parser_print_dot_graphs")] - pub fn print_dot_graphs(&mut self, file: &impl AsRawFd) { - let fd = file.as_raw_fd(); - unsafe { ffi::ts_parser_print_dot_graphs(self.0.as_ptr(), ffi::dup(fd)) } + pub fn print_dot_graphs( + &mut self, + #[cfg(not(windows))] file: &impl AsRawFd, + #[cfg(windows)] file: &impl AsRawHandle, + ) { + #[cfg(not(windows))] + { + let fd = file.as_raw_fd(); + unsafe { + ffi::ts_parser_print_dot_graphs(self.0.as_ptr(), ffi::_ts_dup(fd)); + } + } + + #[cfg(windows)] + { + let handle = file.as_raw_handle(); + unsafe { + ffi::ts_parser_print_dot_graphs(self.0.as_ptr(), ffi::_ts_dup(handle)); + } + } } /// Stop the parser from printing debugging graphs while parsing. @@ -452,18 +560,18 @@ impl Parser { /// * `old_tree` A previous syntax tree parsed from the same document. /// If the text of the document has changed since `old_tree` was /// created, then you must edit `old_tree` to match the new text using - /// [Tree::edit]. + /// [`Tree::edit`]. /// - /// Returns a [Tree] if parsing succeeded, or `None` if: - /// * The parser has not yet had a language assigned with [Parser::set_language] - /// * The timeout set with [Parser::set_timeout_micros] expired - /// * The cancellation flag set with [Parser::set_cancellation_flag] was flipped + /// Returns a [`Tree`] if parsing succeeded, or `None` if: + /// * The parser has not yet had a language assigned with [`Parser::set_language`] + /// * The timeout set with [`Parser::set_timeout_micros`] expired + /// * The cancellation flag set with [`Parser::set_cancellation_flag`] was flipped #[doc(alias = "ts_parser_parse")] pub fn parse(&mut self, text: impl AsRef<[u8]>, old_tree: Option<&Tree>) -> Option { let bytes = text.as_ref(); let len = bytes.len(); self.parse_with( - &mut |i, _| if i < len { &bytes[i..] } else { &[] }, + &mut |i, _| (i < len).then(|| &bytes[i..]).unwrap_or_default(), old_tree, ) } @@ -475,7 +583,7 @@ impl Parser { /// * `old_tree` A previous syntax tree parsed from the same document. /// If the text of the document has changed since `old_tree` was /// created, then you must edit `old_tree` to match the new text using - /// [Tree::edit]. + /// [`Tree::edit`]. pub fn parse_utf16( &mut self, input: impl AsRef<[u16]>, @@ -484,7 +592,7 @@ impl Parser { let code_points = input.as_ref(); let len = code_points.len(); self.parse_utf16_with( - &mut |i, _| if i < len { &code_points[i..] } else { &[] }, + &mut |i, _| (i < len).then(|| &code_points[i..]).unwrap_or_default(), old_tree, ) } @@ -499,8 +607,8 @@ impl Parser { /// * `old_tree` A previous syntax tree parsed from the same document. /// If the text of the document has changed since `old_tree` was /// created, then you must edit `old_tree` to match the new text using - /// [Tree::edit]. - pub fn parse_with<'a, T: AsRef<[u8]>, F: FnMut(usize, Point) -> T>( + /// [`Tree::edit`]. + pub fn parse_with, F: FnMut(usize, Point) -> T>( &mut self, callback: &mut F, old_tree: Option<&Tree>, @@ -513,23 +621,23 @@ impl Parser { let mut payload: (&mut F, Option) = (callback, None); // This C function is passed to Tree-sitter as the input callback. - unsafe extern "C" fn read<'a, T: AsRef<[u8]>, F: FnMut(usize, Point) -> T>( + unsafe extern "C" fn read, F: FnMut(usize, Point) -> T>( payload: *mut c_void, byte_offset: u32, position: ffi::TSPoint, bytes_read: *mut u32, ) -> *const c_char { - let (callback, text) = (payload as *mut (&mut F, Option)).as_mut().unwrap(); + let (callback, text) = payload.cast::<(&mut F, Option)>().as_mut().unwrap(); *text = Some(callback(byte_offset as usize, position.into())); let slice = text.as_ref().unwrap().as_ref(); *bytes_read = slice.len() as u32; - return slice.as_ptr() as *const c_char; + slice.as_ptr().cast::() } let c_input = ffi::TSInput { - payload: &mut payload as *mut (&mut F, Option) as *mut c_void, + payload: std::ptr::addr_of_mut!(payload).cast::(), read: Some(read::), - encoding: ffi::TSInputEncoding_TSInputEncodingUTF8, + encoding: ffi::TSInputEncodingUTF8, }; let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0.as_ptr()); @@ -549,8 +657,8 @@ impl Parser { /// * `old_tree` A previous syntax tree parsed from the same document. /// If the text of the document has changed since `old_tree` was /// created, then you must edit `old_tree` to match the new text using - /// [Tree::edit]. - pub fn parse_utf16_with<'a, T: AsRef<[u16]>, F: FnMut(usize, Point) -> T>( + /// [`Tree::edit`]. + pub fn parse_utf16_with, F: FnMut(usize, Point) -> T>( &mut self, callback: &mut F, old_tree: Option<&Tree>, @@ -563,13 +671,13 @@ impl Parser { let mut payload: (&mut F, Option) = (callback, None); // This C function is passed to Tree-sitter as the input callback. - unsafe extern "C" fn read<'a, T: AsRef<[u16]>, F: FnMut(usize, Point) -> T>( + unsafe extern "C" fn read, F: FnMut(usize, Point) -> T>( payload: *mut c_void, byte_offset: u32, position: ffi::TSPoint, bytes_read: *mut u32, ) -> *const c_char { - let (callback, text) = (payload as *mut (&mut F, Option)).as_mut().unwrap(); + let (callback, text) = payload.cast::<(&mut F, Option)>().as_mut().unwrap(); *text = Some(callback( (byte_offset / 2) as usize, Point { @@ -579,13 +687,13 @@ impl Parser { )); let slice = text.as_ref().unwrap().as_ref(); *bytes_read = slice.len() as u32 * 2; - slice.as_ptr() as *const c_char + slice.as_ptr().cast::() } let c_input = ffi::TSInput { - payload: &mut payload as *mut (&mut F, Option) as *mut c_void, + payload: std::ptr::addr_of_mut!(payload).cast::(), read: Some(read::), - encoding: ffi::TSInputEncoding_TSInputEncodingUTF16, + encoding: ffi::TSInputEncodingUTF16, }; let c_old_tree = old_tree.map_or(ptr::null_mut(), |t| t.0.as_ptr()); @@ -597,10 +705,10 @@ impl Parser { /// Instruct the parser to start the next parse from the beginning. /// - /// If the parser previously failed because of a timeout or a cancellation, then - /// by default, it will resume where it left off on the next call to `parse` or - /// other parsing functions. If you don't want to resume, and instead intend to - /// use this parser to parse some other document, you must call `reset` first. + /// If the parser previously failed because of a timeout or a cancellation, then by default, it + /// will resume where it left off on the next call to [`parse`](Parser::parse) or other parsing + /// functions. If you don't want to resume, and instead intend to use this parser to parse some + /// other document, you must call `reset` first. #[doc(alias = "ts_parser_reset")] pub fn reset(&mut self) { unsafe { ffi::ts_parser_reset(self.0.as_ptr()) } @@ -608,8 +716,9 @@ impl Parser { /// Get the duration in microseconds that parsing is allowed to take. /// - /// This is set via [set_timeout_micros](Parser::set_timeout_micros). + /// This is set via [`set_timeout_micros`](Parser::set_timeout_micros). #[doc(alias = "ts_parser_timeout_micros")] + #[must_use] pub fn timeout_micros(&self) -> u64 { unsafe { ffi::ts_parser_timeout_micros(self.0.as_ptr()) } } @@ -618,7 +727,7 @@ impl Parser { /// take before halting. /// /// If parsing takes longer than this, it will halt early, returning `None`. - /// See `parse` for more information. + /// See [`parse`](Parser::parse) for more information. #[doc(alias = "ts_parser_set_timeout_micros")] pub fn set_timeout_micros(&mut self, timeout_micros: u64) { unsafe { ffi::ts_parser_set_timeout_micros(self.0.as_ptr(), timeout_micros) } @@ -638,15 +747,15 @@ impl Parser { /// ```text /// ranges[i].end_byte <= ranges[i + 1].start_byte /// ``` - /// If this requirement is not satisfied, method will return IncludedRangesError + /// If this requirement is not satisfied, method will return [`IncludedRangesError`] /// error with an offset in the passed ranges slice pointing to a first incorrect range. #[doc(alias = "ts_parser_set_included_ranges")] - pub fn set_included_ranges<'a>( - &mut self, - ranges: &'a [Range], - ) -> Result<(), IncludedRangesError> { - let ts_ranges: Vec = - ranges.iter().cloned().map(|range| range.into()).collect(); + pub fn set_included_ranges(&mut self, ranges: &[Range]) -> Result<(), IncludedRangesError> { + let ts_ranges = ranges + .iter() + .copied() + .map(std::convert::Into::into) + .collect::>(); let result = unsafe { ffi::ts_parser_set_included_ranges( self.0.as_ptr(), @@ -670,22 +779,33 @@ impl Parser { } /// Get the parser's current cancellation flag pointer. + /// + /// # Safety + /// + /// It uses FFI #[doc(alias = "ts_parser_cancellation_flag")] + #[must_use] pub unsafe fn cancellation_flag(&self) -> Option<&AtomicUsize> { - (ffi::ts_parser_cancellation_flag(self.0.as_ptr()) as *const AtomicUsize).as_ref() + ffi::ts_parser_cancellation_flag(self.0.as_ptr()) + .cast::() + .as_ref() } /// Set the parser's current cancellation flag pointer. /// /// If a pointer is assigned, then the parser will periodically read from /// this pointer during parsing. If it reads a non-zero value, it will halt early, - /// returning `None`. See [parse](Parser::parse) for more information. + /// returning `None`. See [`parse`](Parser::parse) for more information. + /// + /// # Safety + /// + /// It uses FFI #[doc(alias = "ts_parser_set_cancellation_flag")] pub unsafe fn set_cancellation_flag(&mut self, flag: Option<&AtomicUsize>) { if let Some(flag) = flag { ffi::ts_parser_set_cancellation_flag( self.0.as_ptr(), - flag as *const AtomicUsize as *const usize, + (flag as *const AtomicUsize).cast::(), ); } else { ffi::ts_parser_set_cancellation_flag(self.0.as_ptr(), ptr::null()); @@ -704,6 +824,7 @@ impl Drop for Parser { impl Tree { /// Get the root node of the syntax tree. #[doc(alias = "ts_tree_root_node")] + #[must_use] pub fn root_node(&self) -> Node { Node::new(unsafe { ffi::ts_tree_root_node(self.0.as_ptr()) }).unwrap() } @@ -711,6 +832,7 @@ impl Tree { /// Get the root node of the syntax tree, but with its position shifted /// forward by the given offset. #[doc(alias = "ts_tree_root_node_with_offset")] + #[must_use] pub fn root_node_with_offset(&self, offset_bytes: usize, offset_extent: Point) -> Node { Node::new(unsafe { ffi::ts_tree_root_node_with_offset( @@ -724,8 +846,12 @@ impl Tree { /// Get the language that was used to parse the syntax tree. #[doc(alias = "ts_tree_language")] - pub fn language(&self) -> Language { - Language(unsafe { ffi::ts_tree_language(self.0.as_ptr()) }) + #[must_use] + pub fn language(&self) -> LanguageRef { + LanguageRef( + unsafe { ffi::ts_tree_language(self.0.as_ptr()) }, + PhantomData, + ) } /// Edit the syntax tree to keep it in sync with source code that has been @@ -739,7 +865,8 @@ impl Tree { unsafe { ffi::ts_tree_edit(self.0.as_ptr(), &edit) }; } - /// Create a new [TreeCursor] starting from the root of the tree. + /// Create a new [`TreeCursor`] starting from the root of the tree. + #[must_use] pub fn walk(&self) -> TreeCursor { self.root_node().walk() } @@ -749,29 +876,35 @@ impl Tree { /// /// For this to work correctly, this syntax tree must have been edited such that its /// ranges match up to the new tree. Generally, you'll want to call this method right - /// after calling one of the [Parser::parse] functions. Call it on the old tree that + /// after calling one of the [`Parser::parse`] functions. Call it on the old tree that /// was passed to parse, and pass the new tree that was returned from `parse`. #[doc(alias = "ts_tree_get_changed_ranges")] - pub fn changed_ranges(&self, other: &Tree) -> impl ExactSizeIterator { + #[must_use] + pub fn changed_ranges(&self, other: &Self) -> impl ExactSizeIterator { let mut count = 0u32; unsafe { let ptr = ffi::ts_tree_get_changed_ranges( self.0.as_ptr(), other.0.as_ptr(), - &mut count as *mut u32, + std::ptr::addr_of_mut!(count), ); - util::CBufferIter::new(ptr, count as usize).map(|r| r.into()) + util::CBufferIter::new(ptr, count as usize).map(std::convert::Into::into) } } /// Get the included ranges that were used to parse the syntax tree. + #[must_use] pub fn included_ranges(&self) -> Vec { let mut count = 0u32; unsafe { - let ptr = ffi::ts_tree_included_ranges(self.0.as_ptr(), &mut count as *mut u32); + let ptr = ffi::ts_tree_included_ranges(self.0.as_ptr(), std::ptr::addr_of_mut!(count)); let ranges = slice::from_raw_parts(ptr, count as usize); - let result = ranges.iter().copied().map(|range| range.into()).collect(); - (FREE_FN)(ptr as *mut c_void); + let result = ranges + .iter() + .copied() + .map(std::convert::Into::into) + .collect(); + (FREE_FN)(ptr.cast::()); result } } @@ -779,11 +912,23 @@ impl Tree { /// Print a graph of the tree to the given file descriptor. /// The graph is formatted in the DOT language. You may want to pipe this graph /// directly to a `dot(1)` process in order to generate SVG output. - #[cfg(unix)] #[doc(alias = "ts_tree_print_dot_graph")] - pub fn print_dot_graph(&self, file: &impl AsRawFd) { - let fd = file.as_raw_fd(); - unsafe { ffi::ts_tree_print_dot_graph(self.0.as_ptr(), fd) } + pub fn print_dot_graph( + &self, + #[cfg(unix)] file: &impl AsRawFd, + #[cfg(windows)] file: &impl AsRawHandle, + ) { + #[cfg(unix)] + { + let fd = file.as_raw_fd(); + unsafe { ffi::ts_tree_print_dot_graph(self.0.as_ptr(), fd) } + } + + #[cfg(windows)] + { + let handle = file.as_raw_handle(); + unsafe { ffi::ts_tree_print_dot_graph(self.0.as_ptr(), handle as i32) } + } } } @@ -800,18 +945,14 @@ impl Drop for Tree { } impl Clone for Tree { - fn clone(&self) -> Tree { - unsafe { Tree(NonNull::new_unchecked(ffi::ts_tree_copy(self.0.as_ptr()))) } + fn clone(&self) -> Self { + unsafe { Self(NonNull::new_unchecked(ffi::ts_tree_copy(self.0.as_ptr()))) } } } impl<'tree> Node<'tree> { fn new(node: ffi::TSNode) -> Option { - if node.id.is_null() { - None - } else { - Some(Node(node, PhantomData)) - } + (!node.id.is_null()).then_some(Node(node, PhantomData)) } /// Get a numeric id for this node that is unique. @@ -820,28 +961,50 @@ impl<'tree> Node<'tree> { /// a new tree is created based on an older tree, and a node from the old /// tree is reused in the process, then that node will have the same id in /// both trees. + #[must_use] pub fn id(&self) -> usize { self.0.id as usize } /// Get this node's type as a numerical id. #[doc(alias = "ts_node_symbol")] + #[must_use] pub fn kind_id(&self) -> u16 { unsafe { ffi::ts_node_symbol(self.0) } } + /// Get the node's type as a numerical id as it appears in the grammar + /// ignoring aliases. + #[doc(alias = "ts_node_grammar_symbol")] + #[must_use] + pub fn grammar_id(&self) -> u16 { + unsafe { ffi::ts_node_grammar_symbol(self.0) } + } + /// Get this node's type as a string. #[doc(alias = "ts_node_type")] + #[must_use] pub fn kind(&self) -> &'static str { unsafe { CStr::from_ptr(ffi::ts_node_type(self.0)) } .to_str() .unwrap() } - /// Get the [Language] that was used to parse this node's syntax tree. - #[doc(alias = "ts_tree_language")] - pub fn language(&self) -> Language { - Language(unsafe { ffi::ts_tree_language(self.0.tree) }) + /// Get this node's symbol name as it appears in the grammar ignoring + /// aliases as a string. + #[doc(alias = "ts_node_grammar_type")] + #[must_use] + pub fn grammar_name(&self) -> &'static str { + unsafe { CStr::from_ptr(ffi::ts_node_grammar_type(self.0)) } + .to_str() + .unwrap() + } + + /// Get the [`Language`] that was used to parse this node's syntax tree. + #[doc(alias = "ts_node_language")] + #[must_use] + pub fn language(&self) -> LanguageRef { + LanguageRef(unsafe { ffi::ts_node_language(self.0) }, PhantomData) } /// Check if this node is *named*. @@ -849,6 +1012,7 @@ impl<'tree> Node<'tree> { /// Named nodes correspond to named rules in the grammar, whereas *anonymous* nodes /// correspond to string literals in the grammar. #[doc(alias = "ts_node_is_named")] + #[must_use] pub fn is_named(&self) -> bool { unsafe { ffi::ts_node_is_named(self.0) } } @@ -858,12 +1022,14 @@ impl<'tree> Node<'tree> { /// Extra nodes represent things like comments, which are not required the grammar, /// but can appear anywhere. #[doc(alias = "ts_node_is_extra")] + #[must_use] pub fn is_extra(&self) -> bool { unsafe { ffi::ts_node_is_extra(self.0) } } /// Check if this node has been edited. #[doc(alias = "ts_node_has_changes")] + #[must_use] pub fn has_changes(&self) -> bool { unsafe { ffi::ts_node_has_changes(self.0) } } @@ -871,6 +1037,7 @@ impl<'tree> Node<'tree> { /// Check if this node represents a syntax error or contains any syntax errors anywhere /// within it. #[doc(alias = "ts_node_has_error")] + #[must_use] pub fn has_error(&self) -> bool { unsafe { ffi::ts_node_has_error(self.0) } } @@ -879,8 +1046,24 @@ impl<'tree> Node<'tree> { /// /// Syntax errors represent parts of the code that could not be incorporated into a /// valid syntax tree. + #[doc(alias = "ts_node_is_error")] + #[must_use] pub fn is_error(&self) -> bool { - self.kind_id() == u16::MAX + unsafe { ffi::ts_node_is_error(self.0) } + } + + /// Get this node's parse state. + #[doc(alias = "ts_node_parse_state")] + #[must_use] + pub fn parse_state(&self) -> u16 { + unsafe { ffi::ts_node_parse_state(self.0) } + } + + /// Get the parse state after this node. + #[doc(alias = "ts_node_next_parse_state")] + #[must_use] + pub fn next_parse_state(&self) -> u16 { + unsafe { ffi::ts_node_next_parse_state(self.0) } } /// Check if this node is *missing*. @@ -888,29 +1071,34 @@ impl<'tree> Node<'tree> { /// Missing nodes are inserted by the parser in order to recover from certain kinds of /// syntax errors. #[doc(alias = "ts_node_is_missing")] + #[must_use] pub fn is_missing(&self) -> bool { unsafe { ffi::ts_node_is_missing(self.0) } } /// Get the byte offsets where this node starts. #[doc(alias = "ts_node_start_byte")] + #[must_use] pub fn start_byte(&self) -> usize { unsafe { ffi::ts_node_start_byte(self.0) as usize } } /// Get the byte offsets where this node end. #[doc(alias = "ts_node_end_byte")] + #[must_use] pub fn end_byte(&self) -> usize { unsafe { ffi::ts_node_end_byte(self.0) as usize } } /// Get the byte range of source code that this node represents. + #[must_use] pub fn byte_range(&self) -> std::ops::Range { self.start_byte()..self.end_byte() } /// Get the range of source code that this node represents, both in terms of raw bytes /// and of row/column coordinates. + #[must_use] pub fn range(&self) -> Range { Range { start_byte: self.start_byte(), @@ -922,6 +1110,7 @@ impl<'tree> Node<'tree> { /// Get this node's start position in terms of rows and columns. #[doc(alias = "ts_node_start_point")] + #[must_use] pub fn start_position(&self) -> Point { let result = unsafe { ffi::ts_node_start_point(self.0) }; result.into() @@ -929,6 +1118,7 @@ impl<'tree> Node<'tree> { /// Get this node's end position in terms of rows and columns. #[doc(alias = "ts_node_end_point")] + #[must_use] pub fn end_position(&self) -> Point { let result = unsafe { ffi::ts_node_end_point(self.0) }; result.into() @@ -937,35 +1127,39 @@ impl<'tree> Node<'tree> { /// Get the node's child at the given index, where zero represents the first /// child. /// - /// This method is fairly fast, but its cost is technically log(i), so you - /// if you might be iterating over a long list of children, you should use - /// [Node::children] instead. + /// This method is fairly fast, but its cost is technically log(i), so if + /// you might be iterating over a long list of children, you should use + /// [`Node::children`] instead. #[doc(alias = "ts_node_child")] + #[must_use] pub fn child(&self, i: usize) -> Option { Self::new(unsafe { ffi::ts_node_child(self.0, i as u32) }) } /// Get this node's number of children. #[doc(alias = "ts_node_child_count")] + #[must_use] pub fn child_count(&self) -> usize { unsafe { ffi::ts_node_child_count(self.0) as usize } } /// Get this node's *named* child at the given index. /// - /// See also [Node::is_named]. - /// This method is fairly fast, but its cost is technically log(i), so you - /// if you might be iterating over a long list of children, you should use - /// [Node::named_children] instead. + /// See also [`Node::is_named`]. + /// This method is fairly fast, but its cost is technically log(i), so if + /// you might be iterating over a long list of children, you should use + /// [`Node::named_children`] instead. #[doc(alias = "ts_node_named_child")] - pub fn named_child<'a>(&'a self, i: usize) -> Option { + #[must_use] + pub fn named_child(&self, i: usize) -> Option { Self::new(unsafe { ffi::ts_node_named_child(self.0, i as u32) }) } /// Get this node's number of *named* children. /// - /// See also [Node::is_named]. + /// See also [`Node::is_named`]. #[doc(alias = "ts_node_named_child_count")] + #[must_use] pub fn named_child_count(&self) -> usize { unsafe { ffi::ts_node_named_child_count(self.0) as usize } } @@ -973,14 +1167,15 @@ impl<'tree> Node<'tree> { /// Get the first child with the given field name. /// /// If multiple children may have the same field name, access them using - /// [children_by_field_name](Node::children_by_field_name) + /// [`children_by_field_name`](Node::children_by_field_name) #[doc(alias = "ts_node_child_by_field_name")] + #[must_use] pub fn child_by_field_name(&self, field_name: impl AsRef<[u8]>) -> Option { let field_name = field_name.as_ref(); Self::new(unsafe { ffi::ts_node_child_by_field_name( self.0, - field_name.as_ptr() as *const c_char, + field_name.as_ptr().cast::(), field_name.len() as u32, ) }) @@ -988,42 +1183,40 @@ impl<'tree> Node<'tree> { /// Get this node's child with the given numerical field id. /// - /// See also [child_by_field_name](Node::child_by_field_name). You can convert a field name to - /// an id using [Language::field_id_for_name]. + /// See also [`child_by_field_name`](Node::child_by_field_name). You can convert a field name to + /// an id using [`Language::field_id_for_name`]. #[doc(alias = "ts_node_child_by_field_id")] + #[must_use] pub fn child_by_field_id(&self, field_id: u16) -> Option { Self::new(unsafe { ffi::ts_node_child_by_field_id(self.0, field_id) }) } /// Get the field name of this node's child at the given index. #[doc(alias = "ts_node_field_name_for_child")] + #[must_use] pub fn field_name_for_child(&self, child_index: u32) -> Option<&'static str> { unsafe { let ptr = ffi::ts_node_field_name_for_child(self.0, child_index); - if ptr.is_null() { - None - } else { - Some(CStr::from_ptr(ptr).to_str().unwrap()) - } + (!ptr.is_null()).then(|| CStr::from_ptr(ptr).to_str().unwrap()) } } /// Iterate over this node's children. /// - /// A [TreeCursor] is used to retrieve the children efficiently. Obtain - /// a [TreeCursor] by calling [Tree::walk] or [Node::walk]. To avoid unnecessary + /// A [`TreeCursor`] is used to retrieve the children efficiently. Obtain + /// a [`TreeCursor`] by calling [`Tree::walk`] or [`Node::walk`]. To avoid unnecessary /// allocations, you should reuse the same cursor for subsequent calls to /// this method. /// - /// If you're walking the tree recursively, you may want to use the `TreeCursor` + /// If you're walking the tree recursively, you may want to use the [`TreeCursor`] /// APIs directly instead. - pub fn children<'a>( + pub fn children<'cursor>( &self, - cursor: &'a mut TreeCursor<'tree>, - ) -> impl ExactSizeIterator> + 'a { + cursor: &'cursor mut TreeCursor<'tree>, + ) -> impl ExactSizeIterator> + 'cursor { cursor.reset(*self); cursor.goto_first_child(); - (0..self.child_count()).into_iter().map(move |_| { + (0..self.child_count()).map(move |_| { let result = cursor.node(); cursor.goto_next_sibling(); result @@ -1032,14 +1225,14 @@ impl<'tree> Node<'tree> { /// Iterate over this node's named children. /// - /// See also [Node::children]. - pub fn named_children<'a>( + /// See also [`Node::children`]. + pub fn named_children<'cursor>( &self, - cursor: &'a mut TreeCursor<'tree>, - ) -> impl ExactSizeIterator> + 'a { + cursor: &'cursor mut TreeCursor<'tree>, + ) -> impl ExactSizeIterator> + 'cursor { cursor.reset(*self); cursor.goto_first_child(); - (0..self.named_child_count()).into_iter().map(move |_| { + (0..self.named_child_count()).map(move |_| { while !cursor.node().is_named() { if !cursor.goto_next_sibling() { break; @@ -1053,29 +1246,48 @@ impl<'tree> Node<'tree> { /// Iterate over this node's children with a given field name. /// - /// See also [Node::children]. - pub fn children_by_field_name<'a>( + /// See also [`Node::children`]. + pub fn children_by_field_name<'cursor>( &self, field_name: &str, - cursor: &'a mut TreeCursor<'tree>, - ) -> impl Iterator> + 'a { + cursor: &'cursor mut TreeCursor<'tree>, + ) -> impl Iterator> + 'cursor { let field_id = self.language().field_id_for_name(field_name); - self.children_by_field_id(field_id.unwrap_or(0), cursor) + let mut done = field_id.is_none(); + if !done { + cursor.reset(*self); + cursor.goto_first_child(); + } + iter::from_fn(move || { + if !done { + while cursor.field_id() != field_id { + if !cursor.goto_next_sibling() { + return None; + } + } + let result = cursor.node(); + if !cursor.goto_next_sibling() { + done = true; + } + return Some(result); + } + None + }) } /// Iterate over this node's children with a given field id. /// - /// See also [Node::children_by_field_name]. - pub fn children_by_field_id<'a>( + /// See also [`Node::children_by_field_name`]. + pub fn children_by_field_id<'cursor>( &self, - field_id: u16, - cursor: &'a mut TreeCursor<'tree>, - ) -> impl Iterator> + 'a { + field_id: FieldId, + cursor: &'cursor mut TreeCursor<'tree>, + ) -> impl Iterator> + 'cursor { cursor.reset(*self); cursor.goto_first_child(); let mut done = false; iter::from_fn(move || { - while !done { + if !done { while cursor.field_id() != Some(field_id) { if !cursor.goto_next_sibling() { return None; @@ -1093,36 +1305,49 @@ impl<'tree> Node<'tree> { /// Get this node's immediate parent. #[doc(alias = "ts_node_parent")] + #[must_use] pub fn parent(&self) -> Option { Self::new(unsafe { ffi::ts_node_parent(self.0) }) } /// Get this node's next sibling. #[doc(alias = "ts_node_next_sibling")] + #[must_use] pub fn next_sibling(&self) -> Option { Self::new(unsafe { ffi::ts_node_next_sibling(self.0) }) } /// Get this node's previous sibling. #[doc(alias = "ts_node_prev_sibling")] + #[must_use] pub fn prev_sibling(&self) -> Option { Self::new(unsafe { ffi::ts_node_prev_sibling(self.0) }) } /// Get this node's next named sibling. #[doc(alias = "ts_node_next_named_sibling")] + #[must_use] pub fn next_named_sibling(&self) -> Option { Self::new(unsafe { ffi::ts_node_next_named_sibling(self.0) }) } /// Get this node's previous named sibling. #[doc(alias = "ts_node_prev_named_sibling")] + #[must_use] pub fn prev_named_sibling(&self) -> Option { Self::new(unsafe { ffi::ts_node_prev_named_sibling(self.0) }) } + /// Get the node's number of descendants, including one for the node itself. + #[doc(alias = "ts_node_descendant_count")] + #[must_use] + pub fn descendant_count(&self) -> usize { + unsafe { ffi::ts_node_descendant_count(self.0) as usize } + } + /// Get the smallest node within this node that spans the given range. #[doc(alias = "ts_node_descendant_for_byte_range")] + #[must_use] pub fn descendant_for_byte_range(&self, start: usize, end: usize) -> Option { Self::new(unsafe { ffi::ts_node_descendant_for_byte_range(self.0, start as u32, end as u32) @@ -1131,6 +1356,7 @@ impl<'tree> Node<'tree> { /// Get the smallest named node within this node that spans the given range. #[doc(alias = "ts_node_named_descendant_for_byte_range")] + #[must_use] pub fn named_descendant_for_byte_range(&self, start: usize, end: usize) -> Option { Self::new(unsafe { ffi::ts_node_named_descendant_for_byte_range(self.0, start as u32, end as u32) @@ -1139,6 +1365,7 @@ impl<'tree> Node<'tree> { /// Get the smallest node within this node that spans the given range. #[doc(alias = "ts_node_descendant_for_point_range")] + #[must_use] pub fn descendant_for_point_range(&self, start: Point, end: Point) -> Option { Self::new(unsafe { ffi::ts_node_descendant_for_point_range(self.0, start.into(), end.into()) @@ -1147,6 +1374,7 @@ impl<'tree> Node<'tree> { /// Get the smallest named node within this node that spans the given range. #[doc(alias = "ts_node_named_descendant_for_point_range")] + #[must_use] pub fn named_descendant_for_point_range(&self, start: Point, end: Point) -> Option { Self::new(unsafe { ffi::ts_node_named_descendant_for_point_range(self.0, start.into(), end.into()) @@ -1154,13 +1382,14 @@ impl<'tree> Node<'tree> { } #[doc(alias = "ts_node_string")] + #[must_use] pub fn to_sexp(&self) -> String { let c_string = unsafe { ffi::ts_node_string(self.0) }; let result = unsafe { CStr::from_ptr(c_string) } .to_str() .unwrap() .to_string(); - unsafe { (FREE_FN)(c_string as *mut c_void) }; + unsafe { (FREE_FN)(c_string.cast::()) }; result } @@ -1168,12 +1397,14 @@ impl<'tree> Node<'tree> { str::from_utf8(&source[self.start_byte()..self.end_byte()]) } + #[must_use] pub fn utf16_text<'a>(&self, source: &'a [u16]) -> &'a [u16] { - &source.as_ref()[self.start_byte()..self.end_byte()] + &source[self.start_byte()..self.end_byte()] } - /// Create a new [TreeCursor] starting from this node. + /// Create a new [`TreeCursor`] starting from this node. #[doc(alias = "ts_tree_cursor_new")] + #[must_use] pub fn walk(&self) -> TreeCursor<'tree> { TreeCursor(unsafe { ffi::ts_tree_cursor_new(self.0) }, PhantomData) } @@ -1181,26 +1412,26 @@ impl<'tree> Node<'tree> { /// Edit this node to keep it in-sync with source code that has been edited. /// /// This function is only rarely needed. When you edit a syntax tree with the - /// [Tree::edit] method, all of the nodes that you retrieve from the tree - /// afterward will already reflect the edit. You only need to use [Node::edit] - /// when you have a specific [Node] instance that you want to keep and continue + /// [`Tree::edit`] method, all of the nodes that you retrieve from the tree + /// afterward will already reflect the edit. You only need to use [`Node::edit`] + /// when you have a specific [`Node`] instance that you want to keep and continue /// to use after an edit. #[doc(alias = "ts_node_edit")] pub fn edit(&mut self, edit: &InputEdit) { let edit = edit.into(); - unsafe { ffi::ts_node_edit(&mut self.0 as *mut ffi::TSNode, &edit) } + unsafe { ffi::ts_node_edit(std::ptr::addr_of_mut!(self.0), &edit) } } } -impl<'a> PartialEq for Node<'a> { +impl PartialEq for Node<'_> { fn eq(&self, other: &Self) -> bool { self.0.id == other.0.id } } -impl<'a> Eq for Node<'a> {} +impl Eq for Node<'_> {} -impl<'a> hash::Hash for Node<'a> { +impl hash::Hash for Node<'_> { fn hash(&self, state: &mut H) { self.0.id.hash(state); self.0.context[0].hash(state); @@ -1210,7 +1441,7 @@ impl<'a> hash::Hash for Node<'a> { } } -impl<'a> fmt::Debug for Node<'a> { +impl fmt::Debug for Node<'_> { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { write!( f, @@ -1222,10 +1453,11 @@ impl<'a> fmt::Debug for Node<'a> { } } -impl<'a> TreeCursor<'a> { - /// Get the tree cursor's current [Node]. +impl<'cursor> TreeCursor<'cursor> { + /// Get the tree cursor's current [`Node`]. #[doc(alias = "ts_tree_cursor_current_node")] - pub fn node(&self) -> Node<'a> { + #[must_use] + pub fn node(&self) -> Node<'cursor> { Node( unsafe { ffi::ts_tree_cursor_current_node(&self.0) }, PhantomData, @@ -1234,39 +1466,61 @@ impl<'a> TreeCursor<'a> { /// Get the numerical field id of this tree cursor's current node. /// - /// See also [field_name](TreeCursor::field_name). + /// See also [`field_name`](TreeCursor::field_name). #[doc(alias = "ts_tree_cursor_current_field_id")] - pub fn field_id(&self) -> Option { - unsafe { - let id = ffi::ts_tree_cursor_current_field_id(&self.0); - if id == 0 { - None - } else { - Some(id) - } - } + #[must_use] + pub fn field_id(&self) -> Option { + let id = unsafe { ffi::ts_tree_cursor_current_field_id(&self.0) }; + FieldId::new(id) } /// Get the field name of this tree cursor's current node. #[doc(alias = "ts_tree_cursor_current_field_name")] + #[must_use] pub fn field_name(&self) -> Option<&'static str> { unsafe { let ptr = ffi::ts_tree_cursor_current_field_name(&self.0); - if ptr.is_null() { - None - } else { - Some(CStr::from_ptr(ptr).to_str().unwrap()) - } + (!ptr.is_null()).then(|| CStr::from_ptr(ptr).to_str().unwrap()) } } + /// Get the numerical field id of this tree cursor's current node. + /// + /// See also [`field_name`](TreeCursor::field_name). + #[doc(alias = "ts_tree_cursor_current_depth")] + #[must_use] + pub fn depth(&self) -> u32 { + unsafe { ffi::ts_tree_cursor_current_depth(&self.0) } + } + + /// Get the index of the cursor's current node out of all of the + /// descendants of the original node that the cursor was constructed with + #[doc(alias = "ts_tree_cursor_current_descendant_index")] + #[must_use] + pub fn descendant_index(&self) -> usize { + unsafe { ffi::ts_tree_cursor_current_descendant_index(&self.0) as usize } + } + /// Move this cursor to the first child of its current node. /// /// This returns `true` if the cursor successfully moved, and returns `false` /// if there were no children. #[doc(alias = "ts_tree_cursor_goto_first_child")] pub fn goto_first_child(&mut self) -> bool { - return unsafe { ffi::ts_tree_cursor_goto_first_child(&mut self.0) }; + unsafe { ffi::ts_tree_cursor_goto_first_child(&mut self.0) } + } + + /// Move this cursor to the last child of its current node. + /// + /// This returns `true` if the cursor successfully moved, and returns + /// `false` if there were no children. + /// + /// Note that this function may be slower than + /// [`goto_first_child`](TreeCursor::goto_first_child) because it needs to + /// iterate through all the children to compute the child's position. + #[doc(alias = "ts_tree_cursor_goto_last_child")] + pub fn goto_last_child(&mut self) -> bool { + unsafe { ffi::ts_tree_cursor_goto_last_child(&mut self.0) } } /// Move this cursor to the parent of its current node. @@ -1275,7 +1529,7 @@ impl<'a> TreeCursor<'a> { /// if there was no parent node (the cursor was already on the root node). #[doc(alias = "ts_tree_cursor_goto_parent")] pub fn goto_parent(&mut self) -> bool { - return unsafe { ffi::ts_tree_cursor_goto_parent(&mut self.0) }; + unsafe { ffi::ts_tree_cursor_goto_parent(&mut self.0) } } /// Move this cursor to the next sibling of its current node. @@ -1284,7 +1538,30 @@ impl<'a> TreeCursor<'a> { /// if there was no next sibling node. #[doc(alias = "ts_tree_cursor_goto_next_sibling")] pub fn goto_next_sibling(&mut self) -> bool { - return unsafe { ffi::ts_tree_cursor_goto_next_sibling(&mut self.0) }; + unsafe { ffi::ts_tree_cursor_goto_next_sibling(&mut self.0) } + } + + /// Move the cursor to the node that is the nth descendant of + /// the original node that the cursor was constructed with, where + /// zero represents the original node itself. + #[doc(alias = "ts_tree_cursor_goto_descendant")] + pub fn goto_descendant(&mut self, descendant_index: usize) { + unsafe { ffi::ts_tree_cursor_goto_descendant(&mut self.0, descendant_index as u32) } + } + + /// Move this cursor to the previous sibling of its current node. + /// + /// This returns `true` if the cursor successfully moved, and returns + /// `false` if there was no previous sibling node. + /// + /// Note, that this function may be slower than + /// [`goto_next_sibling`](TreeCursor::goto_next_sibling) due to how node + /// positions are stored. In the worst case, this will need to iterate + /// through all the children upto the previous sibling node to recalculate + /// its position. + #[doc(alias = "ts_tree_cursor_goto_previous_sibling")] + pub fn goto_previous_sibling(&mut self) -> bool { + unsafe { ffi::ts_tree_cursor_goto_previous_sibling(&mut self.0) } } /// Move this cursor to the first child of its current node that extends beyond @@ -1296,11 +1573,7 @@ impl<'a> TreeCursor<'a> { pub fn goto_first_child_for_byte(&mut self, index: usize) -> Option { let result = unsafe { ffi::ts_tree_cursor_goto_first_child_for_byte(&mut self.0, index as u32) }; - if result < 0 { - None - } else { - Some(result as usize) - } + (result >= 0).then_some(result as usize) } /// Move this cursor to the first child of its current node that extends beyond @@ -1312,32 +1585,119 @@ impl<'a> TreeCursor<'a> { pub fn goto_first_child_for_point(&mut self, point: Point) -> Option { let result = unsafe { ffi::ts_tree_cursor_goto_first_child_for_point(&mut self.0, point.into()) }; - if result < 0 { - None - } else { - Some(result as usize) - } + (result >= 0).then_some(result as usize) } /// Re-initialize this tree cursor to start at a different node. #[doc(alias = "ts_tree_cursor_reset")] - pub fn reset(&mut self, node: Node<'a>) { + pub fn reset(&mut self, node: Node<'cursor>) { unsafe { ffi::ts_tree_cursor_reset(&mut self.0, node.0) }; } + + /// Re-initialize a tree cursor to the same position as another cursor. + /// + /// Unlike [`reset`](TreeCursor::reset), this will not lose parent information and + /// allows reusing already created cursors. + #[doc(alias = "ts_tree_cursor_reset_to")] + pub fn reset_to(&mut self, cursor: &TreeCursor<'cursor>) { + unsafe { ffi::ts_tree_cursor_reset_to(&mut self.0, &cursor.0) }; + } } -impl<'a> Clone for TreeCursor<'a> { +impl Clone for TreeCursor<'_> { fn clone(&self) -> Self { TreeCursor(unsafe { ffi::ts_tree_cursor_copy(&self.0) }, PhantomData) } } -impl<'a> Drop for TreeCursor<'a> { +impl Drop for TreeCursor<'_> { fn drop(&mut self) { unsafe { ffi::ts_tree_cursor_delete(&mut self.0) } } } +impl LookaheadIterator { + /// Get the current language of the lookahead iterator. + #[doc(alias = "ts_lookahead_iterator_language")] + #[must_use] + pub fn language(&self) -> LanguageRef<'_> { + LanguageRef( + unsafe { ffi::ts_lookahead_iterator_language(self.0.as_ptr()) }, + PhantomData, + ) + } + + /// Get the current symbol of the lookahead iterator. + #[doc(alias = "ts_lookahead_iterator_current_symbol")] + #[must_use] + pub fn current_symbol(&self) -> u16 { + unsafe { ffi::ts_lookahead_iterator_current_symbol(self.0.as_ptr()) } + } + + /// Get the current symbol name of the lookahead iterator. + #[doc(alias = "ts_lookahead_iterator_current_symbol_name")] + #[must_use] + pub fn current_symbol_name(&self) -> &'static str { + unsafe { + CStr::from_ptr(ffi::ts_lookahead_iterator_current_symbol_name( + self.0.as_ptr(), + )) + .to_str() + .unwrap() + } + } + + /// Reset the lookahead iterator. + /// + /// This returns `true` if the language was set successfully and `false` + /// otherwise. + #[doc(alias = "ts_lookahead_iterator_reset")] + pub fn reset(&mut self, language: &Language, state: u16) -> bool { + unsafe { ffi::ts_lookahead_iterator_reset(self.0.as_ptr(), language.0, state) } + } + + /// Reset the lookahead iterator to another state. + /// + /// This returns `true` if the iterator was reset to the given state and `false` + /// otherwise. + #[doc(alias = "ts_lookahead_iterator_reset_state")] + pub fn reset_state(&mut self, state: u16) -> bool { + unsafe { ffi::ts_lookahead_iterator_reset_state(self.0.as_ptr(), state) } + } + + /// Iterate symbol names. + pub fn iter_names(&mut self) -> impl Iterator + '_ { + LookaheadNamesIterator(self) + } +} + +impl Iterator for LookaheadNamesIterator<'_> { + type Item = &'static str; + + #[doc(alias = "ts_lookahead_iterator_next")] + fn next(&mut self) -> Option { + unsafe { ffi::ts_lookahead_iterator_next(self.0 .0.as_ptr()) } + .then(|| self.0.current_symbol_name()) + } +} + +impl Iterator for LookaheadIterator { + type Item = u16; + + #[doc(alias = "ts_lookahead_iterator_next")] + fn next(&mut self) -> Option { + // the first symbol is always `0` so we can safely skip it + unsafe { ffi::ts_lookahead_iterator_next(self.0.as_ptr()) }.then(|| self.current_symbol()) + } +} + +impl Drop for LookaheadIterator { + #[doc(alias = "ts_lookahead_iterator_delete")] + fn drop(&mut self) { + unsafe { ffi::ts_lookahead_iterator_delete(self.0.as_ptr()) } + } +} + impl Query { /// Create a new query from a string containing one or more S-expression /// patterns. @@ -1345,7 +1705,7 @@ impl Query { /// The query is associated with a particular language, and can only be run /// on syntax nodes parsed with that language. References to Queries can be /// shared between multiple threads. - pub fn new(language: Language, source: &str) -> Result { + pub fn new(language: &Language, source: &str) -> Result { let mut error_offset = 0u32; let mut error_type: ffi::TSQueryError = 0; let bytes = source.as_bytes(); @@ -1354,16 +1714,16 @@ impl Query { let ptr = unsafe { ffi::ts_query_new( language.0, - bytes.as_ptr() as *const c_char, + bytes.as_ptr().cast::(), bytes.len() as u32, - &mut error_offset as *mut u32, - &mut error_type as *mut ffi::TSQueryError, + std::ptr::addr_of_mut!(error_offset), + std::ptr::addr_of_mut!(error_type), ) }; // On failure, build an error based on the error code and offset. if ptr.is_null() { - if error_type == ffi::TSQueryError_TSQueryErrorLanguage { + if error_type == ffi::TSQueryErrorLanguage { return Err(QueryError { row: 0, column: 0, @@ -1380,7 +1740,7 @@ impl Query { let mut line_start = 0; let mut row = 0; let mut line_containing_error = None; - for line in source.split("\n") { + for line in source.split('\n') { let line_end = line_start + line.len() + 1; if line_end > offset { line_containing_error = Some(line); @@ -1395,31 +1755,28 @@ impl Query { let message; match error_type { // Error types that report names - ffi::TSQueryError_TSQueryErrorNodeType - | ffi::TSQueryError_TSQueryErrorField - | ffi::TSQueryError_TSQueryErrorCapture => { + ffi::TSQueryErrorNodeType | ffi::TSQueryErrorField | ffi::TSQueryErrorCapture => { let suffix = source.split_at(offset).1; let end_offset = suffix .find(|c| !char::is_alphanumeric(c) && c != '_' && c != '-') - .unwrap_or(source.len()); + .unwrap_or(suffix.len()); message = suffix.split_at(end_offset).0.to_string(); kind = match error_type { - ffi::TSQueryError_TSQueryErrorNodeType => QueryErrorKind::NodeType, - ffi::TSQueryError_TSQueryErrorField => QueryErrorKind::Field, - ffi::TSQueryError_TSQueryErrorCapture => QueryErrorKind::Capture, + ffi::TSQueryErrorNodeType => QueryErrorKind::NodeType, + ffi::TSQueryErrorField => QueryErrorKind::Field, + ffi::TSQueryErrorCapture => QueryErrorKind::Capture, _ => unreachable!(), }; } // Error types that report positions _ => { - message = if let Some(line) = line_containing_error { - line.to_string() + "\n" + &" ".repeat(offset - line_start) + "^" - } else { - "Unexpected EOF".to_string() - }; + message = line_containing_error.map_or_else( + || "Unexpected EOF".to_string(), + |line| line.to_string() + "\n" + &" ".repeat(offset - line_start) + "^", + ); kind = match error_type { - ffi::TSQueryError_TSQueryErrorStructure => QueryErrorKind::Structure, + ffi::TSQueryErrorStructure => QueryErrorKind::Structure, _ => QueryErrorKind::Syntax, }; } @@ -1429,33 +1786,47 @@ impl Query { row, column, offset, - kind, message, + kind, }); } - let string_count = unsafe { ffi::ts_query_string_count(ptr) }; - let capture_count = unsafe { ffi::ts_query_capture_count(ptr) }; - let pattern_count = unsafe { ffi::ts_query_pattern_count(ptr) as usize }; - let mut result = Query { - ptr: unsafe { NonNull::new_unchecked(ptr) }, - capture_names: Vec::with_capacity(capture_count as usize), - capture_quantifiers: Vec::with_capacity(pattern_count as usize), - text_predicates: Vec::with_capacity(pattern_count), - property_predicates: Vec::with_capacity(pattern_count), - property_settings: Vec::with_capacity(pattern_count), - general_predicates: Vec::with_capacity(pattern_count), + unsafe { Self::from_raw_parts(ptr, source) } + } + + #[doc(hidden)] + unsafe fn from_raw_parts(ptr: *mut ffi::TSQuery, source: &str) -> Result { + let ptr = { + struct TSQueryDrop(*mut ffi::TSQuery); + impl Drop for TSQueryDrop { + fn drop(&mut self) { + unsafe { ffi::ts_query_delete(self.0) } + } + } + TSQueryDrop(ptr) }; + let string_count = unsafe { ffi::ts_query_string_count(ptr.0) }; + let capture_count = unsafe { ffi::ts_query_capture_count(ptr.0) }; + let pattern_count = unsafe { ffi::ts_query_pattern_count(ptr.0) as usize }; + + let mut capture_names = Vec::with_capacity(capture_count as usize); + let mut capture_quantifiers_vec = Vec::with_capacity(pattern_count as usize); + let mut text_predicates_vec = Vec::with_capacity(pattern_count); + let mut property_predicates_vec = Vec::with_capacity(pattern_count); + let mut property_settings_vec = Vec::with_capacity(pattern_count); + let mut general_predicates_vec = Vec::with_capacity(pattern_count); + // Build a vector of strings to store the capture names. for i in 0..capture_count { unsafe { let mut length = 0u32; let name = - ffi::ts_query_capture_name_for_id(ptr, i, &mut length as *mut u32) as *const u8; + ffi::ts_query_capture_name_for_id(ptr.0, i, std::ptr::addr_of_mut!(length)) + .cast::(); let name = slice::from_raw_parts(name, length as usize); let name = str::from_utf8_unchecked(name); - result.capture_names.push(name.to_string()); + capture_names.push(name); } } @@ -1464,11 +1835,11 @@ impl Query { let mut capture_quantifiers = Vec::with_capacity(capture_count as usize); for j in 0..capture_count { unsafe { - let quantifier = ffi::ts_query_capture_quantifier_for_id(ptr, i as u32, j); + let quantifier = ffi::ts_query_capture_quantifier_for_id(ptr.0, i as u32, j); capture_quantifiers.push(quantifier.into()); } } - result.capture_quantifiers.push(capture_quantifiers); + capture_quantifiers_vec.push(capture_quantifiers.into()); } // Build a vector of strings to represent literal values used in predicates. @@ -1476,11 +1847,11 @@ impl Query { .map(|i| unsafe { let mut length = 0u32; let value = - ffi::ts_query_string_value_for_id(ptr, i as u32, &mut length as *mut u32) - as *const u8; + ffi::ts_query_string_value_for_id(ptr.0, i, std::ptr::addr_of_mut!(length)) + .cast::(); let value = slice::from_raw_parts(value, length as usize); let value = str::from_utf8_unchecked(value); - value.to_string() + value }) .collect::>(); @@ -1488,49 +1859,51 @@ impl Query { for i in 0..pattern_count { let predicate_steps = unsafe { let mut length = 0u32; - let raw_predicates = - ffi::ts_query_predicates_for_pattern(ptr, i as u32, &mut length as *mut u32); - if length > 0 { - slice::from_raw_parts(raw_predicates, length as usize) - } else { - &[] - } + let raw_predicates = ffi::ts_query_predicates_for_pattern( + ptr.0, + i as u32, + std::ptr::addr_of_mut!(length), + ); + (length > 0) + .then(|| slice::from_raw_parts(raw_predicates, length as usize)) + .unwrap_or_default() }; - let byte_offset = unsafe { ffi::ts_query_start_byte_for_pattern(ptr, i as u32) }; + let byte_offset = unsafe { ffi::ts_query_start_byte_for_pattern(ptr.0, i as u32) }; let row = source .char_indices() .take_while(|(i, _)| *i < byte_offset as usize) .filter(|(_, c)| *c == '\n') .count(); - let type_done = ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeDone; - let type_capture = ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeCapture; - let type_string = ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeString; + use ffi::TSQueryPredicateStepType as T; + const TYPE_DONE: T = ffi::TSQueryPredicateStepTypeDone; + const TYPE_CAPTURE: T = ffi::TSQueryPredicateStepTypeCapture; + const TYPE_STRING: T = ffi::TSQueryPredicateStepTypeString; let mut text_predicates = Vec::new(); let mut property_predicates = Vec::new(); let mut property_settings = Vec::new(); let mut general_predicates = Vec::new(); - for p in predicate_steps.split(|s| s.type_ == type_done) { + for p in predicate_steps.split(|s| s.type_ == TYPE_DONE) { if p.is_empty() { continue; } - if p[0].type_ != type_string { + if p[0].type_ != TYPE_STRING { return Err(predicate_error( row, format!( "Expected predicate to start with a function name. Got @{}.", - result.capture_names[p[0].value_id as usize], + capture_names[p[0].value_id as usize], ), )); } // Build a predicate for each of the known predicate function names. - let operator_name = &string_values[p[0].value_id as usize]; - match operator_name.as_str() { - "eq?" | "not-eq?" => { + let operator_name = string_values[p[0].value_id as usize]; + match operator_name { + "eq?" | "not-eq?" | "any-eq?" | "any-not-eq?" => { if p.len() != 3 { return Err(predicate_error( row, @@ -1540,64 +1913,78 @@ impl Query { ), )); } - if p[1].type_ != type_capture { + if p[1].type_ != TYPE_CAPTURE { return Err(predicate_error(row, format!( "First argument to #eq? predicate must be a capture name. Got literal \"{}\".", string_values[p[1].value_id as usize], ))); } - let is_positive = operator_name == "eq?"; - text_predicates.push(if p[2].type_ == type_capture { - TextPredicate::CaptureEqCapture( + let is_positive = operator_name == "eq?" || operator_name == "any-eq?"; + let match_all = match operator_name { + "eq?" | "not-eq?" => true, + "any-eq?" | "any-not-eq?" => false, + _ => unreachable!(), + }; + text_predicates.push(if p[2].type_ == TYPE_CAPTURE { + TextPredicateCapture::EqCapture( p[1].value_id, p[2].value_id, is_positive, + match_all, ) } else { - TextPredicate::CaptureEqString( + TextPredicateCapture::EqString( p[1].value_id, - string_values[p[2].value_id as usize].clone(), + string_values[p[2].value_id as usize].to_string().into(), is_positive, + match_all, ) }); } - "match?" | "not-match?" => { + "match?" | "not-match?" | "any-match?" | "any-not-match?" => { if p.len() != 3 { return Err(predicate_error(row, format!( "Wrong number of arguments to #match? predicate. Expected 2, got {}.", p.len() - 1 ))); } - if p[1].type_ != type_capture { + if p[1].type_ != TYPE_CAPTURE { return Err(predicate_error(row, format!( "First argument to #match? predicate must be a capture name. Got literal \"{}\".", string_values[p[1].value_id as usize], ))); } - if p[2].type_ == type_capture { + if p[2].type_ == TYPE_CAPTURE { return Err(predicate_error(row, format!( "Second argument to #match? predicate must be a literal. Got capture @{}.", - result.capture_names[p[2].value_id as usize], + capture_names[p[2].value_id as usize], ))); } - let is_positive = operator_name == "match?"; + let is_positive = + operator_name == "match?" || operator_name == "any-match?"; + let match_all = match operator_name { + "match?" | "not-match?" => true, + "any-match?" | "any-not-match?" => false, + _ => unreachable!(), + }; let regex = &string_values[p[2].value_id as usize]; - text_predicates.push(TextPredicate::CaptureMatchString( + text_predicates.push(TextPredicateCapture::MatchString( p[1].value_id, regex::bytes::Regex::new(regex).map_err(|_| { - predicate_error(row, format!("Invalid regex '{}'", regex)) + predicate_error(row, format!("Invalid regex '{regex}'")) })?, is_positive, + match_all, )); } "set!" => property_settings.push(Self::parse_property( row, - &operator_name, - &result.capture_names, + operator_name, + &capture_names, &string_values, &p[1..], )?), @@ -1605,24 +1992,60 @@ impl Query { "is?" | "is-not?" => property_predicates.push(( Self::parse_property( row, - &operator_name, - &result.capture_names, + operator_name, + &capture_names, &string_values, &p[1..], )?, operator_name == "is?", )), + "any-of?" | "not-any-of?" => { + if p.len() < 2 { + return Err(predicate_error(row, format!( + "Wrong number of arguments to #any-of? predicate. Expected at least 1, got {}.", + p.len() - 1 + ))); + } + if p[1].type_ != TYPE_CAPTURE { + return Err(predicate_error(row, format!( + "First argument to #any-of? predicate must be a capture name. Got literal \"{}\".", + string_values[p[1].value_id as usize], + ))); + } + + let is_positive = operator_name == "any-of?"; + let mut values = Vec::new(); + for arg in &p[2..] { + if arg.type_ == TYPE_CAPTURE { + return Err(predicate_error(row, format!( + "Arguments to #any-of? predicate must be literals. Got capture @{}.", + capture_names[arg.value_id as usize], + ))); + } + values.push(string_values[arg.value_id as usize]); + } + text_predicates.push(TextPredicateCapture::AnyString( + p[1].value_id, + values + .iter() + .map(|x| (*x).to_string().into()) + .collect::>() + .into(), + is_positive, + )); + } + _ => general_predicates.push(QueryPredicate { - operator: operator_name.clone().into_boxed_str(), + operator: operator_name.to_string().into(), args: p[1..] .iter() .map(|a| { - if a.type_ == type_capture { + if a.type_ == TYPE_CAPTURE { QueryPredicateArg::Capture(a.value_id) } else { QueryPredicateArg::String( - string_values[a.value_id as usize].clone().into_boxed_str(), + string_values[a.value_id as usize].to_string().into(), ) } }) @@ -1631,32 +2054,36 @@ impl Query { } } - result - .text_predicates - .push(text_predicates.into_boxed_slice()); - result - .property_predicates - .push(property_predicates.into_boxed_slice()); - result - .property_settings - .push(property_settings.into_boxed_slice()); - result - .general_predicates - .push(general_predicates.into_boxed_slice()); + text_predicates_vec.push(text_predicates.into()); + property_predicates_vec.push(property_predicates.into()); + property_settings_vec.push(property_settings.into()); + general_predicates_vec.push(general_predicates.into()); } + + let result = Self { + ptr: unsafe { NonNull::new_unchecked(ptr.0) }, + capture_names: capture_names.into(), + capture_quantifiers: capture_quantifiers_vec.into(), + text_predicates: text_predicates_vec.into(), + property_predicates: property_predicates_vec.into(), + property_settings: property_settings_vec.into(), + general_predicates: general_predicates_vec.into(), + }; + + std::mem::forget(ptr); + Ok(result) } /// Get the byte offset where the given pattern starts in the query's source. #[doc(alias = "ts_query_start_byte_for_pattern")] + #[must_use] pub fn start_byte_for_pattern(&self, pattern_index: usize) -> usize { - if pattern_index >= self.text_predicates.len() { - panic!( - "Pattern index is {} but the pattern count is {}", - pattern_index, - self.text_predicates.len(), - ); - } + assert!( + pattern_index < self.text_predicates.len(), + "Pattern index is {pattern_index} but the pattern count is {}", + self.text_predicates.len(), + ); unsafe { ffi::ts_query_start_byte_for_pattern(self.ptr.as_ptr(), pattern_index as u32) as usize } @@ -1664,39 +2091,45 @@ impl Query { /// Get the number of patterns in the query. #[doc(alias = "ts_query_pattern_count")] + #[must_use] pub fn pattern_count(&self) -> usize { unsafe { ffi::ts_query_pattern_count(self.ptr.as_ptr()) as usize } } /// Get the names of the captures used in the query. - pub fn capture_names(&self) -> &[String] { + #[must_use] + pub const fn capture_names(&self) -> &[&str] { &self.capture_names } /// Get the quantifiers of the captures used in the query. - pub fn capture_quantifiers(&self, index: usize) -> &[CaptureQuantifier] { + #[must_use] + pub const fn capture_quantifiers(&self, index: usize) -> &[CaptureQuantifier] { &self.capture_quantifiers[index] } /// Get the index for a given capture name. + #[must_use] pub fn capture_index_for_name(&self, name: &str) -> Option { self.capture_names .iter() - .position(|n| n == name) + .position(|n| *n == name) .map(|ix| ix as u32) } /// Get the properties that are checked for the given pattern index. /// /// This includes predicates with the operators `is?` and `is-not?`. - pub fn property_predicates(&self, index: usize) -> &[(QueryProperty, bool)] { + #[must_use] + pub const fn property_predicates(&self, index: usize) -> &[(QueryProperty, bool)] { &self.property_predicates[index] } /// Get the properties that are set for the given pattern index. /// /// This includes predicates with the operator `set!`. - pub fn property_settings(&self, index: usize) -> &[QueryProperty] { + #[must_use] + pub const fn property_settings(&self, index: usize) -> &[QueryProperty] { &self.property_settings[index] } @@ -1707,7 +2140,8 @@ impl Query { /// * `eq?` and `not-eq?` /// * `is?` and `is-not?` /// * `set!` - pub fn general_predicates(&self, index: usize) -> &[QueryPredicate] { + #[must_use] + pub const fn general_predicates(&self, index: usize) -> &[QueryPredicate] { &self.general_predicates[index] } @@ -1720,7 +2154,7 @@ impl Query { unsafe { ffi::ts_query_disable_capture( self.ptr.as_ptr(), - name.as_bytes().as_ptr() as *const c_char, + name.as_bytes().as_ptr().cast::(), name.len() as u32, ); } @@ -1737,12 +2171,14 @@ impl Query { /// Check if a given pattern within a query has a single root node. #[doc(alias = "ts_query_is_pattern_rooted")] + #[must_use] pub fn is_pattern_rooted(&self, index: usize) -> bool { unsafe { ffi::ts_query_is_pattern_rooted(self.ptr.as_ptr(), index as u32) } } /// Check if a given pattern within a query has a single root node. #[doc(alias = "ts_query_is_pattern_non_local")] + #[must_use] pub fn is_pattern_non_local(&self, index: usize) -> bool { unsafe { ffi::ts_query_is_pattern_non_local(self.ptr.as_ptr(), index as u32) } } @@ -1752,6 +2188,7 @@ impl Query { /// A query step is 'definite' if its parent pattern will be guaranteed to match /// successfully once it reaches the step. #[doc(alias = "ts_query_is_pattern_guaranteed_at_step")] + #[must_use] pub fn is_pattern_guaranteed_at_step(&self, byte_offset: usize) -> bool { unsafe { ffi::ts_query_is_pattern_guaranteed_at_step(self.ptr.as_ptr(), byte_offset as u32) @@ -1761,16 +2198,15 @@ impl Query { fn parse_property( row: usize, function_name: &str, - capture_names: &[String], - string_values: &[String], + capture_names: &[&str], + string_values: &[&str], args: &[ffi::TSQueryPredicateStep], ) -> Result { - if args.len() == 0 || args.len() > 3 { + if args.is_empty() || args.len() > 3 { return Err(predicate_error( row, format!( - "Wrong number of arguments to {} predicate. Expected 1 to 3, got {}.", - function_name, + "Wrong number of arguments to {function_name} predicate. Expected 1 to 3, got {}.", args.len(), ), )); @@ -1781,13 +2217,13 @@ impl Query { let mut value = None; for arg in args { - if arg.type_ == ffi::TSQueryPredicateStepType_TSQueryPredicateStepTypeCapture { + if arg.type_ == ffi::TSQueryPredicateStepTypeCapture { if capture_id.is_some() { return Err(predicate_error( row, format!( - "Invalid arguments to {} predicate. Unexpected second capture name @{}", - function_name, capture_names[arg.value_id as usize] + "Invalid arguments to {function_name} predicate. Unexpected second capture name @{}", + capture_names[arg.value_id as usize] ), )); } @@ -1795,13 +2231,13 @@ impl Query { } else if key.is_none() { key = Some(&string_values[arg.value_id as usize]); } else if value.is_none() { - value = Some(string_values[arg.value_id as usize].as_str()); + value = Some(string_values[arg.value_id as usize]); } else { return Err(predicate_error( row, format!( - "Invalid arguments to {} predicate. Unexpected third argument @{}", - function_name, string_values[arg.value_id as usize] + "Invalid arguments to {function_name} predicate. Unexpected third argument @{}", + string_values[arg.value_id as usize] ), )); } @@ -1810,30 +2246,35 @@ impl Query { if let Some(key) = key { Ok(QueryProperty::new(key, value, capture_id)) } else { - return Err(predicate_error( + Err(predicate_error( row, - format!( - "Invalid arguments to {} predicate. Missing key argument", - function_name, - ), - )); + format!("Invalid arguments to {function_name} predicate. Missing key argument",), + )) } } } +impl Default for QueryCursor { + fn default() -> Self { + Self::new() + } +} + impl QueryCursor { /// Create a new cursor for executing a given query. /// /// The cursor stores the state that is needed to iteratively search for matches. #[doc(alias = "ts_query_cursor_new")] + #[must_use] pub fn new() -> Self { - QueryCursor { + Self { ptr: unsafe { NonNull::new_unchecked(ffi::ts_query_cursor_new()) }, } } /// Return the maximum number of in-progress matches for this cursor. #[doc(alias = "ts_query_cursor_match_limit")] + #[must_use] pub fn match_limit(&self) -> u32 { unsafe { ffi::ts_query_cursor_match_limit(self.ptr.as_ptr()) } } @@ -1850,6 +2291,7 @@ impl QueryCursor { /// Check if, on its last execution, this cursor exceeded its maximum number of /// in-progress matches. #[doc(alias = "ts_query_cursor_did_exceed_match_limit")] + #[must_use] pub fn did_exceed_match_limit(&self) -> bool { unsafe { ffi::ts_query_cursor_did_exceed_match_limit(self.ptr.as_ptr()) } } @@ -1860,21 +2302,21 @@ impl QueryCursor { /// Because multiple patterns can match the same set of nodes, one match may contain /// captures that appear *before* some of the captures from a previous match. #[doc(alias = "ts_query_cursor_exec")] - pub fn matches<'a, 'tree: 'a, T: TextProvider<'a> + 'a>( - &'a mut self, - query: &'a Query, + pub fn matches<'query, 'tree, T: TextProvider, I: AsRef<[u8]>>( + &mut self, + query: &'query Query, node: Node<'tree>, text_provider: T, - ) -> QueryMatches<'a, 'tree, T> { + ) -> QueryMatches<'query, 'tree, T, I> { let ptr = self.ptr.as_ptr(); unsafe { ffi::ts_query_cursor_exec(ptr, query.ptr.as_ptr(), node.0) }; QueryMatches { ptr, query, text_provider, - buffer1: Default::default(), - buffer2: Default::default(), - _tree: PhantomData, + buffer1: Vec::default(), + buffer2: Vec::default(), + _phantom: PhantomData, } } @@ -1883,21 +2325,21 @@ impl QueryCursor { /// This is useful if you don't care about which pattern matched, and just want a single, /// ordered sequence of captures. #[doc(alias = "ts_query_cursor_exec")] - pub fn captures<'a, 'tree: 'a, T: TextProvider<'a> + 'a>( - &'a mut self, - query: &'a Query, + pub fn captures<'query, 'tree, T: TextProvider, I: AsRef<[u8]>>( + &mut self, + query: &'query Query, node: Node<'tree>, text_provider: T, - ) -> QueryCaptures<'a, 'tree, T> { + ) -> QueryCaptures<'query, 'tree, T, I> { let ptr = self.ptr.as_ptr(); - unsafe { ffi::ts_query_cursor_exec(self.ptr.as_ptr(), query.ptr.as_ptr(), node.0) }; + unsafe { ffi::ts_query_cursor_exec(ptr, query.ptr.as_ptr(), node.0) }; QueryCaptures { ptr, query, text_provider, - buffer1: Default::default(), - buffer2: Default::default(), - _tree: PhantomData, + buffer1: Vec::default(), + buffer2: Vec::default(), + _phantom: PhantomData, } } @@ -1926,10 +2368,34 @@ impl QueryCursor { } self } + + /// Set the maximum start depth for a query cursor. + /// + /// This prevents cursors from exploring children nodes at a certain depth. + /// Note if a pattern includes many children, then they will still be checked. + /// + /// The zero max start depth value can be used as a special behavior and + /// it helps to destructure a subtree by staying on a node and using captures + /// for interested parts. Note that the zero max start depth only limit a search + /// depth for a pattern's root node but other nodes that are parts of the pattern + /// may be searched at any depth what defined by the pattern structure. + /// + /// Set to `None` to remove the maximum start depth. + #[doc(alias = "ts_query_cursor_set_max_start_depth")] + pub fn set_max_start_depth(&mut self, max_start_depth: Option) -> &mut Self { + unsafe { + ffi::ts_query_cursor_set_max_start_depth( + self.ptr.as_ptr(), + max_start_depth.unwrap_or(u32::MAX), + ); + } + self + } } -impl<'a, 'tree> QueryMatch<'a, 'tree> { - pub fn id(&self) -> u32 { +impl<'tree> QueryMatch<'_, 'tree> { + #[must_use] + pub const fn id(&self) -> u32 { self.id } @@ -1942,116 +2408,153 @@ impl<'a, 'tree> QueryMatch<'a, 'tree> { &self, capture_ix: u32, ) -> impl Iterator> + '_ { - self.captures.iter().filter_map(move |capture| { - if capture.index == capture_ix { - Some(capture.node) - } else { - None - } - }) + self.captures + .iter() + .filter_map(move |capture| (capture.index == capture_ix).then_some(capture.node)) } - fn new(m: ffi::TSQueryMatch, cursor: *mut ffi::TSQueryCursor) -> Self { + fn new(m: &ffi::TSQueryMatch, cursor: *mut ffi::TSQueryCursor) -> Self { QueryMatch { cursor, id: m.id, pattern_index: m.pattern_index as usize, - captures: if m.capture_count > 0 { - unsafe { + captures: (m.capture_count > 0) + .then(|| unsafe { slice::from_raw_parts( - m.captures as *const QueryCapture<'tree>, + m.captures.cast::>(), m.capture_count as usize, ) - } - } else { - &[] - }, + }) + .unwrap_or_default(), } } - fn satisfies_text_predicates( + fn satisfies_text_predicates>( &self, query: &Query, buffer1: &mut Vec, buffer2: &mut Vec, - text_provider: &mut impl TextProvider<'a>, + text_provider: &mut impl TextProvider, ) -> bool { - fn get_text<'a, 'b: 'a, I: Iterator>( + struct NodeText<'a, T> { buffer: &'a mut Vec, - mut chunks: I, - ) -> &'a [u8] { - let first_chunk = chunks.next().unwrap_or(&[]); - if let Some(next_chunk) = chunks.next() { - buffer.clear(); - buffer.extend_from_slice(first_chunk); - buffer.extend_from_slice(next_chunk); - for chunk in chunks { - buffer.extend_from_slice(chunk); + first_chunk: Option, + } + impl<'a, T: AsRef<[u8]>> NodeText<'a, T> { + fn new(buffer: &'a mut Vec) -> Self { + Self { + buffer, + first_chunk: None, + } + } + + fn get_text(&mut self, chunks: &mut impl Iterator) -> &[u8] { + self.first_chunk = chunks.next(); + if let Some(next_chunk) = chunks.next() { + self.buffer.clear(); + self.buffer + .extend_from_slice(self.first_chunk.as_ref().unwrap().as_ref()); + self.buffer.extend_from_slice(next_chunk.as_ref()); + for chunk in chunks { + self.buffer.extend_from_slice(chunk.as_ref()); + } + self.buffer.as_slice() + } else if let Some(ref first_chunk) = self.first_chunk { + first_chunk.as_ref() + } else { + Default::default() } - buffer.as_slice() - } else { - first_chunk } } + let mut node_text1 = NodeText::new(buffer1); + let mut node_text2 = NodeText::new(buffer2); + query.text_predicates[self.pattern_index] .iter() .all(|predicate| match predicate { - TextPredicate::CaptureEqCapture(i, j, is_positive) => { - let node1 = self.nodes_for_capture_index(*i).next(); - let node2 = self.nodes_for_capture_index(*j).next(); - match (node1, node2) { - (Some(node1), Some(node2)) => { - let text1 = get_text(buffer1, text_provider.text(node1)); - let text2 = get_text(buffer2, text_provider.text(node2)); - (text1 == text2) == *is_positive + TextPredicateCapture::EqCapture(i, j, is_positive, match_all_nodes) => { + let mut nodes_1 = self.nodes_for_capture_index(*i); + let mut nodes_2 = self.nodes_for_capture_index(*j); + while let (Some(node1), Some(node2)) = (nodes_1.next(), nodes_2.next()) { + let mut text1 = text_provider.text(node1); + let mut text2 = text_provider.text(node2); + let text1 = node_text1.get_text(&mut text1); + let text2 = node_text2.get_text(&mut text2); + if (text1 == text2) != *is_positive && *match_all_nodes { + return false; + } + if (text1 == text2) == *is_positive && !*match_all_nodes { + return true; + } + } + nodes_1.next().is_none() && nodes_2.next().is_none() + } + TextPredicateCapture::EqString(i, s, is_positive, match_all_nodes) => { + let nodes = self.nodes_for_capture_index(*i); + for node in nodes { + let mut text = text_provider.text(node); + let text = node_text1.get_text(&mut text); + if (text == s.as_bytes()) != *is_positive && *match_all_nodes { + return false; + } + if (text == s.as_bytes()) == *is_positive && !*match_all_nodes { + return true; } - _ => true, } + true } - TextPredicate::CaptureEqString(i, s, is_positive) => { - let node = self.nodes_for_capture_index(*i).next(); - match node { - Some(node) => { - let text = get_text(buffer1, text_provider.text(node)); - (text == s.as_bytes()) == *is_positive + TextPredicateCapture::MatchString(i, r, is_positive, match_all_nodes) => { + let nodes = self.nodes_for_capture_index(*i); + for node in nodes { + let mut text = text_provider.text(node); + let text = node_text1.get_text(&mut text); + if (r.is_match(text)) != *is_positive && *match_all_nodes { + return false; + } + if (r.is_match(text)) == *is_positive && !*match_all_nodes { + return true; } - None => true, } + true } - TextPredicate::CaptureMatchString(i, r, is_positive) => { - let node = self.nodes_for_capture_index(*i).next(); - match node { - Some(node) => { - let text = get_text(buffer1, text_provider.text(node)); - r.is_match(text) == *is_positive + TextPredicateCapture::AnyString(i, v, is_positive) => { + let nodes = self.nodes_for_capture_index(*i); + for node in nodes { + let mut text = text_provider.text(node); + let text = node_text1.get_text(&mut text); + if (v.iter().any(|s| text == s.as_bytes())) != *is_positive { + return false; } - None => true, } + true } }) } } impl QueryProperty { + #[must_use] pub fn new(key: &str, value: Option<&str>, capture_id: Option) -> Self { - QueryProperty { + Self { capture_id, - key: key.to_string().into_boxed_str(), - value: value.map(|s| s.to_string().into_boxed_str()), + key: key.to_string().into(), + value: value.map(|s| s.to_string().into()), } } } -impl<'a, 'tree, T: TextProvider<'a>> Iterator for QueryMatches<'a, 'tree, T> { - type Item = QueryMatch<'a, 'tree>; +impl<'query, 'tree: 'query, T: TextProvider, I: AsRef<[u8]>> Iterator + for QueryMatches<'query, 'tree, T, I> +{ + type Item = QueryMatch<'query, 'tree>; fn next(&mut self) -> Option { unsafe { loop { let mut m = MaybeUninit::::uninit(); if ffi::ts_query_cursor_next_match(self.ptr, m.as_mut_ptr()) { - let result = QueryMatch::new(m.assume_init(), self.ptr); + let result = QueryMatch::new(&m.assume_init(), self.ptr); if result.satisfies_text_predicates( self.query, &mut self.buffer1, @@ -2068,8 +2571,10 @@ impl<'a, 'tree, T: TextProvider<'a>> Iterator for QueryMatches<'a, 'tree, T> { } } -impl<'a, 'tree, T: TextProvider<'a>> Iterator for QueryCaptures<'a, 'tree, T> { - type Item = (QueryMatch<'a, 'tree>, usize); +impl<'query, 'tree: 'query, T: TextProvider, I: AsRef<[u8]>> Iterator + for QueryCaptures<'query, 'tree, T, I> +{ + type Item = (QueryMatch<'query, 'tree>, usize); fn next(&mut self) -> Option { unsafe { @@ -2079,9 +2584,9 @@ impl<'a, 'tree, T: TextProvider<'a>> Iterator for QueryCaptures<'a, 'tree, T> { if ffi::ts_query_cursor_next_capture( self.ptr, m.as_mut_ptr(), - &mut capture_index as *mut u32, + std::ptr::addr_of_mut!(capture_index), ) { - let result = QueryMatch::new(m.assume_init(), self.ptr); + let result = QueryMatch::new(&m.assume_init(), self.ptr); if result.satisfies_text_predicates( self.query, &mut self.buffer1, @@ -2089,9 +2594,8 @@ impl<'a, 'tree, T: TextProvider<'a>> Iterator for QueryCaptures<'a, 'tree, T> { &mut self.text_provider, ) { return Some((result, capture_index as usize)); - } else { - result.remove(); } + result.remove(); } else { return None; } @@ -2100,7 +2604,7 @@ impl<'a, 'tree, T: TextProvider<'a>> Iterator for QueryCaptures<'a, 'tree, T> { } } -impl<'a, 'tree, T: TextProvider<'a>> QueryMatches<'a, 'tree, T> { +impl, I: AsRef<[u8]>> QueryMatches<'_, '_, T, I> { #[doc(alias = "ts_query_cursor_set_byte_range")] pub fn set_byte_range(&mut self, range: ops::Range) { unsafe { @@ -2116,7 +2620,7 @@ impl<'a, 'tree, T: TextProvider<'a>> QueryMatches<'a, 'tree, T> { } } -impl<'a, 'tree, T: TextProvider<'a>> QueryCaptures<'a, 'tree, T> { +impl, I: AsRef<[u8]>> QueryCaptures<'_, '_, T, I> { #[doc(alias = "ts_query_cursor_set_byte_range")] pub fn set_byte_range(&mut self, range: ops::Range) { unsafe { @@ -2132,7 +2636,7 @@ impl<'a, 'tree, T: TextProvider<'a>> QueryCaptures<'a, 'tree, T> { } } -impl<'cursor, 'tree> fmt::Debug for QueryMatch<'cursor, 'tree> { +impl fmt::Debug for QueryMatch<'_, '_> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!( f, @@ -2142,19 +2646,20 @@ impl<'cursor, 'tree> fmt::Debug for QueryMatch<'cursor, 'tree> { } } -impl<'a, F, I> TextProvider<'a> for F +impl TextProvider for F where - F: FnMut(Node) -> I, - I: Iterator + 'a, + F: FnMut(Node) -> R, + R: Iterator, + I: AsRef<[u8]>, { - type I = I; + type I = R; fn text(&mut self, node: Node) -> Self::I { (self)(node) } } -impl<'a> TextProvider<'a> for &'a [u8] { +impl<'a> TextProvider<&'a [u8]> for &'a [u8] { type I = iter::Once<&'a [u8]>; fn text(&mut self, node: Node) -> Self::I { @@ -2181,8 +2686,9 @@ impl Drop for QueryCursor { } impl Point { - pub fn new(row: usize, column: usize) -> Self { - Point { row, column } + #[must_use] + pub const fn new(row: usize, column: usize) -> Self { + Self { row, column } } } @@ -2192,11 +2698,11 @@ impl fmt::Display for Point { } } -impl Into for Point { - fn into(self) -> ffi::TSPoint { - ffi::TSPoint { - row: self.row as u32, - column: self.column as u32, +impl From for ffi::TSPoint { + fn from(val: Point) -> Self { + Self { + row: val.row as u32, + column: val.column as u32, } } } @@ -2210,13 +2716,13 @@ impl From for Point { } } -impl Into for Range { - fn into(self) -> ffi::TSRange { - ffi::TSRange { - start_byte: self.start_byte as u32, - end_byte: self.end_byte as u32, - start_point: self.start_point.into(), - end_point: self.end_point.into(), +impl From for ffi::TSRange { + fn from(val: Range) -> Self { + Self { + start_byte: val.start_byte as u32, + end_byte: val.end_byte as u32, + start_point: val.start_point.into(), + end_point: val.end_point.into(), } } } @@ -2232,21 +2738,22 @@ impl From for Range { } } -impl<'a> Into for &'a InputEdit { - fn into(self) -> ffi::TSInputEdit { - ffi::TSInputEdit { - start_byte: self.start_byte as u32, - old_end_byte: self.old_end_byte as u32, - new_end_byte: self.new_end_byte as u32, - start_point: self.start_position.into(), - old_end_point: self.old_end_position.into(), - new_end_point: self.new_end_position.into(), +impl From<&'_ InputEdit> for ffi::TSInputEdit { + fn from(val: &'_ InputEdit) -> Self { + Self { + start_byte: val.start_byte as u32, + old_end_byte: val.old_end_byte as u32, + new_end_byte: val.new_end_byte as u32, + start_point: val.start_position.into(), + old_end_point: val.old_end_position.into(), + new_end_point: val.new_end_position.into(), } } } impl<'a> LossyUtf8<'a> { - pub fn new(bytes: &'a [u8]) -> Self { + #[must_use] + pub const fn new(bytes: &'a [u8]) -> Self { LossyUtf8 { bytes, in_replacement: false, @@ -2267,7 +2774,7 @@ impl<'a> Iterator for LossyUtf8<'a> { } match std::str::from_utf8(self.bytes) { Ok(valid) => { - self.bytes = &[]; + self.bytes = Default::default(); Some(valid) } Err(error) => { @@ -2291,7 +2798,8 @@ impl<'a> Iterator for LossyUtf8<'a> { } } -fn predicate_error(row: usize, message: String) -> QueryError { +#[must_use] +const fn predicate_error(row: usize, message: String) -> QueryError { QueryError { kind: QueryErrorKind::Predicate, row, @@ -2328,7 +2836,9 @@ impl fmt::Display for QueryError { QueryErrorKind::Syntax => "Invalid syntax:\n", QueryErrorKind::Language => "", }; - if msg.len() > 0 { + if msg.is_empty() { + write!(f, "{}", self.message) + } else { write!( f, "Query error at {}:{}. {}{}", @@ -2337,18 +2847,29 @@ impl fmt::Display for QueryError { msg, self.message ) - } else { - write!(f, "{}", self.message) } } } +pub fn wasm_stdlib_symbols() -> impl Iterator { + const WASM_STDLIB_SYMBOLS: &str = include_str!(concat!(env!("OUT_DIR"), "/stdlib-symbols.txt")); + + WASM_STDLIB_SYMBOLS + .lines() + .map(|s| s.trim_matches(|c| c == '"' || c == ',')) +} + extern "C" { fn free(ptr: *mut c_void); } static mut FREE_FN: unsafe extern "C" fn(ptr: *mut c_void) = free; +/// Sets the memory allocation functions that the core library should use. +/// +/// # Safety +/// +/// This function uses FFI and mutates a static global. #[doc(alias = "ts_set_allocator")] pub unsafe fn set_allocator( new_malloc: Option *mut c_void>, @@ -2365,12 +2886,28 @@ impl error::Error for LanguageError {} impl error::Error for QueryError {} unsafe impl Send for Language {} -unsafe impl Send for Parser {} -unsafe impl Send for Query {} -unsafe impl Send for QueryCursor {} -unsafe impl Send for Tree {} unsafe impl Sync for Language {} + +unsafe impl Send for Node<'_> {} +unsafe impl Sync for Node<'_> {} + +unsafe impl Send for LookaheadIterator {} +unsafe impl Sync for LookaheadIterator {} + +unsafe impl Send for LookaheadNamesIterator<'_> {} +unsafe impl Sync for LookaheadNamesIterator<'_> {} + +unsafe impl Send for Parser {} unsafe impl Sync for Parser {} + +unsafe impl Send for Query {} unsafe impl Sync for Query {} + +unsafe impl Send for QueryCursor {} unsafe impl Sync for QueryCursor {} + +unsafe impl Send for Tree {} unsafe impl Sync for Tree {} + +unsafe impl Send for TreeCursor<'_> {} +unsafe impl Sync for TreeCursor<'_> {} diff --git a/third-party/tree-sitter/tree-sitter/lib/binding_rust/util.rs b/third-party/tree-sitter/tree-sitter/lib/binding_rust/util.rs index 5eda71f4223..69e5ec7c81b 100644 --- a/third-party/tree-sitter/tree-sitter/lib/binding_rust/util.rs +++ b/third-party/tree-sitter/tree-sitter/lib/binding_rust/util.rs @@ -9,7 +9,7 @@ pub struct CBufferIter { } impl CBufferIter { - pub unsafe fn new(ptr: *mut T, count: usize) -> Self { + pub const unsafe fn new(ptr: *mut T, count: usize) -> Self { Self { ptr, count, i: 0 } } } @@ -23,7 +23,7 @@ impl Iterator for CBufferIter { None } else { self.i += 1; - Some(unsafe { *self.ptr.offset(i as isize) }) + Some(unsafe { *self.ptr.add(i) }) } } @@ -37,6 +37,8 @@ impl ExactSizeIterator for CBufferIter {} impl Drop for CBufferIter { fn drop(&mut self) { - unsafe { (FREE_FN)(self.ptr as *mut c_void) }; + if !self.ptr.is_null() { + unsafe { (FREE_FN)(self.ptr.cast::()) }; + } } } diff --git a/third-party/tree-sitter/tree-sitter/lib/binding_rust/wasm_language.rs b/third-party/tree-sitter/tree-sitter/lib/binding_rust/wasm_language.rs new file mode 100644 index 00000000000..2d8a32d26df --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/lib/binding_rust/wasm_language.rs @@ -0,0 +1,141 @@ +use crate::{ffi, Language, LanguageError, Parser, FREE_FN}; +use std::{ + error, + ffi::{CStr, CString}, + fmt, + mem::{self, MaybeUninit}, + os::raw::c_char, +}; +pub use wasmtime; + +// Force Cargo to include wasmtime-c-api as a dependency of this crate, +// even though it is only used by the C code. +#[allow(unused)] +fn _use_wasmtime() { + wasmtime_c_api::wasm_engine_new(); +} + +#[repr(C)] +#[derive(Clone)] +#[allow(non_camel_case_types)] +pub struct wasm_engine_t { + pub(crate) engine: wasmtime::Engine, +} + +pub struct WasmStore(*mut ffi::TSWasmStore); + +#[derive(Debug, PartialEq, Eq)] +pub struct WasmError { + pub kind: WasmErrorKind, + pub message: String, +} + +#[derive(Debug, PartialEq, Eq)] +pub enum WasmErrorKind { + Parse, + Compile, + Instantiate, + Other, +} + +impl WasmStore { + pub fn new(engine: wasmtime::Engine) -> Result { + unsafe { + let mut error = MaybeUninit::::uninit(); + let engine = Box::new(wasm_engine_t { engine }); + let store = ffi::ts_wasm_store_new( + (Box::leak(engine) as *mut wasm_engine_t).cast(), + error.as_mut_ptr(), + ); + if store.is_null() { + Err(WasmError::new(error.assume_init())) + } else { + Ok(Self(store)) + } + } + } + + pub fn load_language(&mut self, name: &str, bytes: &[u8]) -> Result { + let name = CString::new(name).unwrap(); + unsafe { + let mut error = MaybeUninit::::uninit(); + let language = ffi::ts_wasm_store_load_language( + self.0, + name.as_ptr(), + bytes.as_ptr().cast::(), + bytes.len() as u32, + error.as_mut_ptr(), + ); + if language.is_null() { + Err(WasmError::new(error.assume_init())) + } else { + Ok(Language(language)) + } + } + } + + #[must_use] + pub fn language_count(&self) -> usize { + unsafe { ffi::ts_wasm_store_language_count(self.0) } + } +} + +impl WasmError { + unsafe fn new(error: ffi::TSWasmError) -> Self { + let message = CStr::from_ptr(error.message).to_str().unwrap().to_string(); + (FREE_FN)(error.message.cast()); + Self { + kind: match error.kind { + ffi::TSWasmErrorKindParse => WasmErrorKind::Parse, + ffi::TSWasmErrorKindCompile => WasmErrorKind::Compile, + ffi::TSWasmErrorKindInstantiate => WasmErrorKind::Instantiate, + _ => WasmErrorKind::Other, + }, + message, + } + } +} + +impl Language { + #[must_use] + pub fn is_wasm(&self) -> bool { + unsafe { ffi::ts_language_is_wasm(self.0) } + } +} + +impl Parser { + pub fn set_wasm_store(&mut self, store: WasmStore) -> Result<(), LanguageError> { + unsafe { ffi::ts_parser_set_wasm_store(self.0.as_ptr(), store.0) }; + mem::forget(store); + Ok(()) + } + + pub fn take_wasm_store(&mut self) -> Option { + let ptr = unsafe { ffi::ts_parser_take_wasm_store(self.0.as_ptr()) }; + if ptr.is_null() { + None + } else { + Some(WasmStore(ptr)) + } + } +} + +impl Drop for WasmStore { + fn drop(&mut self) { + unsafe { ffi::ts_wasm_store_delete(self.0) }; + } +} + +impl fmt::Display for WasmError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let kind = match self.kind { + WasmErrorKind::Parse => "Failed to parse wasm", + WasmErrorKind::Compile => "Failed to compile wasm", + WasmErrorKind::Instantiate => "Failed to instantiate wasm module", + WasmErrorKind::Other => "Unknown error", + }; + write!(f, "{kind}: {}", self.message) + } +} + +impl error::Error for WasmError {} diff --git a/third-party/tree-sitter/tree-sitter/lib/binding_web/.eslintrc.js b/third-party/tree-sitter/tree-sitter/lib/binding_web/.eslintrc.js new file mode 100644 index 00000000000..38709eb8ece --- /dev/null +++ b/third-party/tree-sitter/tree-sitter/lib/binding_web/.eslintrc.js @@ -0,0 +1,22 @@ +module.exports = { + 'env': { + 'commonjs': true, + 'es2021': true, + }, + 'extends': 'google', + 'overrides': [ + ], + 'parserOptions': { + 'ecmaVersion': 'latest', + 'sourceType': 'module', + }, + 'rules': { + 'indent': ['error', 2, {'SwitchCase': 1}], + 'max-len': [ + 'error', + {'code': 120, 'ignoreComments': true, 'ignoreUrls': true, 'ignoreStrings': true, 'ignoreTemplateLiterals': true}, + ], + 'require-jsdoc': 0, + 'new-cap': 0, + }, +}; diff --git a/third-party/tree-sitter/tree-sitter/lib/binding_web/README.md b/third-party/tree-sitter/tree-sitter/lib/binding_web/README.md index a75cd9f01ff..f9f621a6ba0 100644 --- a/third-party/tree-sitter/tree-sitter/lib/binding_web/README.md +++ b/third-party/tree-sitter/tree-sitter/lib/binding_web/README.md @@ -1,14 +1,18 @@ -Web Tree-sitter -=============== +# Web Tree-sitter + +[![npmjs.com badge]][npmjs.com] + +[npmjs.com]: https://www.npmjs.org/package/web-tree-sitter +[npmjs.com badge]: https://img.shields.io/npm/v/web-tree-sitter.svg?color=%23BF4A4A WebAssembly bindings to the [Tree-sitter](https://github.com/tree-sitter/tree-sitter) parsing library. ### Setup -You can download the the `tree-sitter.js` and `tree-sitter.wasm` files from [the latest GitHub release](https://github.com/tree-sitter/tree-sitter/releases/latest) and load them using a standalone script: +You can download the `tree-sitter.js` and `tree-sitter.wasm` files from [the latest GitHub release](https://github.com/tree-sitter/tree-sitter/releases/latest) and load them using a standalone script: ```html -