diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/AnalyzerGuru.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/AnalyzerGuru.java index a8396f7fe92..49457fd16ef 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/AnalyzerGuru.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/AnalyzerGuru.java @@ -89,6 +89,7 @@ import org.opengrok.indexer.analysis.kotlin.KotlinAnalyzerFactory; import org.opengrok.indexer.analysis.lisp.LispAnalyzerFactory; import org.opengrok.indexer.analysis.lua.LuaAnalyzerFactory; +import org.opengrok.indexer.analysis.ocaml.OCamlAnalyzerFactory; import org.opengrok.indexer.analysis.pascal.PascalAnalyzerFactory; import org.opengrok.indexer.analysis.perl.PerlAnalyzerFactory; import org.opengrok.indexer.analysis.php.PhpAnalyzerFactory; @@ -298,6 +299,7 @@ public class AnalyzerGuru { new HaskellAnalyzerFactory(), new GolangAnalyzerFactory(), new LuaAnalyzerFactory(), + new OCamlAnalyzerFactory(), new PascalAnalyzerFactory(), new AdaAnalyzerFactory(), new RubyAnalyzerFactory(), diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/ocaml/Consts.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/ocaml/Consts.java new file mode 100644 index 00000000000..09f63e52038 --- /dev/null +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/ocaml/Consts.java @@ -0,0 +1,101 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved. + * Portions Copyright (c) 2017, 2020, Chris Fraire . + */ +package org.opengrok.indexer.analysis.ocaml; + +import java.util.HashSet; +import java.util.Set; + +/** + * Represents a container for a set of OCaml keywords. + */ +public class Consts { + + static final Set kwd = new HashSet<>(); + + /* From parsing/lexer.mll of OCaml 5.3.0. */ + static { + kwd.add("and"); + kwd.add("as"); + kwd.add("assert"); + kwd.add("begin"); + kwd.add("class"); + kwd.add("constraint"); + kwd.add("do"); + kwd.add("done"); + kwd.add("downto"); + kwd.add("effect"); + kwd.add("else"); + kwd.add("end"); + kwd.add("exception"); + kwd.add("external"); + kwd.add("false"); + kwd.add("for"); + kwd.add("fun"); + kwd.add("function"); + kwd.add("functor"); + kwd.add("if"); + kwd.add("in"); + kwd.add("include"); + kwd.add("inherit"); + kwd.add("initializer"); + kwd.add("lazy"); + kwd.add("let"); + kwd.add("match"); + kwd.add("method"); + kwd.add("module"); + kwd.add("mutable"); + kwd.add("new"); + kwd.add("nonrec"); + kwd.add("object"); + kwd.add("of"); + kwd.add("open"); + kwd.add("or"); + kwd.add("parser"); + kwd.add("private"); + kwd.add("rec"); + kwd.add("sig"); + kwd.add("struct"); + kwd.add("then"); + kwd.add("to"); + kwd.add("true"); + kwd.add("try"); + kwd.add("type"); + kwd.add("val"); + kwd.add("virtual"); + kwd.add("when"); + kwd.add("while"); + kwd.add("with"); + kwd.add("lor"); + kwd.add("lxor"); + kwd.add("mod"); + kwd.add("land"); + kwd.add("lsl"); + kwd.add("lsr"); + kwd.add("asr"); + } + + /** Private to enforce static. */ + private Consts() { + } +} diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/ocaml/OCamlAnalyzer.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/ocaml/OCamlAnalyzer.java new file mode 100644 index 00000000000..fa14987c1a0 --- /dev/null +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/ocaml/OCamlAnalyzer.java @@ -0,0 +1,76 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved. + * Portions Copyright (c) 2017, 2020, Chris Fraire . + */ +package org.opengrok.indexer.analysis.ocaml; + +import org.opengrok.indexer.analysis.AbstractAnalyzer; +import org.opengrok.indexer.analysis.FileAnalyzerFactory; +import org.opengrok.indexer.analysis.JFlexTokenizer; +import org.opengrok.indexer.analysis.JFlexXref; +import org.opengrok.indexer.analysis.plain.AbstractSourceCodeAnalyzer; + +import java.io.Reader; + +/** + * Represents an analyzer for the OCaml language. + */ +@SuppressWarnings("java:S110") +public class OCamlAnalyzer extends AbstractSourceCodeAnalyzer { + + /** + * Creates a new instance of {@link OCamlAnalyzer}. + * @param factory instance + */ + protected OCamlAnalyzer(FileAnalyzerFactory factory) { + super(factory, () -> new JFlexTokenizer(new OCamlSymbolTokenizer( + AbstractAnalyzer.DUMMY_READER))); + } + + /** + * @return {@code "ocaml"} + */ + @Override + public String getCtagsLang() { + return "ocaml"; + } + + /** + * Gets a version number to be used to tag processed documents so that + * re-analysis can be re-done later if a stored version number is different + * from the current implementation. + * @return 20250403_00 + */ + @Override + protected int getSpecializedVersionNo() { + return 20250403_00; // Edit comment above too! + } + + /** + * Creates a wrapped {@link OCamlXref} instance. + * @return a defined instance + */ + @Override + protected JFlexXref newXref(Reader reader) { + return new JFlexXref(new OCamlXref(reader)); + } +} diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/ocaml/OCamlAnalyzerFactory.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/ocaml/OCamlAnalyzerFactory.java new file mode 100644 index 00000000000..4934eed483e --- /dev/null +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/ocaml/OCamlAnalyzerFactory.java @@ -0,0 +1,56 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved. + * Portions Copyright (c) 2017, 2020, Chris Fraire . + */ +package org.opengrok.indexer.analysis.ocaml; + +import org.opengrok.indexer.analysis.AbstractAnalyzer.Genre; +import org.opengrok.indexer.analysis.FileAnalyzer; +import org.opengrok.indexer.analysis.FileAnalyzerFactory; + +/** + * Represents a factory to create {@link OCamlAnalyzer} instances. + */ +public class OCamlAnalyzerFactory extends FileAnalyzerFactory { + + private static final String NAME = "OCaml"; + + private static final String[] SUFFIXES = {"ML", "MLI"}; + + /** + * Initializes a factory instance to associate a file extensions ".ml", + * ".mli" with {@link OCamlAnalyzer}. + */ + public OCamlAnalyzerFactory() { + super(null, null, SUFFIXES, null, null, "text/plain", Genre.PLAIN, + NAME, true); + } + + /** + * Creates a new {@link OCamlAnalyzer} instance. + * @return a defined instance + */ + @Override + protected FileAnalyzer newAnalyzer() { + return new OCamlAnalyzer(this); + } +} diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/ocaml/OCamlLexer.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/ocaml/OCamlLexer.java new file mode 100644 index 00000000000..3b04d06f215 --- /dev/null +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/ocaml/OCamlLexer.java @@ -0,0 +1,59 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved. + * Portions Copyright (c) 2017, 2020, Chris Fraire . + */ +package org.opengrok.indexer.analysis.ocaml; + +import org.opengrok.indexer.analysis.JFlexJointLexer; +import org.opengrok.indexer.analysis.JFlexSymbolMatcher; +import org.opengrok.indexer.analysis.Resettable; + +/** + * Represents an abstract base class for OCaml lexers. + */ +@SuppressWarnings("Duplicates") +abstract class OCamlLexer extends JFlexSymbolMatcher + implements JFlexJointLexer, Resettable { + + /** + * Calls {@link #phLOC()} if the yystate is not COMMENT or SCOMMENT. + */ + public void chkLOC() { + if (yystate() != COMMENT() && yystate() != SCOMMENT()) { + phLOC(); + } + } + + /** + * Subclasses must override to get the constant value created by JFlex to + * represent COMMENT. + */ + @SuppressWarnings("java:S100") + abstract int COMMENT(); + + /** + * Subclasses must override to get the constant value created by JFlex to + * represent SCOMMENT. + */ + @SuppressWarnings("java:S100") + abstract int SCOMMENT(); +} diff --git a/opengrok-indexer/src/main/jflex/analysis/ocaml/OCaml.lexh b/opengrok-indexer/src/main/jflex/analysis/ocaml/OCaml.lexh new file mode 100644 index 00000000000..f276a68db02 --- /dev/null +++ b/opengrok-indexer/src/main/jflex/analysis/ocaml/OCaml.lexh @@ -0,0 +1,95 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, Chris Fraire . + * + * Copyright (c) Simon Peyton Jones. + * Copyright (c) Simon Marlow. + * The authors and publisher intend this Report to belong to the entire Haskell + * community, and grant permission to copy and distribute it for any purpose, + * provided that it is reproduced in its entirety, including this Notice. + * Modified versions of this Report may also be copied and distributed for any + * purpose, provided that the modified version is clearly presented as such, + * and that it does not claim to be a definition of the language Haskell 2010. + */ + +Identifier = ({varid} | {conid} | {pvconid}) +/* + * varid → (small {small | large | digit | ' })⟨reservedid⟩ + * ; N.b. "except {reservedid} is excluded from OpenGrok's varid definition + */ +varid = {small} ({small} | {large} | {digit} | [\'])* +/* + * conid → large {small | large | digit | ' } + */ +conid = {large} ({small} | {large} | {digit} | [\'])* +/* + * polymorphic variant + * pvconid → `large {small | large | digit | ' } + */ +pvconid = [\`] {large} ({small} | {large} | {digit} | [\'])* +/* + * small → ascSmall | uniSmall | _ + * ascSmall → a | b | … | z + */ +small = [a-z_] +/* + * large → ascLarge | uniLarge + * ascLarge → A | B | … | Z + */ +large = [A-Z] +/* + * digit → ascDigit | uniDigit + * ascDigit → 0 | 1 | … | 9 + * uniDigit → any Unicode decimal digit + * octit → 0 | 1 | … | 7 + * hexit → digit | A | … | F | a | … | f + */ +digit = [0-9] +octit = [0-7] +hexit = [0-9A-Fa-f] + +Number = ({integer} | {float}) +/* + * decimal → digit{digit} + * octal → octit{octit} + * hexadecimal → hexit{hexit} + */ +decimal = {digit}({digit} | _)* +octal = {octit}({octit} | _)* +hexadecimal = {hexit}({hexit} | _)* +/* + * + * integer → decimal + * | 0o octal | 0O octal + * | 0x hexadecimal | 0X hexadecimal + */ +integer = ({decimal} | [0][oO]{octal} | [0][xX]{hexadecimal}) +/* + * float → decimal . decimal [exponent] + * | decimal exponent + */ +float = ({decimal} [\.] {decimal} {exponent}? | + {decimal} {exponent}) +/* + * exponent → (e | E) [+ | -] decimal + */ +exponent = [eE] [\+\-]? {decimal} diff --git a/opengrok-indexer/src/main/jflex/analysis/ocaml/OCamlSymbolTokenizer.lex b/opengrok-indexer/src/main/jflex/analysis/ocaml/OCamlSymbolTokenizer.lex new file mode 100644 index 00000000000..1df48ef67af --- /dev/null +++ b/opengrok-indexer/src/main/jflex/analysis/ocaml/OCamlSymbolTokenizer.lex @@ -0,0 +1,101 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2015, 2021, Oracle and/or its affiliates. All rights reserved. + * Portions Copyright (c) 2017, Chris Fraire . + */ + +/* + * Get OCaml symbols + */ + +package org.opengrok.indexer.analysis.ocaml; + +import java.io.IOException; +import org.opengrok.indexer.analysis.JFlexSymbolMatcher; + +/** + * @author Harry Pan + */ +%% +%public +%class OCamlSymbolTokenizer +%extends JFlexSymbolMatcher +%unicode +%int +%include ../CommonLexer.lexh +%char +%{ + private int nestedComment; + + public void reset() { + super.reset(); + nestedComment = 0; + } +%} + +%state STRING CHAR BCOMMENT + +%include ../Common.lexh +%include OCaml.lexh +%% + + { + {Identifier} { + String id = yytext(); + if (!Consts.kwd.contains(id)) { + onSymbolMatched(id, yychar); + return yystate(); + } + } + {Number} {} + \" { yybegin(STRING); } + \' { yybegin(CHAR); } +} + + { + \\[\"\\] {} + \" { yybegin(YYINITIAL); } +} + + { // we don't need to consider the case where prime is part of an identifier since it is handled above + \\[\'\\] {} + \' { yybegin(YYINITIAL); } +} + + { + "(*" { + if (nestedComment++ == 0) { + yybegin(BCOMMENT); + } + } +} + + { + "*)" { + if (--nestedComment == 0) { + yybegin(YYINITIAL); + } + } +} + +// fallback +{WhspChar}+ | +[^] {} diff --git a/opengrok-indexer/src/main/jflex/analysis/ocaml/OCamlXref.lex b/opengrok-indexer/src/main/jflex/analysis/ocaml/OCamlXref.lex new file mode 100644 index 00000000000..1b274d1963f --- /dev/null +++ b/opengrok-indexer/src/main/jflex/analysis/ocaml/OCamlXref.lex @@ -0,0 +1,181 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2015, 2021, Oracle and/or its affiliates. All rights reserved. + * Portions Copyright (c) 2017, Chris Fraire . + */ + +/* + * Cross reference a OCaml file + */ + +package org.opengrok.indexer.analysis.ocaml; + +import java.io.IOException; +import org.opengrok.indexer.analysis.JFlexSymbolMatcher; +import org.opengrok.indexer.web.HtmlConsts; + +/** + * @author Harry Pan + */ +%% +%public +%class OCamlXref +%extends JFlexSymbolMatcher +%unicode +%int +%char +%include ../CommonLexer.lexh +%include ../CommonXref.lexh +%{ + private int nestedComment; + + @Override + public void reset() { + super.reset(); + nestedComment = 0; + } + + @Override + public void yypop() throws IOException { + onDisjointSpanChanged(null, yychar); + super.yypop(); + } + + protected void chkLOC() { + switch (yystate()) { + case BCOMMENT: + break; + default: + phLOC(); + break; + } + } +%} + +%state STRING CHAR BCOMMENT + +%include ../Common.lexh +%include ../CommonURI.lexh +%include ../CommonPath.lexh +%include OCaml.lexh +%% + { + {Identifier} { + chkLOC(); + String id = yytext(); + onFilteredSymbolMatched(id, yychar, Consts.kwd); + } + {Number} { + chkLOC(); + onDisjointSpanChanged(HtmlConsts.NUMBER_CLASS, yychar); + onNonSymbolMatched(yytext(), yychar); + onDisjointSpanChanged(null, yychar); + } + \" { + chkLOC(); + yypush(STRING); + onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar); + onNonSymbolMatched(yytext(), yychar); + } + \' { + chkLOC(); + yypush(CHAR); + onDisjointSpanChanged(HtmlConsts.STRING_CLASS, yychar); + onNonSymbolMatched(yytext(), yychar); + } +} + + { + \\[\"\\] { chkLOC(); onNonSymbolMatched(yytext(), yychar); } + \" { + chkLOC(); + onNonSymbolMatched(yytext(), yychar); + yypop(); + } + /* + * "A string may include a 'gap'-—two backslants enclosing white + * characters—-which is ignored. This allows one to write long strings on + * more than one line by writing a backslant at the end of one line and at + * the start of the next." N.b. OpenGrok does not explicltly recognize the + * "gap" but since a STRING must end in a non-escaped quotation mark, just + * allow STRINGs to be multi-line regardless of syntax. + */ +} + + { // we don't need to consider the case where prime is part of an identifier since it is handled above + \\[\'\\] { chkLOC(); onNonSymbolMatched(yytext(), yychar); } + \' { + chkLOC(); + onNonSymbolMatched(yytext(), yychar); + yypop(); + } + /* + * N.b. though only a single char is valid OCaml syntax, OpenGrok just + * waits to end CHAR at a non-escaped apostrophe regardless of count. + */ +} + + { + "(*" { + if (nestedComment++ == 0) { + yypush(BCOMMENT); + onDisjointSpanChanged(HtmlConsts.COMMENT_CLASS, yychar); + } + onNonSymbolMatched(yytext(), yychar); + } +} + + { + "*)" { + onNonSymbolMatched(yytext(), yychar); + if (--nestedComment == 0) { + yypop(); + } + } +} + +{WhspChar}*{EOL} { onEndOfLineMatched(yytext(), yychar); } +[[\s]--[\n]] { onNonSymbolMatched(yytext(), yychar); } +[^\n] { chkLOC(); onNonSymbolMatched(yytext(), yychar); } + + { + {FPath} { + chkLOC(); + onPathlikeMatched(yytext(), '/', false, yychar); + } + {FNameChar}+ "@" {FNameChar}+ "." {FNameChar}+ { + chkLOC(); + onEmailAddressMatched(yytext(), yychar); + } +} + + { + {BrowseableURI} { + chkLOC(); + onUriMatched(yytext(), yychar); + } +} + + { + {BrowseableURI} \}? { + onUriMatched(yytext(), yychar); + } +}