From ac365ff6255d952d4552e71e4d5cac1189022ae8 Mon Sep 17 00:00:00 2001 From: Knut Wannheden Date: Tue, 24 Sep 2024 12:53:59 +0200 Subject: [PATCH] Adjustments to parser API (#112) * Adjustments to parser API The ParserInput source is now a `Buffer`, which I think better reflects what we need. Also added some initial setup for the `JavaScriptParser`. * Set `allowJs` compiler option * Add `.gitignore` file * Add some more exception handling * Declare visitor as top-level function instead * Some more fixes to parser * Remove logging * Add Parser.Builder * Some more initial work on parser * Implement `Space.format()` * Implement AST-based alternative to `Space.format()` * Add `isTree()` * Split up tests * Merge main --- openrewrite/.gitignore | 4 +- openrewrite/jest.config.js | 7 +- openrewrite/src/core/parser.ts | 26 +-- openrewrite/src/core/tree.ts | 8 +- openrewrite/src/java/tree/support_types.ts | 71 ++++++ openrewrite/src/javascript/parser.ts | 213 +++++++++++++++--- .../src/javascript/tree/support_types.ts | 1 + openrewrite/src/yaml/tree/support_types.ts | 1 + openrewrite/test/core/tree.test.ts | 7 +- openrewrite/test/java/tree.test.ts | 23 ++ openrewrite/test/javascript/parser.test.ts | 18 ++ 11 files changed, 317 insertions(+), 62 deletions(-) create mode 100644 openrewrite/test/java/tree.test.ts create mode 100644 openrewrite/test/javascript/parser.test.ts diff --git a/openrewrite/.gitignore b/openrewrite/.gitignore index 22224618..99a9c1fa 100644 --- a/openrewrite/.gitignore +++ b/openrewrite/.gitignore @@ -15,4 +15,6 @@ yarn-error.log* .env .env.test .env.production -.env.*.local \ No newline at end of file +.env.*.local + +tsconfig.build.tsbuildinfo diff --git a/openrewrite/jest.config.js b/openrewrite/jest.config.js index 578f1289..3793e390 100644 --- a/openrewrite/jest.config.js +++ b/openrewrite/jest.config.js @@ -5,14 +5,9 @@ module.exports = { moduleFileExtensions: ['ts', 'tsx', 'js', 'jsx', 'json', 'node'], transform: { '^.+\\.tsx?$': ['ts-jest', { - tsconfig: 'tsconfig.json', // Adjust if your tsconfig file is named or located differently + tsconfig: 'tsconfig.test.json', // Adjust if your tsconfig file is named or located differently }], }, testMatch: ['**/__tests__/**/*.+(ts|tsx|js)', '**/?(*.)+(spec|test).+(ts|tsx|js)'], collectCoverageFrom: ['src/**/*.{ts,tsx}', '!src/**/*.d.ts'], - globals: { - 'ts-jest': { - tsconfig: 'tsconfig.test.json' - } - } }; \ No newline at end of file diff --git a/openrewrite/src/core/parser.ts b/openrewrite/src/core/parser.ts index 98bfc379..ad0cce3a 100644 --- a/openrewrite/src/core/parser.ts +++ b/openrewrite/src/core/parser.ts @@ -7,13 +7,13 @@ export class ParserInput { private readonly _path: string; private readonly _fileAttributes: FileAttributes | null; private readonly _synthetic: boolean; - private readonly _source: () => fs.ReadStream; + private readonly _source: () => Buffer; constructor( path: string, fileAttributes: FileAttributes | null, synthetic: boolean, - source: () => fs.ReadStream + source: () => Buffer ) { this._path = path; this._fileAttributes = fileAttributes; @@ -33,7 +33,7 @@ export class ParserInput { return this._synthetic; } - get source(): () => fs.ReadStream { + get source(): () => Buffer { return this._source; } } @@ -50,18 +50,18 @@ export abstract class Parser { abstract sourcePathFromSourceText(prefix: string, sourceCode: string): string; parse( - sourceFiles: Iterable, + sourceFilesPaths: Iterable, relativeTo: string | null, ctx: ExecutionContext ): Iterable { const inputs: ParserInput[] = []; - for (const path of sourceFiles) { + for (const path of sourceFilesPaths) { inputs.push( new ParserInput( path, null, false, - () => fs.createReadStream(path) + () => fs.readFileSync(path) ) ); } @@ -79,13 +79,7 @@ export abstract class Parser { path, null, true, - () => { - // FIXME handling of streams - const stream = new fs.ReadStream(null!); - stream.push(source); - stream.push(null); - return stream; - } + () => Buffer.from(source) ); }); return this.parseInputs(inputs, null, ctx); @@ -115,7 +109,7 @@ export abstract class Parser { } export namespace Parser { - abstract class Builder { + export abstract class Builder { protected _sourceFileType: any; get sourceFileType(): any { @@ -126,7 +120,7 @@ export namespace Parser { } } -function requirePrintEqualsInput( +export function requirePrintEqualsInput( parser: Parser, sourceFile: SourceFile, parserInput: ParserInput, @@ -136,7 +130,7 @@ function requirePrintEqualsInput( const required = ctx.getMessage(ExecutionContext.REQUIRE_PRINT_EQUALS_INPUT, true); if (required && !sourceFile.printEqualsInput(parserInput, ctx)) { const diff = Result.diff( - parserInput.source().read().toString(), + parserInput.source().toString(), sourceFile.printAll(), parserInput.path ); diff --git a/openrewrite/src/core/tree.ts b/openrewrite/src/core/tree.ts index 3701bb56..0b5892eb 100644 --- a/openrewrite/src/core/tree.ts +++ b/openrewrite/src/core/tree.ts @@ -18,6 +18,10 @@ export interface Tree { accept(v: TreeVisitor, p: P): R | null; } +export function isTree(tree: any): tree is Tree { + return !!tree.constructor.isTree || !!tree.isTree; +} + export abstract class TreeVisitor { private _cursor: Cursor; private _visitCount: number = 0; @@ -352,6 +356,7 @@ type AbstractConstructor = abstract new (...args: any[]) => T; export function SourceFileMixin>(Base: TBase) { abstract class SourceFileMixed extends Base implements SourceFile { + static isTree = true; static isSourceFile = true; abstract get sourcePath(): string; @@ -479,6 +484,7 @@ export abstract class PrinterFactory { @LstType("org.openrewrite.tree.ParseError") export class ParseError implements SourceFile { + static isTree = true; static isParseError = true; static isSourceFile = true; @@ -530,7 +536,7 @@ export class ParseError implements SourceFile { parser.getCharset(ctx), false, null, - input.source().read().toString(), + input.source().toString(), erroneous ); } diff --git a/openrewrite/src/java/tree/support_types.ts b/openrewrite/src/java/tree/support_types.ts index 7355741f..158f6fb6 100644 --- a/openrewrite/src/java/tree/support_types.ts +++ b/openrewrite/src/java/tree/support_types.ts @@ -32,6 +32,7 @@ export function isJava(tree: any): tree is J { export function JMixin>(Base: TBase) { abstract class JMixed extends Base implements J { + static isTree = true; static isJava = true; abstract get prefix(): Space; @@ -107,10 +108,80 @@ export class Space { } else if (whitespace == ' ') { return Space.SINGLE_SPACE; } + // FIXME add flyweights } return new Space(comments, whitespace); } + static format(formatting: string, beginIndex: number, toIndex: number): Space { + if (beginIndex == toIndex) { + return Space.EMPTY; + } else if (toIndex == beginIndex + 1 && formatting[beginIndex] === ' ') { + return Space.SINGLE_SPACE; + } + + let comments: Comment[] = []; + let whitespaceStart = beginIndex; + let commentStart = -1; + let commentEnd = -1; + let suffixStart = -1; + let i = beginIndex; + + // Step 1: Process leading whitespace + while (i < toIndex && (formatting[i] === ' ' || formatting[i] === '\t' || formatting[i] === '\n' || formatting[i] === '\r')) { + i++; + } + let whitespaceEnd = i; // Capture end of leading whitespace + + // Step 2: Parse comments + while (i < toIndex) { + const char = formatting[i]; + + // Handle single-line comment (//) + if (char === '/' && i + 1 < toIndex && formatting[i + 1] === '/') { + commentStart = i + 2; // Skip the "//" + i += 2; + while (i < toIndex && formatting[i] !== '\n' && formatting[i] !== '\r') { + i++; // Continue until end of line or end of input + } + commentEnd = i; + suffixStart = i; // Capture newline as suffix + while (i < toIndex && (formatting[i] === '\n' || formatting[i] === '\r')) { + i++; + } + const commentText = formatting.slice(commentStart, commentEnd); + const suffix = formatting.slice(suffixStart, i); + comments.push(new TextComment(false, commentText, suffix, Markers.EMPTY)); + + // Handle multi-line comment (/* ... */) + } else if (char === '/' && i + 1 < toIndex && formatting[i + 1] === '*') { + commentStart = i + 2; // Skip the "/*" + i += 2; + while (i + 1 < toIndex && !(formatting[i] === '*' && formatting[i + 1] === '/')) { + i++; // Continue until "*/" or end of input + } + commentEnd = i; // Position before */ + i += 2; // Skip the closing "*/" + + suffixStart = i; + while (i < toIndex && (formatting[i] === ' ' || formatting[i] === '\t' || formatting[i] === '\n' || formatting[i] === '\r')) { + i++; // Capture any trailing whitespace after comment + } + const commentText = formatting.slice(commentStart, commentEnd); + const suffix = formatting.slice(suffixStart, i); + comments.push(new TextComment(true, commentText, suffix, Markers.EMPTY)); + } else { + i++; // Skip non-comment characters + } + } + + // Step 3: Extract leading whitespace + const whitespace = whitespaceEnd > whitespaceStart ? formatting.slice(whitespaceStart, whitespaceEnd) : null; + + // Step 4: Return a Space object with accumulated whitespace and comments + return Space.build(comments, whitespace); + } + public constructor(comments: Comment[], whitespace: string | null) { this._comments = comments; this._whitespace = whitespace; diff --git a/openrewrite/src/javascript/parser.ts b/openrewrite/src/javascript/parser.ts index 6075f3f7..c1737552 100644 --- a/openrewrite/src/javascript/parser.ts +++ b/openrewrite/src/javascript/parser.ts @@ -1,25 +1,65 @@ +import * as ts from 'typescript'; +import * as J from '../java/tree'; +import {Comment, JRightPadded, Space, TextComment} from '../java/tree'; import * as JS from './tree'; -import {ExecutionContext, Markers, Parser, ParserInput, randomId, SourceFile} from "../core"; -import * as J from "../java/tree"; -import {ClassDeclaration, Space, Unknown} from "../java/tree"; -import * as ts from "typescript"; -import Source = Unknown.Source; +import {ExecutionContext, Markers, ParseError, Parser, ParserInput, randomId, SourceFile} from "../core"; export class JavaScriptParser extends Parser { - parseInputs(sources: Iterable, relativeTo: string | null, ctx: ExecutionContext): Iterable { - return [new JS.CompilationUnit( - randomId(), - Space.EMPTY, - Markers.EMPTY, - "file.ts", - null, - null, - false, - null, - [], - [], - Space.EMPTY - )]; + + parseInputs(inputs: Iterable, relativeTo: string | null, ctx: ExecutionContext): Iterable { + const inputsArray = Array.from(inputs); + const compilerOptions: ts.CompilerOptions = { + target: ts.ScriptTarget.Latest, + module: ts.ModuleKind.CommonJS, + // strict: true, + allowJs: true + }; + const host = ts.createCompilerHost(compilerOptions); + host.getSourceFile = (fileName, languageVersion) => { + if (fileName.endsWith('lib.d.ts')) { + // For default library files like lib.d.ts + const libFilePath = ts.getDefaultLibFilePath(compilerOptions); + const libSource = ts.sys.readFile(libFilePath); + return libSource + ? ts.createSourceFile(fileName, libSource, languageVersion, true) + : undefined; + } + + let sourceText = inputsArray.find(i => i.path === fileName)?.source().toString('utf8')!; + return sourceText ? ts.createSourceFile(fileName, sourceText, languageVersion, true) : undefined; + } + + const program = ts.createProgram(Array.from(inputsArray, i => i.path), compilerOptions, host); + const typeChecker = program.getTypeChecker(); + + const result = []; + for (let input of inputsArray) { + const sourceFile = program.getSourceFile(input.path); + if (sourceFile) { + try { + result.push(new JavaScriptParserVisitor(sourceFile, typeChecker).visit(sourceFile) as SourceFile); + } catch (error) { + result.push(ParseError.build( + this, + input, + relativeTo, + ctx, + error instanceof Error ? error : new Error("Parser threw unknown error: " + error), + null + )); + } + } else { + result.push(ParseError.build( + this, + input, + relativeTo, + ctx, + new Error("Parser returned undefined"), + null + )); + } + } + return result; } accept(path: string): boolean { @@ -29,24 +69,83 @@ export class JavaScriptParser extends Parser { sourcePathFromSourceText(prefix: string, sourceCode: string): string { return prefix + "/source.js"; } + + static builder(): JavaScriptParser.Builder { + return new JavaScriptParser.Builder(); + } +} + +export namespace JavaScriptParser { + export class Builder extends Parser.Builder { + build(): JavaScriptParser { + return new JavaScriptParser(); + } + } } export class JavaScriptParserVisitor { - visit(sf: ts.SourceFile) { - return new JS.CompilationUnit(randomId(), Space.EMPTY, Markers.EMPTY, sf.fileName, null, null, false, null, [], [], Space.EMPTY); + constructor(private readonly sourceFile: ts.SourceFile, private readonly typeChecker: ts.TypeChecker) { } - #visit0(node: ts.Node) : T { + visit(node: ts.Node): any { const member = this[(`visit${ts.SyntaxKind[node.kind]}` as keyof JavaScriptParserVisitor)]; if (typeof member === 'function') { - return member(node as any) as any as T; + return member(node as any); } else { - return this.visitUnknown(node) as any as T; + return this.visitUnknown(node); + } + } + + private prefix(node: ts.Node) { + if (node.getLeadingTriviaWidth(this.sourceFile) == 0) { + return Space.EMPTY; } + return prefixFromNode(node, this.sourceFile); + // return Space.format(this.sourceFile.text, node.getFullStart(), node.getFullStart() + node.getLeadingTriviaWidth()); + } + + visitSourceFile(node: ts.SourceFile): JS.CompilationUnit { + return new JS.CompilationUnit( + randomId(), + this.prefix(node), + Markers.EMPTY, + this.sourceFile.fileName, + null, + null, + false, + null, + [], + this.rightPaddedList(node.statements), + Space.EMPTY + ); } visitUnknown(node: ts.Node) { - return new J.Unknown(randomId(), Space.EMPTY, Markers.EMPTY, new Source(randomId(), Space.EMPTY, Markers.EMPTY, node.getText())); + return new J.Unknown( + randomId(), + Space.EMPTY, + Markers.EMPTY, + new J.Unknown.Source( + randomId(), + Space.EMPTY, + Markers.EMPTY, + node.getFullText() + ) + ); + } + + private mapModifiers(node: ts.VariableStatement) { + return []; + } + + private rightPaddedList(nodes: ts.NodeArray) { + return nodes.map((s) => { + return new JRightPadded( + this.visit(s) as T, + Space.EMPTY, + Markers.EMPTY + ); + }); } visitClassDeclaration(node: ts.ClassDeclaration) { @@ -405,12 +504,25 @@ export class JavaScriptParserVisitor { return this.visitUnknown(node); } - visitVariableStatement(node: ts.VariableStatement) { - return this.visitUnknown(node); + visitVariableStatement(node: ts.VariableStatement): J.VariableDeclarations { + return new J.VariableDeclarations( + randomId(), + this.prefix(node), + Markers.EMPTY, + [], + this.mapModifiers(node), + null, + null, + [], + this.rightPaddedList(node.declarationList.declarations) + ) } - visitExpressionStatement(node: ts.ExpressionStatement) { - return this.visitUnknown(node); + visitExpressionStatement(node: ts.ExpressionStatement): JS.ExpressionStatement { + return new JS.ExpressionStatement( + randomId(), + this.visit(node.expression) as J.Expression + ) } visitIfStatement(node: ts.IfStatement) { @@ -653,10 +765,6 @@ export class JavaScriptParserVisitor { return this.visitUnknown(node); } - visitSourceFile(node: ts.SourceFile) { - return this.visitUnknown(node); - } - visitBundle(node: ts.Bundle) { return this.visitUnknown(node); } @@ -856,4 +964,41 @@ export class JavaScriptParserVisitor { visitSyntheticReferenceExpression(node: ts.Node) { return this.visitUnknown(node); } -} \ No newline at end of file +} + +function prefixFromNode(node: ts.Node, sourceFile: ts.SourceFile): Space { + const comments: Comment[] = []; + const text = sourceFile.getFullText(); + const nodeStart = node.getFullStart(); + + let leadingWhitespacePos = node.getStart(); + + // Step 1: Use forEachLeadingCommentRange to extract comments + ts.forEachLeadingCommentRange(text, nodeStart, (pos, end, kind) => { + leadingWhitespacePos = Math.min(leadingWhitespacePos, pos); + + const isMultiline = kind === ts.SyntaxKind.MultiLineCommentTrivia; + const commentStart = isMultiline ? pos + 2 : pos + 2; // Skip `/*` or `//` + const commentEnd = isMultiline ? end - 2 : end; // Exclude closing `*/` or nothing for `//` + + // Step 2: Capture suffix (whitespace after the comment) + let suffixEnd = end; + while (suffixEnd < text.length && (text[suffixEnd] === ' ' || text[suffixEnd] === '\t' || text[suffixEnd] === '\n' || text[suffixEnd] === '\r')) { + suffixEnd++; + } + + const commentBody = text.slice(commentStart, commentEnd); // Extract comment body + const suffix = text.slice(end, suffixEnd); // Extract suffix (whitespace after comment) + + comments.push(new TextComment(isMultiline, commentBody, suffix, Markers.EMPTY)); + }); + + // Step 3: Extract leading whitespace (before the first comment) + let whitespace = ''; + if (leadingWhitespacePos > node.getFullStart()) { + whitespace = text.slice(node.getFullStart(), leadingWhitespacePos); + } + + // Step 4: Return the Space object with comments and leading whitespace + return new Space(comments, whitespace.length > 0 ? whitespace : null); +} diff --git a/openrewrite/src/javascript/tree/support_types.ts b/openrewrite/src/javascript/tree/support_types.ts index 78cd077b..37f7ce33 100644 --- a/openrewrite/src/javascript/tree/support_types.ts +++ b/openrewrite/src/javascript/tree/support_types.ts @@ -28,6 +28,7 @@ export function isJavaScript(tree: any): tree is JS { export function JSMixin>(Base: TBase) { abstract class JSMixed extends Base implements JS { + static isTree = true; static isJavaScript = true; abstract get prefix(): Space; diff --git a/openrewrite/src/yaml/tree/support_types.ts b/openrewrite/src/yaml/tree/support_types.ts index 4ce28a81..cb189cbb 100644 --- a/openrewrite/src/yaml/tree/support_types.ts +++ b/openrewrite/src/yaml/tree/support_types.ts @@ -25,6 +25,7 @@ export function isYaml(tree: any): tree is Yaml { export function YamlMixin>(Base: TBase) { abstract class YamlMixed extends Base implements Yaml { + static isTree = true; static isYaml = true; abstract get id(): UUID; diff --git a/openrewrite/test/core/tree.test.ts b/openrewrite/test/core/tree.test.ts index 463939d3..745e16e8 100644 --- a/openrewrite/test/core/tree.test.ts +++ b/openrewrite/test/core/tree.test.ts @@ -1,8 +1,7 @@ -import {Cursor, Markers, randomId} from '../../src/core'; -import {isSourceFile} from "typescript"; -import {Document, Documents, isYaml} from "../../src/yaml"; +import {Cursor, Markers, randomId, isSourceFile} from '../../src/core'; +import {Document, Documents, isYaml} from "../../src/yaml/tree"; -describe('tree utils', () => { +describe('utils', () => { test('new random ID', () => { expect(randomId()).toBeDefined(); }); diff --git a/openrewrite/test/java/tree.test.ts b/openrewrite/test/java/tree.test.ts new file mode 100644 index 00000000..5c5ed023 --- /dev/null +++ b/openrewrite/test/java/tree.test.ts @@ -0,0 +1,23 @@ +import {Space} from "../../src/java/tree"; + +describe('Space parsing', () => { + test('parse space', () => { + let str = ' /* c1*/ /*c2 */ '; + console.log(Space.format(str, 0, str.length)); + }); + + test('parse space 2', () => { + let str = ' // c1 \n // c2\n//c3'; + console.log(Space.format(str, 0, str.length)); + }); + + test('parse empty space', () => { + let str = ''; + console.log(Space.format(str, 0, str.length)); + }); + + test('parse single space', () => { + let str = ' '; + console.log(Space.format(str, 0, str.length)); + }); +}); diff --git a/openrewrite/test/javascript/parser.test.ts b/openrewrite/test/javascript/parser.test.ts new file mode 100644 index 00000000..4a9b04a9 --- /dev/null +++ b/openrewrite/test/javascript/parser.test.ts @@ -0,0 +1,18 @@ +import {InMemoryExecutionContext, ParserInput} from '../../src/core'; +import {JavaScriptParser} from "../../src/javascript"; + +describe('JavaScriptParser', () => { + test('parseInputs', () => { + const parser = JavaScriptParser.builder().build(); + const [sourceFile] = parser.parseInputs([new ParserInput('foo.ts', null, true, () => Buffer.from('1', 'utf8'))], null, new InMemoryExecutionContext()); + console.log(sourceFile); + }); + + test('parseStrings', () => { + const parser = JavaScriptParser.builder().build(); + const [sourceFile] = parser.parseStrings(` + const c = 1; + /* c1*/ /*c2 */const d = 1;`); + console.log(sourceFile); + }); +});