Fix: Prevent Reconstruction of Stub Functions in the Output (#1178)

* refactor: refine side effect triggers * feat-fix: parameter recovery if empty * feat-fix: type for parameter reconstruct * refactor: trim parse data to prepare
flowr-analysis · Nov 28, 2024 · d26c4a4 · d26c4a4 · github-actions · Nov 28, 2024
1 parent 4313a35
commit d26c4a4
Show file tree

Hide file tree

Showing 5 changed files with 10 additions and 6 deletions.
diff --git a/src/dataflow/environments/default-builtin-config.ts b/src/dataflow/environments/default-builtin-config.ts
@@ -83,7 +83,7 @@ export const DefaultBuiltinConfig: BuiltInDefinitions = [
 			/* downloader and installer functions (R, devtools, BiocManager) */
 			'library.dynam', 'install.packages','install', 'install_github', 'install_gitlab', 'install_bitbucket', 'install_url', 'install_git', 'install_svn', 'install_local', 'install_version', 'update_packages',
 			/* weird env attachments */
-			'attach', 'detach', 'unname', 'rm', 'remove'
+			'attach', 'unname'
 		],
 		processor:       'builtin:default',
 		config:          { hasUnknownSideEffects: true },

diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-expression-list.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-expression-list.ts
@@ -80,7 +80,7 @@ function updateSideEffectsForCalledFunctions(calledEnvs: {
 	for(const { functionCall, called } of calledEnvs) {
 		const callDependencies = nextGraph.getVertex(functionCall, true)?.controlDependencies;
 		for(const calledFn of called) {
-			guard(calledFn.tag === VertexType.FunctionDefinition, 'called function must call a function definition');
+			guard(calledFn.tag === VertexType.FunctionDefinition, 'called function must be a function definition');
 			// only merge the environments they have in common
 			let environment = calledFn.environment;
 			while(environment.level > inputEnvironment.level) {

diff --git a/src/r-bridge/lang-4.x/ast/parser/json/format.ts b/src/r-bridge/lang-4.x/ast/parser/json/format.ts
@@ -49,9 +49,9 @@ type ParsedDataRow = [line1: number, col1: number, line2: number, col2: number,
 export function prepareParsedData(data: string): CsvEntry[] {
 	let json: unknown;
 	try {
-		json = JSON.parse(`[${data}]`);
+		json = JSON.parse(`[${data.trim()}]`);
 	} catch(e) {
-		throw new Error(`Failed to parse data ${data}: ${(e as Error)?.message}`);
+		throw new Error(`Failed to parse data [${data}]: ${(e as Error)?.message}`);
 	}
 	guard(Array.isArray(json), () => `Expected ${data} to be an array but was not`);
 

diff --git a/src/reconstruct/reconstruct.ts b/src/reconstruct/reconstruct.ts
@@ -300,14 +300,14 @@ function reconstructArgument(argument: RArgument<ParentInformation>, name: Code
 }
 
 
-function reconstructParameter(parameter: RParameter<ParentInformation>, name: Code, defaultValue: unknown, configuration: ReconstructionConfiguration): Code {
+function reconstructParameter(parameter: RParameter<ParentInformation>, name: Code, defaultValue: Code | undefined, configuration: ReconstructionConfiguration): Code {
 	if(isSelected(configuration, parameter)) {
 		return plain(getLexeme(parameter));
 	}
 	if(parameter.defaultValue !== undefined && name.length > 0) {
 		return plain(`${getLexeme(parameter.name)}=${getLexeme(parameter.defaultValue)}`);
 	} else if(parameter.defaultValue !== undefined && name.length === 0) {
-		return plain(getLexeme(parameter.defaultValue));
+		return defaultValue ?? [];
 	} else {
 		return name;
 	}

diff --git a/test/functionality/slicing/static-program-slices/calls.test.ts b/test/functionality/slicing/static-program-slices/calls.test.ts
@@ -538,6 +538,10 @@ y` /* the formatting here seems wild, why five spaces */, { expectedOutput: '[1]
 			shell, 'c <- 3\nc(1, 2, 3)', ['2@c'], 'c(1, 2, 3)');
 	});
 	describe('Failures in Practice', () => {
+		describe('empty functions', () => {
+			assertSliced(label('Empty Function in Reconstruct', ['function-definitions']), shell,
+				'x <- 2\nfoo <- function(n, x = 3) { print(x) }\nprint(x)', ['3@x'], 'x <- 2\nx');
+		});
 		describe('Inverted Caller', () => {
 			assertSliced(label('Call from Higher', ['function-calls', 'lexicographic-scope']),
 				shell, 'create <- function() function() 3\ng <- create()\nc <- g()', ['3@c'], 'create <- function() function() 3\ng <- create()\nc <- g()');
Benchmark suite	Current: `d26c4a4`	Previous: `2380113`	Ratio
`Retrieve AST from R code`	`241.54267363636362` ms (`102.47070825847955`)	`238.99386745454547` ms (`99.5397110373604`)	`1.01`
`Normalize R AST`	`19.595258227272726` ms (`38.46921161866718`)	`17.344255954545453` ms (`30.98539214696861`)	`1.13`
`Produce dataflow information`	`60.852574045454546` ms (`132.99066276329603`)	`60.94850918181818` ms (`127.79646053550617`)	`1.00`
`Total per-file`	`839.1214398636364` ms (`1517.8413090509673`)	`839.1842053181819` ms (`1529.9296327924758`)	`1.00`
`Static slicing`	`2.1179048013224766` ms (`1.3227806262588866`)	`2.0548017594197465` ms (`1.212534826826947`)	`1.03`
`Reconstruct code`	`0.2298629893028825` ms (`0.16987624243462746`)	`0.23833344722067284` ms (`0.18520720238626878`)	`0.96`
`Total per-slice`	`2.361774895831661` ms (`1.3814644523944883`)	`2.306734662937843` ms (`1.2769510302954954`)	`1.02`
`failed to reconstruct/re-parse`	`0` #	`0` #	`1`
`times hit threshold`	`0` #	`0` #	`1`
`reduction (characters)`	`0.7891949660994808` #	`0.7869360165281424` #	`1.00`
`reduction (normalized tokens)`	`0.7665650684287274` #	`0.7639690077689504` #	`1.00`
`memory (df-graph)`	`95.46617542613636` KiB (`244.77619956879823`)	`95.46617542613636` KiB (`244.77619956879823`)	`1`
Benchmark suite	Current: `d26c4a4`	Previous: `2380113`	Ratio
`Retrieve AST from R code`	`240.40030814` ms (`43.90909242163245`)	`246.34857636` ms (`46.05736358259125`)	`0.98`
`Normalize R AST`	`18.69034436` ms (`13.880553801068945`)	`19.12205878` ms (`14.425168152928613`)	`0.98`
`Produce dataflow information`	`72.74484428` ms (`68.53708869817343`)	`74.45471131999999` ms (`70.57749655074137`)	`0.98`
`Total per-file`	`7595.8029607` ms (`28662.605191020873`)	`7726.72160172` ms (`28528.07523908248`)	`0.98`
`Static slicing`	`15.686619407051408` ms (`43.989529671374186`)	`15.999780665624812` ms (`43.68226004466487`)	`0.98`
`Reconstruct code`	`0.24974162672409403` ms (`0.14734808326631418`)	`0.2820169656616924` ms (`0.15417410861831182`)	`0.89`
`Total per-slice`	`15.944086063957226` ms (`44.02309438740676`)	`16.29007209329986` ms (`43.71440479393139`)	`0.98`
`failed to reconstruct/re-parse`	`0` #	`0` #	`1`
`times hit threshold`	`0` #	`0` #	`1`
`reduction (characters)`	`0.8762109251198998` #	`0.8712997340230448` #	`1.01`
`reduction (normalized tokens)`	`0.819994064355517` #	`0.8102441553774778` #	`1.01`
`memory (df-graph)`	`99.526015625` KiB (`113.60201607005874`)	`99.4425` KiB (`113.62933451202426`)	`1.00`