-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTypeEvidenceGatherer.cs
381 lines (293 loc) · 17.1 KB
/
TypeEvidenceGatherer.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.CSharp;
using Microsoft.CodeAnalysis.CSharp.Syntax;
using Microsoft.CodeAnalysis.FindSymbols;
namespace Quack.Analysis
{
class TypeEvidenceGatherer
{
private DeserializationCall deserCall;
private Logger logger;
private TypeEvidenceRuleSet ruleSet;
private List<TypeEvidence> gatheredEvidence;
private List<SyntaxNode> visitedNodes;
public TypeEvidenceGatherer(DeserializationCall deserCall, Logger logger)
{
this.deserCall = deserCall;
this.logger = logger;
this.ruleSet = new TypeEvidenceRuleSet(deserCall.model, logger);
this.gatheredEvidence = [];
}
private List<SyntaxNode> FindSymbolUsers(ISymbol variableSymbol)
{
var nodeList = new List<SyntaxNode>();
// Find all references to the variable
var references = SymbolFinder.FindReferencesAsync(variableSymbol, deserCall.project.Solution).Result;
var referenceLocations = references.SelectMany(r => r.Locations).ToList();
logger.Info("Found " + referenceLocations.Count() + " references to variable " + variableSymbol.Name);
foreach (var location in referenceLocations)
{
logger.Info("Reference at " + location.Location.SourceTree.FilePath + ":" + (location.Location.GetLineSpan().StartLinePosition.Line + 1));
// Convert location to node and add to list
// TODO could do more complex dataflow analysis (i.e. exclude previous nodes in the same block)
var referenceNode = location.Location.SourceTree.GetRoot().FindNode(location.Location.SourceSpan);
logger.Info("User Node: " + MiscUtils.FirstNLines(referenceNode.ToString(), 3));
logger.Assert(referenceNode.IsKind(SyntaxKind.IdentifierName) || referenceNode.IsKind(SyntaxKind.Argument), "Unhandled reference node kind in UserNodes(): " + referenceNode.Kind());
nodeList.Add(referenceNode);
}
return nodeList;
}
/* For a given node, retrive all nodes which 'use' that node */
/* TODO build out with more cases--currently adding case-by-case as they appear
in test program */
private (List<SyntaxNode>?, List<SyntaxNode>?) UserNodes(SyntaxNode node)
{
// TODO implement this
if (node == null || node.Parent == null)
{
logger.Info("UserNodes() reached null node or parent");
return ([], []);
}
var nodeList = new List<SyntaxNode>();
var memberUsageNodes = new List<SyntaxNode>();
var nodeKind = node.Kind();
logger.Info("Finding user nodes for node of kind " + nodeKind);
switch (nodeKind)
{
// An argument node sohuld add the parameter node from the method definition
case SyntaxKind.Argument:
logger.Info("Handling argument node");
// get the argument node and index
var argNode = (ArgumentSyntax)node;
var argNodeList = (ArgumentListSyntax)argNode.Parent;
// Get the index of the argument
var argIndex = argNodeList.Arguments.IndexOf(argNode);
logger.Info("Argument index: " + argIndex);
// Find the method node and symbol
var methodNode = (InvocationExpressionSyntax)argNodeList.Parent;
var methodSymbol = deserCall.model.GetSymbolInfo(methodNode).Symbol;
logger.Info("Caller method: " + methodSymbol.ToString());
// Find the method declaration
var syntaxReference = methodSymbol.DeclaringSyntaxReferences;
if (syntaxReference.Length == 0)
{
logger.Warn("Could not find method declaration (may be external method), so cannot restrict type further");
return (null, null);
}
var declaration = syntaxReference.Single().GetSyntax() as MethodDeclarationSyntax;
// Find the parameter node
var paramNode = declaration.ParameterList.Parameters[argIndex];
logger.Assert(paramNode.IsKind(SyntaxKind.Parameter), "Computed parameter node is not a parameter node kind");
logger.Info("Found parameter node");
nodeList.Add(paramNode);
break;
// TODO move localdeclarationstatement and variabledeclaration to common anonymous case if possible
case SyntaxKind.LocalDeclarationStatement:
logger.Info("Handling local declaration statement node");
var localDeclNode = (LocalDeclarationStatementSyntax)node;
nodeList.Add(node.Parent); // TODO make sure this is required
break;
// A variable assignment node should add all users of the variable
case SyntaxKind.VariableDeclaration:
logger.Info("Handling variable declaration node");
nodeList.Add(node.Parent); // TODO make sure this is required
break;
case SyntaxKind.VariableDeclarator:
logger.Info("Handling variable declarator node");
logger.Assert(node.IsKind(SyntaxKind.VariableDeclarator), "Unhandled identifier kind in UserNodes(): " + node.Kind());
var declaratorSymbol = deserCall.model.GetDeclaredSymbol(node);
nodeList.AddRange(FindSymbolUsers(declaratorSymbol));
nodeList.Add(node.Parent); // TODO make sure this is required
break;
case SyntaxKind.Parameter:
var parameterSymbol = deserCall.model.GetDeclaredSymbol(node);
nodeList.AddRange(FindSymbolUsers(parameterSymbol));
break;
case SyntaxKind.SimpleAssignmentExpression:
logger.Info("Handling assignment node");
var assignmentNode = (AssignmentExpressionSyntax)node;
// Variable being assigned
var lhs = assignmentNode.Left;
// TODO For now, can only handle simple assignments
logger.Assert(lhs.IsKind(SyntaxKind.IdentifierName), "Unhandled LHS kind in UserNodes(): " + lhs.Kind());
var variableSymbol = deserCall.model.GetSymbolInfo(lhs).Symbol;
var newNodeList = FindSymbolUsers(variableSymbol);
// Add all to nodeList except for itself
foreach (var newNode in newNodeList)
{
if (newNode != lhs)
{
nodeList.Add(newNode);
}
}
// // Find all references to the variable
// var variableSymbol = deserCall.model.GetSymbolInfo(lhs).Symbol;
// var references = SymbolFinder.FindReferencesAsync(variableSymbol, deserCall.project.Solution).Result;
// var referenceLocations = references.SelectMany(r => r.Locations).ToList();
// logger.Info("Found " + referenceLocations.Count() + " references to variable " + variableSymbol.Name);
// foreach (var location in referenceLocations)
// {
// logger.Info("Reference at " + location.Location.SourceTree.FilePath + ":" + (location.Location.GetLineSpan().StartLinePosition.Line + 1));
// // Convert location to node and add to list
// // TODO could do more complex dataflow analysis (i.e. exclude previous nodes in the same block)
// var referenceNode = location.Location.SourceTree.GetRoot().FindNode(location.Location.SourceSpan);
// logger.Info("User Node: " + MiscUtils.FirstNLines(referenceNode.ToString(), 3));
// logger.Assert(referenceNode.IsKind(SyntaxKind.IdentifierName), "Unhandled reference node kind in UserNodes(): " + referenceNode.Kind());
// // Don't add the node itself to the list
// if (referenceNode == lhs)
// {
// logger.Info("Skipping self-reference");
// continue;
// }
// nodeList.Add(referenceNode);
// }
nodeList.Add(node.Parent); // TODO make sure this is required
break;
// TODO could do member analysis here? Probably not, probably wait until allowList finished
// then convert each possible object to an object graph
// TODO handle pointer member access expression
case SyntaxKind.SimpleMemberAccessExpression:
// In this case, just add the 'parent.member' node to the member usage list
logger.Info("Handling member access node");
var memberNode = (MemberAccessExpressionSyntax)node;
logger.Info("Member name:" + memberNode.Name);
logger.Info("Operator Token: " + memberNode.OperatorToken);
logger.Assert(memberNode.OperatorToken.IsKind(SyntaxKind.DotToken), "Unhandled member access operator in UserNodes(): " + memberNode.OperatorToken.Kind());
var memberAccessNode = (MemberAccessExpressionSyntax)node;
var expr = memberAccessNode.Expression;
var exprType = deserCall.model.GetTypeInfo(expr).Type;
// Check if member is a field or property
var memberSymbol = deserCall.model.GetSymbolInfo(memberAccessNode).Symbol;
// If kind is field, we want to follow it so add to member usage
if (memberSymbol.Kind == SymbolKind.Field)
{
memberUsageNodes.Add(node.Parent);
}
// Otherwise, we can skip it (TODO this probably needs more though)
// TODO properties definitely need to be handled more carefully
logger.Info("Member is not a field, no users to add");
break;
/* For 'anonymous' nodes that just return a value, the only user is parent (TODO some of these
might need more logic) */
case SyntaxKind.InvocationExpression:
case SyntaxKind.IdentifierName:
case SyntaxKind.ExpressionStatement:
case SyntaxKind.Block:
case SyntaxKind.EqualsValueClause:
case SyntaxKind.CastExpression:
case SyntaxKind.ParenthesizedExpression:
nodeList.Add(node.Parent);
// Handle default case
break;
/* For non-expression cases that dead-end, don't add any users */
case SyntaxKind.MethodDeclaration:
break;
default:
logger.Error("Unhandled node kind in UserNodes(): " + nodeKind);
throw new System.Exception("Unhandled node kind in UserNodes(): " + nodeKind);
}
return (nodeList, memberUsageNodes);
}
// TODO create a class to hold return object?
// returns all possible types of the node except for the base evidence
private List<TypeEvidence>? AnalyzeNode(SyntaxNode node, List<TypeEvidence> baseEvidence)
{
logger.Info("Analyzing node: " + MiscUtils.FirstNLines(node.ToString(), 3));
// Create a list of evidence for this node
var allowList = new List<TypeEvidence>();
var nextBaseEvidence = baseEvidence;
// Extract evidence from the toplevel node
var initialEvidence = ruleSet.ExtractEvidence(node);
// If node provides evidence, we can safely replace base evidence
// This is where 'narrowing' happens, because we no longer consider
// previous base evidence
if (initialEvidence != null && initialEvidence.Count > 0)
{
nextBaseEvidence = initialEvidence;
}
// Find each user node of the present node
var (rootUserNodes, memberNodes) = UserNodes(node);
// If any user node is untrackable (i.e. external method), return
// an untrackable version of the base evidence
if (rootUserNodes == null)
{
logger.Info("At least one user node could not be analyzed, cannot resetrict type further");
// Exits early, returning base evidence plus a new untrackable evidence
// that includes all subclasses of the old base evidence and the new evidence
if (initialEvidence != null && initialEvidence.Count > 0)
{
baseEvidence.AddRange(initialEvidence.Except(baseEvidence));
}
return [new UntrackableTypeEvidence(node, baseEvidence, "Untrackable user node")];
}
allowList.AddRange(baseEvidence.Except(allowList));
foreach (var memberNode in memberNodes)
{
// TODO empty base evidence might not be right here? it should populate with initial evidence
var memberAllowList = AnalyzeNode(memberNode, []);
// For now, we don't track member-use evidence separately since
// it all gets combined in the final binder anyways (but might be nice to separate for debugging)
allowList.AddRange(memberAllowList.Except(allowList));
}
// Analyze each user node
foreach (var userNode in rootUserNodes)
{
// Each user node starts with the base evidence (i.e. its type is definitely base evidence or a subclass of it)
var userEvidence = AnalyzeNode(userNode, nextBaseEvidence);
allowList.AddRange(userEvidence.Except(allowList));
}
return allowList;
// // New base evidence is the intersection of the base evidence and the initial evidence
// // TODO prevent duplicate evidence
// if (initialEvidence != null)
// {
// nextBaseEvidence.AddRange(initialEvidence);
// }
// // Retrieve all users of the present node, and nodes of all member usages on the present node
// (var rootUserNodes, var memberNodes) = UserNodes(node);
// if (rootUserNodes == null || rootUserNodes.Count == 0)
// {
// logger.Info("At least one user node could not be analyzed, cannot resetrict type further");
// // Exits early, returning initial evidence and base evidence
// return nextBaseEvidence;
// }
// // Analyze each member-use node
// foreach (var memberNode in memberNodes)
// {
// // TODO empty base evidence might not be right here? should populate with initial evidence
// var memberAllowList = AnalyzeNode(memberNode, []);
// // For now, we don't track member-use evidence separately since
// // it all gets combined in the final binder anyways (but might be nice to separate for debugging)
// allowList.AddRange(memberAllowList);
// }
// foreach (var userNode in rootUserNodes)
// {
// var userAllowList = AnalyzeNode(userNode, nextBaseEvidence);
// // If any user node has no evidence, return the initial evidence
// // Anything already collected is irrelevant (although could be
// // added anyways because it should be subclasses of the initial evidence)
// if (userAllowList == null || userAllowList.Count == 0)
// {
// logger.Info("At least one user node could not be analyzed, cannot resetrict type further");
// // Exits early, returning initial evidence
// return nextBaseEvidence;
// }
// allowList.AddRange(userAllowList);
// }
// return allowList;
}
public List<TypeEvidence> GatherTypeEvidence()
{
logger.Info("Gathering type evidence");
// Follow the deserialization call node
var node = deserCall.node;
gatheredEvidence = AnalyzeNode(node, []);
logger.Info("Finished gathering evidence");
foreach (var evidence in gatheredEvidence)
{
logger.Plain(LogLevel.Info, evidence.ToString() + "\n");
}
return gatheredEvidence;
}
}
}