Skip to content

Commit

Permalink
add lint for duplicate feature under a statement (#2573)
Browse files Browse the repository at this point in the history
* add lint for duplicate feature under a statement

* add support for more scopes

* fix format for duplicate feature lint

* fix false positives for duplicate features lint

* remove unused code and comments

Signed-off-by: vibhatsu <[email protected]>

* refactor duplicate feature lint to use yaml parser

Signed-off-by: vibhatsu <[email protected]>

* update CHANGELOG

Signed-off-by: vibhatsu <[email protected]>

* clarify for using rule definition

Signed-off-by: vibhatsu <[email protected]>

* update CHANGELOG

Signed-off-by: vibhatsu <[email protected]>

* refactor duplicate feature lint to improve key generation and tracking of line numbers

Signed-off-by: vibhatsu <[email protected]>

---------

Signed-off-by: vibhatsu <[email protected]>
  • Loading branch information
v1bh475u authored Jan 31, 2025
1 parent 91d0d8c commit 2798d60
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 1 deletion.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
## master (unreleased)

### New Features

- add lint for detecting duplicate features in capa-rules #2250 @v1bh475u
- add span-of-calls scope to match features against a across a sliding window of API calls within a thread @williballenthin #2532
- add lint to catch rules that depend on other rules with impossible scope @williballenthin #2124

Expand Down
84 changes: 84 additions & 0 deletions scripts/lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import argparse
import itertools
import posixpath
from typing import Any, Dict, List
from pathlib import Path
from dataclasses import field, dataclass

Expand Down Expand Up @@ -597,6 +598,88 @@ def rec(statement):
return self.violation


class DuplicateFeatureUnderStatement(Lint):
name = "rule contains a duplicate features"
recommendation = "remove the duplicate features"
recommendation_template = '\n\tduplicate line: "{:s}"\t: line numbers: {:s}'
violation = False

def check_rule(self, ctx: Context, rule: Rule) -> bool:
self.violation = False
self.recommendation = ""
STATEMENTS = frozenset(
{
"or",
"and",
"not",
"optional",
"some",
"basic block",
"function",
"instruction",
"call",
" or more",
}
)
# rule.statement discards the duplicate features by default so
# need to use the rule definition to check for duplicates
data = rule._get_ruamel_yaml_parser().load(rule.definition)

def get_line_number(line: Dict[str, Any]) -> int:
lc = getattr(line, "lc", None)
if lc and hasattr(lc, "line"):
return lc.line + 1
return 0

def is_statement(key: str) -> bool:
# to generalize the check for 'n or more' statements
return any(statement in key for statement in STATEMENTS)

def get_feature_key(feature_dict: Dict[str, Any]) -> str:
# need this for generating key for multi-lined feature
# for example, - string: /dbghelp\.dll/i
# description: WindBG
parts = []
for key, value in list(feature_dict.items()):
parts.append(f"{key}: {value}")
return "- " + ", ".join(parts)

def find_duplicates(features: List[Any]) -> None:
if not isinstance(features, list):
return

seen_features: Dict[str, List[int]] = {}
for item in features:
if not isinstance(item, dict):
continue

if any(is_statement(key) for key in item.keys()):
for key, value in item.items():
if is_statement(key):
# recursively check nested features
find_duplicates(value)
continue

feature_key = get_feature_key(item)
line_num = get_line_number(item)
if feature_key in seen_features:
self.violation = True
seen_features[feature_key].append(line_num)
else:
seen_features[feature_key] = [line_num]
for feature_key, line_numbers in seen_features.items():
if len(line_numbers) > 1:
sorted_lines = sorted(line_numbers)
self.recommendation += self.recommendation_template.format(
feature_key, ", ".join(str(line) for line in sorted_lines)
)

features = data["rule"].get("features", [])
find_duplicates(features)

return self.violation


class UnusualMetaField(Lint):
name = "unusual meta field"
recommendation = "Remove the meta field"
Expand Down Expand Up @@ -916,6 +999,7 @@ def rec(statement):
OrStatementWithAlwaysTrueChild(),
NotNotUnderAnd(),
OptionalNotUnderAnd(),
DuplicateFeatureUnderStatement(),
RuleDependencyScopeMismatch(),
)

Expand Down

0 comments on commit 2798d60

Please sign in to comment.