Skip to content

Commit 6e5eaae

Browse files
fqriousFadl
and
Fadl
authored
add content-check output to data file #119 (#121)
Co-authored-by: Fadl <[email protected]>
1 parent 6f92066 commit 6e5eaae

File tree

1 file changed

+30
-23
lines changed

1 file changed

+30
-23
lines changed

txt2stix/txt2stix.py

+30-23
Original file line numberDiff line numberDiff line change
@@ -136,11 +136,6 @@ def parse_args():
136136

137137
inf_arg = parser.add_argument("--input_file", "--input-file", required=True, help="The file to be converted. Must be .txt", type=Path)
138138
parser.add_argument("--ai_check_content", required=False, type=parse_model, help="Use an AI model to check wether the content of the file contains threat intelligence. Paticularly useful to weed out vendor marketing.")
139-
if (args := parser.parse_known_args()[0]) and args.ai_check_content:
140-
model : BaseAIExtractor = args.ai_check_content
141-
value = model.check_content(args.input_file.read_text())
142-
print("check-content output:", value.model_dump_json())
143-
exit(0)
144139
name_arg = parser.add_argument("--name", required=True, help="Name of the file, max 124 chars", default="stix-out")
145140
parser.add_argument("--created", required=False, default=datetime.now(), help="Allow user to optionally pass --created time in input, which will hardcode the time used in created times")
146141
parser.add_argument("--ai_settings_extractions", required=False, type=parse_model, help="(required if AI extraction enabled): passed in format provider:model e.g. openai:gpt4o. Can pass more than one value to get extractions from multiple providers.", metavar="provider[:model]", nargs='+', default=[parse_model('openai')])
@@ -266,29 +261,40 @@ def main():
266261
preprocessed_text = remove_links(input_text, args.ignore_image_refs, args.ignore_link_refs)
267262
load_env()
268263

264+
should_extract = True
265+
content_check_output = None
266+
267+
if args.ai_check_content:
268+
logging.info("checking content")
269+
model : BaseAIExtractor = args.ai_check_content
270+
content_check_output = model.check_content(args.input_file.read_text())
271+
should_extract = content_check_output.describes_incident
272+
logging.info("=== ai-check-content output ====")
273+
logging.info(content_check_output.model_dump_json())
274+
269275

270276
bundler = txt2stixBundler(args.name, args.use_identity, args.tlp_level, input_text, args.confidence, args.all_extractors, args.labels, created=args.created, report_id=args.report_id, external_references=args.external_refs)
271277
log_notes(sys.argv, "Config")
272278
convo_str = None
273279

274280
# ai_extractor_session = args.ai_model[0](args.ai_model[1])
275-
if args.use_extractions.get("ai"):
276-
validate_token_count(int(os.environ["INPUT_TOKEN_LIMIT"]), preprocessed_text, args.ai_settings_extractions)
277-
if args.relationship_mode == "ai":
278-
validate_token_count(int(os.environ["INPUT_TOKEN_LIMIT"]), preprocessed_text, [args.ai_settings_relationships])
279-
280-
all_extracts = extract_all(bundler, args.use_extractions, preprocessed_text, ai_extractors=args.ai_settings_extractions, ignore_extraction_boundary=args.ignore_extraction_boundary)
281-
extracted_relationships = None
282-
if args.relationship_mode == "ai" and sum(map(lambda x: len(x), all_extracts.values())):
283-
extracted_relationships = extract_relationships_with_ai(bundler, preprocessed_text, all_extracts, args.ai_settings_relationships)
284-
285-
# convo_str = ai_extractor_session.get_conversation() if ai_extractor_session and ai_extractor_session.initialized else ""
286-
flow = None
287-
if args.ai_create_attack_flow:
288-
logging.info("creating attack-flow bundle")
289-
ex: BaseAIExtractor = args.ai_settings_relationships
290-
flow = ex.extract_attack_flow(input_text, all_extracts, extracted_relationships)
291-
bundler.flow_objects = parse_flow(bundler.report, flow)
281+
282+
283+
all_extracts = flow = extracted_relationships = None
284+
if should_extract:
285+
if args.use_extractions.get("ai"):
286+
validate_token_count(int(os.environ["INPUT_TOKEN_LIMIT"]), preprocessed_text, args.ai_settings_extractions)
287+
if args.relationship_mode == "ai":
288+
validate_token_count(int(os.environ["INPUT_TOKEN_LIMIT"]), preprocessed_text, [args.ai_settings_relationships])
289+
all_extracts = extract_all(bundler, args.use_extractions, preprocessed_text, ai_extractors=args.ai_settings_extractions, ignore_extraction_boundary=args.ignore_extraction_boundary)
290+
if args.relationship_mode == "ai" and sum(map(lambda x: len(x), all_extracts.values())):
291+
extracted_relationships = extract_relationships_with_ai(bundler, preprocessed_text, all_extracts, args.ai_settings_relationships)
292+
293+
if args.ai_create_attack_flow:
294+
logging.info("creating attack-flow bundle")
295+
ex: BaseAIExtractor = args.ai_settings_relationships
296+
flow = ex.extract_attack_flow(input_text, all_extracts, extracted_relationships)
297+
bundler.flow_objects = parse_flow(bundler.report, flow)
292298

293299

294300

@@ -298,9 +304,10 @@ def main():
298304
output_path.write_text(out)
299305
logger.info(f"Wrote bundle output to `{output_path}`")
300306
data = {
307+
"content-check": content_check_output and content_check_output.model_dump(),
301308
"extractions": all_extracts,
302309
"relationships": extracted_relationships,
303-
"attack-flow": flow.model_dump(),
310+
"attack-flow": flow and flow.model_dump(),
304311
}
305312
data_path = Path(str(output_path).replace('bundle--', 'data--'))
306313
data_path.write_text(json.dumps(data, indent=4))

0 commit comments

Comments
 (0)