Skip to content

Commit

Permalink
Insert results
Browse files Browse the repository at this point in the history
  • Loading branch information
cloudyyoung committed Apr 7, 2024
1 parent 946fbea commit 7d45222
Showing 1 changed file with 26 additions and 25 deletions.
51 changes: 26 additions & 25 deletions cskg/detectors/data_clumps.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from abc import ABC
from collections import defaultdict
import math
from time import sleep
from typing import Iterable
from neo4j.exceptions import ClientError
Expand Down Expand Up @@ -90,6 +91,7 @@ def build_conditional_fp_tree(self):
unit="nodes",
)
for class_qualified_name, param_name in bar:
# Create conditional FP tree
query = f"""
MATCH path = (root:{FpTreeNode.label} {{
node_id: "{self.root.node_id}"
Expand All @@ -111,6 +113,7 @@ def build_conditional_fp_tree(self):
logger.debug(query)
results, meta = self.neo_db.cypher_query(query)

# Propagate correct support counts
query = f"""
MATCH (child:{ConditionalFpTreeNode.label})<-[:LINKS*]-(parent:{ConditionalFpTreeNode.label})
WHERE NOT (child)-[]->()
Expand All @@ -123,31 +126,29 @@ def build_conditional_fp_tree(self):
if len(results) > 100:
sleep(5)

# # # Query for CFP Tree
# query = f"""
# MATCH path =
# (root:{ConditionalFpTreeNode.label})-[:LINKS*]->(end:{ConditionalFpTreeNode.label})
# RETURN path, end
# """
# paths, meta = self.neo_db.cypher_query(query)
# patterns = []
# for path, end in paths:
# nodes = list(
# filter(lambda node: node["support_count"] >= 3, path.nodes[1:-1])
# )
# if len(nodes) > 0:
# patterns.append(nodes)

# for pattern in patterns:
# logger.debug(pattern)
# self.result_collection.insert_one(
# {
# "pattern": pattern,
# "support_count": min(
# map(lambda node: node["support_count"], pattern)
# ),
# }
# )
# Query for frequent itemsets
query = f"""
MATCH path = (root:{ConditionalFpTreeNode.label})-[:LINKS*]->(end:{ConditionalFpTreeNode.label})
WHERE NOT (end)-[]->()
RETURN path
"""
paths, _ = self.neo_db.cypher_query(query)
for (path,) in paths:
path: Path
itemset = [(class_qualified_name, param_name)]
frequency = math.inf

nodes = list(path.nodes)
interim_nodes = nodes[1:-1]
for node in interim_nodes:
cfp_node = ConditionalFpTreeNode.from_neo_node(node)
itemset.append((cfp_node.class_qualified_name, cfp_node.param_name))
frequency = min(frequency, cfp_node.support_count)

if len(interim_nodes) > 1:
self.result_collection.insert_one(
{"itemset": itemset, "support_count": frequency}
)

# Clear CFP Tree
self.clear_conditional_fp_nodes()
Expand Down

0 comments on commit 7d45222

Please sign in to comment.