From f5768cb42c1f6d3616c2367ccc95b48cfb3bedd4 Mon Sep 17 00:00:00 2001
From: Roberto Cantu <Roberto_Cantu@live.com>
Date: Mon, 24 Jun 2024 18:48:47 +0800
Subject: [PATCH 01/16] Remove contract similarity from main bot logic

---
 scam-detector-py/src/agent.py | 63 +++--------------------------------
 1 file changed, 4 insertions(+), 59 deletions(-)

diff --git a/scam-detector-py/src/agent.py b/scam-detector-py/src/agent.py
index 80a7f3c5..6594d5ac 100644
--- a/scam-detector-py/src/agent.py
+++ b/scam-detector-py/src/agent.py
@@ -22,9 +22,9 @@
 from forta_agent import Finding, FindingType, FindingSeverity, get_alerts, get_labels
 from web3 import Web3
 
-from src.constants import (BASE_BOTS, ALERTED_ENTITIES_ML_KEY, ALERTED_ENTITIES_ML_QUEUE_SIZE, ALERTED_ENTITIES_PASSTHROUGH_KEY, ALERTED_ENTITIES_PASSTHROUGH_QUEUE_SIZE, ALERTED_ENTITIES_SCAMMER_ASSOCIATION_KEY, ALERTED_ENTITIES_SCAMMER_ASSOCIATION_QUEUE_SIZE, ALERTED_ENTITIES_SIMILAR_CONTRACT_KEY, ALERTED_ENTITIES_SIMILAR_CONTRACT_QUEUE_SIZE, ALERTED_ENTITIES_MANUAL_KEY, ALERTED_ENTITIES_MANUAL_QUEUE_SIZE, ALERTED_ENTITIES_MANUAL_METAMASK_KEY, ALERTED_ENTITIES_MANUAL_METAMASK_QUEUE_SIZE, ALERT_LOOKBACK_WINDOW_IN_DAYS, ENTITY_CLUSTER_BOTS,
+from src.constants import (BASE_BOTS, ALERTED_ENTITIES_ML_KEY, ALERTED_ENTITIES_ML_QUEUE_SIZE, ALERTED_ENTITIES_PASSTHROUGH_KEY, ALERTED_ENTITIES_PASSTHROUGH_QUEUE_SIZE, ALERTED_ENTITIES_SCAMMER_ASSOCIATION_KEY, ALERTED_ENTITIES_SCAMMER_ASSOCIATION_QUEUE_SIZE, ALERTED_ENTITIES_MANUAL_KEY, ALERTED_ENTITIES_MANUAL_QUEUE_SIZE, ALERTED_ENTITIES_MANUAL_METAMASK_KEY, ALERTED_ENTITIES_MANUAL_METAMASK_QUEUE_SIZE, ALERT_LOOKBACK_WINDOW_IN_DAYS, ENTITY_CLUSTER_BOTS,
                        FINDINGS_CACHE_ALERT_KEY, FINDINGS_CACHE_BLOCK_KEY, ALERTED_FP_CLUSTERS_KEY, FINDINGS_CACHE_TRANSACTION_KEY,
-                       ALERTED_FP_CLUSTERS_QUEUE_SIZE, SCAM_DETECTOR_BOT_ID, SCAM_DETECTOR_BETA_BOT_ID, SCAM_DETECTOR_BETA_ALT_BOT_ID, CONTRACT_SIMILARITY_BOTS, CONTRACT_SIMILARITY_BOT_THRESHOLDS, EOA_ASSOCIATION_BOTS,
+                       ALERTED_FP_CLUSTERS_QUEUE_SIZE, SCAM_DETECTOR_BOT_ID, SCAM_DETECTOR_BETA_BOT_ID, SCAM_DETECTOR_BETA_ALT_BOT_ID, EOA_ASSOCIATION_BOTS,
                        EOA_ASSOCIATION_BOT_THRESHOLDS, PAIRCREATED_EVENT_ABI, SWAP_FACTORY_ADDRESSES, POOLCREATED_EVENT_ABI, ENCRYPTED_BOTS,
                        MODEL_ALERT_THRESHOLD_LOOSE, MODEL_ALERT_THRESHOLD_STRICT, MODEL_FEATURES, MODEL_NAME, DEBUG_ALERT_ENABLED, ENABLE_METAMASK_CONSUMPTION)
 from src.storage import s3_client, dynamo_table, get_secrets, bucket_name
@@ -50,7 +50,6 @@
 ALERTED_ENTITIES_ML = OrderedDict()  # cluster -> alert_id
 ALERTED_ENTITIES_PASSTHROUGH = OrderedDict()  # cluster -> alert_id
 ALERTED_ENTITIES_SCAMMER_ASSOCIATION = OrderedDict()  # cluster -> alert_id
-ALERTED_ENTITIES_SIMILAR_CONTRACT = OrderedDict()  # cluster -> alert_id
 ALERTED_ENTITIES_MANUAL = OrderedDict()  # cluster -> alert_id
 ALERTED_ENTITIES_MANUAL_METAMASK = OrderedDict()  # cluster -> alert_id
 ALERTED_ENTITIES_MANUAL_METAMASK_LIST = [] # Used to reduce size of persisted item
@@ -112,10 +111,6 @@ def initialize(test = False):
         alerted_entities_scammer_association = load(CHAIN_ID, ALERTED_ENTITIES_SCAMMER_ASSOCIATION_KEY)
         ALERTED_ENTITIES_SCAMMER_ASSOCIATION = OrderedDict() if alerted_entities_scammer_association is None else OrderedDict(alerted_entities_scammer_association)
 
-        global ALERTED_ENTITIES_SIMILAR_CONTRACT
-        alerted_entities_similar_contract = load(CHAIN_ID, ALERTED_ENTITIES_SIMILAR_CONTRACT_KEY)
-        ALERTED_ENTITIES_SIMILAR_CONTRACT = OrderedDict() if alerted_entities_similar_contract is None else OrderedDict(alerted_entities_similar_contract)
-
         global ALERTED_ENTITIES_MANUAL
         alerted_entities_manual = load(CHAIN_ID, ALERTED_ENTITIES_MANUAL_KEY)
         ALERTED_ENTITIES_MANUAL = OrderedDict() if alerted_entities_manual is None else OrderedDict(alerted_entities_manual)
@@ -450,7 +445,7 @@ def get_model_score(df_feature_vector: pd.DataFrame) -> float:
 
 
 def already_alerted(entity: str, alert_id: str, logic = ""):
-    global ALERTED_ENTITIES_ML, ALERTED_ENTITIES_PASSTHROUGH, ALERTED_ENTITIES_SCAMMER_ASSOCIATION, ALERTED_ENTITIES_SIMILAR_CONTRACT, ALERTED_ENTITIES_MANUAL, ALERTED_ENTITIES_MANUAL_METAMASK
+    global ALERTED_ENTITIES_ML, ALERTED_ENTITIES_PASSTHROUGH, ALERTED_ENTITIES_SCAMMER_ASSOCIATION, ALERTED_ENTITIES_MANUAL, ALERTED_ENTITIES_MANUAL_METAMASK
     
     if logic == "ml":
         alerted_entities = ALERTED_ENTITIES_ML
@@ -458,8 +453,6 @@ def already_alerted(entity: str, alert_id: str, logic = ""):
         alerted_entities = ALERTED_ENTITIES_PASSTHROUGH
     elif logic == "scammer_association":
         alerted_entities = ALERTED_ENTITIES_SCAMMER_ASSOCIATION
-    elif logic == "similar_contract":
-        alerted_entities = ALERTED_ENTITIES_SIMILAR_CONTRACT
     elif logic == "manual":
         alerted_entities = ALERTED_ENTITIES_MANUAL
     elif logic == "manual_metamask":
@@ -630,43 +623,6 @@ def emit_passthrough_finding(w3, alert_event: forta_agent.alert_event.AlertEvent
     logging.info(f"{BOT_VERSION}: alert {alert_event.alert_hash} {alert_event.bot_id} {alert_event.alert.alert_id} - return total findings: {len(findings)}")
     return findings
 
-def emit_contract_similarity_finding(w3, alert_event: forta_agent.alert_event.AlertEvent) -> list:
-    global ALERTED_ENTITIES_SIMILAR_CONTRACT
-    global ALERTED_ENTITIES_SIMILAR_CONTRACT_QUEUE_SIZE
-    global CONTRACT_SIMILARITY_BOT_THRESHOLDS
-    global CHAIN_ID
-
-    findings = []
-    scammer_addresses_lower = BaseBotParser.get_scammer_addresses(w3, alert_event)
-    logging.info(f"{BOT_VERSION}: alert {alert_event.alert_hash} {alert_event.bot_id} {alert_event.alert.alert_id} - got contract similarity bot alert; got {len(scammer_addresses_lower)} scammer addresses.")
-    for scammer_address_lower in scammer_addresses_lower:
-        # Check if the address is in the manual FP list
-        if Utils.is_in_fp_mitigation_list(scammer_address_lower):
-            logging.info(f"Skipped alert for {scammer_address_lower} as it is in the manual FP list.")
-            continue
-
-        logging.info(f"{BOT_VERSION}: alert {alert_event.alert_hash} {alert_event.bot_id} {alert_event.alert.alert_id} - processing contract similarity bot address {scammer_address_lower}")
-
-        similarity_score = float(alert_event.alert.metadata['similarity_score']) if 'similarity_score' in alert_event.alert.metadata else float(alert_event.alert.metadata['similarityScore'])
-        logging.info(f"{BOT_VERSION}: alert {alert_event.alert_hash} {alert_event.bot_id} {alert_event.alert.alert_id} - {scammer_address_lower} similarity score {similarity_score}")
-        if similarity_score > CONTRACT_SIMILARITY_BOT_THRESHOLDS[0]:
-            logging.info(f"{BOT_VERSION}: alert {alert_event.alert_hash} {alert_event.bot_id} {alert_event.alert.alert_id} - similarity score {similarity_score} is above threshold {CONTRACT_SIMILARITY_BOT_THRESHOLDS[0]}")
-            if not Utils.is_fp(w3, scammer_address_lower, CHAIN_ID, FINDINGS_CACHE_ALERT):
-                
-                if not already_alerted(scammer_address_lower, "SCAM-DETECTOR-SIMILAR-CONTRACT", "similar_contract"):
-                    logging.info(f"{BOT_VERSION}: alert {alert_event.alert_hash} {alert_event.bot_id} {alert_event.alert.alert_id} - address {scammer_address_lower}; emitting finding")
-                    update_list(ALERTED_ENTITIES_SIMILAR_CONTRACT, ALERTED_ENTITIES_SIMILAR_CONTRACT_QUEUE_SIZE, scammer_address_lower, "SCAM-DETECTOR-SIMILAR-CONTRACT", "ALERTED_ENTITIES_SIMILAR_CONTRACT", "similar_contract")
-                    finding = ScamDetectorFinding.alert_similar_contract(block_chain_indexer, forta_explorer, alert_event.alert.alert_id, alert_event.alert_hash, alert_event.alert.metadata, CHAIN_ID)
-                    if(finding is not None):
-                        findings.append(finding)
-                    else:
-                        logging.info(f"{BOT_VERSION}: alert {alert_event.alert_hash} {alert_event.bot_id} {alert_event.alert.alert_id} - finding is none due to original threat category not being in list flagged for propagation")
-                else:
-                    logging.info(f"{BOT_VERSION}: alert {alert_event.alert_hash} {alert_event.bot_id} {alert_event.alert.alert_id} - address {scammer_address_lower} already alerted")
-            else:
-                logging.info(f"{BOT_VERSION}: alert {alert_event.alert_hash} {alert_event.bot_id} {alert_event.alert.alert_id} - address {scammer_address_lower} in FP.")
-    return findings
-
 
 def emit_eoa_association_finding(w3, alert_event: forta_agent.alert_event.AlertEvent) -> list:
     global ALERTED_ENTITIES_SCAMMER_ASSOCIATION
@@ -898,16 +854,10 @@ def detect_scam(w3, alert_event: forta_agent.alert_event.AlertEvent, clear_state
                     put_entity_cluster(alert_event.alert.created_at, address, cluster)
 
             # for basebots, three paths:
-            # for contract similarity, a bit more work
             # for passthroughs, simply emit an alert (pot with some adjustments on mappings)
             # for combination base bots store in dynamo; then query dynamo for the cluster (this will pull all alerts from multiple shards), build feature vector and then evaluate detection heuristic
             
-            if in_list(alert_event, CONTRACT_SIMILARITY_BOTS):
-                start = time.time()
-                logging.info(f"{BOT_VERSION}: alert {alert_event.alert_hash} is contract similarity alert")
-                findings.extend(emit_contract_similarity_finding(w3, alert_event))
-                logging.info(f"{BOT_VERSION}: alert {alert_event.alert_hash} is contract similarity alert. Processing took {time.time() - start} seconds.")
-            elif in_list(alert_event, EOA_ASSOCIATION_BOTS):
+            if in_list(alert_event, EOA_ASSOCIATION_BOTS):
                 start = time.time()
                 logging.info(f"{BOT_VERSION}: alert {alert_event.alert_hash} is eoa association alert")
                 findings.extend(emit_eoa_association_finding(w3, alert_event))
@@ -1302,7 +1252,6 @@ def clear_state():
     L2Cache.remove(CHAIN_ID, ALERTED_ENTITIES_ML_KEY)
     L2Cache.remove(CHAIN_ID, ALERTED_ENTITIES_PASSTHROUGH_KEY)
     L2Cache.remove(CHAIN_ID, ALERTED_ENTITIES_SCAMMER_ASSOCIATION_KEY)
-    L2Cache.remove(CHAIN_ID, ALERTED_ENTITIES_SIMILAR_CONTRACT_KEY)
     L2Cache.remove(CHAIN_ID, ALERTED_ENTITIES_MANUAL_KEY)
     L2Cache.remove(CHAIN_ID, ALERTED_ENTITIES_MANUAL_METAMASK_KEY)
     L2Cache.remove(CHAIN_ID, ALERTED_FP_CLUSTERS_KEY)
@@ -1325,9 +1274,6 @@ def persist_state():
     global ALERTED_ENTITIES_SCAMMER_ASSOCIATION
     global ALERTED_ENTITIES_SCAMMER_ASSOCIATION_KEY
 
-    global ALERTED_ENTITIES_SIMILAR_CONTRACT
-    global ALERTED_ENTITIES_SIMILAR_CONTRACT_KEY
-
     global ALERTED_ENTITIES_MANUAL
     global ALERTED_ENTITIES_MANUAL_KEY
 
@@ -1353,7 +1299,6 @@ def persist_state():
     persist(ALERTED_ENTITIES_ML, CHAIN_ID, ALERTED_ENTITIES_ML_KEY)
     persist(ALERTED_ENTITIES_PASSTHROUGH, CHAIN_ID, ALERTED_ENTITIES_PASSTHROUGH_KEY)
     persist(ALERTED_ENTITIES_SCAMMER_ASSOCIATION, CHAIN_ID, ALERTED_ENTITIES_SCAMMER_ASSOCIATION_KEY)
-    persist(ALERTED_ENTITIES_SIMILAR_CONTRACT, CHAIN_ID, ALERTED_ENTITIES_SIMILAR_CONTRACT_KEY)
     persist(ALERTED_ENTITIES_MANUAL, CHAIN_ID, ALERTED_ENTITIES_MANUAL_KEY)
     persist(ALERTED_FP_CLUSTERS, CHAIN_ID, ALERTED_FP_CLUSTERS_KEY)
     persist(FINDINGS_CACHE_BLOCK, CHAIN_ID, FINDINGS_CACHE_BLOCK_KEY)

From 2e846da0d804b8a11ebe45268d433a6e587f20ea Mon Sep 17 00:00:00 2001
From: Roberto Cantu <Roberto_Cantu@live.com>
Date: Mon, 24 Jun 2024 18:49:52 +0800
Subject: [PATCH 02/16] Remove contract similarity constants

---
 scam-detector-py/src/constants.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/scam-detector-py/src/constants.py b/scam-detector-py/src/constants.py
index f943864c..3e47ffc9 100644
--- a/scam-detector-py/src/constants.py
+++ b/scam-detector-py/src/constants.py
@@ -5,9 +5,6 @@
 
 ENTITY_CLUSTER_BOTS = [("0xd3061db4662d5b3406b52b20f34234e462d2c275b99414d76dc644e2486be3e9", "ENTITY-CLUSTER")]
 
-CONTRACT_SIMILARITY_BOTS = [("0x3acf759d5e180c05ecabac2dbd11b79a1f07e746121fc3c86910aaace8910560", "NEW-SCAMMER-CONTRACT-CODE-HASH")]
-CONTRACT_SIMILARITY_BOT_THRESHOLDS = [0.97]
-
 EOA_ASSOCIATION_BOTS = [("0xcd9988f3d5c993592b61048628c28a7424235794ada5dc80d55eeb70ec513848", "SCAMMER-LABEL-PROPAGATION-1")]
 EOA_ASSOCIATION_BOT_THRESHOLDS = [0.0]
 
@@ -23,8 +20,6 @@
 ALERTED_ENTITIES_PASSTHROUGH_QUEUE_SIZE = 75000
 ALERTED_ENTITIES_SCAMMER_ASSOCIATION_KEY = "alerted_entities_scammer_association_per_alert_id_key"
 ALERTED_ENTITIES_SCAMMER_ASSOCIATION_QUEUE_SIZE = 100000
-ALERTED_ENTITIES_SIMILAR_CONTRACT_KEY = "alerted_entities_similar_contract_per_alert_id_key"
-ALERTED_ENTITIES_SIMILAR_CONTRACT_QUEUE_SIZE = 100000
 ALERTED_ENTITIES_MANUAL_KEY = "alerted_entities_manual_per_alert_id_key"
 ALERTED_ENTITIES_MANUAL_QUEUE_SIZE = 100000
 ALERTED_ENTITIES_MANUAL_METAMASK_KEY = "alerted_entities_manual_metamask_per_alert_id_key"

From edc355395930a089ba320df810687d14f5cc287f Mon Sep 17 00:00:00 2001
From: Roberto Cantu <Roberto_Cantu@live.com>
Date: Mon, 24 Jun 2024 18:50:29 +0800
Subject: [PATCH 03/16] remove contract similarity finding generation

---
 scam-detector-py/src/findings.py | 139 -------------------------------
 1 file changed, 139 deletions(-)

diff --git a/scam-detector-py/src/findings.py b/scam-detector-py/src/findings.py
index 47115f78..777676c5 100644
--- a/scam-detector-py/src/findings.py
+++ b/scam-detector-py/src/findings.py
@@ -101,145 +101,6 @@ def get_threat_category(alert_id: str) -> str:
             return "unknown"
         else:
             return ""
-
-    @staticmethod
-    def alert_similar_contract(block_chain_indexer, forta_explorer, base_bot_alert_id: str, base_bot_alert_hash: str, metadata: dict, chain_id:int) -> Optional[Finding]:
-
-        # {"alert_hash":"0x92f0e1c5f9677a3ea2903047641213ba62e5a00d62f363efc1a85cd1e184e016",
-        #  "new_scammer_contract_address":"0x75577bd21803a13d6ec3e0d784f84e0e7e31cbd2",
-        #  "new_scammer_eoa":"0x7e6b6f2be1bb8d2e1d5fcefa2d6df86b6e03b8d0",
-        #  "scammer_contract_address":"0xe22536ac6f6a20dbb283e7f61a880993eab63313",
-        #  "scammer_eoa":"0xc1015eb4d9aa4f77d79cf04825cbfb7fc04e232e",
-        #  "similarity_hash":"68e6432db785f93986a9d49b19077067f8b694612f2bc1e8ef5cd38af2c8727e",
-        #  "similarity_score":"0.9347575306892395"}
-
-        alert_hash = metadata["alertHash"] if "alertHash" in metadata else metadata["alert_hash"]
-        existing_scammer_contract_address = metadata["scammerContractAddress"] if "scammerContractAddress" in metadata else metadata["scammer_contract_address"]
-        existing_scammer_address = metadata["scammerEoa"] if "scammerEoa" in metadata else metadata["scammer_eoa"]
-        scammer_contract_address = metadata["newScammerContractAddress"] if "newScammerContractAddress" in metadata else metadata["new_scammer_contract_address"]
-        scammer_address = metadata["newScammerEoa"] if "newScammerEoa" in metadata else metadata["new_scammer_eoa"]
-        similarity_score = metadata["similarityScore"] if "similarityScore" in metadata else metadata["similarity_score"]
-
-        alert_id = "SCAM-DETECTOR-SIMILAR-CONTRACT"  # only used in context of alerts; in context of labels we talk about threat-categories
-
-        original_threat_categories = set()  # scammer-eoa/* threat categories of the original scammer
-        source_id = SCAM_DETECTOR_BETA_ALT_BOT_ID if Utils.is_beta_alt() else (SCAM_DETECTOR_BETA_BOT_ID if Utils.is_beta() else SCAM_DETECTOR_BOT_ID)
-        df_labels = forta_explorer.get_labels(source_id, datetime(2023,1,1), datetime.now(), entity = existing_scammer_contract_address.lower())
-
-        for index, row in df_labels.iterrows():
-            if row['metadata'] is not None and "address_type" in row['metadata'].keys() and "threat_category" in row['metadata'].keys() and row['metadata']['address_type'] == 'contract':
-                original_threat_category = row['metadata']['threat_category']
-                original_threat_categories.add(original_threat_category)
-                logging.info(f"retrieved original threat category for label {existing_scammer_contract_address.lower()}: {original_threat_category}")
-        
-
-        if len(original_threat_categories.intersection(set(['address-poisoner', 'native-ice-phishing-social-engineering', 'hard-rug-pull', 'soft-rug-pull', 'rake-token', 'impersonating-token'])))>0:
-            labels = []
-            threat_category = ScamDetectorFinding.get_threat_category(alert_id)
-            confidence = Utils.get_confidence_value(threat_category)
-            labels.append(Label({
-                'entityType': EntityType.Address,
-                'label': 'scammer',
-                'entity': scammer_address,
-                'confidence': confidence,
-                'metadata': {
-                    'address_type': 'EOA',
-                    'chain_id': chain_id,
-                    'base_bot_alert_ids': base_bot_alert_id,  # base bot alert id: contract similarity alert id
-                    'base_bot_alert_hashes': base_bot_alert_hash,
-                    'associated_scammer_contract': existing_scammer_contract_address,
-                    'associated_scammer_threat_categories': ','.join(original_threat_categories),
-                    'associated_scammer_alert_hashes': alert_hash,
-                    'deployer_info': f"Deployer {scammer_address} deployed a contract {scammer_contract_address} that is similar to a contract {existing_scammer_contract_address} deployed by a known scammer {existing_scammer_address} involved in {','.join(original_threat_categories)} scam (alert hash: {alert_hash}).",
-                    'threat_category': threat_category,
-                    'threat_description_url': ScamDetectorFinding.get_threat_description_url(alert_id),
-                    'bot_version': Utils.get_bot_version(),
-                    'label_version': ScamDetectorFinding.LABEL_VERSION,
-                    'logic': 'propagation'
-                }
-            }))
-
-            common_scammer_contract_label_properties = {
-                'entityType': EntityType.Address,
-                'entity': scammer_contract_address,
-                'confidence': confidence,
-                'metadata': {
-                    'address_type': 'contract',
-                    'chain_id': chain_id,
-                    'base_bot_alert_ids': base_bot_alert_id,  # base bot alert id: contract similarity alert id
-                    'base_bot_alert_hashes': base_bot_alert_hash,
-                    'associated_scammer_contract': existing_scammer_contract_address,
-                    'associated_scammer_threat_categories': ','.join(original_threat_categories),
-                    'associated_scammer_alert_hashes': alert_hash,
-                    'deployer_info': f"Deployer {scammer_address} deployed a contract {scammer_contract_address} that is similar to a contract {existing_scammer_contract_address} deployed by a known scammer {existing_scammer_address} involved in {','.join(original_threat_categories)} scam (alert hash: {alert_hash}); this contract may or may not be related to this particular scam, but was created by the scammer.",
-                    'threat_category': threat_category,
-                    'threat_description_url': ScamDetectorFinding.get_threat_description_url(alert_id),
-                    'bot_version': Utils.get_bot_version(),
-                    'label_version': ScamDetectorFinding.LABEL_VERSION,
-                    'logic': 'propagation'
-                }
-            }
-
-            labels.append(Label({
-                'label': 'scammer',
-                **common_scammer_contract_label_properties
-            }))
-
-            labels.append(Label({
-                'label': 'similar-contract',
-                **common_scammer_contract_label_properties
-            }))
-
-            # get all deployed contracts by EOA and add label for those using etherscan or allium
-            try:
-                contracts = block_chain_indexer.get_contracts(scammer_address, chain_id)
-                for contract in contracts:
-                    labels.append(Label({
-                        'entityType': EntityType.Address,
-                        'label': 'scammer',
-                        'entity': contract,
-                        'confidence': confidence * 0.8,
-                        'metadata': {
-                            'address_type': 'contract',
-                            'chain_id': chain_id,
-                            'base_bot_alert_ids': base_bot_alert_id,  # base bot alert id: contract similarity alert id
-                            'base_bot_alert_hashes': base_bot_alert_hash,
-                            'associated_scammer_contract': existing_scammer_contract_address,
-                            'associated_scammer_threat_categories': ','.join(original_threat_categories),
-                            'associated_scammer_alert_hashes': alert_hash,
-                            'deployer_info': f"Deployer {scammer_address} involved in {','.join(original_threat_categories)} scam; this contract may or may not be related to this particular scam, but was created by the scammer.",
-                            'threat_category': ScamDetectorFinding.get_threat_category("SCAM-DETECTOR-SCAMMER-DEPLOYED-CONTRACT"),
-                            'threat_description_url': ScamDetectorFinding.get_threat_description_url(alert_id),
-                            'bot_version': Utils.get_bot_version(),
-                            'label_version': ScamDetectorFinding.LABEL_VERSION,
-                            'logic': 'propagation'
-                        }
-                    }))
-            except Exception as e:
-                logging.warning(f"Error getting contracts for scammer address {scammer_address}: {e}")
-                Utils.ERROR_CACHE.add(Utils.alert_error(str(e), "findings.alert_similar_contract", traceback.format_exc()))
-
-            metadata = {}
-            metadata['scammer_address'] = scammer_address
-            metadata['scammer_contract_address'] = scammer_contract_address
-            metadata['existing_scammer_address'] = existing_scammer_address
-            metadata['existing_scammer_contract_address'] = existing_scammer_contract_address
-            metadata['similarity_score'] = similarity_score
-            metadata['involved_threat_categories'] = ','.join(original_threat_categories)
-            metadata['involved_alert_hash_1'] = alert_hash
-
-            return Finding({
-                'name': 'Scam detector identified an EOA with past alerts mapping to scam behavior',
-                'description': f'{scammer_address} likely involved in a scam ({alert_id}, propagation)',
-                'alert_id': alert_id,
-                'type': FindingType.Scam,
-                'severity': FindingSeverity.Critical,
-                'metadata': metadata,
-                'labels': labels
-            })
-        
-        else:
-            return None
         
     @staticmethod
     def get_url(metadata:dict) -> str:

From 21a7cc9e6ce322c461fa6202154e1b510251c993 Mon Sep 17 00:00:00 2001
From: Roberto Cantu <Roberto_Cantu@live.com>
Date: Mon, 24 Jun 2024 19:10:13 +0800
Subject: [PATCH 04/16] Remove references to 'contract-similar' in README

---
 scam-detector-py/README.md | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/scam-detector-py/README.md b/scam-detector-py/README.md
index f42198ad..29600e5c 100644
--- a/scam-detector-py/README.md
+++ b/scam-detector-py/README.md
@@ -51,8 +51,6 @@ The Scam Detector is opinionated, and consumes evidence of and issues judgment a
    - **Scammer Deployed Contracts**: When an EOA is labeled a scammer, the Scam Detector queries for all contracts created by the EOA at the time the EOA was labeled. This includes direct contract creations (e.g. scammer deploys a token) as well as indirect contract creations (e.g. scammer adds liquidity to a pool that may result in the pool creation). To capture future contract creations, the Scam Detector also monitors for new contract creations by known scammers and emits a new ‘scammer’ label.
    
    - **Scammer Association**: When an EOA is labeled a scammer, label propagation will label associated scammers (e.g. addresses that are a destination for stolen assets) as scammers. [A graph-based approach](https://forta.org/blog/discovering-scammer-networks-with-machine-learning/) is utilized. The threat category of these labels is ‘scammer-association’.
-   
-   - **Similar to Scammer**: For all contracts labeled scam by the Scam Detector, a contract similarity bot will identify and emit labels about contracts that resemble known scammer contacts. The threat category of these labels is ‘similar-contract’.
 
 **False Positive Mitigation**
 
@@ -161,11 +159,6 @@ The complete list of scammer label threat-categories, and conditions under which
     <td>Passthrough Label</td>
     <td>At times, the specific threat category can not be identified, but there is confidence in the address being associated with a scam. In those cases, the threat category is set to unknown.</td>
   </tr>
-  <tr>
-    <td>similar-contract</td>
-    <td>Propagation Label</td>
-    <td>Emitted to identify a newly deployed contract that is similar to a known scammer contract</td>
-  </tr>
   <tr>
     <td>scammer-deployed-contract</td>
     <td>Propagation Label</td>

From 6152c7abb272de804853be073965971922a5dcca Mon Sep 17 00:00:00 2001
From: Roberto Cantu <Roberto_Cantu@live.com>
Date: Mon, 24 Jun 2024 23:43:08 +0800
Subject: [PATCH 05/16] Remove 'similar-contract' logic from FP mitigation
 logic

---
 scam-detector-py/src/agent.py | 41 +++--------------------------------
 1 file changed, 3 insertions(+), 38 deletions(-)

diff --git a/scam-detector-py/src/agent.py b/scam-detector-py/src/agent.py
index 6594d5ac..a2bf3430 100644
--- a/scam-detector-py/src/agent.py
+++ b/scam-detector-py/src/agent.py
@@ -59,7 +59,6 @@
 FINDINGS_CACHE_TRANSACTION = []
 REACTIVE_LIKELY_FPS = {}  # address -> list of label metadata (addresses that are yet to be checked)
 SCAMMER_ASSOCIATION_LABELS = None
-SIMILAR_CONTRACT_LABELS = None
 DF_CONTRACT_SIGNATURES = None
 
 MODEL = None
@@ -908,7 +907,6 @@ def emit_new_fp_finding(w3) -> list:
             raise Exception("CHAIN_ID not set")
     findings = []
 
-    similar_contract_labels = None
     scammer_association_labels = None
 
     try:
@@ -924,10 +922,8 @@ def emit_new_fp_finding(w3) -> list:
                     for address in cluster.split(','):
                         if scammer_association_labels is None:
                             scammer_association_labels = get_scammer_association_labels(w3, forta_explorer)
-                        if similar_contract_labels is None:
-                            similar_contract_labels = get_similar_contract_labels(w3, forta_explorer)
                         
-                        for (entity, label, metadata, unique_key) in obtain_all_fp_labels(w3, address, block_chain_indexer, forta_explorer, similar_contract_labels, scammer_association_labels, CHAIN_ID):
+                        for (entity, label, metadata, unique_key) in obtain_all_fp_labels(w3, address, block_chain_indexer, forta_explorer, scammer_association_labels, CHAIN_ID):
                             logging.info(f"{BOT_VERSION}: Emitting FP mitigation finding for {entity} {label}")
                             update_list(ALERTED_FP_CLUSTERS, ALERTED_FP_CLUSTERS_QUEUE_SIZE, entity, "SCAM-DETECTOR-FALSE-POSITIVE", "ALERTED_FP_CLUSTERS")
                             findings.append(ScamDetectorFinding.alert_FP(w3, entity, label, metadata, [unique_key]))
@@ -949,7 +945,6 @@ def update_reactive_likely_fps(w3, current_date) -> list:
     logging.info(f"{BOT_VERSION}: update reactive likely fps called")
     global REACTIVE_LIKELY_FPS
     global ALERTED_FP_CLUSTERS
-    global SIMILAR_CONTRACT_LABELS
     global SCAMMER_ASSOCIATION_LABELS
     global LAST_PROCESSED_TIME
     findings = []
@@ -1006,7 +1001,6 @@ def update_reactive_likely_fps(w3, current_date) -> list:
         if REACTIVE_LIKELY_FPS:
             if current_date.minute == 5:
                 # Refresh the data every hour (at the 05 minute)
-                SIMILAR_CONTRACT_LABELS = None
                 SCAMMER_ASSOCIATION_LABELS = None
 
             address = next(iter(REACTIVE_LIKELY_FPS), None)
@@ -1018,9 +1012,7 @@ def update_reactive_likely_fps(w3, current_date) -> list:
                 findings.append(ScamDetectorFinding.alert_FP(w3, address, "scammer", metadata_array, unique_keys_array))
                 if SCAMMER_ASSOCIATION_LABELS is None:
                         SCAMMER_ASSOCIATION_LABELS = get_scammer_association_labels(w3, forta_explorer)
-                if SIMILAR_CONTRACT_LABELS is None:
-                    SIMILAR_CONTRACT_LABELS = get_similar_contract_labels(w3, forta_explorer)
-                for (entity, label, metadata, unique_key) in obtain_all_fp_labels(w3, address, block_chain_indexer, forta_explorer, SIMILAR_CONTRACT_LABELS, SCAMMER_ASSOCIATION_LABELS, CHAIN_ID):
+                for (entity, label, metadata, unique_key) in obtain_all_fp_labels(w3, address, block_chain_indexer, forta_explorer, SCAMMER_ASSOCIATION_LABELS, CHAIN_ID):
                         logging.info(f"{BOT_VERSION}: Processing entity: {entity} - {label}")
                         if entity != address:
                             logging.info(f"{BOT_VERSION}: Emitting FP mitigation finding for {entity} {label}")
@@ -1041,19 +1033,6 @@ def get_value(items: dict, key: str):
 
     return v
 
-# contains from_entity, from_entity_deployer, to_entity, to_entity_deployer
-def get_similar_contract_labels(w3, forta_explorer) -> pd.DataFrame:
-    source_id = SCAM_DETECTOR_BETA_ALT_BOT_ID if Utils.is_beta_alt() else (SCAM_DETECTOR_BETA_BOT_ID if Utils.is_beta() else SCAM_DETECTOR_BOT_ID)
-    df_labels = forta_explorer.get_labels(source_id, datetime(2023,3,1), datetime.now(), label_query = "similar-contract")
-    df_labels.rename(columns={'entity': 'to_entity'}, inplace=True)
-    df_labels['from_entity'] = df_labels['metadata'].apply(lambda x: get_value(x, "associated_scammer_contract"))
-    df_labels['deployer_info'] = df_labels['metadata'].apply(lambda x: get_value(x, "deployer_info"))
-    df_labels['from_entity_deployer'] = df_labels['deployer_info'].apply(lambda x: x[216:216+42])
-    df_labels['to_entity_deployer'] = df_labels['deployer_info'].apply(lambda x: x[9:9+42])
-    # drop all but from_entity and to_entity
-    df_labels.drop(df_labels.columns.difference(['from_entity', 'from_entity_deployer', 'to_entity', 'to_entity_deployer']), axis=1, inplace=True)                                      
-    return df_labels
-
 
 
 # contains from_entity and to_entity
@@ -1073,7 +1052,7 @@ def get_scammer_association_labels(w3, forta_explorer) -> pd.DataFrame:
 # this function returns a list of all labels that need to be removed with the address as a starting point
 # it contain a queue of addresses to process and a set of addresses that have already been processed
 # returns a tuple of (entity, threat_category, metadata); metadata is a tuple of key=value pairs because its not hashable otherwise
-def obtain_all_fp_labels(w3, starting_address: str, block_chain_indexer, forta_explorer, similar_contract_labels: pd.DataFrame, scammer_association_labels: pd.DataFrame, chain_id: int) -> set:
+def obtain_all_fp_labels(w3, starting_address: str, block_chain_indexer, forta_explorer, scammer_association_labels: pd.DataFrame, chain_id: int) -> set:
     global ALERTED_FP_CLUSTERS
     global ALERTED_FP_CLUSTERS_QUEUE_SIZE
 
@@ -1104,20 +1083,6 @@ def obtain_all_fp_labels(w3, starting_address: str, block_chain_indexer, forta_e
                     logging.info(f"{BOT_VERSION}: {starting_address} adding FP label threat category {threat_category} for contract {address}")
                     fp_labels.add((address,label, tuple([f"{k}={v}" for k, v in row['metadata'].items()]), unique_key))
 
-                    similar_contract_labels_for_address = similar_contract_labels[similar_contract_labels['from_entity'] == address]
-                    for index, row in similar_contract_labels_for_address.iterrows():
-                        logging.info(f"{BOT_VERSION}: {starting_address} adding to process due to contract similarity from_entity {address} -> to_entity {row['to_entity']}, to_entity_deployer {row['to_entity_deployer']}, from_entity_deployer {row['from_entity_deployer']}")
-                        to_process.add(row['to_entity'])
-                        to_process.add(row['to_entity_deployer'])
-                        to_process.add(row['from_entity_deployer'])
-
-                    similar_contract_labels_for_address = similar_contract_labels[similar_contract_labels['to_entity'] == address]
-                    for index, row in similar_contract_labels_for_address.iterrows():
-                        logging.info(f"{BOT_VERSION}: {starting_address} adding to process due to contract similarity to_entity {address} -> from_entity {row['from_entity']}, from_entity_deployer {row['from_entity_deployer']}, to_entity_deployer {row['to_entity_deployer']}")
-                        to_process.add(row['from_entity'])
-                        to_process.add(row['from_entity_deployer'])
-                        to_process.add(row['to_entity_deployer'])
-
 
         else:
             forta_labels = forta_explorer.get_labels(source_id, datetime(2023,1,1), datetime.now(), entity=address)

From 658493a88869ed4dbb93b5f2a656f0c740e77465 Mon Sep 17 00:00:00 2001
From: Roberto Cantu <Roberto_Cantu@live.com>
Date: Tue, 25 Jun 2024 17:28:32 +0800
Subject: [PATCH 06/16] Remove more 'similar-contract' references from README

---
 scam-detector-py/README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scam-detector-py/README.md b/scam-detector-py/README.md
index 29600e5c..9ed5d295 100644
--- a/scam-detector-py/README.md
+++ b/scam-detector-py/README.md
@@ -234,11 +234,11 @@ For reference, each field is described below:
   </tr>
   <tr>
     <td>base_bot_alert_ids</td>
-    <td>When the label is emitted via passthrough, ML, similar contract, or an association alert, this field will contain the alert ids of the base bot alerts utilized to derive the label.</td>
+    <td>When the label is emitted via passthrough, ML, or an association alert, this field will contain the alert ids of the base bot alerts utilized to derive the label.</td>
   </tr>
   <tr>
     <td>base_bot_alert_hashes</td>
-    <td>When the label is emitted via passthrough, ML, similar contract, or an association alert, this field will contain the alert hashes of the base bot alerts utilized to derive the label.</td>
+    <td>When the label is emitted via passthrough, ML, or an association alert, this field will contain the alert hashes of the base bot alerts utilized to derive the label.</td>
   </tr>
   <tr>
     <td>deployer_info</td>
@@ -320,7 +320,7 @@ Address poisoners are the initiator of the address poisoning activity. A simple
 
 Native ice phishing are straight transfers of native assets to the scammer. Matching the to address of the transaction against Forta threat intelligence yield transactions for this type of scam.
 
-### soft-rug-pull, hard-rug-pull, rake-token, impersonating-token, similar-contract, scammer-deployed-contract
+### soft-rug-pull, hard-rug-pull, rake-token, impersonating-token, scammer-deployed-contract
 
 These threat categories all point to contracts that a user should not be interacting with. A check of the to address or the transaction trace data against Forta threat intelligence yields transactions where this may be the case. 
 

From 8e0e28a75f26618a77e5abe0f85ea37aca3bd1de Mon Sep 17 00:00:00 2001
From: Roberto Cantu <Roberto_Cantu@live.com>
Date: Tue, 25 Jun 2024 17:33:57 +0800
Subject: [PATCH 07/16] Remove additional 'similar-contract' constants

---
 scam-detector-py/src/constants.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/scam-detector-py/src/constants.py b/scam-detector-py/src/constants.py
index 3e47ffc9..6c322f0d 100644
--- a/scam-detector-py/src/constants.py
+++ b/scam-detector-py/src/constants.py
@@ -74,7 +74,6 @@
                 ("0x1a69f5ec8ef436e4093f9ec4ce1a55252b7a9a2d2c386e3f950b79d164bc99e0", "NIP-1", "PassThrough", "SCAM-DETECTOR-SOCIAL-ENG-NATIVE-ICE-PHISHING"),  # Native ice phishing with a social eng component (aka a function parameter)
                 ("0x8732dbb3858d65844d940f5de3705b4161c05258bdfedf1ff5afb6683e1274e5", "NFT-WASH-TRADE", "PassThrough", "SCAM-DETECTOR-WASH-TRADE"),  # wash trading bot maintained by nethermind
                 ("0x067e4c4f771f288c686efa574b685b98a92918f038a478b82c9ac5b5b6472732", "NFT-WASH-TRADE", "Combination", ""),  # wash trading bot - for ML bot; need to replace after retraining
-                ("0x3acf759d5e180c05ecabac2dbd11b79a1f07e746121fc3c86910aaace8910560", "NEW-SCAMMER-CONTRACT-CODE-HASH", "PassThrough", "SCAM-DETECTOR-SIMILAR-CONTRACT"),  # contract similarity bot
                 ("0x1a69f5ec8ef436e4093f9ec4ce1a55252b7a9a2d2c386e3f950b79d164bc99e0", "NIP-5", "PassThrough", "SCAM-DETECTOR-SOCIAL-ENG-NATIVE-ICE-PHISHING"),  # Native ice phishing using soc eng contract (static)
                 ("0x1a69f5ec8ef436e4093f9ec4ce1a55252b7a9a2d2c386e3f950b79d164bc99e0", "NIP-6", "PassThrough", "SCAM-DETECTOR-SOCIAL-ENG-NATIVE-ICE-PHISHING"),  # Native ice phishing using soc eng contract (dynamic)
                 ("0x1a69f5ec8ef436e4093f9ec4ce1a55252b7a9a2d2c386e3f950b79d164bc99e0", "NIP-8", "PassThrough", "SCAM-DETECTOR-SOCIAL-ENG-NATIVE-ICE-PHISHING"),  # Native ice phishing using soc eng contract (dynamic)
@@ -291,7 +290,6 @@
         "address-poisoner": 0.85,
         "impersonating-token": 0.99,
         "attack-stages": 0.25,
-        "similar-contract": 0.99,
         "scammer-deployed-contract": 0.99,
         "scammer-association": 0.60,
         "private-key-compromise": 0.4,

From a3f84d653446d22201a024c4de34f599b3c4bad5 Mon Sep 17 00:00:00 2001
From: Roberto Cantu <Roberto_Cantu@live.com>
Date: Tue, 25 Jun 2024 17:49:26 +0800
Subject: [PATCH 08/16] Update 'package-lock.json'

---
 scam-detector-py/package-lock.json | 150 +++++++++++++++++------------
 1 file changed, 89 insertions(+), 61 deletions(-)

diff --git a/scam-detector-py/package-lock.json b/scam-detector-py/package-lock.json
index 6267b3a1..efe6acc8 100644
--- a/scam-detector-py/package-lock.json
+++ b/scam-detector-py/package-lock.json
@@ -687,25 +687,25 @@
       }
     },
     "node_modules/@grpc/grpc-js": {
-      "version": "1.9.12",
-      "resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.9.12.tgz",
-      "integrity": "sha512-Um5MBuge32TS3lAKX02PGCnFM4xPT996yLgZNb5H03pn6NyJ4Iwn5YcPq6Jj9yxGRk7WOgaZFtVRH5iTdYBeUg==",
+      "version": "1.10.9",
+      "resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.10.9.tgz",
+      "integrity": "sha512-5tcgUctCG0qoNyfChZifz2tJqbRbXVO9J7X6duFcOjY3HUNCxg5D0ZCK7EP9vIcZ0zRpLU9bWkyCqVCLZ46IbQ==",
       "dependencies": {
-        "@grpc/proto-loader": "^0.7.8",
-        "@types/node": ">=12.12.47"
+        "@grpc/proto-loader": "^0.7.13",
+        "@js-sdsl/ordered-map": "^4.4.2"
       },
       "engines": {
-        "node": "^8.13.0 || >=10.10.0"
+        "node": ">=12.10.0"
       }
     },
     "node_modules/@grpc/grpc-js/node_modules/@grpc/proto-loader": {
-      "version": "0.7.10",
-      "resolved": "https://registry.npmjs.org/@grpc/proto-loader/-/proto-loader-0.7.10.tgz",
-      "integrity": "sha512-CAqDfoaQ8ykFd9zqBDn4k6iWT9loLAlc2ETmDFS9JCD70gDcnA4L3AFEo2iV7KyAtAAHFW9ftq1Fz+Vsgq80RQ==",
+      "version": "0.7.13",
+      "resolved": "https://registry.npmjs.org/@grpc/proto-loader/-/proto-loader-0.7.13.tgz",
+      "integrity": "sha512-AiXO/bfe9bmxBjxxtYxFAXGZvMaN5s8kO+jBHAJCON8rJoB5YS/D6X7ZNc6XQkuHNmyl4CYaMI1fJ/Gn27RGGw==",
       "dependencies": {
         "lodash.camelcase": "^4.3.0",
         "long": "^5.0.0",
-        "protobufjs": "^7.2.4",
+        "protobufjs": "^7.2.5",
         "yargs": "^17.7.2"
       },
       "bin": {
@@ -721,9 +721,9 @@
       "integrity": "sha512-lcHwpNoggQTObv5apGNCTdJrO69eHOZMi4BNC+rTLER8iHAqGrUVeLh/irVIM7zTw2bOXA8T6uNPeujwOLg/2Q=="
     },
     "node_modules/@grpc/grpc-js/node_modules/protobufjs": {
-      "version": "7.2.5",
-      "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.2.5.tgz",
-      "integrity": "sha512-gGXRSXvxQ7UiPgfw8gevrfRWcTlSbOFg+p/N+JVJEK5VhueL2miT6qTymqAmjr1Q5WbOCyJbyrk6JfWKwlFn6A==",
+      "version": "7.3.2",
+      "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.3.2.tgz",
+      "integrity": "sha512-RXyHaACeqXeqAKGLDl68rQKbmObRsTIn4TYVUUug1KfS47YWCo5MacGITEryugIgZqORCvJWEk4l449POg5Txg==",
       "hasInstallScript": true,
       "dependencies": {
         "@protobufjs/aspromise": "^1.1.2",
@@ -796,6 +796,15 @@
         "node": ">=10"
       }
     },
+    "node_modules/@js-sdsl/ordered-map": {
+      "version": "4.4.2",
+      "resolved": "https://registry.npmjs.org/@js-sdsl/ordered-map/-/ordered-map-4.4.2.tgz",
+      "integrity": "sha512-iUKgm52T8HOE/makSxjqoWhe95ZJA1/G1sYsGev2JDKUSS14KAgg1LHb+Ba+IPow0xflbnSkOsZcO08C7w1gYw==",
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/js-sdsl"
+      }
+    },
     "node_modules/@protobufjs/aspromise": {
       "version": "1.1.2",
       "resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz",
@@ -956,11 +965,13 @@
       }
     },
     "node_modules/axios": {
-      "version": "0.21.4",
-      "resolved": "https://registry.npmjs.org/axios/-/axios-0.21.4.tgz",
-      "integrity": "sha512-ut5vewkiu8jjGBdqpM44XxjuCjq9LAKeHVmoVfHVzy8eHgxxq8SbAVQNovDA8mVi05kP0Ea/n/UzcSHcTJQfNg==",
+      "version": "1.7.2",
+      "resolved": "https://registry.npmjs.org/axios/-/axios-1.7.2.tgz",
+      "integrity": "sha512-2A8QhOMrbomlDuiLeK9XibIBzuHeRcqqNOHp0Cyp5EoJ1IFDh+XZH3A6BkXtv0K4gFGCI0Y4BM7B1wOEi0Rmgw==",
       "dependencies": {
-        "follow-redirects": "^1.14.0"
+        "follow-redirects": "^1.15.6",
+        "form-data": "^4.0.0",
+        "proxy-from-env": "^1.1.0"
       }
     },
     "node_modules/balanced-match": {
@@ -1024,12 +1035,12 @@
       }
     },
     "node_modules/braces": {
-      "version": "3.0.2",
-      "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz",
-      "integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==",
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz",
+      "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==",
       "dev": true,
       "dependencies": {
-        "fill-range": "^7.0.1"
+        "fill-range": "^7.1.1"
       },
       "engines": {
         "node": ">=8"
@@ -1423,9 +1434,9 @@
       "integrity": "sha512-W+KJc2dmILlPplD/H4K9l9LcAHAfPtP6BY84uVLXQ6Evcz9Lcg33Y2z1IVblT6xdY54PXYVHEv+0Wpq8Io6zkA=="
     },
     "node_modules/fill-range": {
-      "version": "7.0.1",
-      "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz",
-      "integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==",
+      "version": "7.1.1",
+      "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz",
+      "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==",
       "dev": true,
       "dependencies": {
         "to-regex-range": "^5.0.1"
@@ -1453,9 +1464,9 @@
       "integrity": "sha512-36yxDn5H7OFZQla0/jFJmbIKTdZAQHngCedGxiMmpNfEZM0sdEeT+WczLQrjK6D7o2aiyLYDnkw0R3JK0Qv1RQ=="
     },
     "node_modules/follow-redirects": {
-      "version": "1.15.3",
-      "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.3.tgz",
-      "integrity": "sha512-1VzOtuEM8pC9SFU1E+8KfTjZyMztRsgEfwQl44z8A25uy13jSzTj6dyK2Df52iV0vgHCfBwLhDWevLn95w5v6Q==",
+      "version": "1.15.6",
+      "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz",
+      "integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==",
       "funding": [
         {
           "type": "individual",
@@ -1485,16 +1496,16 @@
       }
     },
     "node_modules/forta-agent": {
-      "version": "0.1.45",
-      "resolved": "https://registry.npmjs.org/forta-agent/-/forta-agent-0.1.45.tgz",
-      "integrity": "sha512-QP+qsWPmA1kvyHVFSpGSnXxTODOpeoxfGil0HmcpO6dOuH+f6FY54oPyjrJ2Eg+kiB1skO9a1SIdW16N1UmELg==",
+      "version": "0.1.48",
+      "resolved": "https://registry.npmjs.org/forta-agent/-/forta-agent-0.1.48.tgz",
+      "integrity": "sha512-fk3mar7/Avqg/4OHFmgv01ww/azr1XM+g5KcSnwvNxZy3KDMi7aFp1jAjPCsBjs8ZyVcR03ITUlbtFpRVgZB4Q==",
       "dependencies": {
         "@grpc/grpc-js": "^1.3.6",
         "@grpc/proto-loader": "^0.6.4",
         "@types/uuid": "^8.3.4",
         "async-retry": "^1.3.3",
         "awilix": "^4.3.4",
-        "axios": "^0.21.1",
+        "axios": "^1.6.2",
         "base64-arraybuffer": "^1.0.2",
         "ethers": "^5.5.1",
         "flat-cache": "^3.0.4",
@@ -2158,6 +2169,11 @@
         "pbts": "bin/pbts"
       }
     },
+    "node_modules/proxy-from-env": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
+      "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg=="
+    },
     "node_modules/pstree.remy": {
       "version": "1.1.8",
       "resolved": "https://registry.npmjs.org/pstree.remy/-/pstree.remy-1.1.8.tgz",
@@ -2995,22 +3011,22 @@
       }
     },
     "@grpc/grpc-js": {
-      "version": "1.9.12",
-      "resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.9.12.tgz",
-      "integrity": "sha512-Um5MBuge32TS3lAKX02PGCnFM4xPT996yLgZNb5H03pn6NyJ4Iwn5YcPq6Jj9yxGRk7WOgaZFtVRH5iTdYBeUg==",
+      "version": "1.10.9",
+      "resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.10.9.tgz",
+      "integrity": "sha512-5tcgUctCG0qoNyfChZifz2tJqbRbXVO9J7X6duFcOjY3HUNCxg5D0ZCK7EP9vIcZ0zRpLU9bWkyCqVCLZ46IbQ==",
       "requires": {
-        "@grpc/proto-loader": "^0.7.8",
-        "@types/node": ">=12.12.47"
+        "@grpc/proto-loader": "^0.7.13",
+        "@js-sdsl/ordered-map": "^4.4.2"
       },
       "dependencies": {
         "@grpc/proto-loader": {
-          "version": "0.7.10",
-          "resolved": "https://registry.npmjs.org/@grpc/proto-loader/-/proto-loader-0.7.10.tgz",
-          "integrity": "sha512-CAqDfoaQ8ykFd9zqBDn4k6iWT9loLAlc2ETmDFS9JCD70gDcnA4L3AFEo2iV7KyAtAAHFW9ftq1Fz+Vsgq80RQ==",
+          "version": "0.7.13",
+          "resolved": "https://registry.npmjs.org/@grpc/proto-loader/-/proto-loader-0.7.13.tgz",
+          "integrity": "sha512-AiXO/bfe9bmxBjxxtYxFAXGZvMaN5s8kO+jBHAJCON8rJoB5YS/D6X7ZNc6XQkuHNmyl4CYaMI1fJ/Gn27RGGw==",
           "requires": {
             "lodash.camelcase": "^4.3.0",
             "long": "^5.0.0",
-            "protobufjs": "^7.2.4",
+            "protobufjs": "^7.2.5",
             "yargs": "^17.7.2"
           }
         },
@@ -3020,9 +3036,9 @@
           "integrity": "sha512-lcHwpNoggQTObv5apGNCTdJrO69eHOZMi4BNC+rTLER8iHAqGrUVeLh/irVIM7zTw2bOXA8T6uNPeujwOLg/2Q=="
         },
         "protobufjs": {
-          "version": "7.2.5",
-          "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.2.5.tgz",
-          "integrity": "sha512-gGXRSXvxQ7UiPgfw8gevrfRWcTlSbOFg+p/N+JVJEK5VhueL2miT6qTymqAmjr1Q5WbOCyJbyrk6JfWKwlFn6A==",
+          "version": "7.3.2",
+          "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.3.2.tgz",
+          "integrity": "sha512-RXyHaACeqXeqAKGLDl68rQKbmObRsTIn4TYVUUug1KfS47YWCo5MacGITEryugIgZqORCvJWEk4l449POg5Txg==",
           "requires": {
             "@protobufjs/aspromise": "^1.1.2",
             "@protobufjs/base64": "^1.1.2",
@@ -3083,6 +3099,11 @@
         }
       }
     },
+    "@js-sdsl/ordered-map": {
+      "version": "4.4.2",
+      "resolved": "https://registry.npmjs.org/@js-sdsl/ordered-map/-/ordered-map-4.4.2.tgz",
+      "integrity": "sha512-iUKgm52T8HOE/makSxjqoWhe95ZJA1/G1sYsGev2JDKUSS14KAgg1LHb+Ba+IPow0xflbnSkOsZcO08C7w1gYw=="
+    },
     "@protobufjs/aspromise": {
       "version": "1.1.2",
       "resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz",
@@ -3230,11 +3251,13 @@
       }
     },
     "axios": {
-      "version": "0.21.4",
-      "resolved": "https://registry.npmjs.org/axios/-/axios-0.21.4.tgz",
-      "integrity": "sha512-ut5vewkiu8jjGBdqpM44XxjuCjq9LAKeHVmoVfHVzy8eHgxxq8SbAVQNovDA8mVi05kP0Ea/n/UzcSHcTJQfNg==",
+      "version": "1.7.2",
+      "resolved": "https://registry.npmjs.org/axios/-/axios-1.7.2.tgz",
+      "integrity": "sha512-2A8QhOMrbomlDuiLeK9XibIBzuHeRcqqNOHp0Cyp5EoJ1IFDh+XZH3A6BkXtv0K4gFGCI0Y4BM7B1wOEi0Rmgw==",
       "requires": {
-        "follow-redirects": "^1.14.0"
+        "follow-redirects": "^1.15.6",
+        "form-data": "^4.0.0",
+        "proxy-from-env": "^1.1.0"
       }
     },
     "balanced-match": {
@@ -3278,12 +3301,12 @@
       }
     },
     "braces": {
-      "version": "3.0.2",
-      "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz",
-      "integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==",
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz",
+      "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==",
       "dev": true,
       "requires": {
-        "fill-range": "^7.0.1"
+        "fill-range": "^7.1.1"
       }
     },
     "brorand": {
@@ -3624,9 +3647,9 @@
       "integrity": "sha512-W+KJc2dmILlPplD/H4K9l9LcAHAfPtP6BY84uVLXQ6Evcz9Lcg33Y2z1IVblT6xdY54PXYVHEv+0Wpq8Io6zkA=="
     },
     "fill-range": {
-      "version": "7.0.1",
-      "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz",
-      "integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==",
+      "version": "7.1.1",
+      "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz",
+      "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==",
       "dev": true,
       "requires": {
         "to-regex-range": "^5.0.1"
@@ -3648,9 +3671,9 @@
       "integrity": "sha512-36yxDn5H7OFZQla0/jFJmbIKTdZAQHngCedGxiMmpNfEZM0sdEeT+WczLQrjK6D7o2aiyLYDnkw0R3JK0Qv1RQ=="
     },
     "follow-redirects": {
-      "version": "1.15.3",
-      "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.3.tgz",
-      "integrity": "sha512-1VzOtuEM8pC9SFU1E+8KfTjZyMztRsgEfwQl44z8A25uy13jSzTj6dyK2Df52iV0vgHCfBwLhDWevLn95w5v6Q=="
+      "version": "1.15.6",
+      "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz",
+      "integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA=="
     },
     "form-data": {
       "version": "4.0.0",
@@ -3663,16 +3686,16 @@
       }
     },
     "forta-agent": {
-      "version": "0.1.45",
-      "resolved": "https://registry.npmjs.org/forta-agent/-/forta-agent-0.1.45.tgz",
-      "integrity": "sha512-QP+qsWPmA1kvyHVFSpGSnXxTODOpeoxfGil0HmcpO6dOuH+f6FY54oPyjrJ2Eg+kiB1skO9a1SIdW16N1UmELg==",
+      "version": "0.1.48",
+      "resolved": "https://registry.npmjs.org/forta-agent/-/forta-agent-0.1.48.tgz",
+      "integrity": "sha512-fk3mar7/Avqg/4OHFmgv01ww/azr1XM+g5KcSnwvNxZy3KDMi7aFp1jAjPCsBjs8ZyVcR03ITUlbtFpRVgZB4Q==",
       "requires": {
         "@grpc/grpc-js": "^1.3.6",
         "@grpc/proto-loader": "^0.6.4",
         "@types/uuid": "^8.3.4",
         "async-retry": "^1.3.3",
         "awilix": "^4.3.4",
-        "axios": "^0.21.1",
+        "axios": "^1.6.2",
         "base64-arraybuffer": "^1.0.2",
         "ethers": "^5.5.1",
         "flat-cache": "^3.0.4",
@@ -4186,6 +4209,11 @@
         "long": "^4.0.0"
       }
     },
+    "proxy-from-env": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
+      "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg=="
+    },
     "pstree.remy": {
       "version": "1.1.8",
       "resolved": "https://registry.npmjs.org/pstree.remy/-/pstree.remy-1.1.8.tgz",

From e4a6797a4bc65e457b94791ade555e7394b2965f Mon Sep 17 00:00:00 2001
From: Roberto Cantu <Roberto_Cantu@live.com>
Date: Tue, 25 Jun 2024 22:24:28 +0800
Subject: [PATCH 09/16] Remove tests related to 'contract-similarity'

---
 scam-detector-py/src/agent_test.py    | 89 ++-------------------------
 scam-detector-py/src/findings_test.py | 70 ---------------------
 2 files changed, 4 insertions(+), 155 deletions(-)

diff --git a/scam-detector-py/src/agent_test.py b/scam-detector-py/src/agent_test.py
index 53fe784a..db1591c7 100644
--- a/scam-detector-py/src/agent_test.py
+++ b/scam-detector-py/src/agent_test.py
@@ -824,56 +824,6 @@ def test_detect_twitter_bot_scammer(self):
         assert finding.metadata is not None, "metadata should not be empty"
         assert len(finding.labels) > 0, "labels should not be empty"
 
-    def test_detect_alert_similar_contract(self):
-        agent.initialize()
-        agent.item_id_prefix = "test_" + str(random.randint(0, 1000000))
-
-        # Read the content of package.json and store the original "name" field
-        original_name = ""
-        with open("package.json", "r") as package_file:
-            package_data = json.load(package_file)
-            original_name = package_data["name"]
-
-        # Modify the "name" field to "beta" (as alt doesn't return labels for the test)
-        package_data["name"] = "beta"
-        with open("package.json", "w") as package_file:
-            json.dump(package_data, package_file, indent=2)
-        
-        bot_id = "0x3acf759d5e180c05ecabac2dbd11b79a1f07e746121fc3c86910aaace8910560"
-        alert_id = "NEW-SCAMMER-CONTRACT-CODE-HASH"
-        description = "0xd359b4058cfbc9a5ef2889bc484cbbffbe3fa254f6f36845be6a4f5618531bd5 (NEW-SCAMMER-CONTRACT-CODE-HASH)"
-
-        metadata = {"alert_hash":"0xcfc5f89ac8c801901724621470fb7e3efec1b0cb5e1af625b82d587b788cdc86","new_scammer_contract_address":"0xfe551e214563283c8ab5df967d7d69f630b64079","new_scammer_eoa":"0xa4f58353711f9f29b483fe41be8f0dcc893d9f8a","scammer_contract_address":"0x200c5fa46720e40c375dd276a816da905b19081e","scammer_eoa":"0x43cf4c4759ebe43aa6e21e13ece8546dcfcb728c","similarity_hash":"20d794469ef5c3f5937d8b2ad1505e57a97b6fa0205b9fba965d71e9a4f66ea6","similarity_score":"0.9768354296684265"}
-        alert_event = TestScamDetector.generate_alert(bot_id, alert_id, description, metadata)
-
-        findings = agent.detect_scam(w3, alert_event, True)
-
-        # Revert the "name" field back to its original value
-        package_data["name"] = original_name
-        with open("package.json", "w") as package_file:
-            json.dump(package_data, package_file, indent=2)
-
-        assert len(findings) == 1, "this should have triggered a finding"
-        assert findings[0].alert_id == "SCAM-DETECTOR-SIMILAR-CONTRACT"
-        assert findings[0].metadata['scammer_address'] == "0xa4f58353711f9f29b483fe41be8f0dcc893d9f8a", "metadata should not be empty"
-        assert findings[0].metadata['scammer_contract_address'] == "0xfe551e214563283c8ab5df967d7d69f630b64079", "metadata should not be empty"
-        assert findings[0].metadata['existing_scammer_address'] == "0x43cf4c4759ebe43aa6e21e13ece8546dcfcb728c", "metadata should not be empty"
-        assert findings[0].metadata['existing_scammer_contract_address'] == "0x200c5fa46720e40c375dd276a816da905b19081e", "metadata should not be empty"
-        assert findings[0].metadata['similarity_score'] == "0.9768354296684265", "metadata should not be empty"
-        assert findings[0].metadata['involved_threat_categories'] == "soft-rug-pull", "metadata should not be empty"
-        assert findings[0].metadata['involved_alert_hash_1'] == "0xcfc5f89ac8c801901724621470fb7e3efec1b0cb5e1af625b82d587b788cdc86", "metadata should not be empty"
-
-        assert findings[0].labels is not None, "labels should not be empty"
-        label = findings[0].labels[0]
-        assert label.entity == "0xa4f58353711f9f29b483fe41be8f0dcc893d9f8a", "entity should be attacker address"
-        assert label.label == "scammer", "entity should labeled as scam"
-        assert label.confidence == Utils.get_confidence_value('similar-contract'), "entity should labeled with 0.7 confidence"
-
-        label = findings[0].labels[1]
-        assert label.entity == "0xfe551e214563283c8ab5df967d7d69f630b64079", "entity should be attacker address"
-        assert label.label == "scammer", "entity should labeled as scam"
-        assert label.confidence == Utils.get_confidence_value('similar-contract'), "entity should labeled with 0.7 confidence"
-
     def test_put_entity_cluster(self):
         agent.initialize()
         agent.item_id_prefix = "test_" + str(random.randint(0, 1000000))
@@ -1051,6 +1001,7 @@ def test_get_score_empty_features(self):
         score = agent.get_model_score(df_expected_feature_vector)
         assert score < MODEL_ALERT_THRESHOLD_LOOSE, "should less than model threshold"
 
+    # TODO: Update test because score is below threshold
     def test_scam_critical(self):
         agent.initialize()
         agent.item_id_prefix = "test_" + str(random.randint(0, 1000000))
@@ -1152,21 +1103,6 @@ def test_fp_mitigation_proper_chain_id(self):
         assert label.entity == "0x8cc6b83d52b67f629fb3c5978cda3a6c2a456edc"
         assert label.metadata['address_type'] == "EOA"
 
-    def test_get_similar_contract_labels(self):
-        agent.clear_state()
-        agent.initialize()
-        similar_contract_labels = agent.get_similar_contract_labels(w3, forta_explorer)
-
-        # from_address was detected first and it propagated its label to the to_address
-        from_address = "0xfa8c1a1dddea2c06364c9e6ab31772f020f5efc6"
-        from_address_deployer = "0x2320a28f52334d62622cc2eafa15de55f9987ecc"
-        to_address = "0xfa8c1a1dddea2c06364c9e6ab31772f020f5efc5"
-        to_address_deployer = "0x2320a28f52334d62622cc2eafa15de55f9987eaa"
-
-        assert similar_contract_labels[similar_contract_labels['from_entity'] == from_address].iloc[0]['to_entity'] == to_address
-        assert similar_contract_labels[similar_contract_labels['from_entity'] == from_address].iloc[0]['from_entity_deployer'] == from_address_deployer
-        assert similar_contract_labels[similar_contract_labels['to_entity'] == to_address].iloc[0]['to_entity_deployer'] == to_address_deployer
-
     def test_get_scammer_association_labels(self):
         agent.clear_state()
         agent.initialize()
@@ -1183,10 +1119,9 @@ def test_obtain_all_fp_labels_deployed_contracts(self):
         agent.clear_state()
         agent.initialize()
 
-        similar_contract_labels = pd.DataFrame(columns=['from_entity', 'to_entity'])
         scammer_association_labels = pd.DataFrame(columns=['from_entity', 'to_entity'])
 
-        fp_labels = agent.obtain_all_fp_labels(w3, EOA_ADDRESS_SMALL_TX, block_chain_indexer, forta_explorer, similar_contract_labels, scammer_association_labels, 1)
+        fp_labels = agent.obtain_all_fp_labels(w3, EOA_ADDRESS_SMALL_TX, block_chain_indexer, forta_explorer, scammer_association_labels, 1)
         sorted_fp_labels = sorted(fp_labels, key=lambda x: x[0])
         sorted_fp_labels = list(sorted_fp_labels)
         assert len(sorted_fp_labels) == 2, "should have two FP label; one for the EOA, one for the contract"
@@ -1208,11 +1143,10 @@ def test_obtain_all_fp_labels_scammer_association(self):
         agent.clear_state()
         agent.initialize()
 
-        similar_contract_labels = pd.DataFrame(columns=['from_entity', 'to_entity'])
         scammer_association_labels = pd.DataFrame(columns=['from_entity', 'to_entity'])
         scammer_association_labels = pd.concat([scammer_association_labels, pd.DataFrame({'from_entity': [EOA_ADDRESS_LARGE_TX.lower()], 'to_entity': [EOA_ADDRESS_SMALL_TX.lower()]})], ignore_index=True)
 
-        fp_labels = agent.obtain_all_fp_labels(w3, EOA_ADDRESS_LARGE_TX, block_chain_indexer, forta_explorer, similar_contract_labels, scammer_association_labels, 1)
+        fp_labels = agent.obtain_all_fp_labels(w3, EOA_ADDRESS_LARGE_TX, block_chain_indexer, forta_explorer, scammer_association_labels, 1)
         sorted_fp_labels = sorted(fp_labels, key=lambda x: x[0])
         sorted_fp_labels = list(sorted_fp_labels)
         assert len(sorted_fp_labels) == 4, "should have four FP labels; one for each EOA and contract"
@@ -1229,22 +1163,7 @@ def test_obtain_all_fp_labels_scammer_association(self):
         assert 'threat_category=address-poisoner' in label_3[2]
         
        
-    def test_obtain_all_fp_labels_similar_contract(self):
-        # got address A that deployed contract B; contract B propagated to contract D
-        agent.clear_state()
-        agent.initialize()
-
-        similar_contract_labels = pd.DataFrame(columns=['from_entity', 'to_entity'])
-        new_labels = pd.DataFrame({'from_entity': [CONTRACT.lower()], 'from_entity_deployer': [EOA_ADDRESS_LARGE_TX.lower()], 'to_entity_deployer': [EOA_ADDRESS_SMALL_TX.lower()], 'to_entity': [CONTRACT2.lower()]})
-        similar_contract_labels = pd.concat([similar_contract_labels, new_labels], ignore_index=True)
-        scammer_association_labels = pd.DataFrame(columns=['from_entity', 'to_entity'])
-        
-        fp_labels = agent.obtain_all_fp_labels(w3, EOA_ADDRESS_LARGE_TX, block_chain_indexer, forta_explorer, similar_contract_labels, scammer_association_labels, 1)
-        sorted_fp_labels = sorted(fp_labels, key=lambda x: x[0])
-        sorted_fp_labels = list(sorted_fp_labels)
-        assert len(sorted_fp_labels) == 4, "should have four FP labels; one for each EOA and contract"
-
-    # 11/22/2023 - removed because we have not been able to ship this for some time now
+     # 11/22/2023 - removed because we have not been able to ship this for some time now
     # def test_detect_ice_phishing_ml(self):
     #     agent.initialize()
     #     agent.item_id_prefix = "test_" + str(random.randint(0, 1000000))
diff --git a/scam-detector-py/src/findings_test.py b/scam-detector-py/src/findings_test.py
index 91f05833..e75e7f42 100644
--- a/scam-detector-py/src/findings_test.py
+++ b/scam-detector-py/src/findings_test.py
@@ -180,76 +180,6 @@ def test_scam_finding_only_url(self):
 
 
         
-    def test_scam_similar_contract(self):
-        chain_id = 1
-        metadata = {"alert_hash":"0x92f0e1c5f9677a3ea2903047641213ba62e5a00d62f363efc1a85cd1e184e016",
-                           "new_scammer_contract_address":"0x75577bd21803a13d6ec3e0d784f84e0e7e31cbd2",
-                           "new_scammer_eoa":"0x7e6b6f2be1bb8d2e1d5fcefa2d6df86b6e03b8d0",
-                           "scammer_contract_address":"0xe22536ac6f6a20dbb283e7f61a880993eab63313",
-                           "scammer_eoa":"0xc1015eb4d9aa4f77d79cf04825cbfb7fc04e232e",
-                           "similarity_hash":"68e6432db785f93986a9d49b19077067f8b694612f2bc1e8ef5cd38af2c8727e",
-                           "similarity_score":"0.9347575306892395"}
-        base_bot_alert_id = ""
-        base_bot_alert_hash = "0x8192"
-        finding = ScamDetectorFinding.alert_similar_contract(block_chain_indexer, forta_explorer, base_bot_alert_id, base_bot_alert_hash, metadata, chain_id)
-
-        assert finding is not None
-        alert_id = "SCAM-DETECTOR-SIMILAR-CONTRACT"
-        assert finding.alert_id == alert_id
-        assert finding.severity == FindingSeverity.Critical
-        assert finding.type == FindingType.Scam
-        assert finding.name == f'Scam detector identified an EOA with past alerts mapping to scam behavior'
-        assert finding.description == f"0x7e6b6f2be1bb8d2e1d5fcefa2d6df86b6e03b8d0 likely involved in a scam ({alert_id}, propagation)"
-        assert finding.metadata is not None
-        assert finding.labels is not None
-
-        assert finding.metadata['scammer_address'] == "0x7e6b6f2be1bb8d2e1d5fcefa2d6df86b6e03b8d0"
-        assert finding.metadata['scammer_contract_address'] == "0x75577bd21803a13d6ec3e0d784f84e0e7e31cbd2"
-        assert finding.metadata['existing_scammer_address'] == "0xc1015eb4d9aa4f77d79cf04825cbfb7fc04e232e"
-        assert finding.metadata['existing_scammer_contract_address'] == "0xe22536ac6f6a20dbb283e7f61a880993eab63313"
-        assert finding.metadata['similarity_score'] == "0.9347575306892395"
-        assert finding.metadata['involved_threat_categories'] == "address-poisoner"
-        assert finding.metadata['involved_alert_hash_1'] == "0x92f0e1c5f9677a3ea2903047641213ba62e5a00d62f363efc1a85cd1e184e016"
-
-        assert len(finding.labels) == 3
-
-        assert finding.labels[0].entity_type == EntityType.Address
-        assert finding.labels[0].entity == "0x7e6b6f2be1bb8d2e1d5fcefa2d6df86b6e03b8d0"
-        assert finding.labels[0].label == "scammer"
-        assert finding.labels[0].confidence == Utils.get_confidence_value('similar-contract')
-        assert finding.labels[0].metadata["address_type"] == "EOA"
-        assert finding.labels[0].metadata["logic"] == "propagation"
-        assert finding.labels[0].metadata["base_bot_alert_ids"] == base_bot_alert_id
-        assert finding.labels[0].metadata["base_bot_alert_hashes"] == base_bot_alert_hash
-        assert finding.labels[0].metadata["deployer_info"] == f'Deployer {metadata["new_scammer_eoa"]} deployed a contract {metadata["new_scammer_contract_address"]} that is similar to a contract {metadata["scammer_contract_address"]} deployed by a known scammer {metadata["scammer_eoa"]} involved in address-poisoner scam (alert hash: {metadata["alert_hash"]}).'
-        assert finding.labels[0].metadata["threat_category"] == "similar-contract"
-        assert finding.labels[0].metadata["threat_description_url"] == ScamDetectorFinding.get_threat_description_url(alert_id)
-
-        assert finding.labels[1].entity_type == EntityType.Address
-        assert finding.labels[1].entity == "0x75577bd21803a13d6ec3e0d784f84e0e7e31cbd2"
-        assert finding.labels[1].label == "scammer"
-        assert finding.labels[1].confidence == Utils.get_confidence_value('similar-contract')
-        assert finding.labels[1].metadata["address_type"] == "contract"
-        assert finding.labels[1].metadata["logic"] == "propagation"
-        assert finding.labels[1].metadata["base_bot_alert_ids"] == base_bot_alert_id
-        assert finding.labels[1].metadata["base_bot_alert_hashes"] == base_bot_alert_hash
-        assert finding.labels[1].metadata["deployer_info"] == "Deployer 0x7e6b6f2be1bb8d2e1d5fcefa2d6df86b6e03b8d0 deployed a contract 0x75577bd21803a13d6ec3e0d784f84e0e7e31cbd2 that is similar to a contract 0xe22536ac6f6a20dbb283e7f61a880993eab63313 deployed by a known scammer 0xc1015eb4d9aa4f77d79cf04825cbfb7fc04e232e involved in address-poisoner scam (alert hash: 0x92f0e1c5f9677a3ea2903047641213ba62e5a00d62f363efc1a85cd1e184e016); this contract may or may not be related to this particular scam, but was created by the scammer."
-        assert finding.labels[1].metadata["threat_category"] == "similar-contract"
-        assert finding.labels[1].metadata["threat_description_url"] == ScamDetectorFinding.get_threat_description_url(alert_id)
-
-        assert finding.labels[2].entity_type == EntityType.Address
-        assert finding.labels[2].entity == "0x75577bd21803a13d6ec3e0d784f84e0e7e31cbd2"
-        assert finding.labels[2].label == "similar-contract"
-        assert finding.labels[2].confidence == Utils.get_confidence_value('similar-contract')
-        assert finding.labels[2].metadata["address_type"] == "contract"
-        assert finding.labels[2].metadata["logic"] == "propagation"
-        assert finding.labels[2].metadata["base_bot_alert_ids"] == base_bot_alert_id
-        assert finding.labels[2].metadata["base_bot_alert_hashes"] == base_bot_alert_hash
-        assert finding.labels[2].metadata["deployer_info"] == "Deployer 0x7e6b6f2be1bb8d2e1d5fcefa2d6df86b6e03b8d0 deployed a contract 0x75577bd21803a13d6ec3e0d784f84e0e7e31cbd2 that is similar to a contract 0xe22536ac6f6a20dbb283e7f61a880993eab63313 deployed by a known scammer 0xc1015eb4d9aa4f77d79cf04825cbfb7fc04e232e involved in address-poisoner scam (alert hash: 0x92f0e1c5f9677a3ea2903047641213ba62e5a00d62f363efc1a85cd1e184e016); this contract may or may not be related to this particular scam, but was created by the scammer."
-        assert finding.labels[2].metadata["threat_category"] == "similar-contract"
-        assert finding.labels[2].metadata["threat_description_url"] == ScamDetectorFinding.get_threat_description_url(alert_id)
-        
-
     def test_alert_FP(self):
         finding = ScamDetectorFinding.alert_FP(w3, EOA_ADDRESS_LARGE_TX, "scammer", ("threat_category=similar-contract", "address_type=EOA", "logic=propagation"), [""])
         assert finding.alert_id == "SCAM-DETECTOR-FALSE-POSITIVE", "should be FP"

From 4e9a26b3357ce62b87e4be2acf72d96a4f5d699e Mon Sep 17 00:00:00 2001
From: Roberto Cantu <Roberto_Cantu@live.com>
Date: Wed, 26 Jun 2024 14:23:44 +0800
Subject: [PATCH 10/16] Remove 'similar-contract' alert_id check

---
 scam-detector-py/src/findings.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/scam-detector-py/src/findings.py b/scam-detector-py/src/findings.py
index 777676c5..336da4a8 100644
--- a/scam-detector-py/src/findings.py
+++ b/scam-detector-py/src/findings.py
@@ -81,8 +81,6 @@ def get_threat_category(alert_id: str) -> str:
             return "rake-token"
         elif alert_id == "SCAM-DETECTOR-IMPERSONATING-TOKEN":
             return "impersonating-token"
-        elif alert_id == "SCAM-DETECTOR-SIMILAR-CONTRACT":
-            return "similar-contract"
         elif alert_id == "SCAM-DETECTOR-SCAMMER-ASSOCIATION":
             return "scammer-association"
         elif alert_id == "SCAM-DETECTOR-SCAMMER-DEPLOYED-CONTRACT":

From b88839ac63deaadcfb0926961254daaf2471ac80 Mon Sep 17 00:00:00 2001
From: Roberto Cantu <Roberto_Cantu@live.com>
Date: Wed, 26 Jun 2024 14:43:42 +0800
Subject: [PATCH 11/16] Update 'test_alert_fp' to not use 'similar-contract'

---
 scam-detector-py/src/findings_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scam-detector-py/src/findings_test.py b/scam-detector-py/src/findings_test.py
index e75e7f42..14044075 100644
--- a/scam-detector-py/src/findings_test.py
+++ b/scam-detector-py/src/findings_test.py
@@ -181,14 +181,14 @@ def test_scam_finding_only_url(self):
 
         
     def test_alert_FP(self):
-        finding = ScamDetectorFinding.alert_FP(w3, EOA_ADDRESS_LARGE_TX, "scammer", ("threat_category=similar-contract", "address_type=EOA", "logic=propagation"), [""])
+        finding = ScamDetectorFinding.alert_FP(w3, EOA_ADDRESS_LARGE_TX, "scammer", ("threat_category=scammer-association", "address_type=EOA", "logic=propagation"), [""])
         assert finding.alert_id == "SCAM-DETECTOR-FALSE-POSITIVE", "should be FP"
         assert finding.description == f'{EOA_ADDRESS_LARGE_TX} likely not involved in a scam (SCAM-DETECTOR-FALSE-POSITIVE, manual)', "should be FP"
         assert len(finding.labels) == 1, "should be 1"
         assert finding.labels[0].label == "scammer"
         assert finding.labels[0].remove == 'true', "should be remove"
         assert finding.labels[0].metadata["address_type"] == "EOA"
-        assert finding.labels[0].metadata["threat_category"] == "similar-contract"
+        assert finding.labels[0].metadata["threat_category"] == "scammer-association"
         assert finding.labels[0].metadata["logic"] == "propagation"
 
         assert finding.labels[0].entity == EOA_ADDRESS_LARGE_TX, "should be EOA_ADDRESS_LARGE_TX"

From 3e2f2b1c35f77c66171646537aec6d68d0a5b017 Mon Sep 17 00:00:00 2001
From: Roberto Cantu <Roberto_Cantu@live.com>
Date: Wed, 26 Jun 2024 14:52:11 +0800
Subject: [PATCH 12/16] Bump version & update 'release.md'

---
 scam-detector-py/package.json | 2 +-
 scam-detector-py/release.md   | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/scam-detector-py/package.json b/scam-detector-py/package.json
index cfdd035c..c4ff125e 100644
--- a/scam-detector-py/package.json
+++ b/scam-detector-py/package.json
@@ -1,7 +1,7 @@
 {
   "name": "scam-detector-feed",
   "displayName": "Scam Detector Feed",
-  "version": "2.24.5",
+  "version": "2.24.6",
   "repository": "https://github.com/forta-network/starter-kits/tree/main/scam-detector-py",
   "description": "Provides real-time intelligence on scammers engaged in over 10 unique scam types.",
   "longDescription": "The Scam Detector data feed provides real-time intelligence about EOAs, contracts and URLs involved in a variety of Web3 scams. It is jointly maintained by the Forta Foundation, Nethermind, Blocksec, ChainPatrol and members of the Forta developer community. It features market leading scam type coverage on ice phishing, address poisoning, rake tokens, token impersonation, fraudulent NFT orders, pig butchering, gas minting, sleep minting, hard rug pulls, soft rug pulls, and wash trading. Used by Web3 wallets, exchanges, crypto compliance companies and other Web3 security teams and tools. Teams can use Scam Detector labels to warn end-users during the pre-signing transaction approval process, to identify and prevent money laundering through regulated platforms, and to supplement existing blacklists among other use cases. Learn more in the documentation below, and request a free trial today.",
diff --git a/scam-detector-py/release.md b/scam-detector-py/release.md
index f12e6ecf..ff880bc9 100644
--- a/scam-detector-py/release.md
+++ b/scam-detector-py/release.md
@@ -1,5 +1,9 @@
 # Scam Detector Bot Release Notes
 
+## 2.24.6 (beta - 6/26/2024)
+
+- removed Contract Similarity base bot and corresponding logic
+
 ## 2.24.4 (prod - 4/3/2024)
 
 - removed ADDRESS-POISONING-FAKE-TOKEN alert

From 297e9aa5977fac4c1b3b2ad454ace5108d04c8e3 Mon Sep 17 00:00:00 2001
From: Roberto Cantu <Roberto_Cantu@live.com>
Date: Wed, 26 Jun 2024 14:54:11 +0800
Subject: [PATCH 13/16] Update previous 'release.md' entry to '2.24.5'

---
 scam-detector-py/release.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scam-detector-py/release.md b/scam-detector-py/release.md
index ff880bc9..5cb8720a 100644
--- a/scam-detector-py/release.md
+++ b/scam-detector-py/release.md
@@ -4,7 +4,7 @@
 
 - removed Contract Similarity base bot and corresponding logic
 
-## 2.24.4 (prod - 4/3/2024)
+## 2.24.5 (prod - 4/3/2024)
 
 - removed ADDRESS-POISONING-FAKE-TOKEN alert
 

From 640d6a329c693f36ad11909cad3ad2242c42e786 Mon Sep 17 00:00:00 2001
From: Roberto Cantu <Roberto_Cantu@live.com>
Date: Wed, 26 Jun 2024 15:13:25 +0800
Subject: [PATCH 14/16] Remove mock 'similar-contract' label

---
 scam-detector-py/src/forta_explorer_mock.py | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/scam-detector-py/src/forta_explorer_mock.py b/scam-detector-py/src/forta_explorer_mock.py
index 9913ae38..84b34f05 100644
--- a/scam-detector-py/src/forta_explorer_mock.py
+++ b/scam-detector-py/src/forta_explorer_mock.py
@@ -88,25 +88,6 @@ def get_labels(source_id: str, start_date: datetime, end_date: datetime, entity:
             labels_df = pd.concat([labels_df, temp])
 
 
-        if entity == '' and label_query == 'similar-contract':
-            temp = pd.DataFrame(columns = ['createdAt', 'id', 'label', 'source', 'alertId', 'alertHash', 'chainId', 'labelstr', 'entity', 'entityType', 'remove', 'confidence', 'metadata', 'botVersion', ], data = [[
-                '2023-03-05 16:01:00',
-                '0x1d646c4045189991fdfd24a66b192a294158b839a6ec121d740474bdacbaaaaa',
-                'label_obj',
-                'source_obj',
-                'SCAM-DETECTOR-SIMILAR-CONTRACT',
-                '0x1d646c4045189991fdfd24a66b192a294158b839a6ec121d740474bdacbbbbbb',
-                1,
-                'scammer',
-                '0xfa8c1a1dddea2c06364c9e6ab31772f020f5efc5',
-                'addresss',
-                False,
-                0.9,
-                {"threat_category":"similar-contract","address_type":"contract","logic":"propagation","base_bot_alert_ids":"ADDRESS-POISONING-FAKE-TOKEN","base_bot_alert_hashes":"0x003e7643042d22f54b817ed14003ad6acbee18f40a818b4e5edadd75d9e9b617","threat_description_url":"https://forta.org/attacks#address-poisoning","bot_version":"0.2.2","associated_scammer_contract":"0xfa8c1a1dddea2c06364c9e6ab31772f020f5efc6","deployer_info":"Deployer 0x2320a28f52334d62622cc2eafa15de55f9987eaa deployed a contract 0xfa8c1a1dddea2c06364c9e6ab31772f020f5efc5 that is similar to a contract 0xfa8c1a1dddea2c06364c9e6ab31772f020f5efc6 deployed by a known scammer 0x2320a28f52334d62622cc2eafa15de55f9987ecc"},
-                '0.2.0'
-            ]])
-            labels_df = pd.concat([labels_df, temp])
-
         if entity == '' and label_query == 'scammer-association':
             temp = pd.DataFrame(columns = ['createdAt', 'id', 'label', 'source', 'alertId', 'alertHash', 'chainId', 'labelstr', 'entity', 'entityType', 'remove', 'confidence', 'metadata', 'botVersion', ], data = [[
                 '2023-03-05 16:01:00',

From 617345179a1fa2a8f4e081a0faa1fa68be1e360f Mon Sep 17 00:00:00 2001
From: Roberto Cantu <Roberto_Cantu@live.com>
Date: Wed, 26 Jun 2024 15:45:42 +0800
Subject: [PATCH 15/16] Update 'Contract Similarity Bot' section in
 'base_bots_additions.md'

---
 scam-detector-py/base_bots_additions.md | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/scam-detector-py/base_bots_additions.md b/scam-detector-py/base_bots_additions.md
index 7bd3a299..2dd51c67 100644
--- a/scam-detector-py/base_bots_additions.md
+++ b/scam-detector-py/base_bots_additions.md
@@ -19,13 +19,22 @@ Once added, the new bot/alert id should be handled by the Scam Detector. It is h
 
 ## Contract Similarity Bot
 
-Currently, we only have one contract similarity bot configured. A new similarity bot could be configured by addition to the `CONTRACT_SIMILARITY_BOTS` with the `CONTRACT_SIMILARITY_BOT_THRESHOLDS` to be utilized. A contract similarity bot would need to emit the following fields in the metadata:
-    - new_scammer_contract_address
-    - new_scammer_eoa
-    - scammer_contract_address
-    - scammer_eoa
-    - similarity_hash
-    - similarity_score
+Currently, we do not have a contract similarity bot configured. A new similarity bot could be configured by the addition of:
+- `CONTRACT_SIMILARITY_BOTS`
+- `CONTRACT_SIMILARITY_BOT_THRESHOLDS` to be utilized
+- new entries to `BASE_BOTS` and `CONFIDENCE_MAPPINGS`
+- `ALERTED_ENTITIES_SIMILAR_CONTRACT_KEY` and `ALERTED_ENTITIES_SIMILAR_CONTRACT_QUEUE_SIZE` for persistence
+
+in `constants.py`. A contract similarity bot would need to emit the following fields in the metadata (and the necessary logic can be added to `findings.py`):
+
+- new_scammer_contract_address
+- new_scammer_eoa
+- scammer_contract_address
+- scammer_eoa
+- similarity_hash
+- similarity_score
+    
+The Scam Detector's logic itself would then need to be updated for the newly added items listed above.
 
 In addition, the deployer of the new contract needs to be extracted. This should be configured in the `basebot_parsing_config.csv`.
 

From 272825cec03a3e4ea39d638df39faa14bfd866e1 Mon Sep 17 00:00:00 2001
From: Roberto Cantu <Roberto_Cantu@live.com>
Date: Wed, 26 Jun 2024 16:35:54 +0800
Subject: [PATCH 16/16] Add prev commit with 'contract-similarity' logic to
 base_bots_addition for reference

---
 scam-detector-py/base_bots_additions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scam-detector-py/base_bots_additions.md b/scam-detector-py/base_bots_additions.md
index 2dd51c67..78efe258 100644
--- a/scam-detector-py/base_bots_additions.md
+++ b/scam-detector-py/base_bots_additions.md
@@ -34,7 +34,7 @@ in `constants.py`. A contract similarity bot would need to emit the following fi
 - similarity_hash
 - similarity_score
     
-The Scam Detector's logic itself would then need to be updated for the newly added items listed above.
+The Scam Detector's logic itself would then need to be updated for the newly added items listed above. A previous implementation of such logic can be found at this commit, [420ce3cced8ee7acb7e1ddb23ccf4e27019da8dc](https://github.com/forta-network/starter-kits/tree/420ce3cced8ee7acb7e1ddb23ccf4e27019da8dc/scam-detector-py), to more easily reintroduce.
 
 In addition, the deployer of the new contract needs to be extracted. This should be configured in the `basebot_parsing_config.csv`.