From d83f62734d82d3456a5e828e3f2438e8510b3266 Mon Sep 17 00:00:00 2001 From: davidchocholaty Date: Thu, 5 Sep 2024 13:20:42 +0200 Subject: [PATCH] Add new p2pkh validation This commit adds a new implementation for the p2pkh verification, which is more readable and should be similar to other scriptpubkey_type types. --- .gitignore | 162 ++++++++++++++++++++++++++++++++ requirements.txt | 2 +- run.sh | 0 src/main.py | 4 +- src/serialize.py | 3 + src/transaction.py | 227 ++++++++++++++++++++++++++++++++++++++++----- src/utils.py | 18 +++- src/verify.py | 111 ++++------------------ 8 files changed, 409 insertions(+), 118 deletions(-) mode change 100644 => 100755 run.sh diff --git a/.gitignore b/.gitignore index e69de29b..82f92755 100644 --- a/.gitignore +++ b/.gitignore @@ -0,0 +1,162 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ diff --git a/requirements.txt b/requirements.txt index 0417a3df..ed2a25ec 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ ecdsa==0.19.0 pycryptodome==3.20.0 -sha3==0.2.1 \ No newline at end of file +#sha3==0.2.1 \ No newline at end of file diff --git a/run.sh b/run.sh old mode 100644 new mode 100755 diff --git a/src/main.py b/src/main.py index f27ea073..060a2102 100644 --- a/src/main.py +++ b/src/main.py @@ -28,6 +28,8 @@ def parse_arguments(): mempool = MemPool(args.mempool) + # TODO pokracovani + block_transactions = [COINBASE_TRANSACTION] + mempool.valid_transactions transaction_hashes = [calculate_txid(COINBASE_TRANSACTION)] + [calculate_txid(json_transaction) for json_transaction in block_transactions[1:]] @@ -45,4 +47,4 @@ def parse_arguments(): print(block_hash) print(coinbase_serialized.hex()) for transaction in transaction_hashes: - print(transaction) \ No newline at end of file + print(transaction) diff --git a/src/serialize.py b/src/serialize.py index 66005071..d025a664 100644 --- a/src/serialize.py +++ b/src/serialize.py @@ -1,3 +1,6 @@ +# The code in this file is inpired from the following source: +# http://karpathy.github.io/2021/06/21/blockchain/ + def serialize_input(tx_input, override=None): serialized_input = [] serialized_input += [bytes.fromhex(tx_input["txid"])[::-1]] # Reversed txid diff --git a/src/transaction.py b/src/transaction.py index 39cf28af..321cfa0d 100644 --- a/src/transaction.py +++ b/src/transaction.py @@ -1,9 +1,11 @@ import hashlib import json +from ecdsa import VerifyingKey, SECP256k1, BadSignatureError + from src.serialize import serialize_transaction -from src.utils import get_filename_without_extension -from src.verify import non_empty_vin_vout, valid_transaction_syntax, verify_p2pkh_transaction +from src.utils import decode_hex, get_filename_without_extension, hash160 +from src.verify import parse_der_signature, valid_transaction_syntax def calculate_txid(transaction_content, coinbase=False): # Serialize the transaction content @@ -35,12 +37,48 @@ def __init__(self, transaction_json_file): self.vout = json_transaction['vout'] self.json_transaction = json_transaction else: + # TODO jestli nejakej error print('Invalid transaction syntax') def is_valid(self): - if not non_empty_vin_vout(self.vin, self.vout): + # At least one input and one output. + if not self.non_empty_vin_vout(): + return False + + # Basic locktime check. + if not self.valid_locktime(): + return False + + if not self.check_input_output_sum(): return False + # Check each input validity. + for vin_idx, vin in enumerate(self.vin): + if not self.valid_input(vin_idx, vin): + return False + + # Check each output validity. + for vout in self.vout: + if not self.valid_output(vout): + return False + + return True + + def non_empty_vin_vout(self): + # Make sure neither in or out lists are empty + if not self.vin: + #print("vin is empty") + return False + if not self.vout: + #print("vout is empty") + return False + + return True + + def valid_locktime(self): + return isinstance(self.locktime, int) and self.locktime >= 0 + + def check_input_output_sum(self): input_sum = 0 for input in self.vin: input_sum = input_sum + input['prevout']['value'] @@ -48,28 +86,173 @@ def is_valid(self): output_sum = 0 for output in self.vout: output_sum = output_sum + output['value'] - + + # Output sum can't be greater than the input sum. if input_sum < output_sum: return False + + return True - input_idx = 0 - for input in self.vin: - if 'scriptsig' in input: - scriptsig = input['scriptsig'] - - scriptpubkey_type = input['prevout']['scriptpubkey_type'] - - if scriptsig == "" or scriptpubkey_type not in ["p2pkh", "p2sh"]: - return False - - if scriptpubkey_type == 'p2pkh': - if not verify_p2pkh_transaction(input_idx, self.json_transaction): - return False - else: - return False - else: + def valid_input(self, vin_idx, vin): + # TODO + if vin.get("is_coinbase", False): + return False + + prevout = vin.get("prevout", {}) + scriptpubkey_type = prevout.get("scriptpubkey_type", "") + + if scriptpubkey_type == "p2pkh": + return self.validate_p2pkh(vin_idx, vin) + elif scriptpubkey_type == "p2sh": + pass + #return self.validate_p2sh(vin) + elif scriptpubkey_type == "v0_p2wsh": + pass + #return self.validate_p2wsh(vin) + elif scriptpubkey_type == "v1_p2tr": + pass + #return self.validate_p2tr(vin) + elif scriptpubkey_type == "v0_p2wpkh": + pass + #return self.validate_p2wpkh(vin) + + # Unknown script type. + return False + + def valid_output(self, vout): + scriptpubkey_type = vout.get("scriptpubkey_type", "") + return scriptpubkey_type in ["v0_p2wpkh", "p2sh", "v0_p2wsh", "v1_p2tr", "p2pkh"] + + def validate_p2pkh(self, vin_idx, vin): + # Checking input signatures. + if "scriptsig" in vin: + ################# + # Pubkey script # + ################# + scriptsig = decode_hex(vin.get("scriptsig", "")) + + if not scriptsig: + return False + + prevout = vin.get("prevout", {}) + + if not prevout: + return False + + scriptpubkey = decode_hex(prevout.get("scriptpubkey", "")) + + ##################################################################### + # Extract signature and public key from scriptSig (Parse scriptSig) # + ##################################################################### + # https://learnmeabitcoin.com/technical/script/p2pkh/ + # Explanation: the scriptSig contains the signature and the public key (including ASM instructions). + + signature_len = scriptsig[0] # The first byte represents the length of the DER signature (including hash type) + signature_w_hash_type = scriptsig[1:1+signature_len] # Extract the signature (includes the hash type at the end) + + public_key_idx = 1 + signature_len + public_key_len = scriptsig[public_key_idx] + public_key = scriptsig[public_key_idx+1:public_key_idx+1+public_key_len] + + # ------------------------------------------------ + # ------------------------------------------------ + + r, s, hash_type = parse_der_signature(signature_w_hash_type) + + r = r[1:] + s = s[1:] + + der_len = len(signature_w_hash_type[:-1]) + signature_len = len(r + s) + 6 # For the leading bytes +6. + + if der_len != signature_len: + return False + + # TODO mel bych pouzivat old_signature, jak jsem to mel driv + # https://bitcoin.stackexchange.com/questions/92680/what-are-the-der-signature-and-sec-format + # https://learnmeabitcoin.com/technical/keys/signature/ + + signature = r + s + + #print("---------------------") + # Debugging: print extracted signature and public key + print("txid: ", vin["txid"]) + #print(f"Extracted signature: {signature.hex()}") + #print(f"Extracted public key: {public_key.hex()}") + print("--------------") + + ###################### + # Parse scriptPubKey # + ###################### + # https://learnmeabitcoin.com/technical/script/p2pkh/ + # Explanation: the scriptPubKey contains: DUP, HASH160, public key hash (including OP_PUSHBYTES_20), EQUALVERIFY and CHECKSIG. + + if scriptpubkey[0:1] != b'\x76' or scriptpubkey[1:2] != b'\xa9' or scriptpubkey[2:3] != b'\x14': + return False # Not a valid P2PKH scriptPubKey (missing OP_DUP, OP_HASH160, or length mismatch) + + if scriptpubkey[23:24] != b'\x88' or scriptpubkey[24:25] != b'\xac': + return False # Not a valid P2PKH scriptPubKey (missing OP_EQUALVERIFY or OP_CHECKSIG) + + pkh = scriptpubkey[3:23] + + # Compute the public key hash (HASH160 of the public key) and compare with scriptPubKey + calc_pkh = hash160(public_key) + if calc_pkh != pkh: + return False # Public key hash does not match + + + ## -------------------------------------- + + """# Extract data from input transaction + script_sig_asm = input_tx["scriptsig_asm"] + + # Parse scriptSig ASM to extract signature and public key + script_parts = script_sig_asm.split(" ") + signature_hex = script_parts[1] + public_key_hex = script_parts[3] + + r, s, hash_type = parse_der_signature(signature_hex) + + r_hex = hex(r)[2:] + s_hex = hex(s)[2:] + + der_len = len(signature_hex[:-2]) + signature_len = len(r_hex + s_hex) + 2 * 6 + + if der_len != signature_len: + return False + + signature = bytes.fromhex(r_hex + s_hex) + + public_key = bytes.fromhex(public_key_hex) + + scriptpubkey = bytes.fromhex(input_tx['prevout']['scriptpubkey']) + pubkey_hash = scriptpubkey[3:23] + + hashed_public_key = hashlib.sha256(public_key).digest() + + ripemd160 = RIPEMD160.new() + ripemd160.update(hashed_public_key) + pubkey_hash_calculated = ripemd160.digest() + + if pubkey_hash != pubkey_hash_calculated: + return False +""" + + ############################################ + # Verify the signature with the public key # + ############################################ + + data_signed = serialize_transaction(self.json_transaction, vin_idx, int(hash_type)) + data_hash = hashlib.sha256(data_signed).digest() + + # Verify the signature + verifying_key = VerifyingKey.from_string(public_key, curve=SECP256k1) + try: + verifying_key.verify(signature, data_hash, hashlib.sha256) + except BadSignatureError: return False - input_idx += 1 + return True - return True \ No newline at end of file + return False diff --git a/src/utils.py b/src/utils.py index 32a854e0..32ad658c 100644 --- a/src/utils.py +++ b/src/utils.py @@ -1,8 +1,24 @@ +import hashlib import os +from Crypto.Hash import RIPEMD160 + def get_filename_without_extension(file_path): # Get the base filename from the path filename = os.path.basename(file_path) # Remove the extension filename_without_extension = os.path.splitext(filename)[0] - return filename_without_extension \ No newline at end of file + return filename_without_extension + +def decode_hex(hex_data): + # Decode a hex-encoded data into its raw bytecode. + return bytes.fromhex(hex_data) + +def hash160(data): + # SHA-256 followed by RIPEMD-160 (Bitcoin's HASH160). + sha256_hash = hashlib.sha256(data).digest() + + ripemd160 = RIPEMD160.new() + ripemd160.update(sha256_hash) + + return ripemd160.digest() diff --git a/src/verify.py b/src/verify.py index c568fbc1..45045da9 100644 --- a/src/verify.py +++ b/src/verify.py @@ -1,137 +1,62 @@ -import ecdsa -import hashlib - -from Crypto.Hash import RIPEMD160 -from src.serialize import serialize_transaction - def valid_transaction_syntax(json_transaction): required = ["version", "locktime", "vin", "vout"] for field in required: if field not in json_transaction: - print('Required field is missing') + #print('Required field is missing') return False if not isinstance(json_transaction["version"], int): - print('Invalid data type') + #print('Invalid data type') return False if not isinstance(json_transaction["locktime"], int): - print('Invalid data type') + #print('Invalid data type') return False if not isinstance(json_transaction["vin"], list): - print('Invalid data type') + #print('Invalid data type') return False if not isinstance(json_transaction["vout"], list): - print('Invalid data type') + #print('Invalid data type') return False # Check inputs for input in json_transaction['vin']: if not isinstance(input, dict): - print('Invalid data type') + #print('Invalid data type') return False if 'txid' not in input or 'vout' not in input: - print('Invalid data type') + #print('Invalid data type') return False # Check outputs - for output in json_transaction['vout']: if not isinstance(output, dict): - print('Invalid data type') + #print('Invalid data type') return False if 'scriptpubkey' not in output or 'value' not in output: - print('Invalid data type') + #print('Invalid data type') return False return True -def non_empty_vin_vout(vin, vout): - # Make sure neither in or out lists are empty - if not vin: - print("vin is empty") - return False - if not vout: - print("vout is empty") - return False - - return True - def parse_der_signature(der_signature_with_hash_type): # Remove the hash_type from the DER signature - der_signature = der_signature_with_hash_type[:-2] + #der_signature = der_signature_with_hash_type[:-2] + der_signature = der_signature_with_hash_type[:-1] # Parse the DER signature - der_bytes = bytes.fromhex(der_signature) - r_length = der_bytes[3] - r = int.from_bytes(der_bytes[4:4 + r_length], 'big') + r_length = der_signature[3] + #r = int.from_bytes(der_signature[4:4 + r_length], 'big') + r = der_signature[4:4 + r_length] s_length_index = 4 + r_length + 1 - s_length = der_bytes[s_length_index] - s = int.from_bytes(der_bytes[s_length_index + 1:s_length_index + 1 + s_length], 'big') - hash_type = der_bytes[-1] + s_length = der_signature[s_length_index] + #s = int.from_bytes(der_signature[s_length_index + 1:s_length_index + 1 + s_length], 'big') + s = der_signature[s_length_index + 1:s_length_index + 1 + s_length] + hash_type = der_signature[-1] return r, s, hash_type - -def verify_p2pkh_transaction(input_idx, json_transaction): - ################# - # Pubkey script # - ################# - - input_tx = json_transaction["vin"][input_idx] - - # Extract data from input transaction - script_sig_asm = input_tx["scriptsig_asm"] - - # Parse scriptSig ASM to extract signature and public key - script_parts = script_sig_asm.split(" ") - signature_hex = script_parts[1] - public_key_hex = script_parts[3] - - r, s, hash_type = parse_der_signature(signature_hex) - - r_hex = hex(r)[2:] - s_hex = hex(s)[2:] - - der_len = len(signature_hex[:-2]) - signature_len = len(r_hex + s_hex) + 2 * 6 - - if der_len != signature_len: - return False - - signature = bytes.fromhex(r_hex + s_hex) - - public_key = bytes.fromhex(public_key_hex) - - scriptpubkey = bytes.fromhex(input_tx['prevout']['scriptpubkey']) - pubkey_hash = scriptpubkey[3:23] - - hashed_public_key = hashlib.sha256(public_key).digest() - - ripemd160 = RIPEMD160.new() - ripemd160.update(hashed_public_key) - pubkey_hash_calculated = ripemd160.digest() - - if pubkey_hash != pubkey_hash_calculated: - return False - - - #################### - # Signature script # - #################### - - data_signed = serialize_transaction(json_transaction, input_idx, int(hash_type)) - data_hash = hashlib.sha256(data_signed).digest() - - # Verify the signature - verifying_key = ecdsa.VerifyingKey.from_string(public_key, curve=ecdsa.SECP256k1) - try: - verifying_key.verify(signature, data_hash, hashlib.sha256) - except ecdsa.BadSignatureError: - return False - - return True