From 86c8f30543269df4a4f6a28c2884026e7f1f6d64 Mon Sep 17 00:00:00 2001 From: Amisha Singla Date: Wed, 31 Jul 2024 14:39:03 -0500 Subject: [PATCH 1/7] Add the document to backfill using tx_meta --- backfill.md | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 backfill.md diff --git a/backfill.md b/backfill.md new file mode 100644 index 00000000..4d0a7f3b --- /dev/null +++ b/backfill.md @@ -0,0 +1,82 @@ +# How to Backfill history_transactions table using tx_meta (xdr) + +In cases of new column requests/addressing bugs, airflow backfill jobs are needed to be kicked off. This process is both time and money consuming. + +This document outlies methods to extract required field from transaction envelope (tx_meta). This documents takes an example of `fee_account_muxed` field. However, it can be extended to other fields as well. + +We will use [js-stellar-base](https://github.com/stellar/js-stellar-base) library to parse the XDRs and then use Javascript UDF in Bigquery to apply the transformation to the dataset. + +Referred medium article: [Using NPM Library in Google BigQuery UDF](https://medium.com/analytics-vidhya/using-npm-library-in-google-bigquery-udf-8aef01b868f4) + +# Setting up JS UDF in Bigquery + +``` +git clone https://github.com/stellar/js-stellar-base.git +cd js-stellar-base +yarn +npx webpack --config config/webpack.config.browser.js +``` + +Above will create following files in `js-stellar-base/dist/` directory: +- stellar-base.js +- stellar-base.min.js + +Copy above files in a GCS bucket. + +# Writing JS Function + +Following is example to extract FeeAccountMuxed: + +``` + let tx_meta = "AAAABQAAAQAAABYMYQ4r9W/uB9X6q6VU6feQhS2kQoRy9CjvwtYXdPRSih2hZeSSAAAAAAAAAZAAAAACAAAAAJwLL0Ul/CyRZdXuenmdXrzVyX9X56m4kYPYmgppVIj8AAAAZAAF9PwAAAABAAAAAAAAAAAAAAABAAAAAQAAAQAAAFj7+8N85JwLL0Ul/CyRZdXuenmdXrzVyX9X56m4kYPYmgppVIj8AAAAAAAAAADN5igtu93OKhkj2NrSHuPEJktU+0gJ0LiNavJirLAmRwAAAAAF9eEAAAAAAAAAAAFpVIj8AAAAQElnt70S4sGicHyhsN1S29DEREZ7i2HU96+8DfyshlFLCoQudDIxThnVEg2KQDrW61R19M7Ms9IAsznURc5y3wIAAAAAAAAAAaFl5JIAAABAIf9/ecA3id1mbHzJ2S9W5bRVqrjQr/c2+jHEuDNZevt3LDVSc+DmRMYie0eQ+vE7B3D+fRPb9yFzpfx4meTfBg=="; + + let txe = StellarBase.xdr.TransactionEnvelope.fromXDR(tx_meta, 'base64'); + let tx = txe.feeBump(); + let sourceAccount = StellarBase.encodeMuxedAccountToAddress(tx.tx().feeSource()); + console.log(sourceAccount) +``` + +Above will print `MBX64B6V7KV2KVHJ66IIKLNEIKCHF5BI57BNMF3U6RJIUHNBMXSJEAAACYGGCDRL6UFO2`, which is the `fee_account_muxed` value + +# Wrapping JS function as UDF + +``` +CREATE TEMP FUNCTION getFeeBumpAccountIfExists(tx_meta STRING) +RETURNS STRING +LANGUAGE js +OPTIONS ( + library=["gs://stellar-test-js-udf/stellar-base.min.js"] -- path to js library in GCS +) +AS r""" + let txe = StellarBase.xdr.TransactionEnvelope.fromXDR(tx_meta, 'base64'); + let tx = txe.feeBump(); + let sourceAccount = StellarBase.encodeMuxedAccountToAddress(tx.tx().feeSource()); + return sourceAccount + +"""; + +WITH fee_bump_transactions AS + ( + SELECT batch_run_date, transaction_hash, tx_envelope as tx_meta FROM `test_crypto_stellar.history_transactions` + WHERE + batch_run_date BETWEEN DATETIME("2024-07-01") AND DATETIME_ADD("2024-07-20", INTERVAL 1 MONTH) + and inner_transaction_hash is not null -- filter in fee bump transactions + ), + calculated_fee_account as ( + SELECT batch_run_date, transaction_hash, getFeeBumpAccountIfExists(tx_meta) as fee_account + FROM fee_bump_transactions + ), + calculated_fee_muxed_account as ( + SELECT batch_run_date, transaction_hash, fee_account FROM calculated_fee_account + WHERE fee_account LIKE 'M%' -- muxed accounts + ) + SELECT batch_run_date, transaction_hash, fee_account as fee_account_muxed FROM calculated_fee_muxed_account +``` + +Sample output for above JS UDF. +Row | transaction_hash | fee_account +-- | -- | -- +1 | f5f5b0aaf758896ef8c5b4807f41c77d15c11977eecf2b0e4769d777324a2d11 |MCBD54KAHHA4AK4DOZWOSX5O5OZ4OI54N24QITDSFLPD7EG2WY2AMAAACYGGCDRL6UBUA +2 | a9e49dff6202663633b83f3645fbf8c2cfeb915db99b2b884a86791b9f8eae2f | MBX64B6V7KV2KVHJ66IIKLNEIKCHF5BI57BNMF3U6RJIUHNBMXSJEAAACYGGCDRL6UFO2 +3 | 00dba50c8689477e6990103338a0eb326725e07a7b7ff187359abf11c23c582a| MC5BEU3DCIMHOHRQDVDAPEPZGMBBALPJ3IQY23VTXC3454SQMNWVSAAACYGGCDRL6UX42 +4 | 2e1c53a9fe1d48ddc493febe467178994e669e3eebf3a4cca646b3cb666616de|MAMYAUW45TC54C3QORQP7OOFYKOXCJTXOG2WIV5LP2HDMR67MWP6IAAACYGGCDRL6VCZM \ No newline at end of file From ea283052c4e0ee24f1183f8542d9b69f6631af0b Mon Sep 17 00:00:00 2001 From: Amisha Singla Date: Wed, 31 Jul 2024 14:56:09 -0500 Subject: [PATCH 2/7] linting gods lint pass? --- backfill.md | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/backfill.md b/backfill.md index 4d0a7f3b..f321144c 100644 --- a/backfill.md +++ b/backfill.md @@ -18,6 +18,7 @@ npx webpack --config config/webpack.config.browser.js ``` Above will create following files in `js-stellar-base/dist/` directory: + - stellar-base.js - stellar-base.min.js @@ -56,10 +57,10 @@ AS r""" """; WITH fee_bump_transactions AS - ( + ( SELECT batch_run_date, transaction_hash, tx_envelope as tx_meta FROM `test_crypto_stellar.history_transactions` WHERE - batch_run_date BETWEEN DATETIME("2024-07-01") AND DATETIME_ADD("2024-07-20", INTERVAL 1 MONTH) + batch_run_date BETWEEN DATETIME("2024-07-01") AND DATETIME_ADD("2024-07-20", INTERVAL 1 MONTH) and inner_transaction_hash is not null -- filter in fee bump transactions ), calculated_fee_account as ( @@ -74,9 +75,9 @@ WITH fee_bump_transactions AS ``` Sample output for above JS UDF. -Row | transaction_hash | fee_account --- | -- | -- -1 | f5f5b0aaf758896ef8c5b4807f41c77d15c11977eecf2b0e4769d777324a2d11 |MCBD54KAHHA4AK4DOZWOSX5O5OZ4OI54N24QITDSFLPD7EG2WY2AMAAACYGGCDRL6UBUA -2 | a9e49dff6202663633b83f3645fbf8c2cfeb915db99b2b884a86791b9f8eae2f | MBX64B6V7KV2KVHJ66IIKLNEIKCHF5BI57BNMF3U6RJIUHNBMXSJEAAACYGGCDRL6UFO2 -3 | 00dba50c8689477e6990103338a0eb326725e07a7b7ff187359abf11c23c582a| MC5BEU3DCIMHOHRQDVDAPEPZGMBBALPJ3IQY23VTXC3454SQMNWVSAAACYGGCDRL6UX42 -4 | 2e1c53a9fe1d48ddc493febe467178994e669e3eebf3a4cca646b3cb666616de|MAMYAUW45TC54C3QORQP7OOFYKOXCJTXOG2WIV5LP2HDMR67MWP6IAAACYGGCDRL6VCZM \ No newline at end of file +Row | transaction_hash | fee_account +-- | -- | -- +1 | f5f5b0aaf758896ef8c5b4807f41c77d15c11977eecf2b0e4769d777324a2d11 |MCBD54KAHHA4AK4DOZWOSX5O5OZ4OI54N24QITDSFLPD7EG2WY2AMAAACYGGCDRL6UBUA +2 | a9e49dff6202663633b83f3645fbf8c2cfeb915db99b2b884a86791b9f8eae2f | MBX64B6V7KV2KVHJ66IIKLNEIKCHF5BI57BNMF3U6RJIUHNBMXSJEAAACYGGCDRL6UFO2 +3 | 00dba50c8689477e6990103338a0eb326725e07a7b7ff187359abf11c23c582a| MC5BEU3DCIMHOHRQDVDAPEPZGMBBALPJ3IQY23VTXC3454SQMNWVSAAACYGGCDRL6UX42 +4 | 2e1c53a9fe1d48ddc493febe467178994e669e3eebf3a4cca646b3cb666616de|MAMYAUW45TC54C3QORQP7OOFYKOXCJTXOG2WIV5LP2HDMR67MWP6IAAACYGGCDRL6VCZM From f2c74b48c1e1f7518bedc0110abff51fe0b11180 Mon Sep 17 00:00:00 2001 From: Amisha Singla Date: Wed, 31 Jul 2024 15:37:47 -0500 Subject: [PATCH 3/7] Move the file to sub --- backfill.md => docs/backfill.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename backfill.md => docs/backfill.md (100%) diff --git a/backfill.md b/docs/backfill.md similarity index 100% rename from backfill.md rename to docs/backfill.md From d5884a2f89388cb43db21a46ececa753eaee3a9e Mon Sep 17 00:00:00 2001 From: Amisha Singla Date: Wed, 31 Jul 2024 15:42:12 -0500 Subject: [PATCH 4/7] Address feedback --- docs/backfill.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/backfill.md b/docs/backfill.md index f321144c..1e7575a0 100644 --- a/docs/backfill.md +++ b/docs/backfill.md @@ -14,12 +14,11 @@ Referred medium article: [Using NPM Library in Google BigQuery UDF](https://medi git clone https://github.com/stellar/js-stellar-base.git cd js-stellar-base yarn -npx webpack --config config/webpack.config.browser.js +yarn build:prod ``` Above will create following files in `js-stellar-base/dist/` directory: -- stellar-base.js - stellar-base.min.js Copy above files in a GCS bucket. @@ -29,6 +28,7 @@ Copy above files in a GCS bucket. Following is example to extract FeeAccountMuxed: ``` +javascript let tx_meta = "AAAABQAAAQAAABYMYQ4r9W/uB9X6q6VU6feQhS2kQoRy9CjvwtYXdPRSih2hZeSSAAAAAAAAAZAAAAACAAAAAJwLL0Ul/CyRZdXuenmdXrzVyX9X56m4kYPYmgppVIj8AAAAZAAF9PwAAAABAAAAAAAAAAAAAAABAAAAAQAAAQAAAFj7+8N85JwLL0Ul/CyRZdXuenmdXrzVyX9X56m4kYPYmgppVIj8AAAAAAAAAADN5igtu93OKhkj2NrSHuPEJktU+0gJ0LiNavJirLAmRwAAAAAF9eEAAAAAAAAAAAFpVIj8AAAAQElnt70S4sGicHyhsN1S29DEREZ7i2HU96+8DfyshlFLCoQudDIxThnVEg2KQDrW61R19M7Ms9IAsznURc5y3wIAAAAAAAAAAaFl5JIAAABAIf9/ecA3id1mbHzJ2S9W5bRVqrjQr/c2+jHEuDNZevt3LDVSc+DmRMYie0eQ+vE7B3D+fRPb9yFzpfx4meTfBg=="; let txe = StellarBase.xdr.TransactionEnvelope.fromXDR(tx_meta, 'base64'); From 70ff35f91c56de0aee8f9395bffebef22d00df22 Mon Sep 17 00:00:00 2001 From: Amisha Singla Date: Wed, 31 Jul 2024 15:45:57 -0500 Subject: [PATCH 5/7] touch up --- docs/backfill.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/backfill.md b/docs/backfill.md index 1e7575a0..b80ab676 100644 --- a/docs/backfill.md +++ b/docs/backfill.md @@ -27,8 +27,7 @@ Copy above files in a GCS bucket. Following is example to extract FeeAccountMuxed: -``` -javascript +```javascript let tx_meta = "AAAABQAAAQAAABYMYQ4r9W/uB9X6q6VU6feQhS2kQoRy9CjvwtYXdPRSih2hZeSSAAAAAAAAAZAAAAACAAAAAJwLL0Ul/CyRZdXuenmdXrzVyX9X56m4kYPYmgppVIj8AAAAZAAF9PwAAAABAAAAAAAAAAAAAAABAAAAAQAAAQAAAFj7+8N85JwLL0Ul/CyRZdXuenmdXrzVyX9X56m4kYPYmgppVIj8AAAAAAAAAADN5igtu93OKhkj2NrSHuPEJktU+0gJ0LiNavJirLAmRwAAAAAF9eEAAAAAAAAAAAFpVIj8AAAAQElnt70S4sGicHyhsN1S29DEREZ7i2HU96+8DfyshlFLCoQudDIxThnVEg2KQDrW61R19M7Ms9IAsznURc5y3wIAAAAAAAAAAaFl5JIAAABAIf9/ecA3id1mbHzJ2S9W5bRVqrjQr/c2+jHEuDNZevt3LDVSc+DmRMYie0eQ+vE7B3D+fRPb9yFzpfx4meTfBg=="; let txe = StellarBase.xdr.TransactionEnvelope.fromXDR(tx_meta, 'base64'); @@ -49,11 +48,12 @@ OPTIONS ( library=["gs://stellar-test-js-udf/stellar-base.min.js"] -- path to js library in GCS ) AS r""" - let txe = StellarBase.xdr.TransactionEnvelope.fromXDR(tx_meta, 'base64'); - let tx = txe.feeBump(); - let sourceAccount = StellarBase.encodeMuxedAccountToAddress(tx.tx().feeSource()); - return sourceAccount - + return StellarBase.encodeMuxedAccountToAddress( + StellarBase.xdr.TransactionEnvelope.fromXDR(tx_meta, 'base64') + .feeBump() + .tx() + .feeSource() + ); """; WITH fee_bump_transactions AS From 5d87f6c973a1f4f48fe6fa853bbdd3535ad28b2d Mon Sep 17 00:00:00 2001 From: Amisha Singla Date: Wed, 31 Jul 2024 15:54:21 -0500 Subject: [PATCH 6/7] lints --- docs/backfill.md | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/docs/backfill.md b/docs/backfill.md index b80ab676..3c2af7ba 100644 --- a/docs/backfill.md +++ b/docs/backfill.md @@ -28,12 +28,15 @@ Copy above files in a GCS bucket. Following is example to extract FeeAccountMuxed: ```javascript - let tx_meta = "AAAABQAAAQAAABYMYQ4r9W/uB9X6q6VU6feQhS2kQoRy9CjvwtYXdPRSih2hZeSSAAAAAAAAAZAAAAACAAAAAJwLL0Ul/CyRZdXuenmdXrzVyX9X56m4kYPYmgppVIj8AAAAZAAF9PwAAAABAAAAAAAAAAAAAAABAAAAAQAAAQAAAFj7+8N85JwLL0Ul/CyRZdXuenmdXrzVyX9X56m4kYPYmgppVIj8AAAAAAAAAADN5igtu93OKhkj2NrSHuPEJktU+0gJ0LiNavJirLAmRwAAAAAF9eEAAAAAAAAAAAFpVIj8AAAAQElnt70S4sGicHyhsN1S29DEREZ7i2HU96+8DfyshlFLCoQudDIxThnVEg2KQDrW61R19M7Ms9IAsznURc5y3wIAAAAAAAAAAaFl5JIAAABAIf9/ecA3id1mbHzJ2S9W5bRVqrjQr/c2+jHEuDNZevt3LDVSc+DmRMYie0eQ+vE7B3D+fRPb9yFzpfx4meTfBg=="; - - let txe = StellarBase.xdr.TransactionEnvelope.fromXDR(tx_meta, 'base64'); - let tx = txe.feeBump(); - let sourceAccount = StellarBase.encodeMuxedAccountToAddress(tx.tx().feeSource()); - console.log(sourceAccount) +let tx_meta = + "AAAABQAAAQAAABYMYQ4r9W/uB9X6q6VU6feQhS2kQoRy9CjvwtYXdPRSih2hZeSSAAAAAAAAAZAAAAACAAAAAJwLL0Ul/CyRZdXuenmdXrzVyX9X56m4kYPYmgppVIj8AAAAZAAF9PwAAAABAAAAAAAAAAAAAAABAAAAAQAAAQAAAFj7+8N85JwLL0Ul/CyRZdXuenmdXrzVyX9X56m4kYPYmgppVIj8AAAAAAAAAADN5igtu93OKhkj2NrSHuPEJktU+0gJ0LiNavJirLAmRwAAAAAF9eEAAAAAAAAAAAFpVIj8AAAAQElnt70S4sGicHyhsN1S29DEREZ7i2HU96+8DfyshlFLCoQudDIxThnVEg2KQDrW61R19M7Ms9IAsznURc5y3wIAAAAAAAAAAaFl5JIAAABAIf9/ecA3id1mbHzJ2S9W5bRVqrjQr/c2+jHEuDNZevt3LDVSc+DmRMYie0eQ+vE7B3D+fRPb9yFzpfx4meTfBg=="; + +let txe = StellarBase.xdr.TransactionEnvelope.fromXDR(tx_meta, "base64"); +let tx = txe.feeBump(); +let sourceAccount = StellarBase.encodeMuxedAccountToAddress( + tx.tx().feeSource(), +); +console.log(sourceAccount); ``` Above will print `MBX64B6V7KV2KVHJ66IIKLNEIKCHF5BI57BNMF3U6RJIUHNBMXSJEAAACYGGCDRL6UFO2`, which is the `fee_account_muxed` value From 5f4311ccf0e4d09840e059c7a622159a8f46dd47 Mon Sep 17 00:00:00 2001 From: Amisha Singla Date: Wed, 31 Jul 2024 17:09:55 -0500 Subject: [PATCH 7/7] Add the merge statement as well --- docs/backfill.md | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/docs/backfill.md b/docs/backfill.md index 3c2af7ba..8dbc2e47 100644 --- a/docs/backfill.md +++ b/docs/backfill.md @@ -84,3 +84,47 @@ Row | transaction_hash | fee_account 2 | a9e49dff6202663633b83f3645fbf8c2cfeb915db99b2b884a86791b9f8eae2f | MBX64B6V7KV2KVHJ66IIKLNEIKCHF5BI57BNMF3U6RJIUHNBMXSJEAAACYGGCDRL6UFO2 3 | 00dba50c8689477e6990103338a0eb326725e07a7b7ff187359abf11c23c582a| MC5BEU3DCIMHOHRQDVDAPEPZGMBBALPJ3IQY23VTXC3454SQMNWVSAAACYGGCDRL6UX42 4 | 2e1c53a9fe1d48ddc493febe467178994e669e3eebf3a4cca646b3cb666616de|MAMYAUW45TC54C3QORQP7OOFYKOXCJTXOG2WIV5LP2HDMR67MWP6IAAACYGGCDRL6VCZM + +# Updating values of columns using UDF + +``` +CREATE TEMP FUNCTION getFeeBumpAccountIfExists(tx_meta STRING) +RETURNS STRING +LANGUAGE js +OPTIONS ( + library=["gs://stellar-test-js-udf/stellar-base.min.js"] +) +AS r""" + let txe = StellarBase.xdr.TransactionEnvelope.fromXDR(tx_meta, 'base64'); + let tx = txe.feeBump(); + let sourceAccount = StellarBase.encodeMuxedAccountToAddress(tx.tx().feeSource()); + return sourceAccount + +"""; + +MERGE `crypto_stellar.history_transactions` AS target +USING ( + WITH fee_bump_transactions AS ( + SELECT batch_run_date, transaction_hash, tx_envelope AS tx_meta + FROM `crypto_stellar.history_transactions` + WHERE batch_run_date > '2020-08-03' + AND batch_run_date < '2020-08-05' + AND inner_transaction_hash IS NOT NULL + ), + calculated_fee_account AS ( + SELECT batch_run_date, transaction_hash, getFeeBumpAccountIfExists(tx_meta) AS fee_account + FROM fee_bump_transactions + ), + calculated_fee_muxed_account AS ( + SELECT batch_run_date, transaction_hash, fee_account + FROM calculated_fee_account + WHERE fee_account LIKE 'M%' -- muxed accounts + ) + SELECT batch_run_date, transaction_hash, fee_account AS fee_account_muxed + FROM calculated_fee_muxed_account +) AS source +ON target.batch_run_date = source.batch_run_date + AND target.transaction_hash = source.transaction_hash +WHEN MATCHED THEN + UPDATE SET target.fee_account_muxed = source.fee_account_muxed; +```