Skip to content

Commit

Permalink
ballet/txn: small changes to improve parse performance, formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
ptaffet-jump committed Nov 21, 2023
1 parent fa4790c commit 7e91d9b
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 56 deletions.
116 changes: 61 additions & 55 deletions src/ballet/txn/fd_txn_parse.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,38 +9,41 @@ fd_txn_parse_core( uchar const * payload,
void * out_buf,
fd_txn_parse_counters_t * counters_opt,
ulong * payload_sz_opt,
int allow_zero_signatures ) {
int allow_zero_signatures ) {
ulong i = 0UL;
/* This code does non-trivial parsing of untrusted user input, which is a potentially dangerous thing.
The main invariants we need to ensure are
/* This code does non-trivial parsing of untrusted user input, which
is a potentially dangerous thing. The main invariants we need to
ensure are
A) i<=payload_sz at all times
B) i< payload_sz prior to reading
As long as these invariants hold, it's safe to read payload[ i ].
To ensure this, we force the following discipline for all parsing steps:
To ensure this, we force the following discipline for all parsing
steps:
Step 1. Assert there are enough bytes to read the field
Step 2. Read the field
Step 3. Advance i
Step 4. Validate the field (if there's anything to do)
This code is structured highly horizontally to make it very clear that it
is correct.
This code is structured highly horizontally to make it very clear
that it is correct.
The first 3 steps are in three columns. The variable `i` only appears in
very specific locations on the line (try searching for \<i\> in VIM to see
this).
The first 3 steps are in three columns. The variable `i` only
appears in very specific locations on the line (try searching for
\<i\> in VIM to see this).
The CHECK_LEFT( x ) call in the first column and the i+=x in the third
column always have the same argument, which ensures invariant A holds.
"Prior to reading" from invariant B corresponds to the middle column,
which is the only place `i` is read. Because x is positive, the
CHECK_LEFT( x ) in the first column ensures invariant B holds.
The CHECK_LEFT( x ) call in the first column and the i+=x in the
third column always have the same argument, which ensures invariant
A holds. "Prior to reading" from invariant B corresponds to the
middle column, which is the only place `i` is read. Because x is
positive, the CHECK_LEFT( x ) in the first column ensures invariant
B holds.
Unfortunately for variable length integers, we have to combine the first
two columns into a call to READ_CHECKED_COMPACT_U16 that also promises not
to use any out-of-bounds data.
Unfortunately for variable length integers, we have to combine the
first two columns into a call to READ_CHECKED_COMPACT_U16 that also
promises not to use any out-of-bounds data.
The assignments are done in chunks in as close to the same order as
possible as the variables are declared in the struct, making it very clear
every variable has been initialized. */
possible as the variables are declared in the struct, making it
very clear every variable has been initialized. */

/* A temporary for storing the return value of fd_cu16_dec_sz */
ulong bytes_consumed = 0UL;
Expand All @@ -54,13 +57,14 @@ fd_txn_parse_core( uchar const * payload,
return 0UL; \
} \
} while( 0 )
/* CHECK that it is safe to read at least n more bytes assuming i is the
current location. n is untrusted and could trigger overflow, so don't do
i+n<=payload_sz */
/* CHECK that it is safe to read at least n more bytes assuming i is
the current location. n is untrusted and could trigger overflow, so
don't do i+n<=payload_sz */
#define CHECK_LEFT( n ) CHECK( (n)<=(payload_sz-i) )
/* READ_CHECKED_COMPACT_U16 safely reads a compact-u16 from the indicated
location in the payload. It stores the resulting value in the ushort
variable called var_name. It stores the size in out_sz. */
/* READ_CHECKED_COMPACT_U16 safely reads a compact-u16 from the
indicated location in the payload. It stores the resulting value
in the ushort variable called var_name. It stores the size in
out_sz. */
#define READ_CHECKED_COMPACT_U16( out_sz, var_name, where ) \
do { \
ulong _where = (where); \
Expand All @@ -70,14 +74,16 @@ fd_txn_parse_core( uchar const * payload,
(out_sz) = _out_sz; \
} while( 0 )

/* Minimal instr has 1B for program id, 1B acct_addr list, 1B for no data */
/* Minimal instr has 1B for program id, 1B for an acct_addr list
containing no accounts, 1B for length-0 instruction data */
#define MIN_INSTR_SZ (3UL)
CHECK( payload_sz<=FD_TXN_MTU );

/* The documentation sometimes calls this field a compact-u16 and sometimes a u8.
Because of transaction size caps, even allowing for a 3k transaction caps the
signatures at 48, so we're comfortably in the range where a compact-u16 and a
u8 are represented the same way. */
/* The documentation sometimes calls signature_cnt a compact-u16 and
sometimes a u8. Because of transaction size limits, even allowing
for a 3k transaction caps the signatures at 48, so we're
comfortably in the range where a compact-u16 and a u8 are
represented the same way. */
CHECK_LEFT( 1UL ); uchar signature_cnt = payload[ i ]; i++;
/* Must have at least one signer for the fee payer */
CHECK( allow_zero_signatures | ((1UL<=signature_cnt) & (signature_cnt<=FD_TXN_SIG_MAX)) );
Expand Down Expand Up @@ -118,11 +124,13 @@ fd_txn_parse_core( uchar const * payload,

CHECK( (ulong)instr_cnt<=FD_TXN_INSTR_MAX );
CHECK_LEFT( MIN_INSTR_SZ*instr_cnt );
CHECK( allow_zero_signatures | ((ulong)acct_addr_cnt>(!!instr_cnt)) ); /* If it has >0 instructions, it must have at least one other account address (the program id) that can't be the fee payer */
/* If it has >0 instructions, it must have at least one other account
address (the program id) that can't be the fee payer */
CHECK( allow_zero_signatures | ((ulong)acct_addr_cnt>(!!instr_cnt)) );

fd_txn_t * parsed = (fd_txn_t *)out_buf;

if (NULL != parsed) {
if( parsed ) {
parsed->transaction_version = transaction_version;
parsed->signature_cnt = signature_cnt;
parsed->signature_off = (ushort)signature_off;
Expand All @@ -132,38 +140,40 @@ fd_txn_parse_core( uchar const * payload,
parsed->acct_addr_cnt = acct_addr_cnt;
parsed->acct_addr_off = (ushort)acct_addr_off;
parsed->recent_blockhash_off = (ushort)recent_blockhash_off;
/* Need to assign addr_table_lookup_cnt, addr_table_adtl_writable_cnt,
addr_table_adtl_cnt, _padding_reserved_1 later */
/* Need to assign addr_table_lookup_cnt,
addr_table_adtl_writable_cnt, addr_table_adtl_cnt,
_padding_reserved_1 later */
parsed->instr_cnt = instr_cnt;
}

uchar max_acct = 0UL;
for( ulong j=0UL; j<instr_cnt; j++ ) {

/* parsing instruction */

/* Parsing instruction */
ushort acct_cnt = (ushort)0;
ushort data_sz = (ushort)0;
CHECK_LEFT( MIN_INSTR_SZ ); uchar program_id = payload[ i ]; i++;
READ_CHECKED_COMPACT_U16( bytes_consumed, acct_cnt, i ); i+=bytes_consumed;
CHECK_LEFT( acct_cnt ); ulong acct_off = i ; i+=acct_cnt;
CHECK_LEFT( acct_cnt ); ulong acct_off = i ;
for( ulong k=0; k<acct_cnt; k++ ) { max_acct=fd_uchar_max( max_acct, payload[ k+i ] ); } i+=acct_cnt;
READ_CHECKED_COMPACT_U16( bytes_consumed, data_sz, i ); i+=bytes_consumed;
CHECK_LEFT( data_sz ); ulong data_off = i ; i+=data_sz;

/* Account 0 is the fee payer and the program can't be the fee payer.
The fee payer account must be owned by the system program, but the
program must be an executable account and the system program is not
permitted to own any executable account.
As of https://github.com/solana-labs/solana/issues/25034, the program ID
can't come from a table. */
/* Account 0 is the fee payer and the program can't be the fee
payer. The fee payer account must be owned by the system
program, but the program must be an executable account and the
system program is not permitted to own any executable account.
As of https://github.com/solana-labs/solana/issues/25034, the
program ID can't come from a table. */
CHECK( allow_zero_signatures | ((0UL < (ulong)program_id) & ((ulong)program_id < (ulong)acct_addr_cnt) ) );

if( NULL != parsed ){
if( parsed ){
parsed->instr[ j ].program_id = program_id;
parsed->instr[ j ]._padding_reserved_1 = (uchar)0;
parsed->instr[ j ].acct_cnt = acct_cnt;
parsed->instr[ j ].data_sz = data_sz;
/* By our invariant, i<size when it was copied into acct_off and data_off,
and size<=USHORT_MAX from above, so this cast is safe */
/* By our invariant, i<size when it was copied into acct_off and
data_off, and size<=USHORT_MAX from above, so this cast is safe */
parsed->instr[ j ].acct_off = (ushort)acct_off;
parsed->instr[ j ].data_off = (ushort)data_off;
}
Expand Down Expand Up @@ -194,7 +204,7 @@ fd_txn_parse_core( uchar const * payload,

CHECK( writable_cnt<=FD_TXN_ACCT_ADDR_MAX-acct_addr_cnt ); /* implies <256 */
CHECK( readonly_cnt<=FD_TXN_ACCT_ADDR_MAX-acct_addr_cnt );
if (NULL != address_tables) {
if( address_tables ) {
address_tables[ j ].addr_off = (ushort)addr_off;
address_tables[ j ].writable_cnt = (uchar )writable_cnt;
address_tables[ j ].readonly_cnt = (uchar )readonly_cnt;
Expand All @@ -208,18 +218,14 @@ fd_txn_parse_core( uchar const * payload,
}
#undef MIN_ADDR_LUT_SIZE
/* Check for leftover bytes if out_sz_opt not specified. */
CHECK( payload_sz_opt!=NULL || i==payload_sz );
CHECK( (payload_sz_opt!=NULL) | (i==payload_sz) );

CHECK( acct_addr_cnt+addr_table_adtl_cnt<=FD_TXN_ACCT_ADDR_MAX ); /* implies addr_table_adtl_cnt<256 */

if (NULL != parsed) {
/* Final validation that all the account address indices are in range */
for( ulong j=0; j<instr_cnt; j++ ) {
for( ulong k=0; k<parsed->instr[ j ].acct_cnt; k++ ) {
CHECK( payload[ parsed->instr[ j ].acct_off + k ] < acct_addr_cnt + addr_table_adtl_cnt );
}
}
/* Final validation that all the account address indices are in range */
CHECK( max_acct < acct_addr_cnt + addr_table_adtl_cnt );

if( parsed ) {
/* Assign final variables */
parsed->addr_table_lookup_cnt = (uchar)addr_table_cnt;
parsed->addr_table_adtl_writable_cnt = (uchar)addr_table_adtl_writable_cnt;
Expand Down
2 changes: 1 addition & 1 deletion src/ballet/txn/test_txn_parse.c
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ void test_mutate( uchar const * payload,

void test_performance( uchar const * payload,
ulong sz ) {
const ulong test_count = 1000000;
const ulong test_count = 10000000UL;
long start = fd_log_wallclock( );
for( ulong i = 0; i < test_count; i++ ) {
FD_TEST( fd_txn_parse( payload, sz, out_buf, NULL ) );
Expand Down

0 comments on commit 7e91d9b

Please sign in to comment.