From 913f5ddd2377fe80ce6062d68457a25c0f789feb Mon Sep 17 00:00:00 2001 From: Nick Downing Date: Mon, 25 Apr 2022 02:01:36 +1000 Subject: [PATCH] Modify tokenizer to use a 1024-entry symbol table with closed hashing by CRC10 --- preForth/seedForth-tokenizer.fs | 107 +++++++++++++++++++++++--------- 1 file changed, 77 insertions(+), 30 deletions(-) diff --git a/preForth/seedForth-tokenizer.fs b/preForth/seedForth-tokenizer.fs index 8a4db0a..6ac5ed6 100644 --- a/preForth/seedForth-tokenizer.fs +++ b/preForth/seedForth-tokenizer.fs @@ -2,41 +2,88 @@ \ seedForth does not support hex so put some useful constants in decimal 255 Constant xFF +1023 Constant x3FF +1024 Constant x400 65261 Constant xFEED -4294967295 Constant xFFFFFFFF -: fnv1a ( c-addr u -- x ) - 2166136261 >r - BEGIN dup WHILE over c@ r> xor 16777619 um* drop xFFFFFFFF and >r 1 /string REPEAT 2drop r> ; - -15 Constant #hashbits -1 #hashbits lshift Constant #hashsize - -#hashbits 16 < [IF] - - #hashsize 1 - Constant tinymask - : fold ( x1 -- x2 ) dup #hashbits rshift xor tinymask and ; - -[ELSE] \ #hashbits has 16 bits or more - - #hashsize 1 - Constant mask - : fold ( x1 -- x2 ) dup #hashbits rshift swap mask and xor ; - -[THEN] - -Create tokens #hashsize cells allot tokens #hashsize cells 0 fill +\ exceptions +100 Constant except_hash_table_full + +\ hash table entry structure +0 Constant _hash_table_xt +1 cells Constant _hash_table_name_addr +2 cells Constant _hash_table_name_len +3 cells Constant #hash_table + +\ the sizing below accommodates up to 1K word definitions +\ (the same as the number of tokens available to seedForth) +x3FF Constant hash_table_mask +x400 Constant hash_table_size +Create hash_table +hash_table_size #hash_table * dup allot hash_table swap 0 fill + +: hash_table_index ( entry -- addr ) + #hash_table * hash_table + ; + +: hash_table_find ( name_addr name_len -- entry_addr found ) + \ calculate CRC10 of the symbol name + \ initial value is same as hash table mask (all 1s) + 2dup hash_table_mask crc10 + \ hash_table_mask and + + \ using the CRC10 as the starting entry, look circularly + \ for either a null entry (not found) or a matching entry + hash_table_size 0 ?DO ( name_addr name_len entry ) + dup >r hash_table_index >r ( name_addr name_len R: entry entry_addr ) + + \ check for null entry + r@ _hash_table_xt + @ 0= IF + 2drop r> r> drop false UNLOOP exit + THEN + + \ check for matching entry + 2dup + r@ _hash_table_name_addr + @ + r@ _hash_table_name_len + @ + compare 0= IF + 2drop r> r> drop true UNLOOP exit + THEN + + \ go to next entry, circularly + r> drop + r> 1+ hash_table_mask and + LOOP + + \ not found, and no room for new entry + except_hash_table_full throw +; -: 'token ( c-addr u -- addr ) - fnv1a fold cells tokens + ; +: token@ ( c-addr u -- x ) + \ get entry address and flag for found/empty + hash_table_find -: token@ ( c-addr u -- x ) 'token @ ; + \ if found, return value of _xt, otherwise 0 + IF _hash_table_xt + @ ELSE drop 0 THEN +; -: ?token ( c-addr u -- x ) - 2dup 'token dup @ - IF - >r cr type ." collides with another token " - cr source type cr r> @ abort \ ??? name-see abort - THEN nip nip ; +: ?token ( c-addr u -- x ) + \ get entry address and flag for found/empty + 2dup hash_table_find + + \ if empty, copy symbol name and fill in entry + 0= IF + >r + here r@ _hash_table_name_addr + ! + dup r@ _hash_table_name_len + ! + here swap dup allot cmove + r> + ELSE + nip nip + THEN + + \ return address of _xt for caller to fill in + _hash_table_xt + +; \ VARIABLE OUTFILE