Skip to content

Commit

Permalink
Add unicode rules for ints
Browse files Browse the repository at this point in the history
  • Loading branch information
Marven11 committed Dec 1, 2023
1 parent 745b13b commit b33fda7
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 9 deletions.
40 changes: 40 additions & 0 deletions fenjing/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,3 +165,43 @@
"}}",
"~",
]

# charcodes that not supported by python3.2 are removed.
UNICODE_INT_CHARCODES = [
[1632, 1633, 1634, 1635, 1636, 1637, 1638, 1639, 1640, 1641],
[1776, 1777, 1778, 1779, 1780, 1781, 1782, 1783, 1784, 1785],
[1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993],
[2406, 2407, 2408, 2409, 2410, 2411, 2412, 2413, 2414, 2415],
[2534, 2535, 2536, 2537, 2538, 2539, 2540, 2541, 2542, 2543],
[2662, 2663, 2664, 2665, 2666, 2667, 2668, 2669, 2670, 2671],
[2790, 2791, 2792, 2793, 2794, 2795, 2796, 2797, 2798, 2799],
[2918, 2919, 2920, 2921, 2922, 2923, 2924, 2925, 2926, 2927],
[3046, 3047, 3048, 3049, 3050, 3051, 3052, 3053, 3054, 3055],
[3174, 3175, 3176, 3177, 3178, 3179, 3180, 3181, 3182, 3183],
[3302, 3303, 3304, 3305, 3306, 3307, 3308, 3309, 3310, 3311],
[3430, 3431, 3432, 3433, 3434, 3435, 3436, 3437, 3438, 3439],
# [3558, 3559, 3560, 3561, 3562, 3563, 3564, 3565, 3566, 3567],
[3664, 3665, 3666, 3667, 3668, 3669, 3670, 3671, 3672, 3673],
[3792, 3793, 3794, 3795, 3796, 3797, 3798, 3799, 3800, 3801],
[3872, 3873, 3874, 3875, 3876, 3877, 3878, 3879, 3880, 3881],
[4160, 4161, 4162, 4163, 4164, 4165, 4166, 4167, 4168, 4169],
[4240, 4241, 4242, 4243, 4244, 4245, 4246, 4247, 4248, 4249],
[6112, 6113, 6114, 6115, 6116, 6117, 6118, 6119, 6120, 6121],
[6160, 6161, 6162, 6163, 6164, 6165, 6166, 6167, 6168, 6169],
[6470, 6471, 6472, 6473, 6474, 6475, 6476, 6477, 6478, 6479],
[6608, 6609, 6610, 6611, 6612, 6613, 6614, 6615, 6616, 6617],
[6784, 6785, 6786, 6787, 6788, 6789, 6790, 6791, 6792, 6793],
[6800, 6801, 6802, 6803, 6804, 6805, 6806, 6807, 6808, 6809],
[6992, 6993, 6994, 6995, 6996, 6997, 6998, 6999, 7000, 7001],
[7088, 7089, 7090, 7091, 7092, 7093, 7094, 7095, 7096, 7097],
[7232, 7233, 7234, 7235, 7236, 7237, 7238, 7239, 7240, 7241],
[7248, 7249, 7250, 7251, 7252, 7253, 7254, 7255, 7256, 7257],
[42528, 42529, 42530, 42531, 42532, 42533, 42534, 42535, 42536, 42537],
[43216, 43217, 43218, 43219, 43220, 43221, 43222, 43223, 43224, 43225],
[43264, 43265, 43266, 43267, 43268, 43269, 43270, 43271, 43272, 43273],
[43472, 43473, 43474, 43475, 43476, 43477, 43478, 43479, 43480, 43481],
# [43504, 43505, 43506, 43507, 43508, 43509, 43510, 43511, 43512, 43513],
[43600, 43601, 43602, 43603, 43604, 43605, 43606, 43607, 43608, 43609],
[44016, 44017, 44018, 44019, 44020, 44021, 44022, 44023, 44024, 44025],
[65296, 65297, 65298, 65299, 65300, 65301, 65302, 65303, 65304, 65305]
]
33 changes: 24 additions & 9 deletions fenjing/payload_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,19 @@ def str_escape(value: str, quote="'"):
return value.replace("\\", "\\\\").replace(quote, "\\" + quote)


def transform_int_chars_charcodes(int_chars, charcodes, keepfirst = False):
charcode_dict = {
str(int(chr(x), 0)): chr(x)
for x in charcodes
}
return "".join(charcode_dict.get(c, c) for c in int_chars)

def transform_int_chars_unicode(int_chars):
return [
transform_int_chars_charcodes(int_chars, charcodes)
for charcodes in UNICODE_INT_CHARCODES
]

class CacheByRepr:
def __init__(self):
self.cache = {}
Expand Down Expand Up @@ -1016,23 +1029,25 @@ def gen_positive_integer_underline(context: dict, value: int):
def gen_positive_integer_unicode(context: dict, value: int):
if value <= 9:
return [(UNSATISFIED,)]
chars = [
c if i == 0 else chr(ord(c) + ord("0") - ord("0"))
for i, c in enumerate(str(value))
payload_targets = [
[(LITERAL, payload)]
for payload in transform_int_chars_unicode(str(value)[1:])
]
targets_list = [(LITERAL, c) for c in chars]
return [(EXPRESSION, precedence["literal"], targets_list)]
return [(EXPRESSION, precedence["literal"], [
(LITERAL, str(value)[0]), (ONEOF,*payload_targets)
])]


@expression_gen
def gen_positive_integer_unicodehex(context: dict, value: int):
if value <= 0:
return [(UNSATISFIED,)]
chars = [
chr(ord(c) + ord("0") - ord("0")) if ord("0") <= ord(c) <= ord("9") else c
for i, c in enumerate(hex(value)[2:])
value_hex_literal = hex(value)[2:]
payload_targets = [
[(LITERAL, payload)]
for payload in transform_int_chars_unicode(value_hex_literal)
]
targets_list = [(LITERAL, "0x")] + [(LITERAL, c) for c in chars]
targets_list = [(LITERAL, "0x"), (ONEOF, *payload_targets)]
return [(EXPRESSION, precedence["literal"], targets_list)]


Expand Down

0 comments on commit b33fda7

Please sign in to comment.