Skip to content

Commit

Permalink
Merge pull request #1223 from andrew-johnson-4/c-ansi-frontend-qpfjp
Browse files Browse the repository at this point in the history
C ansi frontend qpfjp
  • Loading branch information
andrew-johnson-4 authored Feb 7, 2025
2 parents 7f3a422 + 4c8ba48 commit 62dd1e8
Show file tree
Hide file tree
Showing 4 changed files with 249 additions and 14 deletions.
18 changes: 18 additions & 0 deletions PLATFORM/C/LIB/regex.lm
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,24 @@ atom suffix Regex _rgx;
(==( (as status U64) 0_u64 ))
) U64);

== := λ(: text String)(: rgx Regex). (: (
(let matches (: __uninitialized C_regmatch__t_[1]))
(let status (regexec(
(as (& rgx) C_regex__t_*_)
(as text C_char_*)
(as 1_u64 C_size__t_)
(as matches C_regmatch__t_*)
(as 0_u64 C_int)
)))
(&&(
(==( (as status U64) 0_u64 ))
(&&(
(==( (.rm_so([]( matches 0_u64 ))) 0_u64 ))
(==( (.rm_eo([]( matches 0_u64 ))) (.length text) ))
))
))
) U64);

.remove-prefix := λ(: text SmartString)(: rgx Regex). (: (
(let matches (: __uninitialized C_regmatch__t_[1]))
(let status (regexec(
Expand Down
128 changes: 114 additions & 14 deletions PLUGINS/FRONTEND/C/c-parse.lsts
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,84 @@
# However, there are also a large number of compiler-specific extensions that are parsed but mostly ignored
# Example: __extension__ ( f, g )

type CConstant = CConstantInteger{value:CString}
| CConstantCharacter{value:CString}
| CConstantFloating{value:CString}
| CConstantEnumeration{value:CString};

let cmp(l: CConstant, r: CConstant): Ord = (
if $".0"(l) != $".0"(r) then cmp($".0"(l), $".0"(r))
else match Tuple{l, r} {
Tuple{ first:CConstantInteger{lv=value}, second:CConstantInteger{rv=value} } => cmp(lv, rv);
Tuple{ first:CConstantCharacter{lv=value}, second:CConstantCharacter{rv=value} } => cmp(lv, rv);
Tuple{ first:CConstantFloating{lv=value}, second:CConstantFloating{rv=value} } => cmp(lv, rv);
Tuple{ first:CConstantEnumeration{lv=value}, second:CConstantEnumeration{rv=value} } => cmp(lv, rv);
}
);

let std-c-parse(tokens: List<Token>): Nil = (
# while non-zero(tokens) { tokens = std-c-parse-external-declaration(tokens); }
);

let std-c-is-reserved-word(tk: CString): U64 = (
let reserved = false;
if tk == c"auto" then (reserved = true);
if tk == c"double" then (reserved = true);
if tk == c"int" then (reserved = true);
if tk == c"struct" then (reserved = true);
if tk == c"break" then (reserved = true);
if tk == c"else" then (reserved = true);
if tk == c"long" then (reserved = true);
if tk == c"switch" then (reserved = true);
if tk == c"case" then (reserved = true);
if tk == c"enum" then (reserved = true);
if tk == c"register" then (reserved = true);
if tk == c"typedef" then (reserved = true);
if tk == c"char" then (reserved = true);
if tk == c"extern" then (reserved = true);
if tk == c"return" then (reserved = true);
if tk == c"union" then (reserved = true);
if tk == c"const" then (reserved = true);
if tk == c"float" then (reserved = true);
if tk == c"short" then (reserved = true);
if tk == c"unsigned" then (reserved = true);
if tk == c"continue" then (reserved = true);
if tk == c"for" then (reserved = true);
if tk == c"signed" then (reserved = true);
if tk == c"void" then (reserved = true);
if tk == c"default" then (reserved = true);
if tk == c"goto" then (reserved = true);
if tk == c"sizeof" then (reserved = true);
if tk == c"volatile" then (reserved = true);
if tk == c"do" then (reserved = true);
if tk == c"if" then (reserved = true);
if tk == c"static" then (reserved = true);
if tk == c"while" then (reserved = true);
if tk == c"_Bool" then (reserved = true);
if tk == c"_Imaginary" then (reserved = true);
if tk == c"restrict" then (reserved = true);
if tk == c"_Complex" then (reserved = true);
if tk == c"inline" then (reserved = true);
if tk == c"_Alignas" then (reserved = true);
if tk == c"_Generic" then (reserved = true);
if tk == c"_Thread_local" then (reserved = true);
if tk == c"_Alignof" then (reserved = true);
if tk == c"_Noreturn" then (reserved = true);
if tk == c"_Atomic" then (reserved = true);
if tk == c"_Static_assert" then (reserved = true);
reserved
);

let std-c-has-class(tk: CString, cls: String): U64 = (
tk == cls
match cls {
"identifier" => tk == r/^[a-zA-Z_][a-zA-Z0-9_]*/ && not(std-c-is-reserved-word(tk));
"integer" => tk == r/^[0-9]+([uU]|[lL]|wb|WB)*/ # decimal constant
|| tk == r/^[0][0-7]+([uU]|[lL]|wb|WB)*/ # octal constant
|| tk == r/^[0][x][0-9a-fA-F]+([uU]|[lL]|wb|WB)*/ # hexadecimal constant
|| tk == r/^[0][bB][01]+([uU]|[lL]|wb|WB)*/; # binary constant
"character" => tk == r/^(u8|u|U|L)?[']([^']|([\\][']))+[']/; # character constant
_ => tk == cls;
}
);

let std-c-can-take(tokens: List<Token>, cls: String): U64 = (
Expand Down Expand Up @@ -315,9 +387,6 @@ let std-c-take-maybe(tokens: List<Token>, cls: String): List<Token> = (
#struct-or-union-specifier = struct-or-union, '{', struct-declaration-list, '}'
# | struct-or-union, identifier, ['{', struct-declaration-list, '}'];

#struct-or-union = 'struct'
# | 'union';

#struct-declaration-list = struct-declaration, {struct-declaration};

#struct-declaration = specifier-qualifier-list, ';' (* for anonymous struct/union *)
Expand Down Expand Up @@ -383,6 +452,47 @@ let std-c-parse-assignment-operator(tokens: List<Token>): Tuple<Maybe<CString>,L
else Tuple{ no, tokens }
);

let std-c-parse-unary-operator(tokens: List<Token>): Tuple<Maybe<CString>,List<Token>> = (
let no = None :: Maybe<CString>;
if not(non-zero(tokens)) then Tuple{ no, tokens }
else if head(tokens).key == c"&" then Tuple{ Some{c"&"}, tail(tokens) }
else if head(tokens).key == c"*" then Tuple{ Some{c"*"}, tail(tokens) }
else if head(tokens).key == c"+" then Tuple{ Some{c"+"}, tail(tokens) }
else if head(tokens).key == c"-" then Tuple{ Some{c"-"}, tail(tokens) }
else if head(tokens).key == c"~" then Tuple{ Some{c"~"}, tail(tokens) }
else if head(tokens).key == c"!" then Tuple{ Some{c"!"}, tail(tokens) }
else Tuple{ no, tokens }
);

let std-c-parse-struct-or-union(tokens: List<Token>): Tuple<Maybe<CString>,List<Token>> = (
let no = None :: Maybe<CString>;
if not(non-zero(tokens)) then Tuple{ no, tokens }
else if head(tokens).key == c"struct" then Tuple{ Some{c"struct"}, tail(tokens) }
else if head(tokens).key == c"union" then Tuple{ Some{c"union"}, tail(tokens) }
else Tuple{ no, tokens }
);

let std-c-parse-identifier(tokens: List<Token>): Tuple<Maybe<CString>,List<Token>> = (
let no = None :: Maybe<CString>;
if std-c-can-take(tokens, "identifier") then Tuple{ Some{head(tokens).key}, tail(tokens) }
else Tuple{ no, tokens }
);

let std-c-parse-constant(tokens: List<Token>): Tuple<Maybe<CConstant>,List<Token>> = (
let no = None :: Maybe<CConstant>;
if std-c-can-take(tokens, "integer") then Tuple{ Some{CConstantInteger{head(tokens).key}}, tail(tokens) }
else if std-c-can-take(tokens, "character") then Tuple{ Some{CConstantCharacter{head(tokens).key}}, tail(tokens) }
#else if std-c-can-take(tokens, "floating") then Tuple{ Some{CConstantFloating{head(tokens).key}}, tail(tokens) }
#else if std-c-can-take(tokens, "enumeration") then Tuple{ Some{CConstantFloating{head(tokens).key}}, tail(tokens) }
else Tuple{ no, tokens }
);

#constant = integer-constant
# | character-constant
# | floating-constant
# | enumeration-constant;


#parameter-list = parameter-declaration, {',', parameter-declaration};

#parameter-declaration = declaration-specifiers, [declarator | abstract-declarator];
Expand Down Expand Up @@ -431,12 +541,6 @@ let std-c-parse-assignment-operator(tokens: List<Token>): Tuple<Maybe<CString>,L
# | postfix-expression, ('++' | '--')
# | '(', type-name, ')', '{', initializer-list, [','], '}';

#unary-operator = '&'
# | '*'
# | '+'
# | '-'
# | '~'
# | '!';

#primary-expression = identifier
# | constant
Expand All @@ -446,10 +550,6 @@ let std-c-parse-assignment-operator(tokens: List<Token>): Tuple<Maybe<CString>,L

#argument-expression-list = assignment-expression, {',', assignment-expression};

#constant = integer-constant
# | character-constant
# | floating-constant
# | enumeration-constant;

#string = string-literal
# | '__func__';
Expand Down
3 changes: 3 additions & 0 deletions PLUGINS/FRONTEND/C/c-smart-tokenize.lsts
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ let std-c-tokenize-string(file-path: String, text: String): List<Token> = (
(lit=r/^["]([^"\\]|([\\].))*["]/).. rest => (
tokens = cons(text[:lit.length], tokens); text = rest;
);
(cl=r/^(u8|u|U|L)?[']([^']|([\\][']))+[']/).. rest => (
tokens = cons(text[:cl.length], tokens); text = rest;
);

(id=r/^[a-zA-Z0-9_]+/).. rest => (
tokens = cons(text[:id.length], tokens); text = rest;
Expand Down
114 changes: 114 additions & 0 deletions tests/c/c-parse.lsts
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,117 @@ if true then {
assert( std-c-parse-assignment-operator(tokens).first == Some{c"|="} );
};
};

if true then {
let abc = std-c-tokenize-string("abc", "abc");
assert( std-c-parse-unary-operator(abc).first == None :: Maybe<CString> );
if true then {
let tokens = std-c-tokenize-string("[&]", "&");
assert( std-c-parse-unary-operator(tokens).first == Some{c"&"} );
};
if true then {
let tokens = std-c-tokenize-string("[*]", "*");
assert( std-c-parse-unary-operator(tokens).first == Some{c"*"} );
};
if true then {
let tokens = std-c-tokenize-string("[+]", "+");
assert( std-c-parse-unary-operator(tokens).first == Some{c"+"} );
};
if true then {
let tokens = std-c-tokenize-string("[-]", "-");
assert( std-c-parse-unary-operator(tokens).first == Some{c"-"} );
};
if true then {
let tokens = std-c-tokenize-string("[~]", "~");
assert( std-c-parse-unary-operator(tokens).first == Some{c"~"} );
};
if true then {
let tokens = std-c-tokenize-string("[!]", "!");
assert( std-c-parse-unary-operator(tokens).first == Some{c"!"} );
};
};

if true then {
let abc = std-c-tokenize-string("abc", "abc");
assert( std-c-parse-struct-or-union(abc).first == None :: Maybe<CString> );
if true then {
let tokens = std-c-tokenize-string("[struct]", "struct");
assert( std-c-parse-struct-or-union(tokens).first == Some{c"struct"} );
};
if true then {
let tokens = std-c-tokenize-string("[union]", "union");
assert( std-c-parse-struct-or-union(tokens).first == Some{c"union"} );
};
};

if true then {
if true then {
let tokens = std-c-tokenize-string("[abc]", "abc");
assert( std-c-parse-identifier(tokens).first == Some{c"abc"} );
};
if true then {
let tokens = std-c-tokenize-string("[ABC]", "ABC");
assert( std-c-parse-identifier(tokens).first == Some{c"ABC"} );
};
if true then {
let tokens = std-c-tokenize-string("_0", "_0");
assert( std-c-parse-identifier(tokens).first == Some{c"_0"} );
};
if true then {
let tokens = std-c-tokenize-string("[int]", "int");
assert( std-c-parse-identifier(tokens).first == None :: Maybe<CString> );
};
};

if true then {
let abc = std-c-tokenize-string("abc", "abc");
assert( std-c-parse-constant(abc).first == None :: Maybe<CConstant> );
if true then {
let tokens = std-c-tokenize-string("[0123456789]", "0123456789");
assert( std-c-parse-constant(tokens).first == Some{CConstantInteger{c"0123456789"}} );
};
if true then {
let tokens = std-c-tokenize-string("[0x0123456789aAbBcCdDeEfF]", "0x0123456789aAbBcCdDeEfF");
assert( std-c-parse-constant(tokens).first == Some{CConstantInteger{c"0x0123456789aAbBcCdDeEfF"}} );
};
if true then {
let tokens = std-c-tokenize-string("[0b01]", "0b01");
assert( std-c-parse-constant(tokens).first == Some{CConstantInteger{c"0b01"}} );
};
if true then {
let tokens = std-c-tokenize-string("[0ulwb]", "0ulwb");
assert( std-c-parse-constant(tokens).first == Some{CConstantInteger{c"0ulwb"}} );
};
if true then {
let tokens = std-c-tokenize-string("[0UllWB]", "0UllWB");
assert( std-c-parse-constant(tokens).first == Some{CConstantInteger{c"0UllWB"}} );
};
if true then {
let tokens = std-c-tokenize-string("[0ULLWB]", "0ULLWB");
assert( std-c-parse-constant(tokens).first == Some{CConstantInteger{c"0ULLWB"}} );
};
if true then {
let tokens = std-c-tokenize-string("[0ULWB]", "0ULWB");
assert( std-c-parse-constant(tokens).first == Some{CConstantInteger{c"0ULWB"}} );
};
if true then {
let tokens = std-c-tokenize-string("'a'", "'a'");
match std-c-parse-constant(tokens).first {
Some{content:CConstantCharacter{value=value}} => print(value);
None{} => print("None");
};
assert( std-c-parse-constant(tokens).first == Some{CConstantCharacter{c"'a'"}} );
};
if true then {
let tokens = std-c-tokenize-string("u8'\\''", "u8'\\''");
assert( std-c-parse-constant(tokens).first == Some{CConstantCharacter{c"u8'\\''"}} );
};
if true then {
let tokens = std-c-tokenize-string("L'\\0'", "L'\\0'");
assert( std-c-parse-constant(tokens).first == Some{CConstantCharacter{c"L'\\0'"}} );
};
if true then {
let tokens = std-c-tokenize-string("u'\\u123'", "u'\\u123'");
assert( std-c-parse-constant(tokens).first == Some{CConstantCharacter{c"u'\\u123'"}} );
};
};

0 comments on commit 62dd1e8

Please sign in to comment.