forked from alexnask/ctregex.zig
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtests.zig
109 lines (96 loc) · 4.96 KB
/
tests.zig
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
const ctregex = @import("ctregex.zig");
const std = @import("std");
const expect = std.testing.expect;
fn encodeStr(comptime encoding: ctregex.Encoding, comptime str: []const u8) []const encoding.CharT() {
return switch (encoding) {
.ascii, .utf8 => block: {
const out = str;
break :block out;
},
.utf16le => block: {
var temp: [str.len]u16 = undefined;
const len = std.unicode.utf8ToUtf16Le(&temp, str) catch unreachable;
const out = temp;
break :block out[0..len];
},
.codepoint => block: {
var temp: [str.len]u21 = undefined;
var idx = 0;
var it = std.unicode.Utf8View.initComptime(str).iterator();
while (it.nextCodepoint()) |cp| {
temp[idx] = cp;
idx += 1;
}
const out = temp;
break :block out[0..idx];
},
};
}
fn testMatch(comptime regex: []const u8, comptime encoding: ctregex.Encoding, comptime str: []const u8) !void {
const encoded_str = comptime encodeStr(encoding, str);
try expect((try ctregex.match(regex, .{ .encoding = encoding }, encoded_str)) != null);
comptime try expect((try ctregex.match(regex, .{ .encoding = encoding }, encoded_str)) != null);
}
fn testSearchInner(comptime regex: []const u8, comptime encoding: ctregex.Encoding, comptime str: []const encoding.CharT(), comptime found: []const encoding.CharT()) !void {
const result = try ctregex.search(regex, .{ .encoding = encoding }, str);
try expect(result != null);
try expect(std.mem.eql(encoding.CharT(), result.?.slice, found));
}
fn testSearch(comptime regex: []const u8, comptime encoding: ctregex.Encoding, comptime str: []const u8, comptime found: []const u8) !void {
const encoded_str = comptime encodeStr(encoding, str);
const encoded_found = comptime encodeStr(encoding, found);
try testSearchInner(regex, encoding, encoded_str, encoded_found);
comptime try testSearchInner(regex, encoding, encoded_str, encoded_found);
}
fn testCapturesInner(comptime regex: []const u8, comptime encoding: ctregex.Encoding, comptime str: []const encoding.CharT(), comptime captures: []const ?[]const encoding.CharT()) !void {
const result = try ctregex.match(regex, .{ .encoding = encoding }, str);
try expect(result != null);
const res_captures = &result.?.captures;
try expect(res_captures.len == captures.len);
var idx: usize = 0;
while (idx < captures.len) : (idx += 1) {
if (res_captures[idx] == null) {
try expect(captures[idx] == null);
} else {
try expect(captures[idx] != null);
try expect(std.mem.eql(encoding.CharT(), res_captures[idx].?, captures[idx].?));
}
}
}
fn testCaptures(comptime regex: []const u8, comptime encoding: ctregex.Encoding, comptime str: []const u8, comptime captures: []const ?[]const u8) !void {
const encoded_str = comptime encodeStr(encoding, str);
comptime var encoded_captures: [captures.len]?[]const encoding.CharT() = undefined;
inline for (captures, 0..) |capt, idx| {
if (capt) |capt_slice| {
encoded_captures[idx] = comptime encodeStr(encoding, capt_slice);
} else {
encoded_captures[idx] = null;
}
}
const out = encoded_captures;
try testCapturesInner(regex, encoding, encoded_str, &out);
comptime try testCapturesInner(regex, encoding, encoded_str, &out);
}
test "regex matching" {
// Had to raise the eval branch quota a lot for 0.14.0 for some reason
@setEvalBranchQuota(5000);
try testMatch("abc|def", .ascii, "abc");
try testMatch("abc|def", .ascii, "def");
try testMatch("[Α-Ω][α-ω]+", .utf8, "Αλεξανδρος");
try testMatch("[Α-Ω][α-ω]+", .utf16le, "Αλεξανδρος");
try testMatch("[Α-Ω][α-ω]+", .codepoint, "Αλεξανδρος");
try testMatch("[^a-z]{1,}", .ascii, "ABCDEF");
try testMatch("[^a-z]{1,3}", .ascii, "ABC");
try testMatch("Smile|(😀 | 😊){2}", .utf8, "😊😀");
try testCaptures("(?:no\\ capture)([😀-🙏])*|(.*)", .utf8, "no capture", &[_]?[]const u8{ null, null });
try testCaptures("(?:no\\ capture)([😀-🙏])*|(.*)", .utf8, "no capture😿😻", &[_]?[]const u8{ "😻", null });
try testCaptures("(?:no\\ capture)([😀-🙏])*|(.*)", .utf8, "π = 3.14159...", &[_]?[]const u8{ null, "π = 3.14159..." });
}
test "regex searching" {
// Had to raise the eval branch quota a lot for 0.14.0 for some reason
@setEvalBranchQuota(30000);
try testSearch("foo|bar", .ascii, "some very interesting test string including foobar.", "foo");
try testSearch("(abc|αβγ)+", .utf8, "a lorem ipsum αβγαβγαβγ abcabc", "αβγαβγαβγ");
try testSearch("(abc|αβγ)+", .utf16le, "a lorem ipsum αβγαβγαβγ abcabc", "αβγαβγαβγ");
try testSearch("(abc|αβγ)+", .codepoint, "a lorem ipsum αβγαβγαβγ abcabc", "αβγαβγαβγ");
}