-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy patht0.cpp
More file actions
96 lines (84 loc) · 3.63 KB
/
t0.cpp
File metadata and controls
96 lines (84 loc) · 3.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#include <iostream>
#include "rglob.h"
using namespace std;
using namespace rglob;
static void validate(string_view p, string_view t, bool x = true, bool pp = false);
/*
Both the main and validate functions below illustrate some sample patterns
and targets, as well as providing a useful framework for simple testing.
N.B. - it should be emphasized that "globs" are NOT "regexps", and in
particular, a single character class, no matter how complex, will match AT
MOST a SINGLE [UTF-8] character / "code point" from the target string - no
number of '+' or '*' chars after the closing ']' will change this, because
well, "glob" patterns really AREN'T regular expressions (like we said).
*/
int main(int argc, char* argv[])
{
// validate our ability to validate UTF-8
validate("BAD UTF-8 \x80", "a", false); // (illegal 1st char of UTF-8 seq)
validate("b", "BAD UTF-8 \xc0 ",false); // (illegal 2nd char of UTF-8 seq)
validate("BAD UTF-8 \xf0", "c", false); // ("\xf0" needs 4-char UTF-8 seq)
// validate the simplest patterns...
validate("abc", "abc");
validate("abc", "abC", false);
validate("ab?", "abC");
validate("*bar", "foobar");
validate("*ba?", "foobaR", true, true);
// ... now for some character classes
validate("[A-Z][0-9][^0-9]", "B2B", true, true);
validate("[A-Z][0-9][^0-9ф]", "B2Bx", false, true);
validate("[A-Z][0-9][^0-9]*", "B2Bx-ray");
validate("[A-Z][0-9][^0-9]", "B23", false);
// can you spot why this will throw an exception?
validate("[A-Z][0-9][^0-9*", "B2Bx-ray");
// how about some fun?
validate("a?c*def*[^]ABx-z]*", "abcYdefABBA Van Halen");
validate("a?c*def[^]ABx-z]*", "abcYdefABBA Van Halen", false);
validate("a?c*def[]ABx-z]*", "abcYdefABBA Van Halen");
// the next two validations are really about showing the equivalence between
// two different ways of inserting Unicode chars into strings (hard vs easy)
// (they really ARE the same pattern, see the pretty_print output yourself!)
validate("*[\u0410-\u042F \u0430-\u044F][\u0410-\u042F \u0430-\u044F][\u0410-\u042F \u0430-\u044F]bar\u03B5", "fu\u041f \u0444bar\u03B5", true, true);
validate("*[А-Я а-я][А-Я а-я][А-Я а-я]barε", "fuП фbarε", true, true);
return 0;
}
/*
validate "wraps" pattern compiling, optional pretty-printing, and matching,
displaying a [generally] single-line/test formatted report, while catching
and reporting any of the exceptions thrown by rglob.
Usage
=====
p pattern to compile and match against
t target text for matching
x expected result of match (true -> MATCH, false -> FAIL!)
pp pretty_print the compiled version of this pattern
N.B. - whether or not the handy "u8" from of string literals is used, both
the pattern and target will be interpreted as containing Unicode in UTF-8!
*/
static void validate(string_view p, string_view t, bool x, bool pp)
{
auto mf = [](auto tf) { return tf ? "MATCH" : "FAIL!"; };
glob g;
try {
g.compile(p);
} catch (invalid_argument& e) {
cerr << "*** Compiling " << p << " => std::invalid_argument: " << e.what() << endl;
return; // we're outta here - after a compile fail, "match" is undefined
} catch (length_error& e) {
cerr << "*** Compiling " << p << " => std::length_error: " << e.what() << endl;
return; // we're outta here - after a compile fail, "match" is undefined
}
if (pp)
cout << "Pretty_print of " << p << ':' << endl, g.pretty_print(cout, " ");
try {
const auto r = g.match(t);
cout << "Want "
<< mf(x) << ", got "
<< mf(r) << " ("
<< ((r != x) ? "BZZZT!" : "OK") << ") with "
<< t << " -> "
<< p << endl;
} catch (invalid_argument& e) {
cerr << "*** Matching " << t << " => std::invalid_argument: " << e.what() << endl;
}
}