-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathstringManipulationHelperFunctions.cpp
More file actions
207 lines (187 loc) · 6.96 KB
/
stringManipulationHelperFunctions.cpp
File metadata and controls
207 lines (187 loc) · 6.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
#include <cctype>
#include <string>
#include <vector>
bool isAllWhitespace(const std::string token) {
if (the_cpp_runtime_library_supports_regexes)
return std::regex_search(token, std::regex(R"(^\s*$)"));
for (unsigned i = 0; i < token.size(); i++)
if (not(std::isspace(token[i])))
return false;
return true;
}
bool isAllDigits(const std::string token) {
if (the_cpp_runtime_library_supports_regexes)
return std::regex_search(token, std::regex(R"(^\d+$)"));
if (not(token.size()))
return false;
for (unsigned i = 0; i < token.size(); i++)
if (not(std::isdigit(token[i])))
return false;
return true;
}
bool isWordCharacterButNotDigit(const char c) {
return (std::isalnum(c) or c == '_') and not(std::isdigit(c));
}
bool isInteger(std::string str) {
if (the_cpp_runtime_library_supports_regexes)
return std::regex_search(str,
std::regex(R"((^\d+$)|(^0x(\d|[a-f]|[A-F])+$))"));
if (not(str.size()))
return false;
if (str.substr(0, 2) == "0x") { // Hexadecimal numbers...
if (str.size() == 2)
return false;
for (unsigned i = 2; i < str.size(); i++)
if (not(std::isdigit(str[i])) and not((str[i] >= 'A' and str[i] <= 'F') or
(str[i] >= 'a' and str[i] <= 'f')))
return false;
return true;
}
// Integer decimal numbers...
for (unsigned i = 0; i < str.size(); i++)
if (not(std::isdigit(str[i])))
return false;
return true;
}
bool isDecimalNumber(std::string str) {
if (the_cpp_runtime_library_supports_regexes)
return std::regex_search(str, std::regex(R"(^([+-])?\d+\.\d*$)"));
if (not(str.size()))
return false;
bool haveWePassedOverADecimalPoint = false;
if (str[0] == '.')
return false;
for (unsigned i = 0; i < str.size(); i++)
if (str[i] == '.' and not(haveWePassedOverADecimalPoint))
haveWePassedOverADecimalPoint = true;
else if (str[i] == '.')
return false;
else if (!isdigit(str[i]))
return false;
return true;
}
bool isValidVariableName(std::string str) {
if (the_cpp_runtime_library_supports_regexes)
return std::regex_search(str, std::regex(R"(^(_|[a-z]|[A-Z])\w*\[?$)"));
if (!str.size())
return false;
if (std::isdigit(str[0]))
return false;
for (unsigned i = 0; i < str.size(); i++)
if (not(std::isalnum(str[i])) and str[i] != '_' and
not(i == str.size() - 1 and str[i] == '['))
return false;
return true;
}
bool isPointerType(std::string str) {
if (the_cpp_runtime_library_supports_regexes)
return std::regex_search(str, std::regex("Pointer$"));
if (str.size() < std::string("Pointer").size())
return false;
return str.substr(str.size() - std::string("Pointer").size()) == "Pointer";
}
bool isArray(std::string str) {
if (the_cpp_runtime_library_supports_regexes)
return std::regex_search(str, std::regex(R"(\[$)"));
if (str.empty())
return false;
return str.back() == '[';
}
bool isFunction(std::string str) {
if (the_cpp_runtime_library_supports_regexes)
return std::regex_search(str, std::regex(R"(\($)"));
if (str.empty())
return false;
return str.back() == '(';
}
bool isDecimalType(std::string str) {
return str == "Decimal32" or str == "Decimal64";
}
bool isComposedOfAlnumsAndOneDot(
std::string token) // Done often in the tokenizer, so it's probably better
// (and more portable) to do it this way than in REGEX.
{
bool passedOverADot = false;
for (unsigned i = 0; i < token.size(); i++)
if (token[i] == '.' and not(passedOverADot) and i != 0)
passedOverADot = true;
else if (token[i] == '.')
return false;
else if (not(std::isalnum(token[i])) and token[i] != '_')
return false;
return true;
}
int longest_common_subsequence_length(std::string first, std::string second) {
// Adapted from the Domagoj Kusalić'es book "Napredno programiranje i
// algoritmi u C-u i C++-u", chapter 8.4.3, page 331 (in the 2014 edition).
std::map<int, std::map<int, int>>
DP; // There are, of course, faster ways to make multi-dimensional arrays
// in C++, but let's not worry about performance of a function that
// will be run only upon an error (to suggest a true name of a
// misspelled variable name). Still, they are probably all easier than
// dealing with multi-dimensional arrays in JavaScript.
for (size_t i = 0; i < first.size();
i++) // Microsoft C++ Compiler issues a warning if you put "unsigned"
// instead of "size_t" here, I am not sure why.
for (size_t j = 0; j < second.size(); j++)
if (first[i] == second[j])
DP[int(i)][int(j)] =
DP[int(i - 1)][int(j - 1)] +
1; // Had we used vectors instead of maps, we could not do
// this so simply (What if 'i' or 'j' are zero?).
else
DP[int(i)][int(j)] =
std::max(DP[int(i - 1)][int(j)], DP[int(i)][int(j - 1)]);
return DP[int(first.size() - 1)][int(second.size() - 1)];
}
int Levenstein_distance(std::string A, std::string B) {
// https://discord.com/channels/530598289813536771/847014270922391563/867319320485167115
// |
// V
// https://github.com/royalpranjal/Interview-Bit/blob/master/DynamicProgramming/EditDistance.cpp
using std::min;
using std::vector;
int row = int(A.size());
int col = int(B.size());
vector<vector<int>> temp(row + 1, vector<int>(col + 1));
for (size_t i = 0; i < temp.size();
i++) { // Apparently, GCC issues a warning under "-Wall" unless you
// replace "int" with "size_t" here. I do not know why.
for (size_t j = 0; j < temp[0].size(); j++) {
if (j == 0) {
temp[i][j] = int(i);
} else if (i == 0) {
temp[i][j] = int(j);
} else if (A[i - 1] == B[j - 1]) {
temp[i][j] = temp[i - 1][j - 1];
} else {
temp[i][j] =
min(<%temp[i - 1][j - 1], temp[i - 1][j], temp[i][j - 1]%>) +
1; // This is valid since C++14
// (because std::min accepts
// initialization lists as
// arguments and invokes
// std::min_element in that case),
// and I guess most C++11 compilers
// will accept this code (but I have
// not tested that).
}
}
}
return temp[row][col];
}
#define USING_LEVENSTEIN_DISTANCE
bool ends_with(
const std::string &first,
const std::string &
second) { // https://stackoverflow.com/questions/67451951/how-to-use-pointers-to-figure-out-if-a-c-string-ends-with-another-c-string/67452057?noredirect=1#comment119223072_67452057
if (second.size() > first.size())
return false;
return first.substr(first.size() - second.size()) == second;
}
std::string demanglePointerType(std::string pointerType) {
if (pointerType.substr(0, std::string("PointerTo").size()) == "PointerTo")
return demanglePointerType(
pointerType.substr(std::string("PointerTo").size()) + "Pointer");
return pointerType;
}