-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathStringExtensions.cs
More file actions
1194 lines (1119 loc) · 45.6 KB
/
StringExtensions.cs
File metadata and controls
1194 lines (1119 loc) · 45.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
using System;
using System.Linq;
using System.Text.RegularExpressions;
namespace RegexStringLibrary
{
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Regular expression extensions for strings. </summary>
///
/// <remarks>
/// This set of extensions only deals with the string versions for regular expressions. That's
/// where the complexity lies. It also allows for cascading and combining of the results since
/// they are all strings. The only exception to this is the date info since this is a really
/// complex string with vaious options available so we tailor the result for the situation and do
/// the match in our code and interpret the results into a DateInfo object. See the tests for
/// examples of usage. Darrellp, 10/1/2012.
/// </remarks>
////////////////////////////////////////////////////////////////////////////////////////////////////
public static class Stex
{
#region Public strings
public static string Bell { get { return @"\a"; } }
public static string CR { get { return @"\r"; } }
public static string LF { get { return @"\n"; } }
public static string FormFeed { get { return @"\f"; } }
public static string Digit { get { return @"\d"; } }
public static string NonDigit { get { return @"\D"; } }
public static string Word { get { return @"\w"; } }
public static string Tab { get { return @"\t"; } }
public static string White { get { return @"\s"; } }
public static string NonWhite { get { return @"\S"; } }
public static string VerticalTab { get { return @"\v"; } }
public static string WordChar { get { return @"\w"; } }
public static string NonWordChar { get { return @"\W"; } }
public static string WordBoundary { get { return @"\b"; } }
public static string NonWordBoundary { get { return @"\B"; } }
public static string WhitePadding { get { return White.Rep(0); } }
public static string CapLetterRange { get { return Range("A", "Z"); } }
public static string LowerLetterRange { get { return Range("a", "z"); } }
public static string LetterRange { get { return CapLetterRange + LowerLetterRange; } }
public static string AlphanumRange { get { return LetterRange + Digit; } }
public static string Letter { get { return AnyCharFrom(LetterRange); } }
public static string CapLetter { get { return AnyCharFrom(CapLetterRange); } }
public static string LowerLetter { get { return AnyCharFrom(LowerLetterRange); } }
public static string Alphanum { get { return AnyCharFrom(AlphanumRange); } }
public static string StringStart { get { return "\\A"; } }
public static string StringEnd { get { return "\\Z"; } }
public static string Any { get { return "."; } }
public static string Begin { get { return "^"; } }
public static string End { get { return "$"; } }
public static string Failure { get { return "(?!)"; } }
public static string StartAt { get { return @"\G"; } }
public static string Unsigned { get { return Digit.RepAtLeast(1); } }
public static string DateAmerican { get; private set; }
public static string DateEuropean { get; private set; }
public static string DateAmericanBet { get; private set; }
public static string DateEuropeanBet { get; private set; }
// These are public mainly for testing purposes
public static Regex AmericanDateRegExp { get; set; }
public static Regex EuropeanDateRegExp { get; set; }
public static Regex AmericanDateBetRegExp { get; set; }
public static Regex EuropeanDateBetRegExp { get; set; }
#endregion
#region Private variables
#if INFINITE_RECURSION
string strEscape = Esc('\\') + Any;
string strEscapedParen = Esc('\\') + Esc(')');
string strEscapedBracket = Esc('\\') + Esc(']');
string strInBrackets = Cat(
Esc('['),
strEscapedBracket.OrAnyOf(
NotCharIn(Esc(']'))).RepAtLeast(0),
NotCharIn(Esc('\\')),
Esc(']'));
string strInParens = Cat(
Esc('('),
strEscapedParen.OrAnyOf(
Not(Esc(')'))).RepAtLeast(0),
Not(Esc('\\')),
Esc(')'));
string strIgnore = Begin + Any.OrAnyOf(strEscape, strInBrackets, strInParens) + End;
_rgxDontParenthesize = new Regex(strIgnore);
#endif
// The pattern string below is identical to the above (commented out) code and, in fact,
// was generated by that code but using that code causes an infinite recursion so
// I have to use the raw string here. Hopefully this is a pretty good demonstration
// why it's nicer to use code like the above rather than the raw string below.
// Applied to rgx strings to determine whether they require parenthesization or not.
// Not strictly necessary - it just keeps us from parenthesizing "(...)" and getting
// "((...))" or parenthesizing "a" and getting "(a)", etc.. Recursively using search
// patterns on search patterns - gotta love it.
private static readonly Regex RgxDontParenthesize = new Regex(
@"^(?:.|\\.|\\[Pp]\{[^}]+\}|\[(?:\\\]|[^\]])*[^\\]\]|\((?:\\\)|[^\)])*[^\\]\))$",
RegexOptions.Compiled);
// Valid Hex string
private static readonly Regex RgxHex = new Regex(AnyCharFrom("A-Fa-f0-9").Rep(2, 2), RegexOptions.Compiled);
// Valid Octal string
private static readonly Regex RgxOctal = new Regex(AnyCharFrom("0-7").Rep(3, 3), RegexOptions.Compiled);
#endregion
#region Static constructor
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Static constructor. </summary>
///
/// <remarks> Initializes strings and values. Darrellp, 10/1/2012. </remarks>
////////////////////////////////////////////////////////////////////////////////////////////////////
static Stex()
{
DateAmerican = Date(true, false);
DateEuropean = Date(false, false);
DateAmericanBet = Date(true, true);
DateEuropeanBet = Date(false, true);
AmericanDateRegExp = new Regex(DateAmerican, RegexOptions.Compiled);
EuropeanDateRegExp = new Regex(DateEuropean, RegexOptions.Compiled);
AmericanDateBetRegExp = new Regex(DateAmericanBet, RegexOptions.Compiled);
EuropeanDateBetRegExp = new Regex(DateEuropeanBet, RegexOptions.Compiled);
}
#endregion
#region Date matching
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Date information. </summary>
///
/// <remarks>
/// The dates are either an individual date or a period between two dates. Either date can be
/// prefixed with "about", "circa", "before", "after" or "calculated". These can be abbreviated
/// as follows:
///
/// About - "Abt", "A"
/// Before - "Bef", "B"
/// Calculated - "Cal"
/// Circa - "Cir", "Ca", "C"
///
/// Additionally, dates can be suffixed with "BC" or "B.C.".
///
/// Darrellp, 10/1/2012.
/// </remarks>
////////////////////////////////////////////////////////////////////////////////////////////////////
public class DateInfo
{
/// <summary> true if the match was a success, false if it failed </summary>
public bool Success;
/// <summary> true if this is between two dates </summary>
public bool Between;
/// <summary> The prefix on the first date </summary>
public string Prefix1;
/// <summary> The prefix on the second date </summary>
public string Prefix2;
/// <summary> The suffix on the first date </summary>
public string Suffix1;
/// <summary> The suffix on the second date </summary>
public string Suffix2;
/// <summary> First date </summary>
public DateTime Date1;
/// <summary> Second date </summary>
public DateTime Date2;
static readonly DateTime DateUninitialized = new DateTime(1, DateTimeKind.Utc);
internal DateInfo(bool successParm, bool betweenParm, string prefix1Parm, string prefix2Parm, DateTime date1Parm, DateTime date2Parm, string suffix1Parm, string suffix2Parm)
{
Success = successParm;
Between = betweenParm;
Prefix1 = prefix1Parm;
Prefix2 = prefix2Parm;
Date1 = date1Parm;
Date2 = date2Parm;
Suffix1 = suffix1Parm;
Suffix2 = suffix2Parm;
}
public DateInfo()
{
Date1 = DateUninitialized;
}
public bool Initialized()
{
return Date1 != DateUninitialized;
}
}
static int MonthNameToIndex(string mnthName)
{
int iMonth = 0;
if (mnthName != "")
{
switch (mnthName.ToUpper())
{
case "JAN":
case "JANUARY":
iMonth = 1;
break;
case "FEB":
case "FEBRUARY":
iMonth = 2;
break;
case "MAR":
case "MARCH":
iMonth = 3;
break;
case "APR":
case "APRIL":
iMonth = 4;
break;
case "MAY":
iMonth = 5;
break;
case "JUN":
case "JUNE":
iMonth = 6;
break;
case "JUL":
case "JULY":
iMonth = 7;
break;
case "AUG":
case "AUGUST":
iMonth = 8;
break;
case "SEP":
case "SEPTEMBER":
iMonth = 9;
break;
case "OCT":
case "OCTOBER":
iMonth = 10;
break;
case "NOV":
case "NOVEMBER":
iMonth = 11;
break;
case "DEC":
case "DECEMBER":
iMonth = 12;
break;
}
}
return iMonth;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Gets date information from a string. </summary>
///
/// <remarks>
/// Dates can be in American or European ordering. They are either an individual date or a
/// period between two dates. Either date can be prefixed with "about", "circa", "before",
/// "after" or "calculated". These can be abbreviated as follows:
///
/// About - "Abt", "A"
/// Before - "Bef", "B"
/// Calculated - "Cal"
/// Circa - "Cir", "Ca", "C"
///
/// Additionally, dates can be suffixed with "BC" or "B.C.".
///
/// Some sample dates would include:
///
/// 10/12/2012
/// February 10, 1912
/// NOV 4, 1956
/// Nov 4, 1956
/// 1940
/// between 1948 and 1950
/// 11-4-1956
/// 11-04-195
/// 4 November 1956
/// ca 1932
/// after 2000
/// before 800 BC
/// After Jan. 1, 1932
/// between nov 4, 1956 and ca sep 11, 1980
///
/// Darrellp, 10/1/2012.
/// </remarks>
///
/// <param name="strDate"> String representing the date. </param>
/// <param name="fAmerican"> True for American date ordering. </param>
/// <param name="fAllowBetween"> True to allow "between" in the date. </param>
///
/// <returns> The date information. </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
public static DateInfo GetDateInfo(string strDate, bool fAmerican, bool fAllowBetween)
{
if (strDate == null)
{
throw new ArgumentException("Null date in GetDateInfo");
}
// TODO: We should be caching the compiled Regex's
int iType = (fAmerican ? 2 : 0) + (fAllowBetween ? 1 : 0);
Regex rgx = null;
switch (iType)
{
case 0:
rgx = EuropeanDateRegExp;
break;
case 1:
rgx = EuropeanDateBetRegExp;
break;
case 2:
rgx = AmericanDateRegExp;
break;
case 3:
rgx = AmericanDateBetRegExp;
break;
}
// ReSharper disable PossibleNullReferenceException
Match mtch = rgx.Match(strDate);
// ReSharper restore PossibleNullReferenceException
bool success = mtch.Success;
bool between = false;
string prefix1 = string.Empty;
string prefix2 = string.Empty;
string suffix1 = string.Empty;
string suffix2 = string.Empty;
DateTime dt1 = new DateTime();
DateTime dt2 = new DateTime();
if (success)
{
between = mtch.Groups["betweenPrefix"].Value != "";
string tmp;
if ((tmp = mtch.Groups["prefix"].Value.ToLower()) != "")
{
prefix1 = tmp;
}
if ((tmp = mtch.Groups["suffix"].Value.ToLower()) != "")
{
suffix1 = tmp;
}
string mnthName = mtch.Groups["mnthName"].Value;
int iMonth1 = mnthName != "" ? MonthNameToIndex(mnthName) : int.Parse(mtch.Groups["month"].Value);
int iDay1 = int.Parse(mtch.Groups["day"].Value);
int iYear1 = int.Parse(mtch.Groups["year"].Value);
dt1 = new DateTime(iYear1, iMonth1, iDay1);
if (between)
{
if ((tmp = mtch.Groups["prefix2"].Value.ToLower()) != "")
{
prefix2 = tmp;
}
if ((tmp = mtch.Groups["suffix2"].Value.ToLower()) != "")
{
suffix2 = tmp;
}
string mnthName2 = mtch.Groups["mnthName2"].Value;
int iMonth2 = mnthName2 != "" ? MonthNameToIndex(mnthName2) : int.Parse(mtch.Groups["month2"].Value);
int iDay2 = int.Parse(mtch.Groups["day2"].Value);
int iYear2 = int.Parse(mtch.Groups["year2"].Value);
dt2 = new DateTime(iYear2, iMonth2, iDay2);
}
}
return new DateInfo(success, between, prefix1, prefix2, dt1, dt2, suffix1, suffix2);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> This is the regex string for the date. </summary>
///
/// <remarks> Darrellp, 10/1/2012. </remarks>
///
/// <param name="fAmerican"> True for American date ordering. </param>
/// <param name="fAllowBetween"> True to allow "between" in the date. </param>
///
/// <returns> String for regex which parses the date. </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
static string Date(bool fAmerican, bool fAllowBetween)
{
string strMonthAbbr = AnyOf("JAN", "FEB", "MAR", "APR", "JUN", "JUL",
"AUG", "SEP", "OCT", "NOV", "DEC").Named("mnthName") + ".".Optional();
string strMonthSpelled = AnyOf("JANUARY", "FEBRUARY", "MARCH", "APRIL", "MAY", "JUNE",
"JULY", "AUGUST", "SEPTEMBER", "OCTOBER", "NOVEMBER", "DECEMBER").Named("mnthName");
string strMonthName = strMonthAbbr.OrAnyOf(strMonthSpelled);
string strSeparator = AnyOf("-", "/", ".", " ");
string strBetween = AnyOf("Between", "Bet").Named("betweenPrefix");
string strPrefix = AnyOf("About", "Abt", "A",
"After", "Aft",
"Before", "Bef", "B",
"Calculated", "Cal",
"Circa", "Cir", "Ca", "C").Named("prefix");
string strSuffix = AnyOf("BC", "B.C.").Named("suffix");
string strTwoDigits = Cat(Digit, Digit.Optional());
string strDay = strTwoDigits.Named("day");
string strMonth = strTwoDigits.Named("month");
string strYear = Cat(Digit, Digit, Digit, Digit.Optional()).Named("year");
string strDate1 = strMonthName + " " + strDay + ", " + strYear;
string strDate2 = strDay + " " + strMonthName + " " + strYear;
string strDateAmerican = strMonth + strSeparator + strDay + strSeparator + strYear;
string strDateEuropean = strDay + strSeparator + strMonth + strSeparator + strYear;
string strDate3 = strYear + " " + strMonthName + " " + strDay;
string strDate4 = strYear + strSeparator + strMonth + strSeparator + strDay;
string strDate5 = strYear;
string strSingleDate = (strPrefix + " ").Optional() + AnyOf(
strDate1,
strDate2,
fAmerican ? strDateAmerican : strDateEuropean,
strDate3,
strDate4,
strDate5) + (" " + strSuffix).Optional();
string strTags = AnyOf("mnthName", "prefix", "suffix", "day", "month", "year");
// Performing regex replacements on our regex string! Kinky!
string strSingleDate2 = Regex.Replace(strSingleDate, strTags, "$&2");
return Cat(
Begin,
fAllowBetween ? (strBetween + " ").Optional() : string.Empty,
strSingleDate,
fAllowBetween ? "betweenPrefix".If(" AND " + strSingleDate2) : string.Empty,
End)
.CaseSensitive(false);
}
#endregion
#region Character Escapes
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Returns hex character. </summary>
///
/// <remarks> Darrellp, 10/1/2012. </remarks>
///
/// <param name="strHex"> Hex value. </param>
///
/// <returns> The hex character string. </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
public static string Hex(this string strHex)
{
if (!RgxHex.IsMatch(strHex))
{
throw new ArgumentException("parameter in Hex must be exactly two hex characters long");
}
return @"\x" + strHex;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Returns octal character. </summary>
///
/// <remarks> Darrellp, 10/1/2012. </remarks>
///
/// <param name="strOctal"> Octal value. </param>
///
/// <returns> The octal character string. </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
public static string Octal(this string strOctal)
{
if (!RgxOctal.IsMatch(strOctal))
{
throw new ArgumentException("parameter in Octal must be exactly three octal characters long");
}
return @"\o" + strOctal;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Escape a character. </summary>
///
/// <remarks> Darrellp, 10/1/2012. </remarks>
///
/// <param name="ch"> Character to escape. </param>
///
/// <returns> Escaped character. </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
public static string Esc(this char ch)
{
return @"\" + ch;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Escape a character. </summary>
///
/// <remarks> Darrellp, 10/1/2012. </remarks>
///
/// <param name="strch"> String with the char to escape. </param>
///
/// <returns> Escaped character. </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
public static string Esc(this string strch)
{
if (strch.Length != 1)
{
throw new ArgumentException("String longer than a single character is Esc");
}
return @"\" + strch;
}
#endregion
#region Numeric searches
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Pattern for integers. </summary>
///
/// <remarks> Darrellp, 8/29/2011. </remarks>
///
/// <param name="strName"> Name for the match. </param>
///
/// <returns> Pattern to recognize integers. </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
public static string Integer(string strName = "")
{
string strSearch = "-".Optional() + UnsignedInteger();
if (strName != String.Empty)
{
strSearch = strSearch.Named(strName);
}
return strSearch;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Pattern for unsigned integers. </summary>
///
/// <remarks> Darrellp, 8/29/2011. </remarks>
///
/// <param name="strName"> Name for the match. </param>
///
/// <returns> Pattern to recognize unsigned integers. </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
public static string UnsignedInteger(string strName = "")
{
string strSearch = Digit.RepAtLeast(1);
if (strName != String.Empty)
{
strSearch = strSearch.Named(strName);
}
return strSearch;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Pattern for floats. </summary>
///
/// <remarks> Darrellp, 8/29/2011. </remarks>
///
/// <param name="strName"> Name for the match. </param>
///
/// <returns> Pattern to recognize floats. </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
public static string Float(string strName = "")
{
string dot = '.'.Esc().Optional();
string digits = Digit.Rep(0);
string strSearch = "-".Optional() + AnyOf(UnsignedInteger() + dot + digits, digits + dot + UnsignedInteger());
if (strName != String.Empty)
{
strSearch = strSearch.Named(strName);
}
return strSearch;
}
#endregion
#region Search options
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Determine whether the search is case sensitive or not. </summary>
///
/// <remarks> Darrellp, 10/1/2012. </remarks>
///
/// <param name="str"> pattern to be affected. </param>
/// <param name="fCaseSensitive"> true to be case sensitive, false for case insensitive. </param>
///
/// <returns> . </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
public static string CaseSensitive(this string str, bool fCaseSensitive)
{
return "(?" + (fCaseSensitive ? "-" : "") + "i:" + str + ")";
}
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Use Multiline mode. </summary>
///
/// <remarks> Darrellp, 10/1/2012. </remarks>
///
/// <param name="str"> pattern to be affected. </param>
/// <param name="fMultiline"> true for multiline mode</param>
///
/// <returns> . </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
public static string Multiline(this string str, bool fMultiline)
{
return "(?" + (fMultiline ? "-" : "") + "m:" + str + ")";
}
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Don't capture unnamed groups. </summary>
///
/// <remarks> Not all that useful in Stex since it uses non-capturing
/// groups unless the user explicitly asks for a capturing group.
/// Darrellp, 10/1/2012. </remarks>
///
/// <param name="str"> pattern to be affected. </param>
/// <param name="fNoUnnamedCaptures"> true to turn off unnamed captures, false to turn on</param>
///
/// <returns> . </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
public static string NoUnnamedCaptures(this string str, bool fNoUnnamedCaptures)
{
return "(?" + (fNoUnnamedCaptures ? "-" : "") + "n:" + str + ")";
}
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Use single-line mode. </summary>
///
/// <remarks> Darrellp, 10/1/2012. </remarks>
///
/// <param name="str"> pattern to be affected. </param>
/// <param name="fSingleLine"> true for single line mode</param>
///
/// <returns> . </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
public static string SingleLine(this string str, bool fSingleLine)
{
return "(?" + (fSingleLine ? "-" : "") + "s:" + str + ")";
}
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Ignore rgx white space. </summary>
///
/// <remarks> I really don't know why anyone would use this in Stex but
/// I include it for completeness sake.
/// Darrellp, 10/1/2012. </remarks>
///
/// <param name="str"> pattern to be affected. </param>
/// <param name="fIgnoreWS"> true to ignore unescaped WS in RGX string</param>
///
/// <returns> . </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
public static string IgnoreRGXWhiteSpace(this string str, bool fIgnoreWS)
{
return "(?" + (fIgnoreWS ? "-" : "") + "x:" + str + ")";
}
#endregion
#region Character Classes
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Returns a range of chars for use in AnyChar. </summary>
///
/// <remarks> Darrellp, 10/1/2012. </remarks>
///
/// <param name="strLow"> Starting char. </param>
/// <param name="strHigh"> Ending char. </param>
///
/// <returns> The Range. </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
public static string Range(string strLow, string strHigh)
{
return strLow + '-' + strHigh;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Accept any characters from any of the arguments. </summary>
///
/// <remarks> Darrellp, 10/1/2012. </remarks>
///
/// <param name="s"> Characters or ranges. </param>
///
/// <returns> Pattern. </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
public static string AnyCharFrom(params string[] s)
{
return "[" + Cat(s) + "]";
}
[Obsolete("Use AnyCharFrom()")]
public static string AnyChar(params string[] s)
{
return AnyCharFrom(s);
}
/// <summary>
/// Accept any characters not from any of the arguments
/// </summary>
/// <param name="s">Characters or ranges</param>
/// <returns>Pattern</returns>
public static string NotCharIn(params string[] s)
{
return "[^" + Cat(s) + "]";
}
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Cosmetic version of NotCharIn which works better for single characters. </summary>
///
/// <remarks> Darrellp, 10/1/2012. </remarks>
///
/// <param name="s"> Characters or ranges. </param>
///
/// <returns> Pattern. </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
public static string Not(params string[] s)
{
return NotCharIn(s);
}
[Obsolete("Use Not() or NotCharIn()")]
public static string NoChar(params string[] s)
{
return NotCharIn(s);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Accept any characters from Unicode general category or named block. </summary>
///
/// <remarks> Darrellp, 8/17/2016. </remarks>
///
/// <param name="s"> Unicode general category or named block name. </param>
///
/// <returns> Pattern. </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
public static string UnicodeCategory(params string[] s)
{
return @"\p{" + Cat(s) + "}";
}
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Accept any characters NOT from a Unicode general category or named block. </summary>
///
/// <remarks> Darrellp, 8/17/2016. </remarks>
///
/// <param name="s"> Unicode general category or named block name. </param>
///
/// <returns> Pattern. </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
public static string NotInUnicodeCategory(params string[] s)
{
return @"\P{" + Cat(s) + "}";
}
#endregion
#region Concatenation
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Concatenates strings. </summary>
///
/// <remarks> Darrellp, 10/1/2012. </remarks>
///
/// <param name="s"> strings to be concatenated. </param>
///
/// <returns> concatenation of all the strings in s. </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
public static string Cat(params string[] s)
{
return s.Aggregate((sAg, str) => sAg + str);
}
#endregion
#region Alternation
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Creates a pattern which matches either this or any of the parameters. </summary>
///
/// <remarks> Darrellp, 10/1/2012. </remarks>
///
/// <param name="str"> this. </param>
/// <param name="s"> the other strings. </param>
///
/// <returns> pattern. </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
public static string OrAnyOf(this string str, params string[] s)
{
return "(?:" + s.Aggregate(str, (sAg, sNext) => sAg + "|" + sNext) + ")";
}
[Obsolete("Use OrAnyOf()")]
public static string Or(this string str, params string[] s)
{
return str.OrAnyOf(s);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Creates a pattern which matches any of the parameters. </summary>
///
/// <remarks> Darrellp, 10/1/2012. </remarks>
///
/// <param name="s"> the other strings. </param>
///
/// <returns> pattern. </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
public static string AnyOf(params string[] s)
{
return s[0].OrAnyOf(s.Skip(1).ToArray());
}
[Obsolete("Use AnyOf")]
public static string Or(params string[] s)
{
return AnyOf(s);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Pattern which depends on whether a group has been matched. </summary>
///
/// <remarks> Darrellp, 10/1/2012. </remarks>
///
/// <param name="strLabel"> Group to check whether it's matched. </param>
/// <param name="strHasMatched"> Pattern to use if there was a match. </param>
/// <param name="strDidntMatch"> Pattern to use if there wasn't a match. </param>
///
/// <returns> Conditional pattern. </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
public static string If(this string strLabel, string strHasMatched, string strDidntMatch = "")
{
return string.Format("(?({0}){1}|{2})", strLabel, strHasMatched, strDidntMatch);
}
#endregion
#region Quantifiers
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Returns pattern in which str is optional. </summary>
///
/// <remarks> Darrellp, 10/1/2012. </remarks>
///
/// <param name="str"> string to be made optional. </param>
///
/// <returns> pattern which optionally matches string. </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
public static string Optional(this string str, bool isLazy = false)
{
return str.Rep(0, 1, isLazy);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// Repeat spec. repeats at least "least" times and at most "most" times. Most can be negative in
/// which case it's considered to be "infinity" - i.e., it's repeated at least "least" times with
/// no limit on the most. This is the default so Rep(3) means repeat three or more times.
/// </summary>
///
/// <remarks> Darrellp, 10/1/2012. </remarks>
///
/// <exception cref="ArgumentException"> Thrown when one or more arguments have unsupported or
/// illegal values. </exception>
///
/// <param name="str"> String to be repeated in search. </param>
/// <param name="least"> Least number of times to repeat. </param>
/// <param name="most"> Most number of times to repeat. </param>
///
/// <returns> pattern which matches the original number of string repeated properly. </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
public static string Rep(this string str, int least, int most = -1, bool isLazy = false)
{
if (least < 0)
{
throw new ArgumentException("least must be >= 0 in Rep");
}
if (most >= 0 && least > most)
{
throw new ArgumentException("Invalid most value in Rep");
}
string strRep;
string lazySuffix = isLazy ? "?" : "";
if (most < 0)
{
return str.RepAtLeast(least) + lazySuffix;
}
if (least == most)
{
strRep = string.Format("{{{0}}}", least);
}
else if (least == 0 && most == 1)
{
strRep = "?";
}
else
{
strRep = string.Format("{{{0},{1}}}", least, most);
}
return str.AsGroup() + strRep + lazySuffix;
}
private static string RepAtLeast(this string str, int count)
{
string strRep;
if (count < 0)
{
throw new ArgumentException("count is negative in RepAtLeast");
}
if (count == 0)
{
strRep = "*";
}
else if (count == 1)
{
strRep = "+";
}
else
{
strRep = string.Format("{{{0},}}", count);
}
return str.AsGroup() + strRep;
}
#endregion
#region Grouping constructs
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// Parenthesize a pattern properly. If it's already parenthesized or is one character long,
/// then it's merely returned. Otherwise, it's surrounded by (?: ... ).
/// </summary>
///
/// <remarks>
/// This is a non-capturing group - use Capture() for a capturing group. Darrellp, 10/1/2012.
/// </remarks>
///
/// <param name="str"> String to be parenthesized. </param>
///
/// <returns> Properly parenthesized string. </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
public static string AsGroup(this string str)
{
string strRet;
if (RgxDontParenthesize.IsMatch(str))
{
strRet = str;
}
else
{
strRet = "(?:" + str + ")";
}
return strRet;
}
[Obsolete("Use AsGroup")]
public static string AddParens(this string str)
{
return AsGroup(str);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Positive look ahead. </summary>
///
/// <remarks> Darrellp, 8/29/2011. </remarks>
///
/// <param name="str"> pattern to look ahead for. </param>
///
/// <returns> Positive lookahead pattern. </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
public static string PosLookAhead(this string str)
{
return string.Format("(?={0})", str);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Negative look ahead. </summary>
///
/// <remarks> Darrellp, 8/29/2011. </remarks>
///
/// <param name="str"> pattern to look ahead for. </param>
///
/// <returns> Negative lookahead pattern. </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
public static string NegLookAhead(this string str)
{
return string.Format("(?!{0})", str);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Positive look behind. </summary>
///
/// <remarks> Darrellp, 8/29/2011. </remarks>
///
/// <param name="str"> pattern to look behind for. </param>
///
/// <returns> Positive look behind pattern. </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
public static string PosLookBehind(this string str)
{
return string.Format("(?<={0})", str);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Negative look behind. </summary>
///
/// <remarks> Darrellp, 8/29/2011. </remarks>
///
/// <param name="str"> pattern to look behind for. </param>
///
/// <returns> Negative look behind pattern. </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
public static string NegLookBehind(this string str)
{
return string.Format("(?<!{0})", str);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Puts the text in an unnamed group. </summary>
///
/// <remarks> Darrellp, 8/29/2011. </remarks>
///
/// <param name="str"> String to be matched. </param>
///
/// <returns> Pattern which names the match. </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
public static string Capture(this string str)
{
return string.Format("({0})", str);
}
[Obsolete("Use Capture")]
public static string Group(this string str)