From 8d35248bb3c9c0c15319e70eeff5223bc5e1f61c Mon Sep 17 00:00:00 2001 From: Jeffrey Faer Date: Mon, 9 Mar 2026 10:41:21 -0600 Subject: [PATCH 01/10] fix: Apply regexes to the actual line content (separated by newlines) instead of the joinedLines. Now users can craft regexes that would match just the first line of the group or block (or any arbitrary line they desire) which is pretty hard to do right now. --- keepsorted/line_group.go | 48 +++++++++++++++++++++++++++++++++++----- keepsorted/options.go | 3 ++- 2 files changed, 44 insertions(+), 7 deletions(-) diff --git a/keepsorted/line_group.go b/keepsorted/line_group.go index a637de8..25f6fb9 100644 --- a/keepsorted/line_group.go +++ b/keepsorted/line_group.go @@ -119,7 +119,7 @@ func groupLines(lines []string, metadata blockMetadata) []*lineGroup { // Returns another boolean indicating whether the group should be ending // after that line if so. shouldAddToRegexDelimitedGroup := func(l string) (addToGroup bool, finishGroupAfter bool) { - if metadata.opts.GroupStartRegex != nil { + if metadata.opts.GroupStartRegex != nil { // For GroupStartRegex, all non-regex-matching lines should be // part of the group including prior lines. return !matchesAnyRegex(l, metadata.opts.GroupStartRegex), false @@ -349,7 +349,7 @@ func (cb *codeBlock) append(s string, opts blockOptions) { cb.braceCounts = make(map[string]int) } - // TODO(jfalgout): Does this need to handle runes more correctly? + // TODO: jfaer - Does this need to handle runes more correctly? for i := 0; i < len(s); { if cb.expectedQuote == "" { // We do not appear to be inside a string literal. @@ -419,8 +419,18 @@ func (lg *lineGroup) commentOnly() bool { } func (lg *lineGroup) regexTokens() []regexToken { - // TODO: jfaer - Should we match regexes on the original content? - regexMatches := lg.opts.matchRegexes(lg.internalJoinedLines()) + var regexMatches []regexMatch + if len(lg.opts.ByRegex) == 0 { + // We apply other options on top of what the regex extracts, but if the user + // didn't set by_regex, we should fall back to the behavior they would've + // expected before we started supporting by_regex. + // Namely: We would apply the other options on top of the + // internalJoinedLines() instead of the raw content. + regexMatches = []regexMatch{{lg.internalJoinedLines()}} + } else { + regexMatches = lg.opts.matchRegexes(lg.regexJoinedLines()) + } + ret := make([]regexToken, len(regexMatches)) if lg.access.regexTokens == nil { lg.access.regexTokens = make([]regexTokenAccessRecorder, len(regexMatches)) @@ -453,8 +463,15 @@ func (lg *lineGroup) regexTokens() []regexToken { return ret } -// internalJoinedLines calculates the same thing as joinedLines, except it -// doesn't record that it was used in the accessRecorder. +// internalJoinedLines attempts to concatenate all of this lineGroup's content +// the same way a reasonable human might. +// +// If the previous line ends with a "word" character and the current line starts +// with a "word" character, the two lines will be separated by a space. +// Otherwise, the lines are concatenated without any separation. +// +// Unlike joinedLines, this method does not record that it was used in the +// accessRecorder. func (lg *lineGroup) internalJoinedLines() string { if len(lg.lines) == 0 { return "" @@ -475,6 +492,25 @@ func (lg *lineGroup) internalJoinedLines() string { return s.String() } +// regexJoinedLines concatenates all of this lineGroup's content in a way that +// friendlier to regexes than internalJoinedLines. +// +// Primarily, this method still strips leading whitespace but it uses a real +// newline character to separate lines instead of the "word" character logic of +// internalJoinedLines. +func (lg *lineGroup) regexJoinedLines() string { + if len(lg.lines) == 0 { + return "" + } + lines := make([]string, len(lg.lines)) + for i, l := range lg.lines { + lines[i] = strings.TrimLeftFunc(l, unicode.IsSpace) + } + return strings.Join(lines, "\n") +} + +// joinedLines returns internalJoinedLines and records that it was called in the +// accessRecorder. func (lg *lineGroup) joinedLines() string { lg.access.joinedLines = true return lg.internalJoinedLines() diff --git a/keepsorted/options.go b/keepsorted/options.go index e36d20d..ea4e867 100644 --- a/keepsorted/options.go +++ b/keepsorted/options.go @@ -437,13 +437,14 @@ func (opts blockOptions) trimIgnorePrefix(s string) string { // the resulting slice. func (opts blockOptions) matchRegexes(s string) []regexMatch { if len(opts.ByRegex) == 0 { - return []regexMatch{{s}} + return nil } var ret []regexMatch for _, p := range opts.ByRegex { regex := p.Pattern + // TODO: jfaer - It'd be nice if these two branches were unified. if p.Template != nil { var result []byte m := regex.FindAllStringSubmatchIndex(s, -1) From d7a4f840d06ee37c57e0c39f03fee3272f8ca08c Mon Sep 17 00:00:00 2001 From: Jeffrey Faer Date: Mon, 9 Mar 2026 13:27:31 -0600 Subject: [PATCH 02/10] set (?s) on all by_regexes to minimize disruption. --- keepsorted/options.go | 23 ++++++++++++++++------- keepsorted/options_parser.go | 17 ++++++++++++++--- keepsorted/options_parser_test.go | 16 ++++++++-------- keepsorted/options_test.go | 17 +++++++++-------- 4 files changed, 47 insertions(+), 26 deletions(-) diff --git a/keepsorted/options.go b/keepsorted/options.go index ea4e867..c7b4b47 100644 --- a/keepsorted/options.go +++ b/keepsorted/options.go @@ -238,21 +238,26 @@ func formatValue(val reflect.Value) (string, error) { return strconv.Itoa(int(val.Int())), nil case reflect.TypeFor[[]ByRegexOption](): opts := val.Interface().([]ByRegexOption) - vals := make([]string, 0, len(opts)) + vals := make([]any, len(opts)) seenTemplate := false - for _, opt := range opts { + for i, opt := range opts { if opt.Template != nil { seenTemplate = true - vals = append(vals, fmt.Sprintf(`%q: %q`, opt.Pattern.String(), *opt.Template)) + vals[i] = map[string]string{opt.Pattern.String(): *opt.Template} continue } - vals = append(vals, opt.Pattern.String()) + vals[i] = opt.Pattern.String() } + if seenTemplate { - // always presented as a yaml sequence to preserve any `k:v` items - return fmt.Sprintf("[%s]", strings.Join(vals, ", ")), nil + return formatYAMLList(vals) } - return formatList(vals) + + s := make([]string, len(vals)) + for i, val := range vals { + s[i] = val.(string) + } + return formatList(s) case reflect.TypeFor[[]*regexp.Regexp](): regexps := val.Interface().([]*regexp.Regexp) vals := make([]string, len(regexps)) @@ -282,6 +287,10 @@ func formatList(vals []string) (string, error) { return strings.Join(vals, ","), nil } + return formatYAMLList(vals) +} + +func formatYAMLList[T any](vals []T) (string, error) { node := new(yaml.Node) if err := node.Encode(vals); err != nil { return "", fmt.Errorf("while converting list to YAML: %w", err) diff --git a/keepsorted/options_parser.go b/keepsorted/options_parser.go index 4943842..b87c7ca 100644 --- a/keepsorted/options_parser.go +++ b/keepsorted/options_parser.go @@ -125,7 +125,7 @@ func (p *parser) popIntOrBool() (IntOrBool, error) { func (ar *ByRegexOption) UnmarshalYAML(node *yaml.Node) error { switch node.Tag { case "!!str": - pat, err := regexp.Compile(node.Value) + pat, err := compileByRegex(node.Value) if err != nil { return err } @@ -141,7 +141,7 @@ func (ar *ByRegexOption) UnmarshalYAML(node *yaml.Node) error { return fmt.Errorf("by_regex map item must have exactly one key-value pair, but got %d", len(m)) } for pattern, template := range m { - pat, err := regexp.Compile(pattern) + pat, err := compileByRegex(pattern) if err != nil { return fmt.Errorf("invalid regex pattern %q: %w", pattern, err) } @@ -191,7 +191,7 @@ func (p *parser) popList() ([]string, error) { func (p *parser) popByRegexOption() ([]ByRegexOption, error) { return popListValue(p, func(s string) (ByRegexOption, error) { - pat, err := regexp.Compile(s) + pat, err := compileByRegex(s) return ByRegexOption{Pattern: pat}, err }) } @@ -321,3 +321,14 @@ func (iter *runeIter) pop() (rune, bool) { } return ch, ok } + +func compileByRegex(re string) (*regexp.Regexp, error) { + if !strings.HasPrefix(re, "(?s)") { + // The initial version of by_regex ran on top of lineGroup.joinedLines. This + // meant that users wrote regexes that didn't need to handle newlines. + // To minimize disruption, we automatically set dotall flag so that their + // regexes might still work after we start using real newlines. + re = "(?s)" + re + } + return regexp.Compile(re) +} diff --git a/keepsorted/options_parser_test.go b/keepsorted/options_parser_test.go index 284cab5..051a9a1 100644 --- a/keepsorted/options_parser_test.go +++ b/keepsorted/options_parser_test.go @@ -218,7 +218,7 @@ func TestPopValue(t *testing.T) { name: "Regex", input: ".*", - want: []ByRegexOption{{regexp.MustCompile(".*"), nil}}, + want: []ByRegexOption{{regexp.MustCompile("(?s).*"), nil}}, }, { name: "MultipleRegex", @@ -226,9 +226,9 @@ func TestPopValue(t *testing.T) { input: `[.*, abcd, '(?:efgh)ijkl']`, allowYAMLList: true, want: []ByRegexOption{ - {regexp.MustCompile(".*"), nil}, - {regexp.MustCompile("abcd"), nil}, - {regexp.MustCompile("(?:efgh)ijkl"), nil}, + {regexp.MustCompile("(?s).*"), nil}, + {regexp.MustCompile("(?s)abcd"), nil}, + {regexp.MustCompile("(?s)(?:efgh)ijkl"), nil}, }, }, { @@ -237,10 +237,10 @@ func TestPopValue(t *testing.T) { input: `[.*, Mon: 0, '\b(\d{2})/(\d{2})/(\d{4})\b': '${3}-${1}-${2}', "0: 1": 2]`, allowYAMLList: true, want: []ByRegexOption{ - {regexp.MustCompile(".*"), nil}, - {regexp.MustCompile("Mon"), &([]string{"0"})[0]}, - {regexp.MustCompile(`\b(\d{2})/(\d{2})/(\d{4})\b`), &([]string{"${3}-${1}-${2}"})[0]}, - {regexp.MustCompile(`0: 1`), &([]string{"2"})[0]}, + {regexp.MustCompile("(?s).*"), nil}, + {regexp.MustCompile("(?s)Mon"), &([]string{"0"})[0]}, + {regexp.MustCompile(`(?s)\b(\d{2})/(\d{2})/(\d{4})\b`), &([]string{"${3}-${1}-${2}"})[0]}, + {regexp.MustCompile(`(?s)0: 1`), &([]string{"2"})[0]}, }, }, { diff --git a/keepsorted/options_test.go b/keepsorted/options_test.go index dccfd75..20acd7c 100644 --- a/keepsorted/options_test.go +++ b/keepsorted/options_test.go @@ -194,7 +194,8 @@ func TestBlockOptions(t *testing.T) { want: blockOptions{ AllowYAMLLists: true, ByRegex: []ByRegexOption{ - {regexp.MustCompile("(?:abcd)"), nil}, {regexp.MustCompile("efg.*"), nil}, + {regexp.MustCompile("(?s)(?:abcd)"), nil}, + {regexp.MustCompile("(?s)efg.*"), nil}, }, }, }, @@ -206,8 +207,8 @@ func TestBlockOptions(t *testing.T) { want: blockOptions{ AllowYAMLLists: true, ByRegex: []ByRegexOption{ - {Pattern: regexp.MustCompile(`.*`)}, - {Pattern: regexp.MustCompile(`\b(\d{2})/(\d{2})/(\d{4})\b`), + {Pattern: regexp.MustCompile(`(?s).*`)}, + {Pattern: regexp.MustCompile(`(?s)\b(\d{2})/(\d{2})/(\d{4})\b`), Template: &[]string{"${3}-${1}-${2}"}[0]}, }, }, @@ -253,13 +254,13 @@ func TestBlockOptions(t *testing.T) { got, warns := parseBlockOptions(tc.commentMarker, tc.in, tc.defaultOptions) if err := errors.Join(warns...); err != nil { if tc.wantErr == "" { - t.Errorf("parseBlockOptions(%q, %q) = %v", tc.commentMarker, tc.in, err) + t.Errorf("parseBlockOptions(%#v, %#v) = %v", tc.commentMarker, tc.in, err) } else if !strings.Contains(err.Error(), tc.wantErr) { - t.Errorf("parseBlockOptions(%q, %q) = %v, expected to contain %q", tc.commentMarker, tc.in, err, tc.wantErr) + t.Errorf("parseBlockOptions(%#v, %#v) = %v, expected to contain %q", tc.commentMarker, tc.in, err, tc.wantErr) } } if diff := cmp.Diff(tc.want, got, cmp.AllowUnexported(blockOptions{}), cmpRegexp); diff != "" { - t.Errorf("parseBlockOptions(%q, %q) mismatch (-want +got):\n%s", tc.commentMarker, tc.in, diff) + t.Errorf("parseBlockOptions(%#v, %#v) mismatch (-want +got):\n%s", tc.commentMarker, tc.in, diff) } if tc.wantErr == "" { @@ -267,10 +268,10 @@ func TestBlockOptions(t *testing.T) { s := got.String() got2, warns := parseBlockOptions(tc.commentMarker, s, tc.defaultOptions) if err := errors.Join(warns...); err != nil { - t.Errorf("parseBlockOptions(%q, %q) = %v", tc.commentMarker, s, err) + t.Errorf("parseBlockOptions(%#v, %#v) = %v", tc.commentMarker, s, err) } if diff := cmp.Diff(got, got2, cmp.AllowUnexported(blockOptions{}), cmpRegexp); diff != "" { - t.Errorf("parseBlockOptions(%q, %q) mismatch (-want +got):\n%s", tc.commentMarker, s, diff) + t.Errorf("parseBlockOptions(%#v, %#v) mismatch (-want +got):\n%s", tc.commentMarker, s, diff) } }) } From b257ef998545a310b42b73a58a1930c4ad7f655b Mon Sep 17 00:00:00 2001 From: Jeffrey Faer Date: Mon, 9 Mar 2026 15:15:53 -0600 Subject: [PATCH 03/10] fix: Format []ByRegexOption correctly. If there's a mix of mappings and singular elements, the mappings will (maybe) be quoted correctly but the singular elements won't be, which can result in a YAML flow sequence that's not valid YAML. --- keepsorted/options.go | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/keepsorted/options.go b/keepsorted/options.go index e36d20d..65ef013 100644 --- a/keepsorted/options.go +++ b/keepsorted/options.go @@ -238,21 +238,25 @@ func formatValue(val reflect.Value) (string, error) { return strconv.Itoa(int(val.Int())), nil case reflect.TypeFor[[]ByRegexOption](): opts := val.Interface().([]ByRegexOption) - vals := make([]string, 0, len(opts)) + vals := make([]any, len(opts)) seenTemplate := false - for _, opt := range opts { + for i, opt := range opts { if opt.Template != nil { seenTemplate = true - vals = append(vals, fmt.Sprintf(`%q: %q`, opt.Pattern.String(), *opt.Template)) + vals[i] = map[string]string{opt.Pattern.String(): *opt.Template} continue } - vals = append(vals, opt.Pattern.String()) + vals[i] = opt.Pattern.String() } if seenTemplate { // always presented as a yaml sequence to preserve any `k:v` items - return fmt.Sprintf("[%s]", strings.Join(vals, ", ")), nil + return formatYAMLList(vals) } - return formatList(vals) + s := make([]string, len(vals)) + for i, val := range vals { + s[i] = val.(string) + } + return formatList(s) case reflect.TypeFor[[]*regexp.Regexp](): regexps := val.Interface().([]*regexp.Regexp) vals := make([]string, len(regexps)) @@ -282,6 +286,10 @@ func formatList(vals []string) (string, error) { return strings.Join(vals, ","), nil } + return formatYAMLList(vals) +} + +func formatYAMLList[T any](vals []T) (string, error) { node := new(yaml.Node) if err := node.Encode(vals); err != nil { return "", fmt.Errorf("while converting list to YAML: %w", err) From a45c7d39afa9cb2174314f330b5b75510b11b13f Mon Sep 17 00:00:00 2001 From: Jeffrey Faer Date: Mon, 9 Mar 2026 15:19:55 -0600 Subject: [PATCH 04/10] add a test --- keepsorted/options_test.go | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/keepsorted/options_test.go b/keepsorted/options_test.go index dccfd75..bdcfa56 100644 --- a/keepsorted/options_test.go +++ b/keepsorted/options_test.go @@ -212,6 +212,20 @@ func TestBlockOptions(t *testing.T) { }, }, }, + { + name: "RegexWithTemplateAndSingletonWithSpecialChars", + in: `by_regex=['foo, bar', '\b(\d{2})/(\d{2})/(\d{4})\b': '${3}-${1}-${2}']`, + defaultOptions: blockOptions{AllowYAMLLists: true}, + + want: blockOptions{ + AllowYAMLLists: true, + ByRegex: []ByRegexOption{ + {Pattern: regexp.MustCompile(`foo, bar`)}, + {Pattern: regexp.MustCompile(`\b(\d{2})/(\d{2})/(\d{4})\b`), + Template: &[]string{"${3}-${1}-${2}"}[0]}, + }, + }, + }, { name: "OrderAsc", in: "order=asc", @@ -253,13 +267,13 @@ func TestBlockOptions(t *testing.T) { got, warns := parseBlockOptions(tc.commentMarker, tc.in, tc.defaultOptions) if err := errors.Join(warns...); err != nil { if tc.wantErr == "" { - t.Errorf("parseBlockOptions(%q, %q) = %v", tc.commentMarker, tc.in, err) + t.Errorf("parseBlockOptions(%#v, %#v) = %v", tc.commentMarker, tc.in, err) } else if !strings.Contains(err.Error(), tc.wantErr) { - t.Errorf("parseBlockOptions(%q, %q) = %v, expected to contain %q", tc.commentMarker, tc.in, err, tc.wantErr) + t.Errorf("parseBlockOptions(%#v, %#v) = %v, expected to contain %q", tc.commentMarker, tc.in, err, tc.wantErr) } } if diff := cmp.Diff(tc.want, got, cmp.AllowUnexported(blockOptions{}), cmpRegexp); diff != "" { - t.Errorf("parseBlockOptions(%q, %q) mismatch (-want +got):\n%s", tc.commentMarker, tc.in, diff) + t.Errorf("parseBlockOptions(%#v, %#v) mismatch (-want +got):\n%s", tc.commentMarker, tc.in, diff) } if tc.wantErr == "" { @@ -267,10 +281,10 @@ func TestBlockOptions(t *testing.T) { s := got.String() got2, warns := parseBlockOptions(tc.commentMarker, s, tc.defaultOptions) if err := errors.Join(warns...); err != nil { - t.Errorf("parseBlockOptions(%q, %q) = %v", tc.commentMarker, s, err) + t.Errorf("parseBlockOptions(%#v, %#v) = %v", tc.commentMarker, s, err) } if diff := cmp.Diff(got, got2, cmp.AllowUnexported(blockOptions{}), cmpRegexp); diff != "" { - t.Errorf("parseBlockOptions(%q, %q) mismatch (-want +got):\n%s", tc.commentMarker, s, diff) + t.Errorf("parseBlockOptions(%#v, %#v) mismatch (-want +got):\n%s", tc.commentMarker, s, diff) } }) } From 5a2e20df1664c600862ee6e7f1ef0740d9ea4d5c Mon Sep 17 00:00:00 2001 From: Jeffrey Faer Date: Mon, 9 Mar 2026 15:53:22 -0600 Subject: [PATCH 05/10] one last tweak --- keepsorted/options.go | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/keepsorted/options.go b/keepsorted/options.go index 65ef013..dc0567a 100644 --- a/keepsorted/options.go +++ b/keepsorted/options.go @@ -40,6 +40,14 @@ type ByRegexOption struct { Template *string } +func (opt ByRegexOption) MarshalYAML() (any, error) { + if opt.Template == nil { + return opt.Pattern.String(), nil + } + + return map[string]string{opt.Pattern.String(): *opt.Template}, nil +} + // SortOrder defines whether we sort in ascending or descending order. type SortOrder string @@ -238,25 +246,20 @@ func formatValue(val reflect.Value) (string, error) { return strconv.Itoa(int(val.Int())), nil case reflect.TypeFor[[]ByRegexOption](): opts := val.Interface().([]ByRegexOption) - vals := make([]any, len(opts)) + vals := make([]string, len(opts)) seenTemplate := false for i, opt := range opts { if opt.Template != nil { seenTemplate = true - vals[i] = map[string]string{opt.Pattern.String(): *opt.Template} - continue + break } vals[i] = opt.Pattern.String() } if seenTemplate { // always presented as a yaml sequence to preserve any `k:v` items - return formatYAMLList(vals) + return formatYAMLList(opts) } - s := make([]string, len(vals)) - for i, val := range vals { - s[i] = val.(string) - } - return formatList(s) + return formatList(vals) case reflect.TypeFor[[]*regexp.Regexp](): regexps := val.Interface().([]*regexp.Regexp) vals := make([]string, len(regexps)) From 849e102e03eaab6b66f6dc506b750fa27a6bc9b9 Mon Sep 17 00:00:00 2001 From: Jeffrey Faer Date: Mon, 9 Mar 2026 16:01:37 -0600 Subject: [PATCH 06/10] fix the new test --- keepsorted/options_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/keepsorted/options_test.go b/keepsorted/options_test.go index abc7153..8ca18d7 100644 --- a/keepsorted/options_test.go +++ b/keepsorted/options_test.go @@ -221,8 +221,8 @@ func TestBlockOptions(t *testing.T) { want: blockOptions{ AllowYAMLLists: true, ByRegex: []ByRegexOption{ - {Pattern: regexp.MustCompile(`foo, bar`)}, - {Pattern: regexp.MustCompile(`\b(\d{2})/(\d{2})/(\d{4})\b`), + {Pattern: regexp.MustCompile(`(?s)foo, bar`)}, + {Pattern: regexp.MustCompile(`(?s)\b(\d{2})/(\d{2})/(\d{4})\b`), Template: &[]string{"${3}-${1}-${2}"}[0]}, }, }, From 90a0537177859076c9f7fb3445dc14c01c3854d7 Mon Sep 17 00:00:00 2001 From: Jeffrey Faer Date: Mon, 9 Mar 2026 16:07:58 -0600 Subject: [PATCH 07/10] also exempt regexes that start with (?-s). Strictly this isn't necessary since (?s)(?-s) would simplify to (?-s), but it feels nice to skip adding (?s) if it's obvious that's not wanted. --- keepsorted/options_parser.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/keepsorted/options_parser.go b/keepsorted/options_parser.go index b87c7ca..2273631 100644 --- a/keepsorted/options_parser.go +++ b/keepsorted/options_parser.go @@ -323,7 +323,7 @@ func (iter *runeIter) pop() (rune, bool) { } func compileByRegex(re string) (*regexp.Regexp, error) { - if !strings.HasPrefix(re, "(?s)") { + if !strings.HasPrefix(re, "(?s)") && !strings.HasPrefix(re, "(?-s)") { // The initial version of by_regex ran on top of lineGroup.joinedLines. This // meant that users wrote regexes that didn't need to handle newlines. // To minimize disruption, we automatically set dotall flag so that their From 2075edf501d6ac590dbed5cbca4fb921cd97bca7 Mon Sep 17 00:00:00 2001 From: Jeffrey Faer Date: Mon, 9 Mar 2026 16:21:12 -0600 Subject: [PATCH 08/10] fix comment typo --- keepsorted/line_group.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/keepsorted/line_group.go b/keepsorted/line_group.go index 25f6fb9..d45a740 100644 --- a/keepsorted/line_group.go +++ b/keepsorted/line_group.go @@ -492,7 +492,7 @@ func (lg *lineGroup) internalJoinedLines() string { return s.String() } -// regexJoinedLines concatenates all of this lineGroup's content in a way that +// regexJoinedLines concatenates all of this lineGroup's content in a way that's // friendlier to regexes than internalJoinedLines. // // Primarily, this method still strips leading whitespace but it uses a real From 285548eed4aad557910fe0939aff478103343b0e Mon Sep 17 00:00:00 2001 From: Jeffrey Faer Date: Wed, 11 Mar 2026 15:55:39 -0600 Subject: [PATCH 09/10] Apply the joinLines transform to the _result_ of the regex match. That way, we get even more consistent behavior. --- keepsorted/line_group.go | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/keepsorted/line_group.go b/keepsorted/line_group.go index d45a740..6b0c7b3 100644 --- a/keepsorted/line_group.go +++ b/keepsorted/line_group.go @@ -429,6 +429,14 @@ func (lg *lineGroup) regexTokens() []regexToken { regexMatches = []regexMatch{{lg.internalJoinedLines()}} } else { regexMatches = lg.opts.matchRegexes(lg.regexJoinedLines()) + // We still want to apply the joinLines transform so that we get + // "reasonable human" comparisons if the regex intentionally + // (or accidentally!) matches more than one line. + for _, match := range regexMatches { + for i, s := range match { + match[i] = joinLines(strings.Split(s, "\n")) + } + } } ret := make([]regexToken, len(regexMatches)) @@ -473,7 +481,11 @@ func (lg *lineGroup) regexTokens() []regexToken { // Unlike joinedLines, this method does not record that it was used in the // accessRecorder. func (lg *lineGroup) internalJoinedLines() string { - if len(lg.lines) == 0 { + return joinLines(lg.lines) +} + +func joinLines(lines []string) string { + if len(lines) == 0 { return "" } @@ -481,7 +493,7 @@ func (lg *lineGroup) internalJoinedLines() string { startsWithWordChar := regexp.MustCompile(`^\w`) var s strings.Builder var last string - for _, l := range lg.lines { + for _, l := range lines { l := strings.TrimLeftFunc(l, unicode.IsSpace) if len(last) > 0 && len(l) > 0 && endsWithWordChar.MatchString(last) && startsWithWordChar.MatchString(l) { s.WriteString(" ") From 1aa77c821c672df2043dc53093bcde5cd2b6c8a9 Mon Sep 17 00:00:00 2001 From: Jeffrey Faer Date: Wed, 11 Mar 2026 16:11:21 -0600 Subject: [PATCH 10/10] make a comment more succinct --- keepsorted/line_group.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/keepsorted/line_group.go b/keepsorted/line_group.go index 6b0c7b3..3afb005 100644 --- a/keepsorted/line_group.go +++ b/keepsorted/line_group.go @@ -430,8 +430,7 @@ func (lg *lineGroup) regexTokens() []regexToken { } else { regexMatches = lg.opts.matchRegexes(lg.regexJoinedLines()) // We still want to apply the joinLines transform so that we get - // "reasonable human" comparisons if the regex intentionally - // (or accidentally!) matches more than one line. + // "reasonable human" comparisons if the regex matches more than one line. for _, match := range regexMatches { for i, s := range match { match[i] = joinLines(strings.Split(s, "\n"))