diff --git a/file/file.go b/file/file.go index 1160e9f..1e1282a 100644 --- a/file/file.go +++ b/file/file.go @@ -200,37 +200,52 @@ func HasStdin() bool { return isPipedFromChrDev || isPipedFromFIFO } -// ReadFileWithReader and stream on a channel +// ReadFileWithReader streams r line by line on a channel. +// +// Deprecated: use LinesReader, which returns an iter.Seq2[string, error] and +// surfaces scanner errors. Equivalent invocation: +// +// for line, err := range fileutil.LinesReader(r) { ... } func ReadFileWithReader(r io.Reader) (chan string, error) { out := make(chan string) go func() { defer close(out) - scanner := bufio.NewScanner(r) - for scanner.Scan() { - out <- scanner.Text() + for line, err := range LinesReader(r) { + if err != nil { + return + } + out <- line } }() - return out, nil } -// ReadFileWithReader with specific buffer size and stream on a channel +// ReadFileWithReaderAndBufferSize streams r line by line on a channel using +// the given scanner buffer size. +// +// Deprecated: use LinesReader with WithBufferSize. Equivalent invocation: +// +// for line, err := range fileutil.LinesReader(r, fileutil.WithBufferSize(n)) { ... } func ReadFileWithReaderAndBufferSize(r io.Reader, maxCapacity int) (chan string, error) { out := make(chan string) go func() { defer close(out) - scanner := bufio.NewScanner(r) - buf := make([]byte, maxCapacity) - scanner.Buffer(buf, maxCapacity) - for scanner.Scan() { - out <- scanner.Text() + for line, err := range LinesReader(r, WithBufferSize(maxCapacity)) { + if err != nil { + return + } + out <- line } }() - return out, nil } -// ReadFile with filename +// ReadFile streams the file at filename line by line on a channel. +// +// Deprecated: use Lines, which returns an iter.Seq2[string, error] and +// surfaces open / scanner errors. Equivalent invocation: +// +// for line, err := range fileutil.Lines(filename) { ... } func ReadFile(filename string) (chan string, error) { if !FileExists(filename) { return nil, errors.New("file doesn't exist") @@ -238,23 +253,22 @@ func ReadFile(filename string) (chan string, error) { out := make(chan string) go func() { defer close(out) - f, err := os.Open(filename) - if err != nil { - return - } - defer func() { - _ = f.Close() - }() - scanner := bufio.NewScanner(f) - for scanner.Scan() { - out <- scanner.Text() + for line, err := range Lines(filename) { + if err != nil { + return + } + out <- line } }() - return out, nil } -// ReadFile with filename and specific buffer size +// ReadFileWithBufferSize streams the file at filename line by line on a +// channel using the given scanner buffer size. +// +// Deprecated: use Lines with WithBufferSize. Equivalent invocation: +// +// for line, err := range fileutil.Lines(filename, fileutil.WithBufferSize(n)) { ... } func ReadFileWithBufferSize(filename string, maxCapacity int) (chan string, error) { if !FileExists(filename) { return nil, errors.New("file doesn't exist") @@ -262,21 +276,13 @@ func ReadFileWithBufferSize(filename string, maxCapacity int) (chan string, erro out := make(chan string) go func() { defer close(out) - f, err := os.Open(filename) - if err != nil { - return - } - defer func() { - _ = f.Close() - }() - scanner := bufio.NewScanner(f) - buf := make([]byte, maxCapacity) - scanner.Buffer(buf, maxCapacity) - for scanner.Scan() { - out <- scanner.Text() + for line, err := range Lines(filename, WithBufferSize(maxCapacity)) { + if err != nil { + return + } + out <- line } }() - return out, nil } diff --git a/file/lines.go b/file/lines.go new file mode 100644 index 0000000..e8f4d3a --- /dev/null +++ b/file/lines.go @@ -0,0 +1,134 @@ +package fileutil + +import ( + "bufio" + "io" + "iter" + "os" + "strings" +) + +// LineOption configures the line iterator returned by Lines / LinesReader. +type LineOption func(*lineConfig) + +type lineConfig struct { + bufferSize int + splitSet string + hasSplit bool + trimSpace bool + skipEmpty bool + filter func(string) bool +} + +// WithBufferSize sets the underlying bufio.Scanner buffer. A non-positive +// value leaves the scanner default (64 KiB) in place. +func WithBufferSize(n int) LineOption { + return func(c *lineConfig) { c.bufferSize = n } +} + +// WithSplit splits each scanned line on any of the given runes +// (strings.FieldsFunc semantics: runs of separator runes are collapsed and +// empty pieces are not produced). Each piece becomes its own emitted value. +func WithSplit(separators ...rune) LineOption { + return func(c *lineConfig) { + c.hasSplit = true + c.splitSet = string(separators) + } +} + +// WithTrimSpace trims leading/trailing whitespace from each emitted value. +func WithTrimSpace() LineOption { + return func(c *lineConfig) { c.trimSpace = true } +} + +// WithSkipEmpty drops empty values, evaluated after WithTrimSpace. +func WithSkipEmpty() LineOption { + return func(c *lineConfig) { c.skipEmpty = true } +} + +// WithFilter keeps only values for which keep returns true. The filter runs +// after split / trim / skip-empty so it sees the final value that would be +// yielded. +func WithFilter(keep func(string) bool) LineOption { + return func(c *lineConfig) { c.filter = keep } +} + +// Lines streams lines from the file at filename, applying any configured +// transforms. With no options it emits raw scanner lines. +// +// The file is opened lazily on first iteration and closed when iteration +// ends (including via break). Open and scanner errors are surfaced as a +// final ("", err) pair, after which iteration stops. +// +// Typical use: +// +// for v, err := range fileutil.Lines(path, +// fileutil.WithSplit(','), +// fileutil.WithTrimSpace(), +// fileutil.WithSkipEmpty(), +// ) { +// if err != nil { return err } +// // use v +// } +func Lines(filename string, opts ...LineOption) iter.Seq2[string, error] { + return func(yield func(string, error) bool) { + f, err := os.Open(filename) + if err != nil { + yield("", err) + return + } + defer func() { _ = f.Close() }() + scanLines(f, opts, yield) + } +} + +// LinesReader is the io.Reader variant of Lines. The reader is consumed but +// not closed; the caller owns its lifecycle. +func LinesReader(r io.Reader, opts ...LineOption) iter.Seq2[string, error] { + return func(yield func(string, error) bool) { + scanLines(r, opts, yield) + } +} + +func scanLines(r io.Reader, opts []LineOption, yield func(string, error) bool) { + var cfg lineConfig + for _, o := range opts { + o(&cfg) + } + scanner := bufio.NewScanner(r) + if cfg.bufferSize > 0 { + scanner.Buffer(make([]byte, cfg.bufferSize), cfg.bufferSize) + } + for scanner.Scan() { + line := scanner.Text() + if !cfg.hasSplit { + if !emitLine(line, &cfg, yield) { + return + } + continue + } + for _, piece := range strings.FieldsFunc(line, func(r rune) bool { + return strings.ContainsRune(cfg.splitSet, r) + }) { + if !emitLine(piece, &cfg, yield) { + return + } + } + } + if err := scanner.Err(); err != nil { + yield("", err) + } +} + +func emitLine(v string, cfg *lineConfig, yield func(string, error) bool) bool { + if cfg.trimSpace { + v = strings.TrimSpace(v) + } + if cfg.skipEmpty && v == "" { + return true + } + if cfg.filter != nil && !cfg.filter(v) { + return true + } + return yield(v, nil) +} diff --git a/file/lines_test.go b/file/lines_test.go new file mode 100644 index 0000000..f8ca008 --- /dev/null +++ b/file/lines_test.go @@ -0,0 +1,160 @@ +package fileutil + +import ( + "errors" + "io" + "io/fs" + "iter" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/require" +) + +func collectLines(t *testing.T, seq iter.Seq2[string, error]) []string { + t.Helper() + var out []string + for v, err := range seq { + require.NoError(t, err) + out = append(out, v) + } + return out +} + +func writeTempFile(t *testing.T, body string) string { + t.Helper() + path := filepath.Join(t.TempDir(), "lines.txt") + require.NoError(t, os.WriteFile(path, []byte(body), 0o600)) + return path +} + +func TestLines_Default_EmitsRawLines(t *testing.T) { + path := writeTempFile(t, "alpha\nbeta\n gamma \n\nepsilon\n") + got := collectLines(t, Lines(path)) + require.Equal(t, []string{"alpha", "beta", " gamma ", "", "epsilon"}, got) +} + +func TestLines_WithTrimSpace(t *testing.T) { + path := writeTempFile(t, " alpha \n\tbeta\t\n") + got := collectLines(t, Lines(path, WithTrimSpace())) + require.Equal(t, []string{"alpha", "beta"}, got) +} + +func TestLines_WithSkipEmpty(t *testing.T) { + path := writeTempFile(t, "alpha\n\nbeta\n\n\n") + got := collectLines(t, Lines(path, WithSkipEmpty())) + require.Equal(t, []string{"alpha", "beta"}, got) +} + +func TestLines_WithTrimSpace_SkipEmpty_DropsBlankLines(t *testing.T) { + path := writeTempFile(t, "alpha\n \nbeta\n") + got := collectLines(t, Lines(path, WithTrimSpace(), WithSkipEmpty())) + require.Equal(t, []string{"alpha", "beta"}, got) +} + +func TestLines_WithSplit_Comma(t *testing.T) { + path := writeTempFile(t, "1.1.1.1,8.8.8.8\n9.9.9.9\n") + got := collectLines(t, Lines(path, WithSplit(','))) + require.Equal(t, []string{"1.1.1.1", "8.8.8.8", "9.9.9.9"}, got) +} + +func TestLines_WithSplit_MultipleSeparators(t *testing.T) { + path := writeTempFile(t, "a,b;c\td\n") + got := collectLines(t, Lines(path, WithSplit(',', ';', '\t'))) + require.Equal(t, []string{"a", "b", "c", "d"}, got) +} + +func TestLines_ResolverFileScenario(t *testing.T) { + // resolver-file scenario: comma-separated entries with whitespace and + // blanks; this is what the original PR was trying to add a one-shot helper for. + path := writeTempFile(t, "1.1.1.1, 8.8.8.8\n9.9.9.9\n , , \n10.10.10.10 ,11.11.11.11\n") + got := collectLines(t, Lines(path, + WithSplit(','), + WithTrimSpace(), + WithSkipEmpty(), + )) + require.Equal(t, []string{"1.1.1.1", "8.8.8.8", "9.9.9.9", "10.10.10.10", "11.11.11.11"}, got) +} + +func TestLines_WithFilter_DropsComments(t *testing.T) { + path := writeTempFile(t, "alpha\n# comment\nbeta\n# another\n") + got := collectLines(t, Lines(path, + WithFilter(func(s string) bool { return !strings.HasPrefix(s, "#") }), + )) + require.Equal(t, []string{"alpha", "beta"}, got) +} + +func TestLines_WithBufferSize(t *testing.T) { + path := writeTempFile(t, "short\n"+strings.Repeat("x", 1024)+"\n") + got := collectLines(t, Lines(path, WithBufferSize(2048))) + require.Len(t, got, 2) + require.Equal(t, "short", got[0]) + require.Len(t, got[1], 1024) +} + +func TestLines_MissingFile_YieldsErrorPair(t *testing.T) { + var values []string + var gotErr error + for v, err := range Lines("/no/such/file.txt") { + if err != nil { + gotErr = err + continue + } + values = append(values, v) + } + require.Empty(t, values) + require.Error(t, gotErr) + require.True(t, errors.Is(gotErr, fs.ErrNotExist), "expected fs.ErrNotExist, got %v", gotErr) +} + +func TestLines_BreakStopsIterationEarly(t *testing.T) { + path := writeTempFile(t, "a\nb\nc\nd\n") + var seen []string + for v, err := range Lines(path) { + require.NoError(t, err) + seen = append(seen, v) + if len(seen) == 2 { + break + } + } + require.Equal(t, []string{"a", "b"}, seen) +} + +func TestLinesReader_Default(t *testing.T) { + r := strings.NewReader("alpha\nbeta\n gamma \n\n") + got := collectLines(t, LinesReader(r)) + require.Equal(t, []string{"alpha", "beta", " gamma ", ""}, got) +} + +func TestLinesReader_AllOptionsCombined(t *testing.T) { + r := strings.NewReader("# header\n1.1.1.1, 8.8.8.8\n\n# tail\n") + got := collectLines(t, LinesReader(r, + WithSplit(','), + WithTrimSpace(), + WithSkipEmpty(), + WithFilter(func(s string) bool { return !strings.HasPrefix(s, "#") }), + )) + require.Equal(t, []string{"1.1.1.1", "8.8.8.8"}, got) +} + +func TestLinesReader_PropagatesScannerError(t *testing.T) { + // A reader that always errors should surface the error as a final pair. + r := errReader{err: io.ErrUnexpectedEOF} + var seen []string + var gotErr error + for v, err := range LinesReader(r) { + if err != nil { + gotErr = err + continue + } + seen = append(seen, v) + } + require.Empty(t, seen) + require.ErrorIs(t, gotErr, io.ErrUnexpectedEOF) +} + +type errReader struct{ err error } + +func (e errReader) Read(p []byte) (int, error) { return 0, e.err }