Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 44 additions & 38 deletions file/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,83 +200,89 @@ func HasStdin() bool {
return isPipedFromChrDev || isPipedFromFIFO
}

// ReadFileWithReader and stream on a channel
// ReadFileWithReader streams r line by line on a channel.
//
// Deprecated: use LinesReader, which returns an iter.Seq2[string, error] and
// surfaces scanner errors. Equivalent invocation:
//
// for line, err := range fileutil.LinesReader(r) { ... }
func ReadFileWithReader(r io.Reader) (chan string, error) {
out := make(chan string)
go func() {
defer close(out)
scanner := bufio.NewScanner(r)
for scanner.Scan() {
out <- scanner.Text()
for line, err := range LinesReader(r) {
if err != nil {
return
}
out <- line
}
}()

return out, nil
}

// ReadFileWithReader with specific buffer size and stream on a channel
// ReadFileWithReaderAndBufferSize streams r line by line on a channel using
// the given scanner buffer size.
//
// Deprecated: use LinesReader with WithBufferSize. Equivalent invocation:
//
// for line, err := range fileutil.LinesReader(r, fileutil.WithBufferSize(n)) { ... }
func ReadFileWithReaderAndBufferSize(r io.Reader, maxCapacity int) (chan string, error) {
out := make(chan string)
go func() {
defer close(out)
scanner := bufio.NewScanner(r)
buf := make([]byte, maxCapacity)
scanner.Buffer(buf, maxCapacity)
for scanner.Scan() {
out <- scanner.Text()
for line, err := range LinesReader(r, WithBufferSize(maxCapacity)) {
if err != nil {
return
}
out <- line
}
}()

return out, nil
}

// ReadFile with filename
// ReadFile streams the file at filename line by line on a channel.
//
// Deprecated: use Lines, which returns an iter.Seq2[string, error] and
// surfaces open / scanner errors. Equivalent invocation:
//
// for line, err := range fileutil.Lines(filename) { ... }
func ReadFile(filename string) (chan string, error) {
if !FileExists(filename) {
return nil, errors.New("file doesn't exist")
}
out := make(chan string)
go func() {
defer close(out)
f, err := os.Open(filename)
if err != nil {
return
}
defer func() {
_ = f.Close()
}()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
out <- scanner.Text()
for line, err := range Lines(filename) {
if err != nil {
return
}
out <- line
}
}()

return out, nil
}

// ReadFile with filename and specific buffer size
// ReadFileWithBufferSize streams the file at filename line by line on a
// channel using the given scanner buffer size.
//
// Deprecated: use Lines with WithBufferSize. Equivalent invocation:
//
// for line, err := range fileutil.Lines(filename, fileutil.WithBufferSize(n)) { ... }
func ReadFileWithBufferSize(filename string, maxCapacity int) (chan string, error) {
if !FileExists(filename) {
return nil, errors.New("file doesn't exist")
}
out := make(chan string)
go func() {
defer close(out)
f, err := os.Open(filename)
if err != nil {
return
}
defer func() {
_ = f.Close()
}()
scanner := bufio.NewScanner(f)
buf := make([]byte, maxCapacity)
scanner.Buffer(buf, maxCapacity)
for scanner.Scan() {
out <- scanner.Text()
for line, err := range Lines(filename, WithBufferSize(maxCapacity)) {
if err != nil {
return
}
out <- line
}
}()

return out, nil
}

Expand Down
134 changes: 134 additions & 0 deletions file/lines.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
package fileutil

import (
"bufio"
"io"
"iter"
"os"
"strings"
)

// LineOption configures the line iterator returned by Lines / LinesReader.
type LineOption func(*lineConfig)

type lineConfig struct {
bufferSize int
splitSet string
hasSplit bool
trimSpace bool
skipEmpty bool
filter func(string) bool
}

// WithBufferSize sets the underlying bufio.Scanner buffer. A non-positive
// value leaves the scanner default (64 KiB) in place.
func WithBufferSize(n int) LineOption {
return func(c *lineConfig) { c.bufferSize = n }
}

// WithSplit splits each scanned line on any of the given runes
// (strings.FieldsFunc semantics: runs of separator runes are collapsed and
// empty pieces are not produced). Each piece becomes its own emitted value.
func WithSplit(separators ...rune) LineOption {
return func(c *lineConfig) {
c.hasSplit = true
c.splitSet = string(separators)
}
}

// WithTrimSpace trims leading/trailing whitespace from each emitted value.
func WithTrimSpace() LineOption {
return func(c *lineConfig) { c.trimSpace = true }
}

// WithSkipEmpty drops empty values, evaluated after WithTrimSpace.
func WithSkipEmpty() LineOption {
return func(c *lineConfig) { c.skipEmpty = true }
}

// WithFilter keeps only values for which keep returns true. The filter runs
// after split / trim / skip-empty so it sees the final value that would be
// yielded.
func WithFilter(keep func(string) bool) LineOption {
return func(c *lineConfig) { c.filter = keep }
}

// Lines streams lines from the file at filename, applying any configured
// transforms. With no options it emits raw scanner lines.
//
// The file is opened lazily on first iteration and closed when iteration
// ends (including via break). Open and scanner errors are surfaced as a
// final ("", err) pair, after which iteration stops.
//
// Typical use:
//
// for v, err := range fileutil.Lines(path,
// fileutil.WithSplit(','),
// fileutil.WithTrimSpace(),
// fileutil.WithSkipEmpty(),
// ) {
// if err != nil { return err }
// // use v
// }
func Lines(filename string, opts ...LineOption) iter.Seq2[string, error] {
return func(yield func(string, error) bool) {
f, err := os.Open(filename)
if err != nil {
yield("", err)
return
}
defer func() { _ = f.Close() }()
scanLines(f, opts, yield)
}
}

// LinesReader is the io.Reader variant of Lines. The reader is consumed but
// not closed; the caller owns its lifecycle.
func LinesReader(r io.Reader, opts ...LineOption) iter.Seq2[string, error] {
return func(yield func(string, error) bool) {
scanLines(r, opts, yield)
}
}

func scanLines(r io.Reader, opts []LineOption, yield func(string, error) bool) {
var cfg lineConfig
for _, o := range opts {
o(&cfg)
}
scanner := bufio.NewScanner(r)
if cfg.bufferSize > 0 {
scanner.Buffer(make([]byte, cfg.bufferSize), cfg.bufferSize)
}
for scanner.Scan() {
line := scanner.Text()
if !cfg.hasSplit {
if !emitLine(line, &cfg, yield) {
return
}
continue
}
for _, piece := range strings.FieldsFunc(line, func(r rune) bool {
return strings.ContainsRune(cfg.splitSet, r)
}) {
if !emitLine(piece, &cfg, yield) {
return
}
}
}
if err := scanner.Err(); err != nil {
yield("", err)
}
}

func emitLine(v string, cfg *lineConfig, yield func(string, error) bool) bool {
if cfg.trimSpace {
v = strings.TrimSpace(v)
}
if cfg.skipEmpty && v == "" {
return true
}
if cfg.filter != nil && !cfg.filter(v) {
return true
}
return yield(v, nil)
}
Loading
Loading