diff --git a/internal/datagen/datagen.go b/internal/datagen/datagen.go new file mode 100644 index 0000000..52e54b4 --- /dev/null +++ b/internal/datagen/datagen.go @@ -0,0 +1,67 @@ +// Package datagen provides reusable data generation pools and identity types +// for synthetic telemetry generation. It offers deterministic, seed-controlled +// generation of hostnames, users, systems, networks, and other identity data. +package datagen + +import "math/rand" + +// Pool is a reusable collection of values for random selection. +// It is read-only after construction and safe for concurrent use. +type Pool[T any] struct { + items []T +} + +// NewPool creates a new Pool from the given items. +func NewPool[T any](items ...T) *Pool[T] { + cp := make([]T, len(items)) + copy(cp, items) + return &Pool[T]{items: cp} +} + +// Random returns a random item from the pool using the provided rand source. +// Panics if the pool is empty. +func (p *Pool[T]) Random(r *rand.Rand) T { + return p.items[r.Intn(len(p.items))] // #nosec G404 +} + +// RandomN returns n unique random items from the pool. +// If n >= pool size, returns all items in shuffled order. +// If n <= 0, returns an empty slice. +func (p *Pool[T]) RandomN(r *rand.Rand, n int) []T { + if n <= 0 { + return nil + } + if n >= len(p.items) { + n = len(p.items) + } + // Fisher-Yates shuffle on a copy, take first n + cp := make([]T, len(p.items)) + copy(cp, p.items) + r.Shuffle(len(cp), func(i, j int) { cp[i], cp[j] = cp[j], cp[i] }) + return cp[:n] +} + +// All returns a copy of all items in the pool. +func (p *Pool[T]) All() []T { + cp := make([]T, len(p.items)) + copy(cp, p.items) + return cp +} + +// Len returns the number of items in the pool. +func (p *Pool[T]) Len() int { + return len(p.items) +} + +// Merge combines multiple pools into a single pool. +func Merge[T any](pools ...*Pool[T]) *Pool[T] { + total := 0 + for _, p := range pools { + total += len(p.items) + } + items := make([]T, 0, total) + for _, p := range pools { + items = append(items, p.items...) + } + return &Pool[T]{items: items} +} diff --git a/internal/datagen/datagen_test.go b/internal/datagen/datagen_test.go new file mode 100644 index 0000000..2bee0b8 --- /dev/null +++ b/internal/datagen/datagen_test.go @@ -0,0 +1,168 @@ +package datagen + +import ( + "math/rand" + "testing" +) + +func TestNewPool(t *testing.T) { + t.Run("creates pool with items", func(t *testing.T) { + p := NewPool("a", "b", "c") + if p.Len() != 3 { + t.Errorf("expected Len() = 3, got %d", p.Len()) + } + }) + + t.Run("empty pool", func(t *testing.T) { + p := NewPool[string]() + if p.Len() != 0 { + t.Errorf("expected Len() = 0, got %d", p.Len()) + } + }) +} + +func TestPoolAll(t *testing.T) { + items := []string{"x", "y", "z"} + p := NewPool(items...) + all := p.All() + if len(all) != 3 { + t.Fatalf("expected 3 items, got %d", len(all)) + } + for i, v := range items { + if all[i] != v { + t.Errorf("All()[%d] = %q, want %q", i, all[i], v) + } + } + + // Ensure returned slice is a copy + all[0] = "modified" + if p.All()[0] == "modified" { + t.Error("All() should return a copy, not the internal slice") + } +} + +func TestPoolRandom(t *testing.T) { + p := NewPool("a", "b", "c") + r := rand.New(rand.NewSource(42)) + + seen := make(map[string]bool) + for i := 0; i < 100; i++ { + v := p.Random(r) + seen[v] = true + } + // With 100 draws from 3 items, we should see all of them + if len(seen) != 3 { + t.Errorf("expected to see all 3 items, saw %d: %v", len(seen), seen) + } +} + +func TestPoolRandomDeterministic(t *testing.T) { + p := NewPool(1, 2, 3, 4, 5) + + r1 := rand.New(rand.NewSource(99)) + r2 := rand.New(rand.NewSource(99)) + + for i := 0; i < 20; i++ { + v1 := p.Random(r1) + v2 := p.Random(r2) + if v1 != v2 { + t.Fatalf("draw %d: same seed produced different results: %d vs %d", i, v1, v2) + } + } +} + +func TestPoolRandomN(t *testing.T) { + p := NewPool("a", "b", "c", "d", "e") + r := rand.New(rand.NewSource(42)) + + t.Run("n less than pool size", func(t *testing.T) { + result := p.RandomN(r, 3) + if len(result) != 3 { + t.Errorf("expected 3 items, got %d", len(result)) + } + // Check uniqueness + seen := make(map[string]bool) + for _, v := range result { + if seen[v] { + t.Errorf("duplicate item %q in RandomN result", v) + } + seen[v] = true + } + }) + + t.Run("n equals pool size", func(t *testing.T) { + result := p.RandomN(r, 5) + if len(result) != 5 { + t.Errorf("expected 5 items, got %d", len(result)) + } + }) + + t.Run("n exceeds pool size returns all", func(t *testing.T) { + result := p.RandomN(r, 10) + if len(result) != 5 { + t.Errorf("expected 5 items (pool size), got %d", len(result)) + } + }) + + t.Run("n zero returns empty", func(t *testing.T) { + result := p.RandomN(r, 0) + if len(result) != 0 { + t.Errorf("expected 0 items, got %d", len(result)) + } + }) +} + +func TestMerge(t *testing.T) { + p1 := NewPool("a", "b") + p2 := NewPool("c", "d") + p3 := NewPool("e") + + merged := Merge(p1, p2, p3) + if merged.Len() != 5 { + t.Errorf("expected merged Len() = 5, got %d", merged.Len()) + } + + all := merged.All() + expected := []string{"a", "b", "c", "d", "e"} + for i, v := range expected { + if all[i] != v { + t.Errorf("merged.All()[%d] = %q, want %q", i, all[i], v) + } + } +} + +func TestMergeEmpty(t *testing.T) { + merged := Merge[string]() + if merged.Len() != 0 { + t.Errorf("expected merged Len() = 0, got %d", merged.Len()) + } +} + +func TestPoolRandomPanicsOnEmpty(t *testing.T) { + p := NewPool[string]() + r := rand.New(rand.NewSource(42)) + + defer func() { + if recover() == nil { + t.Error("expected panic on Random() with empty pool") + } + }() + p.Random(r) +} + +func TestPoolWithInts(t *testing.T) { + p := NewPool(200, 201, 204, 301, 404, 500) + r := rand.New(rand.NewSource(42)) + + v := p.Random(r) + found := false + for _, item := range p.All() { + if item == v { + found = true + break + } + } + if !found { + t.Errorf("Random() returned %d which is not in the pool", v) + } +}