Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions internal/datagen/datagen.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Package datagen provides reusable data generation pools and identity types
// for synthetic telemetry generation. It offers deterministic, seed-controlled
// generation of hostnames, users, systems, networks, and other identity data.
package datagen

import "math/rand"

// Pool is a reusable collection of values for random selection.
// It is read-only after construction and safe for concurrent use.
type Pool[T any] struct {
items []T
}

// NewPool creates a new Pool from the given items.
func NewPool[T any](items ...T) *Pool[T] {
cp := make([]T, len(items))
copy(cp, items)
return &Pool[T]{items: cp}
}

// Random returns a random item from the pool using the provided rand source.
// Panics if the pool is empty.
func (p *Pool[T]) Random(r *rand.Rand) T {
return p.items[r.Intn(len(p.items))] // #nosec G404
}

// RandomN returns n unique random items from the pool.
// If n >= pool size, returns all items in shuffled order.
// If n <= 0, returns an empty slice.
func (p *Pool[T]) RandomN(r *rand.Rand, n int) []T {
if n <= 0 {
return nil
}
if n >= len(p.items) {
n = len(p.items)
}
// Fisher-Yates shuffle on a copy, take first n
cp := make([]T, len(p.items))
copy(cp, p.items)
r.Shuffle(len(cp), func(i, j int) { cp[i], cp[j] = cp[j], cp[i] })
return cp[:n]
}

// All returns a copy of all items in the pool.
func (p *Pool[T]) All() []T {
cp := make([]T, len(p.items))
copy(cp, p.items)
return cp
}

// Len returns the number of items in the pool.
func (p *Pool[T]) Len() int {
return len(p.items)
}

// Merge combines multiple pools into a single pool.
func Merge[T any](pools ...*Pool[T]) *Pool[T] {
total := 0
for _, p := range pools {
total += len(p.items)
}
items := make([]T, 0, total)
for _, p := range pools {
items = append(items, p.items...)
}
return &Pool[T]{items: items}
}
168 changes: 168 additions & 0 deletions internal/datagen/datagen_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
package datagen

import (
"math/rand"
"testing"
)

func TestNewPool(t *testing.T) {
t.Run("creates pool with items", func(t *testing.T) {
p := NewPool("a", "b", "c")
if p.Len() != 3 {
t.Errorf("expected Len() = 3, got %d", p.Len())
}
})

t.Run("empty pool", func(t *testing.T) {
p := NewPool[string]()
if p.Len() != 0 {
t.Errorf("expected Len() = 0, got %d", p.Len())
}
})
}

func TestPoolAll(t *testing.T) {
items := []string{"x", "y", "z"}
p := NewPool(items...)
all := p.All()
if len(all) != 3 {
t.Fatalf("expected 3 items, got %d", len(all))
}
for i, v := range items {
if all[i] != v {
t.Errorf("All()[%d] = %q, want %q", i, all[i], v)
}
}

// Ensure returned slice is a copy
all[0] = "modified"
if p.All()[0] == "modified" {
t.Error("All() should return a copy, not the internal slice")
}
}

func TestPoolRandom(t *testing.T) {
p := NewPool("a", "b", "c")
r := rand.New(rand.NewSource(42))

seen := make(map[string]bool)
for i := 0; i < 100; i++ {
v := p.Random(r)
seen[v] = true
}
// With 100 draws from 3 items, we should see all of them
if len(seen) != 3 {
t.Errorf("expected to see all 3 items, saw %d: %v", len(seen), seen)
}
}

func TestPoolRandomDeterministic(t *testing.T) {
p := NewPool(1, 2, 3, 4, 5)

r1 := rand.New(rand.NewSource(99))
r2 := rand.New(rand.NewSource(99))

for i := 0; i < 20; i++ {
v1 := p.Random(r1)
v2 := p.Random(r2)
if v1 != v2 {
t.Fatalf("draw %d: same seed produced different results: %d vs %d", i, v1, v2)
}
}
}

func TestPoolRandomN(t *testing.T) {
p := NewPool("a", "b", "c", "d", "e")
r := rand.New(rand.NewSource(42))

t.Run("n less than pool size", func(t *testing.T) {
result := p.RandomN(r, 3)
if len(result) != 3 {
t.Errorf("expected 3 items, got %d", len(result))
}
// Check uniqueness
seen := make(map[string]bool)
for _, v := range result {
if seen[v] {
t.Errorf("duplicate item %q in RandomN result", v)
}
seen[v] = true
}
})

t.Run("n equals pool size", func(t *testing.T) {
result := p.RandomN(r, 5)
if len(result) != 5 {
t.Errorf("expected 5 items, got %d", len(result))
}
})

t.Run("n exceeds pool size returns all", func(t *testing.T) {
result := p.RandomN(r, 10)
if len(result) != 5 {
t.Errorf("expected 5 items (pool size), got %d", len(result))
}
})

t.Run("n zero returns empty", func(t *testing.T) {
result := p.RandomN(r, 0)
if len(result) != 0 {
t.Errorf("expected 0 items, got %d", len(result))
}
})
}

func TestMerge(t *testing.T) {
p1 := NewPool("a", "b")
p2 := NewPool("c", "d")
p3 := NewPool("e")

merged := Merge(p1, p2, p3)
if merged.Len() != 5 {
t.Errorf("expected merged Len() = 5, got %d", merged.Len())
}

all := merged.All()
expected := []string{"a", "b", "c", "d", "e"}
for i, v := range expected {
if all[i] != v {
t.Errorf("merged.All()[%d] = %q, want %q", i, all[i], v)
}
}
}

func TestMergeEmpty(t *testing.T) {
merged := Merge[string]()
if merged.Len() != 0 {
t.Errorf("expected merged Len() = 0, got %d", merged.Len())
}
}

func TestPoolRandomPanicsOnEmpty(t *testing.T) {
p := NewPool[string]()
r := rand.New(rand.NewSource(42))

defer func() {
if recover() == nil {
t.Error("expected panic on Random() with empty pool")
}
}()
p.Random(r)
}

func TestPoolWithInts(t *testing.T) {
p := NewPool(200, 201, 204, 301, 404, 500)
r := rand.New(rand.NewSource(42))

v := p.Random(r)
found := false
for _, item := range p.All() {
if item == v {
found = true
break
}
}
if !found {
t.Errorf("Random() returned %d which is not in the pool", v)
}
}
Loading