From e969cc49633e9ecbdf9860a24a337ffb19aa036e Mon Sep 17 00:00:00 2001 From: Dylan Myers Date: Mon, 6 Apr 2026 12:44:38 -0400 Subject: [PATCH] feat(datagen): add name pools and UserIdentity generation --- internal/datagen/usernames.go | 155 +++++++++++++++++++++++ internal/datagen/usernames_test.go | 192 +++++++++++++++++++++++++++++ 2 files changed, 347 insertions(+) create mode 100644 internal/datagen/usernames.go create mode 100644 internal/datagen/usernames_test.go diff --git a/internal/datagen/usernames.go b/internal/datagen/usernames.go new file mode 100644 index 0000000..ad0f058 --- /dev/null +++ b/internal/datagen/usernames.go @@ -0,0 +1,155 @@ +package datagen + +import ( + "fmt" + "math/rand" + "strings" +) + +// Name pools — 50 first names and 50 surnames. +var ( + FirstNames = NewPool( + "james", "mary", "john", "patricia", "robert", + "jennifer", "michael", "linda", "david", "elizabeth", + "william", "barbara", "richard", "susan", "joseph", + "jessica", "thomas", "sarah", "christopher", "karen", + "charles", "lisa", "daniel", "nancy", "matthew", + "betty", "anthony", "margaret", "mark", "sandra", + "steven", "ashley", "paul", "emily", "andrew", + "donna", "joshua", "michelle", "kenneth", "carol", + "kevin", "amanda", "brian", "melissa", "george", + "deborah", "timothy", "stephanie", "ronald", "rebecca", + ) + + Surnames = NewPool( + "smith", "johnson", "williams", "brown", "jones", + "garcia", "miller", "davis", "rodriguez", "martinez", + "hernandez", "lopez", "gonzalez", "wilson", "anderson", + "thomas", "taylor", "moore", "jackson", "martin", + "lee", "perez", "thompson", "white", "harris", + "sanchez", "clark", "ramirez", "lewis", "robinson", + "walker", "young", "allen", "king", "wright", + "scott", "torres", "nguyen", "hill", "flores", + "green", "adams", "nelson", "baker", "hall", + "rivera", "campbell", "mitchell", "carter", "roberts", + ) + + Departments = NewPool( + "Engineering", "Sales", "Marketing", "Finance", "HR", + "IT", "Operations", "Legal", "Security", "Executive", + ) + + Titles = NewPool( + "Junior Developer", "Senior Developer", "Staff Engineer", + "Team Lead", "Manager", "Director", "VP", + "Analyst", "Consultant", "Administrator", + "Specialist", "Coordinator", "Architect", + ) +) + +// UserIdentity represents a domain user. +type UserIdentity struct { + FirstName string + LastName string + Username string // sAMAccountName: "jsmith" + UPN string // "james.smith@contoso.com" + DisplayName string // "James Smith" + Email string // "james.smith@contoso.com" + SID string // "S-1-5-21-..." + Department string // "Engineering" + Title string // "Senior Developer" + DN string // "CN=James Smith,OU=Engineering,DC=contoso,DC=com" + GroupSIDs []string // back-references to GroupIdentity.SID +} + +// GenerateUserIdentity creates a random user identity within the given domain. +func GenerateUserIdentity(r *rand.Rand, domain *DomainIdentity) *UserIdentity { + first := FirstNames.Random(r) + last := Surnames.Random(r) + + username := string(first[0]) + last // "jsmith" + displayName := titleCase(first) + " " + titleCase(last) + upn := first + "." + last + "@" + domain.Name + email := upn + + // Build DN + dept := Departments.Random(r) + parts := strings.Split(domain.Name, ".") + dcParts := make([]string, len(parts)) + for i, p := range parts { + dcParts[i] = "DC=" + p + } + dn := fmt.Sprintf("CN=%s,OU=%s,%s", displayName, dept, strings.Join(dcParts, ",")) + + // Generate user RID (1000+) + rid := r.Intn(50000) + 1000 // #nosec G404 + sid := fmt.Sprintf("%s-%d", domain.DomainSID, rid) + + title := Titles.Random(r) + + return &UserIdentity{ + FirstName: first, + LastName: last, + Username: username, + UPN: upn, + DisplayName: displayName, + Email: email, + SID: sid, + Department: dept, + Title: title, + DN: dn, + } +} + +// GenerateUsers produces a deterministic set of users from a seed. +// +// When two generated users would share a sAMAccountName (Username), the +// second and subsequent collisions get a numeric suffix that propagates +// through every dependent field — Username, UPN, Email, DisplayName, and +// the CN component of DN — so the returned UserIdentity stays internally +// consistent. In real AD, sAMAccountName, UPN, and mail must all be unique; +// the suffix scheme mirrors that. +func GenerateUsers(seed int64, count int, domain *DomainIdentity) []*UserIdentity { + r := rand.New(rand.NewSource(seed)) // #nosec G404 + users := make([]*UserIdentity, count) + seen := make(map[string]int) + for i := range users { + u := GenerateUserIdentity(r, domain) + seen[u.Username]++ + if seen[u.Username] > 1 { + disambiguateUser(u, seen[u.Username]) + } + users[i] = u + } + return users +} + +// disambiguateUser appends a numeric suffix to all identifier-bearing fields +// of u so the user remains internally consistent after a Username collision. +// suffix is the duplicate index (>= 2 by construction in GenerateUsers). +func disambiguateUser(u *UserIdentity, suffix int) { + u.Username = fmt.Sprintf("%s%d", u.Username, suffix) + // UPN and Email share the local@domain shape; rebuild the local part. + if at := strings.IndexByte(u.UPN, '@'); at != -1 { + u.UPN = fmt.Sprintf("%s%d%s", u.UPN[:at], suffix, u.UPN[at:]) + } + if at := strings.IndexByte(u.Email, '@'); at != -1 { + u.Email = fmt.Sprintf("%s%d%s", u.Email[:at], suffix, u.Email[at:]) + } + // DisplayName + DN's CN component get a "(N)" qualifier — keeps the DN + // human-readable while ensuring the CN is distinct. + u.DisplayName = fmt.Sprintf("%s (%d)", u.DisplayName, suffix) + if strings.HasPrefix(u.DN, "CN=") { + if comma := strings.IndexByte(u.DN, ','); comma != -1 { + u.DN = fmt.Sprintf("CN=%s,%s", u.DisplayName, u.DN[comma+1:]) + } + } +} + +// titleCase capitalizes the first letter of a string. +func titleCase(s string) string { + if s == "" { + return s + } + return strings.ToUpper(s[:1]) + s[1:] +} diff --git a/internal/datagen/usernames_test.go b/internal/datagen/usernames_test.go new file mode 100644 index 0000000..5d9a39b --- /dev/null +++ b/internal/datagen/usernames_test.go @@ -0,0 +1,192 @@ +package datagen + +import ( + "math/rand" + "strings" + "testing" + "time" +) + +func TestNamePools(t *testing.T) { + if FirstNames.Len() < 50 { + t.Errorf("FirstNames has %d items, want at least 50", FirstNames.Len()) + } + if Surnames.Len() < 50 { + t.Errorf("Surnames has %d items, want at least 50", Surnames.Len()) + } +} + +func TestDepartmentPool(t *testing.T) { + if Departments.Len() < 8 { + t.Errorf("Departments has %d items, want at least 8", Departments.Len()) + } +} + +func TestTitlePool(t *testing.T) { + if Titles.Len() < 5 { + t.Errorf("Titles has %d items, want at least 5", Titles.Len()) + } +} + +func TestGenerateUserIdentity(t *testing.T) { + r := rand.New(rand.NewSource(42)) + domain := GenerateDomainIdentity(42, "contoso.com", time.Now()) + + t.Run("basic fields populated", func(t *testing.T) { + user := GenerateUserIdentity(r, domain) + if user.FirstName == "" { + t.Error("FirstName should not be empty") + } + if user.LastName == "" { + t.Error("LastName should not be empty") + } + if user.Username == "" { + t.Error("Username should not be empty") + } + if !strings.HasSuffix(user.UPN, "@contoso.com") { + t.Errorf("UPN %q should end with '@contoso.com'", user.UPN) + } + if !strings.HasSuffix(user.Email, "@contoso.com") { + t.Errorf("Email %q should end with '@contoso.com'", user.Email) + } + }) + + t.Run("display name is title case", func(t *testing.T) { + user := GenerateUserIdentity(r, domain) + parts := strings.Split(user.DisplayName, " ") + if len(parts) != 2 { + t.Errorf("DisplayName %q should be 'First Last'", user.DisplayName) + } + }) + + t.Run("SID format", func(t *testing.T) { + user := GenerateUserIdentity(r, domain) + if !strings.HasPrefix(user.SID, domain.DomainSID+"-") { + t.Errorf("user SID %q should start with domain SID %q", user.SID, domain.DomainSID) + } + }) + + t.Run("has department and title", func(t *testing.T) { + user := GenerateUserIdentity(r, domain) + if user.Department == "" { + t.Error("Department should not be empty") + } + if user.Title == "" { + t.Error("Title should not be empty") + } + }) + + t.Run("DN format", func(t *testing.T) { + user := GenerateUserIdentity(r, domain) + if !strings.HasPrefix(user.DN, "CN=") { + t.Errorf("DN %q should start with 'CN='", user.DN) + } + if !strings.Contains(user.DN, "DC=contoso") { + t.Errorf("DN %q should contain 'DC=contoso'", user.DN) + } + }) + + t.Run("deterministic", func(t *testing.T) { + r1 := rand.New(rand.NewSource(99)) + r2 := rand.New(rand.NewSource(99)) + u1 := GenerateUserIdentity(r1, domain) + u2 := GenerateUserIdentity(r2, domain) + if u1.Username != u2.Username { + t.Errorf("same seed should produce same username: %q vs %q", u1.Username, u2.Username) + } + }) +} + +func TestGenerateUsers(t *testing.T) { + domain := GenerateDomainIdentity(42, "contoso.com", time.Now()) + users := GenerateUsers(42, 20, domain) + if len(users) != 20 { + t.Errorf("expected 20 users, got %d", len(users)) + } + + // Check uniqueness of usernames + seen := make(map[string]bool) + for _, u := range users { + if seen[u.Username] { + t.Errorf("duplicate username %q", u.Username) + } + seen[u.Username] = true + } +} + +func TestGenerateUsersInternalConsistency(t *testing.T) { + // With a small name pool relative to count, duplicate usernames are + // highly likely. After disambiguation, every user's Username, UPN, + // Email, DisplayName, and DN must be self-consistent: the numeric + // suffix on Username must show up in UPN/Email local-parts, and + // DisplayName must match the CN portion of DN. + domain := GenerateDomainIdentity(42, "contoso.com", time.Now()) + users := GenerateUsers(42, 200, domain) + + upns := make(map[string]bool) + emails := make(map[string]bool) + dns := make(map[string]bool) + for _, u := range users { + // UPN and Email must mirror Username's local-part transformation. + // Username is first[0]+last; UPN/Email local-part is first.last — + // but both share the suffix when one is added. + hasSuffix := false + for _, ch := range u.Username { + if ch >= '0' && ch <= '9' { + hasSuffix = true + break + } + } + if hasSuffix { + // UPN/Email local part must contain a digit too. + at := strings.IndexByte(u.UPN, '@') + if at == -1 { + t.Errorf("user %s: UPN %q has no @", u.Username, u.UPN) + continue + } + localPart := u.UPN[:at] + localHasDigit := false + for _, ch := range localPart { + if ch >= '0' && ch <= '9' { + localHasDigit = true + break + } + } + if !localHasDigit { + t.Errorf("user %s has suffixed Username but UPN %q does not have a numeric suffix in local-part", u.Username, u.UPN) + } + // DisplayName must contain the suffix qualifier in parentheses. + if !strings.Contains(u.DisplayName, "(") { + t.Errorf("user %s has suffixed Username but DisplayName %q has no qualifier", u.Username, u.DisplayName) + } + } + + // Uniqueness across all identifying fields. + if upns[u.UPN] { + t.Errorf("duplicate UPN %q", u.UPN) + } + upns[u.UPN] = true + if emails[u.Email] { + t.Errorf("duplicate Email %q", u.Email) + } + emails[u.Email] = true + if dns[u.DN] { + t.Errorf("duplicate DN %q", u.DN) + } + dns[u.DN] = true + + // DN's CN must reflect DisplayName. + if strings.HasPrefix(u.DN, "CN=") { + rest := u.DN[3:] + comma := strings.IndexByte(rest, ',') + if comma == -1 { + t.Errorf("user %s: DN %q has no comma after CN=", u.Username, u.DN) + continue + } + cn := rest[:comma] + if cn != u.DisplayName { + t.Errorf("user %s: DN CN=%q does not match DisplayName %q", u.Username, cn, u.DisplayName) + } + } + } +}