Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
155 changes: 155 additions & 0 deletions internal/datagen/usernames.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
package datagen

import (
"fmt"
"math/rand"
"strings"
)

// Name pools — 50 first names and 50 surnames.
var (
FirstNames = NewPool(
"james", "mary", "john", "patricia", "robert",
"jennifer", "michael", "linda", "david", "elizabeth",
"william", "barbara", "richard", "susan", "joseph",
"jessica", "thomas", "sarah", "christopher", "karen",
"charles", "lisa", "daniel", "nancy", "matthew",
"betty", "anthony", "margaret", "mark", "sandra",
"steven", "ashley", "paul", "emily", "andrew",
"donna", "joshua", "michelle", "kenneth", "carol",
"kevin", "amanda", "brian", "melissa", "george",
"deborah", "timothy", "stephanie", "ronald", "rebecca",
)

Surnames = NewPool(
"smith", "johnson", "williams", "brown", "jones",
"garcia", "miller", "davis", "rodriguez", "martinez",
"hernandez", "lopez", "gonzalez", "wilson", "anderson",
"thomas", "taylor", "moore", "jackson", "martin",
"lee", "perez", "thompson", "white", "harris",
"sanchez", "clark", "ramirez", "lewis", "robinson",
"walker", "young", "allen", "king", "wright",
"scott", "torres", "nguyen", "hill", "flores",
"green", "adams", "nelson", "baker", "hall",
"rivera", "campbell", "mitchell", "carter", "roberts",
)

Departments = NewPool(
"Engineering", "Sales", "Marketing", "Finance", "HR",
"IT", "Operations", "Legal", "Security", "Executive",
)

Titles = NewPool(
"Junior Developer", "Senior Developer", "Staff Engineer",
"Team Lead", "Manager", "Director", "VP",
"Analyst", "Consultant", "Administrator",
"Specialist", "Coordinator", "Architect",
)
)

// UserIdentity represents a domain user.
type UserIdentity struct {
FirstName string
LastName string
Username string // sAMAccountName: "jsmith"
UPN string // "james.smith@contoso.com"
DisplayName string // "James Smith"
Email string // "james.smith@contoso.com"
SID string // "S-1-5-21-..."
Department string // "Engineering"
Title string // "Senior Developer"
DN string // "CN=James Smith,OU=Engineering,DC=contoso,DC=com"
GroupSIDs []string // back-references to GroupIdentity.SID
}

// GenerateUserIdentity creates a random user identity within the given domain.
func GenerateUserIdentity(r *rand.Rand, domain *DomainIdentity) *UserIdentity {
first := FirstNames.Random(r)
last := Surnames.Random(r)

username := string(first[0]) + last // "jsmith"
displayName := titleCase(first) + " " + titleCase(last)
upn := first + "." + last + "@" + domain.Name
email := upn

// Build DN
dept := Departments.Random(r)
parts := strings.Split(domain.Name, ".")
dcParts := make([]string, len(parts))
for i, p := range parts {
dcParts[i] = "DC=" + p
}
dn := fmt.Sprintf("CN=%s,OU=%s,%s", displayName, dept, strings.Join(dcParts, ","))

// Generate user RID (1000+)
rid := r.Intn(50000) + 1000 // #nosec G404
sid := fmt.Sprintf("%s-%d", domain.DomainSID, rid)

title := Titles.Random(r)

return &UserIdentity{
FirstName: first,
LastName: last,
Username: username,
UPN: upn,
DisplayName: displayName,
Email: email,
SID: sid,
Department: dept,
Title: title,
DN: dn,
}
}

// GenerateUsers produces a deterministic set of users from a seed.
//
// When two generated users would share a sAMAccountName (Username), the
// second and subsequent collisions get a numeric suffix that propagates
// through every dependent field — Username, UPN, Email, DisplayName, and
// the CN component of DN — so the returned UserIdentity stays internally
// consistent. In real AD, sAMAccountName, UPN, and mail must all be unique;
// the suffix scheme mirrors that.
func GenerateUsers(seed int64, count int, domain *DomainIdentity) []*UserIdentity {
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When GenerateUsers detects a duplicate username, it only mutates u.Username. It does not update UPN, Email, or DN, so the returned UserIdentity becomes internally inconsistent

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

disambiguateUser now propagates the suffix through every identifier-bearing field. Username gets N appended, UPN/Email get the digit injected before @, DisplayName gets (N) appended, and the CN component of DN is rebuilt from the new DisplayName. New test exercises this with 200 users from a small name pool (forcing collisions) and asserts internal consistency plus uniqueness across Username, UPN, Email, and DN.

r := rand.New(rand.NewSource(seed)) // #nosec G404
users := make([]*UserIdentity, count)
seen := make(map[string]int)
for i := range users {
u := GenerateUserIdentity(r, domain)
seen[u.Username]++
if seen[u.Username] > 1 {
disambiguateUser(u, seen[u.Username])
}
users[i] = u
}
return users
}

// disambiguateUser appends a numeric suffix to all identifier-bearing fields
// of u so the user remains internally consistent after a Username collision.
// suffix is the duplicate index (>= 2 by construction in GenerateUsers).
func disambiguateUser(u *UserIdentity, suffix int) {
u.Username = fmt.Sprintf("%s%d", u.Username, suffix)
// UPN and Email share the local@domain shape; rebuild the local part.
if at := strings.IndexByte(u.UPN, '@'); at != -1 {
u.UPN = fmt.Sprintf("%s%d%s", u.UPN[:at], suffix, u.UPN[at:])
}
if at := strings.IndexByte(u.Email, '@'); at != -1 {
u.Email = fmt.Sprintf("%s%d%s", u.Email[:at], suffix, u.Email[at:])
}
// DisplayName + DN's CN component get a "(N)" qualifier — keeps the DN
// human-readable while ensuring the CN is distinct.
u.DisplayName = fmt.Sprintf("%s (%d)", u.DisplayName, suffix)
if strings.HasPrefix(u.DN, "CN=") {
if comma := strings.IndexByte(u.DN, ','); comma != -1 {
u.DN = fmt.Sprintf("CN=%s,%s", u.DisplayName, u.DN[comma+1:])
}
}
}

// titleCase capitalizes the first letter of a string.
func titleCase(s string) string {
if s == "" {
return s
}
return strings.ToUpper(s[:1]) + s[1:]
}
192 changes: 192 additions & 0 deletions internal/datagen/usernames_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
package datagen

import (
"math/rand"
"strings"
"testing"
"time"
)

func TestNamePools(t *testing.T) {
if FirstNames.Len() < 50 {
t.Errorf("FirstNames has %d items, want at least 50", FirstNames.Len())
}
if Surnames.Len() < 50 {
t.Errorf("Surnames has %d items, want at least 50", Surnames.Len())
}
}

func TestDepartmentPool(t *testing.T) {
if Departments.Len() < 8 {
t.Errorf("Departments has %d items, want at least 8", Departments.Len())
}
}

func TestTitlePool(t *testing.T) {
if Titles.Len() < 5 {
t.Errorf("Titles has %d items, want at least 5", Titles.Len())
}
}

func TestGenerateUserIdentity(t *testing.T) {
r := rand.New(rand.NewSource(42))
domain := GenerateDomainIdentity(42, "contoso.com", time.Now())

t.Run("basic fields populated", func(t *testing.T) {
user := GenerateUserIdentity(r, domain)
if user.FirstName == "" {
t.Error("FirstName should not be empty")
}
if user.LastName == "" {
t.Error("LastName should not be empty")
}
if user.Username == "" {
t.Error("Username should not be empty")
}
if !strings.HasSuffix(user.UPN, "@contoso.com") {
t.Errorf("UPN %q should end with '@contoso.com'", user.UPN)
}
if !strings.HasSuffix(user.Email, "@contoso.com") {
t.Errorf("Email %q should end with '@contoso.com'", user.Email)
}
})

t.Run("display name is title case", func(t *testing.T) {
user := GenerateUserIdentity(r, domain)
parts := strings.Split(user.DisplayName, " ")
if len(parts) != 2 {
t.Errorf("DisplayName %q should be 'First Last'", user.DisplayName)
}
})

t.Run("SID format", func(t *testing.T) {
user := GenerateUserIdentity(r, domain)
if !strings.HasPrefix(user.SID, domain.DomainSID+"-") {
t.Errorf("user SID %q should start with domain SID %q", user.SID, domain.DomainSID)
}
})

t.Run("has department and title", func(t *testing.T) {
user := GenerateUserIdentity(r, domain)
if user.Department == "" {
t.Error("Department should not be empty")
}
if user.Title == "" {
t.Error("Title should not be empty")
}
})

t.Run("DN format", func(t *testing.T) {
user := GenerateUserIdentity(r, domain)
if !strings.HasPrefix(user.DN, "CN=") {
t.Errorf("DN %q should start with 'CN='", user.DN)
}
if !strings.Contains(user.DN, "DC=contoso") {
t.Errorf("DN %q should contain 'DC=contoso'", user.DN)
}
})

t.Run("deterministic", func(t *testing.T) {
r1 := rand.New(rand.NewSource(99))
r2 := rand.New(rand.NewSource(99))
u1 := GenerateUserIdentity(r1, domain)
u2 := GenerateUserIdentity(r2, domain)
if u1.Username != u2.Username {
t.Errorf("same seed should produce same username: %q vs %q", u1.Username, u2.Username)
}
})
}

func TestGenerateUsers(t *testing.T) {
domain := GenerateDomainIdentity(42, "contoso.com", time.Now())
users := GenerateUsers(42, 20, domain)
if len(users) != 20 {
t.Errorf("expected 20 users, got %d", len(users))
}

// Check uniqueness of usernames
seen := make(map[string]bool)
for _, u := range users {
if seen[u.Username] {
t.Errorf("duplicate username %q", u.Username)
}
seen[u.Username] = true
}
}

func TestGenerateUsersInternalConsistency(t *testing.T) {
// With a small name pool relative to count, duplicate usernames are
// highly likely. After disambiguation, every user's Username, UPN,
// Email, DisplayName, and DN must be self-consistent: the numeric
// suffix on Username must show up in UPN/Email local-parts, and
// DisplayName must match the CN portion of DN.
domain := GenerateDomainIdentity(42, "contoso.com", time.Now())
users := GenerateUsers(42, 200, domain)

upns := make(map[string]bool)
emails := make(map[string]bool)
dns := make(map[string]bool)
for _, u := range users {
// UPN and Email must mirror Username's local-part transformation.
// Username is first[0]+last; UPN/Email local-part is first.last —
// but both share the suffix when one is added.
hasSuffix := false
for _, ch := range u.Username {
if ch >= '0' && ch <= '9' {
hasSuffix = true
break
}
}
if hasSuffix {
// UPN/Email local part must contain a digit too.
at := strings.IndexByte(u.UPN, '@')
if at == -1 {
t.Errorf("user %s: UPN %q has no @", u.Username, u.UPN)
continue
}
localPart := u.UPN[:at]
localHasDigit := false
for _, ch := range localPart {
if ch >= '0' && ch <= '9' {
localHasDigit = true
break
}
}
if !localHasDigit {
t.Errorf("user %s has suffixed Username but UPN %q does not have a numeric suffix in local-part", u.Username, u.UPN)
}
// DisplayName must contain the suffix qualifier in parentheses.
if !strings.Contains(u.DisplayName, "(") {
t.Errorf("user %s has suffixed Username but DisplayName %q has no qualifier", u.Username, u.DisplayName)
}
}

// Uniqueness across all identifying fields.
if upns[u.UPN] {
t.Errorf("duplicate UPN %q", u.UPN)
}
upns[u.UPN] = true
if emails[u.Email] {
t.Errorf("duplicate Email %q", u.Email)
}
emails[u.Email] = true
if dns[u.DN] {
t.Errorf("duplicate DN %q", u.DN)
}
dns[u.DN] = true

// DN's CN must reflect DisplayName.
if strings.HasPrefix(u.DN, "CN=") {
rest := u.DN[3:]
comma := strings.IndexByte(rest, ',')
if comma == -1 {
t.Errorf("user %s: DN %q has no comma after CN=", u.Username, u.DN)
continue
}
cn := rest[:comma]
if cn != u.DisplayName {
t.Errorf("user %s: DN CN=%q does not match DisplayName %q", u.Username, cn, u.DisplayName)
}
}
}
}
Loading