-
Notifications
You must be signed in to change notification settings - Fork 1
feat(datagen): add name pools and UserIdentity generation #148
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
Dylan-M
wants to merge
1
commit into
04-06-feat_datagen_add_systemidentity_with_os_arch_role_types_and_version_pools
Choose a base branch
from
04-06-feat_datagen_add_name_pools_and_useridentity_generation
base: 04-06-feat_datagen_add_systemidentity_with_os_arch_role_types_and_version_pools
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+347
−0
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,155 @@ | ||
| package datagen | ||
|
|
||
| import ( | ||
| "fmt" | ||
| "math/rand" | ||
| "strings" | ||
| ) | ||
|
|
||
| // Name pools — 50 first names and 50 surnames. | ||
| var ( | ||
| FirstNames = NewPool( | ||
| "james", "mary", "john", "patricia", "robert", | ||
| "jennifer", "michael", "linda", "david", "elizabeth", | ||
| "william", "barbara", "richard", "susan", "joseph", | ||
| "jessica", "thomas", "sarah", "christopher", "karen", | ||
| "charles", "lisa", "daniel", "nancy", "matthew", | ||
| "betty", "anthony", "margaret", "mark", "sandra", | ||
| "steven", "ashley", "paul", "emily", "andrew", | ||
| "donna", "joshua", "michelle", "kenneth", "carol", | ||
| "kevin", "amanda", "brian", "melissa", "george", | ||
| "deborah", "timothy", "stephanie", "ronald", "rebecca", | ||
| ) | ||
|
|
||
| Surnames = NewPool( | ||
| "smith", "johnson", "williams", "brown", "jones", | ||
| "garcia", "miller", "davis", "rodriguez", "martinez", | ||
| "hernandez", "lopez", "gonzalez", "wilson", "anderson", | ||
| "thomas", "taylor", "moore", "jackson", "martin", | ||
| "lee", "perez", "thompson", "white", "harris", | ||
| "sanchez", "clark", "ramirez", "lewis", "robinson", | ||
| "walker", "young", "allen", "king", "wright", | ||
| "scott", "torres", "nguyen", "hill", "flores", | ||
| "green", "adams", "nelson", "baker", "hall", | ||
| "rivera", "campbell", "mitchell", "carter", "roberts", | ||
| ) | ||
|
|
||
| Departments = NewPool( | ||
| "Engineering", "Sales", "Marketing", "Finance", "HR", | ||
| "IT", "Operations", "Legal", "Security", "Executive", | ||
| ) | ||
|
|
||
| Titles = NewPool( | ||
| "Junior Developer", "Senior Developer", "Staff Engineer", | ||
| "Team Lead", "Manager", "Director", "VP", | ||
| "Analyst", "Consultant", "Administrator", | ||
| "Specialist", "Coordinator", "Architect", | ||
| ) | ||
| ) | ||
|
|
||
| // UserIdentity represents a domain user. | ||
| type UserIdentity struct { | ||
| FirstName string | ||
| LastName string | ||
| Username string // sAMAccountName: "jsmith" | ||
| UPN string // "james.smith@contoso.com" | ||
| DisplayName string // "James Smith" | ||
| Email string // "james.smith@contoso.com" | ||
| SID string // "S-1-5-21-..." | ||
| Department string // "Engineering" | ||
| Title string // "Senior Developer" | ||
| DN string // "CN=James Smith,OU=Engineering,DC=contoso,DC=com" | ||
| GroupSIDs []string // back-references to GroupIdentity.SID | ||
| } | ||
|
|
||
| // GenerateUserIdentity creates a random user identity within the given domain. | ||
| func GenerateUserIdentity(r *rand.Rand, domain *DomainIdentity) *UserIdentity { | ||
| first := FirstNames.Random(r) | ||
| last := Surnames.Random(r) | ||
|
|
||
| username := string(first[0]) + last // "jsmith" | ||
| displayName := titleCase(first) + " " + titleCase(last) | ||
| upn := first + "." + last + "@" + domain.Name | ||
| email := upn | ||
|
|
||
| // Build DN | ||
| dept := Departments.Random(r) | ||
| parts := strings.Split(domain.Name, ".") | ||
| dcParts := make([]string, len(parts)) | ||
| for i, p := range parts { | ||
| dcParts[i] = "DC=" + p | ||
| } | ||
| dn := fmt.Sprintf("CN=%s,OU=%s,%s", displayName, dept, strings.Join(dcParts, ",")) | ||
|
|
||
| // Generate user RID (1000+) | ||
| rid := r.Intn(50000) + 1000 // #nosec G404 | ||
| sid := fmt.Sprintf("%s-%d", domain.DomainSID, rid) | ||
|
|
||
| title := Titles.Random(r) | ||
|
|
||
| return &UserIdentity{ | ||
| FirstName: first, | ||
| LastName: last, | ||
| Username: username, | ||
| UPN: upn, | ||
| DisplayName: displayName, | ||
| Email: email, | ||
| SID: sid, | ||
| Department: dept, | ||
| Title: title, | ||
| DN: dn, | ||
| } | ||
| } | ||
|
|
||
| // GenerateUsers produces a deterministic set of users from a seed. | ||
| // | ||
| // When two generated users would share a sAMAccountName (Username), the | ||
| // second and subsequent collisions get a numeric suffix that propagates | ||
| // through every dependent field — Username, UPN, Email, DisplayName, and | ||
| // the CN component of DN — so the returned UserIdentity stays internally | ||
| // consistent. In real AD, sAMAccountName, UPN, and mail must all be unique; | ||
| // the suffix scheme mirrors that. | ||
| func GenerateUsers(seed int64, count int, domain *DomainIdentity) []*UserIdentity { | ||
| r := rand.New(rand.NewSource(seed)) // #nosec G404 | ||
| users := make([]*UserIdentity, count) | ||
| seen := make(map[string]int) | ||
| for i := range users { | ||
| u := GenerateUserIdentity(r, domain) | ||
| seen[u.Username]++ | ||
| if seen[u.Username] > 1 { | ||
| disambiguateUser(u, seen[u.Username]) | ||
| } | ||
| users[i] = u | ||
| } | ||
| return users | ||
| } | ||
|
|
||
| // disambiguateUser appends a numeric suffix to all identifier-bearing fields | ||
| // of u so the user remains internally consistent after a Username collision. | ||
| // suffix is the duplicate index (>= 2 by construction in GenerateUsers). | ||
| func disambiguateUser(u *UserIdentity, suffix int) { | ||
| u.Username = fmt.Sprintf("%s%d", u.Username, suffix) | ||
| // UPN and Email share the local@domain shape; rebuild the local part. | ||
| if at := strings.IndexByte(u.UPN, '@'); at != -1 { | ||
| u.UPN = fmt.Sprintf("%s%d%s", u.UPN[:at], suffix, u.UPN[at:]) | ||
| } | ||
| if at := strings.IndexByte(u.Email, '@'); at != -1 { | ||
| u.Email = fmt.Sprintf("%s%d%s", u.Email[:at], suffix, u.Email[at:]) | ||
| } | ||
| // DisplayName + DN's CN component get a "(N)" qualifier — keeps the DN | ||
| // human-readable while ensuring the CN is distinct. | ||
| u.DisplayName = fmt.Sprintf("%s (%d)", u.DisplayName, suffix) | ||
| if strings.HasPrefix(u.DN, "CN=") { | ||
| if comma := strings.IndexByte(u.DN, ','); comma != -1 { | ||
| u.DN = fmt.Sprintf("CN=%s,%s", u.DisplayName, u.DN[comma+1:]) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // titleCase capitalizes the first letter of a string. | ||
| func titleCase(s string) string { | ||
| if s == "" { | ||
| return s | ||
| } | ||
| return strings.ToUpper(s[:1]) + s[1:] | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,192 @@ | ||
| package datagen | ||
|
|
||
| import ( | ||
| "math/rand" | ||
| "strings" | ||
| "testing" | ||
| "time" | ||
| ) | ||
|
|
||
| func TestNamePools(t *testing.T) { | ||
| if FirstNames.Len() < 50 { | ||
| t.Errorf("FirstNames has %d items, want at least 50", FirstNames.Len()) | ||
| } | ||
| if Surnames.Len() < 50 { | ||
| t.Errorf("Surnames has %d items, want at least 50", Surnames.Len()) | ||
| } | ||
| } | ||
|
|
||
| func TestDepartmentPool(t *testing.T) { | ||
| if Departments.Len() < 8 { | ||
| t.Errorf("Departments has %d items, want at least 8", Departments.Len()) | ||
| } | ||
| } | ||
|
|
||
| func TestTitlePool(t *testing.T) { | ||
| if Titles.Len() < 5 { | ||
| t.Errorf("Titles has %d items, want at least 5", Titles.Len()) | ||
| } | ||
| } | ||
|
|
||
| func TestGenerateUserIdentity(t *testing.T) { | ||
| r := rand.New(rand.NewSource(42)) | ||
| domain := GenerateDomainIdentity(42, "contoso.com", time.Now()) | ||
|
|
||
| t.Run("basic fields populated", func(t *testing.T) { | ||
| user := GenerateUserIdentity(r, domain) | ||
| if user.FirstName == "" { | ||
| t.Error("FirstName should not be empty") | ||
| } | ||
| if user.LastName == "" { | ||
| t.Error("LastName should not be empty") | ||
| } | ||
| if user.Username == "" { | ||
| t.Error("Username should not be empty") | ||
| } | ||
| if !strings.HasSuffix(user.UPN, "@contoso.com") { | ||
| t.Errorf("UPN %q should end with '@contoso.com'", user.UPN) | ||
| } | ||
| if !strings.HasSuffix(user.Email, "@contoso.com") { | ||
| t.Errorf("Email %q should end with '@contoso.com'", user.Email) | ||
| } | ||
| }) | ||
|
|
||
| t.Run("display name is title case", func(t *testing.T) { | ||
| user := GenerateUserIdentity(r, domain) | ||
| parts := strings.Split(user.DisplayName, " ") | ||
| if len(parts) != 2 { | ||
| t.Errorf("DisplayName %q should be 'First Last'", user.DisplayName) | ||
| } | ||
| }) | ||
|
|
||
| t.Run("SID format", func(t *testing.T) { | ||
| user := GenerateUserIdentity(r, domain) | ||
| if !strings.HasPrefix(user.SID, domain.DomainSID+"-") { | ||
| t.Errorf("user SID %q should start with domain SID %q", user.SID, domain.DomainSID) | ||
| } | ||
| }) | ||
|
|
||
| t.Run("has department and title", func(t *testing.T) { | ||
| user := GenerateUserIdentity(r, domain) | ||
| if user.Department == "" { | ||
| t.Error("Department should not be empty") | ||
| } | ||
| if user.Title == "" { | ||
| t.Error("Title should not be empty") | ||
| } | ||
| }) | ||
|
|
||
| t.Run("DN format", func(t *testing.T) { | ||
| user := GenerateUserIdentity(r, domain) | ||
| if !strings.HasPrefix(user.DN, "CN=") { | ||
| t.Errorf("DN %q should start with 'CN='", user.DN) | ||
| } | ||
| if !strings.Contains(user.DN, "DC=contoso") { | ||
| t.Errorf("DN %q should contain 'DC=contoso'", user.DN) | ||
| } | ||
| }) | ||
|
|
||
| t.Run("deterministic", func(t *testing.T) { | ||
| r1 := rand.New(rand.NewSource(99)) | ||
| r2 := rand.New(rand.NewSource(99)) | ||
| u1 := GenerateUserIdentity(r1, domain) | ||
| u2 := GenerateUserIdentity(r2, domain) | ||
| if u1.Username != u2.Username { | ||
| t.Errorf("same seed should produce same username: %q vs %q", u1.Username, u2.Username) | ||
| } | ||
| }) | ||
| } | ||
|
|
||
| func TestGenerateUsers(t *testing.T) { | ||
| domain := GenerateDomainIdentity(42, "contoso.com", time.Now()) | ||
| users := GenerateUsers(42, 20, domain) | ||
| if len(users) != 20 { | ||
| t.Errorf("expected 20 users, got %d", len(users)) | ||
| } | ||
|
|
||
| // Check uniqueness of usernames | ||
| seen := make(map[string]bool) | ||
| for _, u := range users { | ||
| if seen[u.Username] { | ||
| t.Errorf("duplicate username %q", u.Username) | ||
| } | ||
| seen[u.Username] = true | ||
| } | ||
| } | ||
|
|
||
| func TestGenerateUsersInternalConsistency(t *testing.T) { | ||
| // With a small name pool relative to count, duplicate usernames are | ||
| // highly likely. After disambiguation, every user's Username, UPN, | ||
| // Email, DisplayName, and DN must be self-consistent: the numeric | ||
| // suffix on Username must show up in UPN/Email local-parts, and | ||
| // DisplayName must match the CN portion of DN. | ||
| domain := GenerateDomainIdentity(42, "contoso.com", time.Now()) | ||
| users := GenerateUsers(42, 200, domain) | ||
|
|
||
| upns := make(map[string]bool) | ||
| emails := make(map[string]bool) | ||
| dns := make(map[string]bool) | ||
| for _, u := range users { | ||
| // UPN and Email must mirror Username's local-part transformation. | ||
| // Username is first[0]+last; UPN/Email local-part is first.last — | ||
| // but both share the suffix when one is added. | ||
| hasSuffix := false | ||
| for _, ch := range u.Username { | ||
| if ch >= '0' && ch <= '9' { | ||
| hasSuffix = true | ||
| break | ||
| } | ||
| } | ||
| if hasSuffix { | ||
| // UPN/Email local part must contain a digit too. | ||
| at := strings.IndexByte(u.UPN, '@') | ||
| if at == -1 { | ||
| t.Errorf("user %s: UPN %q has no @", u.Username, u.UPN) | ||
| continue | ||
| } | ||
| localPart := u.UPN[:at] | ||
| localHasDigit := false | ||
| for _, ch := range localPart { | ||
| if ch >= '0' && ch <= '9' { | ||
| localHasDigit = true | ||
| break | ||
| } | ||
| } | ||
| if !localHasDigit { | ||
| t.Errorf("user %s has suffixed Username but UPN %q does not have a numeric suffix in local-part", u.Username, u.UPN) | ||
| } | ||
| // DisplayName must contain the suffix qualifier in parentheses. | ||
| if !strings.Contains(u.DisplayName, "(") { | ||
| t.Errorf("user %s has suffixed Username but DisplayName %q has no qualifier", u.Username, u.DisplayName) | ||
| } | ||
| } | ||
|
|
||
| // Uniqueness across all identifying fields. | ||
| if upns[u.UPN] { | ||
| t.Errorf("duplicate UPN %q", u.UPN) | ||
| } | ||
| upns[u.UPN] = true | ||
| if emails[u.Email] { | ||
| t.Errorf("duplicate Email %q", u.Email) | ||
| } | ||
| emails[u.Email] = true | ||
| if dns[u.DN] { | ||
| t.Errorf("duplicate DN %q", u.DN) | ||
| } | ||
| dns[u.DN] = true | ||
|
|
||
| // DN's CN must reflect DisplayName. | ||
| if strings.HasPrefix(u.DN, "CN=") { | ||
| rest := u.DN[3:] | ||
| comma := strings.IndexByte(rest, ',') | ||
| if comma == -1 { | ||
| t.Errorf("user %s: DN %q has no comma after CN=", u.Username, u.DN) | ||
| continue | ||
| } | ||
| cn := rest[:comma] | ||
| if cn != u.DisplayName { | ||
| t.Errorf("user %s: DN CN=%q does not match DisplayName %q", u.Username, cn, u.DisplayName) | ||
| } | ||
| } | ||
| } | ||
| } |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When GenerateUsers detects a duplicate username, it only mutates u.Username. It does not update UPN, Email, or DN, so the returned
UserIdentitybecomes internally inconsistentThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
disambiguateUsernow propagates the suffix through every identifier-bearing field. Username getsNappended, UPN/Email get the digit injected before@, DisplayName gets(N)appended, and the CN component of DN is rebuilt from the new DisplayName. New test exercises this with 200 users from a small name pool (forcing collisions) and asserts internal consistency plus uniqueness across Username, UPN, Email, and DN.