From 6eb03630f7778acd36cc76ccfc0dc97230dc1488 Mon Sep 17 00:00:00 2001 From: alex prozorov Date: Mon, 22 Jun 2026 15:44:34 +0300 Subject: [PATCH 1/2] emit administers and supervises relationship source fields populate the raw pre-pipeline fields the Entity Store v2 relationship maintainers consume --- .../active_directory_integration.ts | 169 +++++++++++++++++- .../integrations/entra_id_integration.ts | 14 ++ .../org_data/integrations/okta_integration.ts | 16 +- 3 files changed, 195 insertions(+), 4 deletions(-) diff --git a/src/commands/org_data/integrations/active_directory_integration.ts b/src/commands/org_data/integrations/active_directory_integration.ts index f11b4ea..b370f03 100644 --- a/src/commands/org_data/integrations/active_directory_integration.ts +++ b/src/commands/org_data/integrations/active_directory_integration.ts @@ -70,11 +70,30 @@ export class ActiveDirectoryIntegration extends BaseIntegration { const baseDn = `DC=${org.domain.replace('.com', '')},DC=com`; + // Pre-build a map of employee id → computer DNs for their Windows devices, + // so managers can reference their direct reports' machines as managedObjects. + const employeeComputerDns = new Map(); + for (const employee of org.employees) { + const dns = employee.devices + .filter((d) => d.type === 'laptop' && d.platform === 'windows') + .map(() => `CN=${this.buildComputerName(employee)},OU=Computers,${baseDn}`); + employeeComputerDns.set(employee.id, dns); + } + // Generate user documents for all employees for (const employee of org.employees) { const userDn = this.buildUserDn(employee, baseDn); correlationMap.adDnToEmployee.set(userDn, employee); + // Direct reports of this employee (managerId references the manager's oktaUserId). + const directReports = org.employees.filter((e) => e.managerId === employee.oktaUserId); + // Managers administer the computers of their direct reports (user -> host). + const directReportComputerDns = directReports.flatMap( + (e) => employeeComputerDns.get(e.id) ?? [], + ); + // Managers supervise the direct reports themselves (user -> user). + const directReportUserDns = directReports.map((e) => this.buildUserDn(e, baseDn)); + const userDoc = this.createUserDocument( employee, org, @@ -82,16 +101,32 @@ export class ActiveDirectoryIntegration extends BaseIntegration { baseDn, userDn, centralAgent, + { + managedObjects: directReportComputerDns, + directReports: directReportUserDns, + }, ); documents.push(userDoc); } + // Collect all computer DNs so device managedObjects can reference real entities. + const allComputerDns = [...employeeComputerDns.values()].flat(); + // Generate computer documents for Windows devices for (const employee of org.employees) { const windowsDevices = employee.devices.filter( (d) => d.type === 'laptop' && d.platform === 'windows', ); for (const device of windowsDevices) { + // ~15% of devices administer 1–2 other computers (e.g. shared lab machines). + // Reference actual generated computer DNs so the maintainer can resolve them. + const ownDn = `CN=${this.buildComputerName(employee)},OU=Computers,${baseDn}`; + const otherDns = allComputerDns.filter((dn) => dn !== ownDn); + const managedObjects = + faker.datatype.boolean({ probability: 0.15 }) && otherDns.length > 0 + ? faker.helpers.arrayElements(otherDns, { min: 1, max: Math.min(2, otherDns.length) }) + : []; + const computerDoc = this.createDeviceDocument( device, employee, @@ -99,6 +134,7 @@ export class ActiveDirectoryIntegration extends BaseIntegration { timestamp, baseDn, centralAgent, + managedObjects, ); documents.push(computerDoc); } @@ -108,6 +144,99 @@ export class ActiveDirectoryIntegration extends BaseIntegration { return documentsMap; } + /** + * Mirrors the ingest pipeline's buildHostRel() Painless function, which is now + * identical in both pipelines: + * + * - device.yml (host actor): host.name = FQDN (cn.toLowerCase() + "." + domain) + * - user.yml (user actor): host.name = FQDN (cn.toLowerCase() + "." + domain) + * + * Both compose the FQDN when a domain is present, falling back to the bare CN + * otherwise. The administers maintainer resolves target host EUIDs as + * CONCAT("host:", raw_identifiers.host.name); the FQDN matches the device + * entity's own EUID (host:), since AD device docs set root host.name to + * the FQDN and never set root host.id. + * + * Example input: ["CN=Workstation01,OU=Computers,DC=testserver,DC=local"] + * -> host.name = ["workstation01.testserver.local"] + * + * host.id always carries the full DN, and user.domain the DC parts joined, + * exactly as parseDn()/buildHostRel() produce them. + */ + private buildAdministersFromDns(dns: string[]): Record { + const hostIds: string[] = []; + const hostNames: string[] = []; + const userDomains: string[] = []; + + for (const dn of dns) { + if (!dn) continue; + hostIds.push(dn); + + const cnMatch = dn.match(/^CN=([^,]+)/i); + const dcParts = [...dn.matchAll(/DC=([^,]+)/gi)].map((m) => m[1]); + const domain = dcParts.length > 0 ? dcParts.join('.') : undefined; + + if (cnMatch) { + const cn = cnMatch[1]; + // buildHostRel() composes the FQDN only when a domain is present; + // otherwise it falls back to the bare CN. + hostNames.push(domain ? `${cn.toLowerCase()}.${domain}` : cn); + } + + if (domain) userDomains.push(domain); + } + + return { + host: { id: hostIds, name: hostNames }, + ...(userDomains.length > 0 && { user: { domain: userDomains } }), + }; + } + + /** + * Mirrors the ingest pipeline's buildUserRel() Painless function, used for the + * supervises relationship (user -> user). Each DN yields user.id = the full DN, + * user.name = the bare CN, and user.domain = the DC components joined. + * + * Unlike buildHostRel()/buildAdministersFromDns(), the name is the bare CN with no + * FQDN composition, exactly as parseDn()/buildUserRel() produce it. + * + * Example input: ["CN=Jane Smith,OU=Staff,DC=testserver,DC=local"] + * -> user.id = ["CN=Jane Smith,OU=Staff,DC=testserver,DC=local"] + * user.name = ["Jane Smith"] + * user.domain = ["testserver.local"] + */ + private buildSupervisesFromDns(dns: string[]): Record { + const userIds: string[] = []; + const userNames: string[] = []; + const userDomains: string[] = []; + + for (const dn of dns) { + if (!dn) continue; + userIds.push(dn); + + const cnMatch = dn.match(/^CN=([^,]+)/i); + const dcParts = [...dn.matchAll(/DC=([^,]+)/gi)].map((m) => m[1]); + + if (cnMatch) userNames.push(cnMatch[1]); + if (dcParts.length > 0) userDomains.push(dcParts.join('.')); + } + + return { user: { id: userIds, name: userNames, domain: userDomains } }; + } + + /** + * Build a deterministic computer name for an employee's Windows device. + * Must be called consistently — both the device document and the managedObjects + * reference in the manager's document use this same name so the maintainer can + * resolve raw_identifiers.host.name to an actual entity. + */ + private buildComputerName(employee: Employee): string { + // Derive a stable 3-digit suffix from the employee id so the name is + // deterministic across both the device doc and the manager's managedObjects list. + const suffix = employee.id.replace(/\D/g, '').slice(-3).padStart(3, '0'); + return `${employee.userName.replaceAll('.', '').substring(0, 8).toUpperCase()}-PC${suffix}`; + } + /** * Build Distinguished Name for a user */ @@ -127,7 +256,9 @@ export class ActiveDirectoryIntegration extends BaseIntegration { baseDn: string, userDn: string, centralAgent: AgentData, + relationshipDns: { managedObjects: string[]; directReports: string[] }, ): ActiveDirectoryDocument { + const { managedObjects, directReports } = relationshipDns; const whenCreated = faker.date.past({ years: 2 }).toISOString(); const whenChanged = faker.date.recent({ days: 30 }).toISOString(); // Windows NT time: 100ns intervals since 1601-01-01 @@ -190,6 +321,16 @@ export class ActiveDirectoryIntegration extends BaseIntegration { isCriticalSystemObject: false, showInAdvancedViewOnly: false, dSCorePropagationData: whenCreated, + // Both camelCase (raw event shape) and snake_case (post-pipeline shape) are written + // because the generator bypasses the ingest pipeline that would do the rename. + ...(managedObjects.length > 0 && { + managedObjects, + managed_objects: managedObjects, + }), + ...(directReports.length > 0 && { + directReports, + direct_reports: directReports, + }), }; // Add groups as structured array (pipeline expects name and objectSid for privileged detection) @@ -206,6 +347,18 @@ export class ActiveDirectoryIntegration extends BaseIntegration { }; }); + // Build the post-pipeline relationship shapes the entity store extraction reads from. + // administers (user -> host): user.yml's buildHostRel() composes host.name as the FQDN. + // supervises (user -> user): user.yml's buildUserRel() uses the bare CN for user.name. + const administers = + managedObjects.length > 0 ? this.buildAdministersFromDns(managedObjects) : undefined; + const supervises = + directReports.length > 0 ? this.buildSupervisesFromDns(directReports) : undefined; + + const relationships: Record = {}; + if (administers) relationships.administers = administers; + if (supervises) relationships.supervises = supervises; + return { '@timestamp': timestamp, agent: centralAgent, @@ -219,6 +372,7 @@ export class ActiveDirectoryIntegration extends BaseIntegration { }, user: { id: employee.windowsSid, + ...(Object.keys(relationships).length > 0 && { entity: { relationships } }), }, labels: { identity_source: IDENTITY_SOURCE, @@ -236,12 +390,13 @@ export class ActiveDirectoryIntegration extends BaseIntegration { * Create an Active Directory computer document */ private createDeviceDocument( - device: Device, + _device: Device, employee: Employee, org: Organization, timestamp: string, baseDn: string, centralAgent: AgentData, + managedObjects: string[], ): ActiveDirectoryDocument { const whenCreated = faker.date.past({ years: 1 }).toISOString(); const whenChanged = faker.date.recent({ days: 14 }).toISOString(); @@ -249,7 +404,7 @@ export class ActiveDirectoryIntegration extends BaseIntegration { const lastLogonTimestamp = this.generateWindowsNtTime(faker.date.recent({ days: 7 })); const osInfo = faker.helpers.arrayElement(WINDOWS_OS_VERSIONS); - const computerName = `${employee.userName.replace(/\./g, '').substring(0, 8).toUpperCase()}-PC${faker.string.numeric(3)}`; + const computerName = this.buildComputerName(employee); const computerDn = `CN=${computerName},OU=Computers,${baseDn}`; const objectGuid = this.generateObjectGuidBase64(); @@ -280,8 +435,17 @@ export class ActiveDirectoryIntegration extends BaseIntegration { isCriticalSystemObject: false, memberOf: [`CN=Domain Computers,CN=Users,${baseDn}`], managedBy: `CN=${employee.firstName} ${employee.lastName},${DEPARTMENT_OUS[employee.department] || 'OU=Users'},${baseDn}`, + ...(managedObjects.length > 0 && { + managedObjects, + managed_objects: managedObjects, + }), }; + // device.yml's buildHostRel() composes host.name as the FQDN + // (cn.toLowerCase() + "." + domain), so this device document mirrors that. + const administers = + managedObjects.length > 0 ? this.buildAdministersFromDns(managedObjects) : undefined; + return { '@timestamp': timestamp, agent: centralAgent, @@ -295,6 +459,7 @@ export class ActiveDirectoryIntegration extends BaseIntegration { device: { id: computerDn, }, + ...(administers && { host: { entity: { relationships: { administers } } } }), labels: { identity_source: IDENTITY_SOURCE, }, diff --git a/src/commands/org_data/integrations/entra_id_integration.ts b/src/commands/org_data/integrations/entra_id_integration.ts index a91e10e..ee69585 100644 --- a/src/commands/org_data/integrations/entra_id_integration.ts +++ b/src/commands/org_data/integrations/entra_id_integration.ts @@ -105,6 +105,19 @@ export class EntraIdIntegration extends BaseIntegration { const mobilePhone = faker.phone.number({ style: 'international' }); const businessPhone = faker.phone.number({ style: 'international' }); + // Direct reports (managerId references the manager's shared person id / oktaUserId) + // become the supervises relationship. The raw azure_ad.directReports shape is the + // expanded Graph object; the entity pipeline renames it and builds + // user.entity.relationships.supervises.user.{id,name,email}. + const directReports = org.employees + .filter((report) => report.managerId === employee.oktaUserId) + .map((report) => ({ + id: report.entraIdUserId, + displayName: `${report.firstName} ${report.lastName}`, + userPrincipalName: report.email, + mail: report.email, + })); + return { '@timestamp': timestamp, agent: this.buildCentralAgent(org), @@ -120,6 +133,7 @@ export class EntraIdIntegration extends BaseIntegration { mobilePhone: mobilePhone, businessPhones: [businessPhone], accountEnabled: true, + ...(directReports.length > 0 && { directReports }), }, event: { action: 'user-discovered', diff --git a/src/commands/org_data/integrations/okta_integration.ts b/src/commands/org_data/integrations/okta_integration.ts index 624ae1b..f42f66f 100644 --- a/src/commands/org_data/integrations/okta_integration.ts +++ b/src/commands/org_data/integrations/okta_integration.ts @@ -101,10 +101,21 @@ export class OktaIntegration extends BaseIntegration { const state = faker.location.state(); const zipCode = faker.location.zipCode(); + // Direct reports (managerId references the manager's oktaUserId) become the + // supervises relationship. The entity pipeline reads the raw top-level `supervises` + // array of { user_id, email, username } and builds user.entity.relationships.supervises. + const supervises = org.employees + .filter((report) => report.managerId === employee.oktaUserId) + .map((report) => ({ + user_id: report.oktaUserId, + email: report.email, + username: report.email, + })); + return { '@timestamp': timestamp, agent: this.buildCentralAgent(org), - event: { action: 'user-discovered' }, + event: { action: 'user-discovered', id: faker.string.uuid() }, okta: { id: employee.oktaUserId, status: 'ACTIVE', @@ -167,6 +178,7 @@ export class OktaIntegration extends BaseIntegration { lastUpdated: r.last_updated ?? r.created ?? lastUpdated, })), user: { id: employee.oktaUserId }, + ...(supervises.length > 0 && { supervises }), labels: { identity_source: IDENTITY_SOURCE }, data_stream: { namespace: 'default', @@ -207,7 +219,7 @@ export class OktaIntegration extends BaseIntegration { return { '@timestamp': timestamp, agent: this.buildCentralAgent(org), - event: { action: 'device-discovered' }, + event: { action: 'device-discovered', id: faker.string.uuid() }, okta: { id: device.id, status: 'ACTIVE', From 84fc89fcaa5ad68daeb3ede67633d3868e3c9de2 Mon Sep 17 00:00:00 2001 From: alex prozorov Date: Mon, 22 Jun 2026 17:01:41 +0300 Subject: [PATCH 2/2] fix types --- src/commands/org_data/types.ts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/commands/org_data/types.ts b/src/commands/org_data/types.ts index 74cd715..1df1460 100644 --- a/src/commands/org_data/types.ts +++ b/src/commands/org_data/types.ts @@ -627,6 +627,12 @@ export interface EntraIdUserDocument { mobilePhone?: string; businessPhones?: string[]; accountEnabled: boolean; + directReports?: Array<{ + id: string; + displayName: string; + userPrincipalName: string; + mail: string; + }>; }; event: { action: string;