From ca54d7af38875e8b2627e88a3b63f3579aa8e980 Mon Sep 17 00:00:00 2001 From: Devin AI Date: Mon, 27 Apr 2026 21:25:28 +0000 Subject: [PATCH 1/2] feat(telemetry): attribute machine-identity CLI events to identity- persons MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the CLI runs with a machine-identity access token (INFISICAL_TOKEN or INFISICAL_UNIVERSAL_AUTH_ACCESS_TOKEN env var), every cli-command:* event today is captured under `anonymous_cli_`. For CI runners and ephemeral containers — where the machineId is fresh per container — this means each container shows up as a unique anonymous person in PostHog, which is the dominant source of `anonymous_cli_*` person inflation. The backend already tracks machine identities under the distinctId `identity-` and enriches the person record with a `[Machine Identity] ` display name and `actorType: identity` on every MachineIdentityLogin and identity-scoped event (see backend/src/services/telemetry/telemetry-service.ts:identifyIdentity and the 11 identity auth routers). The CLI just needs to use the matching distinctId so its events land on the same person record; no Identify call from the CLI is needed because the backend has already created the person. Add getMachineIdentityIdFromEnv() to inspect the same env-var precedence as util.GetInfisicalToken (--token flag is per-command and not accessible from the telemetry layer) and decode the unverified JWT payload to extract the `identityId` claim. Use "identity-" + identityId as the distinctId in GetDistinctId(), ahead of the user-email branch — when both are present, the CLI authenticates as the machine identity, so telemetry should follow the same attribution. Service tokens (`st.` prefix) and malformed JWTs fall through to the existing email/anonymous resolution. JWT signatures are not verified — the value is only used to derive a distinctId, never for authorization, and the same token is signature-verified on the backend when the API call is made. This change is upgrade-gated: existing `anonymous_cli_*` person records are not retroactively reassigned, but new events from upgraded CLIs will route to `identity-` and stop generating new anonymous persons. --- packages/telemetry/telemetry.go | 87 ++++++++++++++++++++++++++++++++- 1 file changed, 86 insertions(+), 1 deletion(-) diff --git a/packages/telemetry/telemetry.go b/packages/telemetry/telemetry.go index d40f5437..4ad2e269 100644 --- a/packages/telemetry/telemetry.go +++ b/packages/telemetry/telemetry.go @@ -1,7 +1,11 @@ package telemetry import ( + "encoding/base64" + "encoding/json" "errors" + "os" + "strings" "github.com/Infisical/infisical-merge/packages/util" "github.com/denisbrodbeck/machineid" @@ -89,6 +93,76 @@ func (t *Telemetry) IdentifyUser(email string) { // all enqueued events (Identify, Alias, and Capture). } +// getMachineIdentityIdFromEnv inspects the environment variables that the +// CLI uses to receive machine-identity access tokens (the same set checked +// by util.GetInfisicalToken, minus the `--token` flag which is per-command +// and not visible to the telemetry layer) and, if a machine-identity JWT +// is present, returns the `identityId` claim from its payload. +// +// The function is intentionally best-effort and silent on failure: +// - returns "" if no token is set +// - returns "" for service tokens (`st.` prefix), which carry no JWT +// payload and represent the deprecated service-token auth mode +// - returns "" if the JWT is malformed or missing the `identityId` claim +// +// The token's signature is not verified — the value is only used to derive +// a PostHog distinctId, never for authorization. The same token has already +// been (or is about to be) sent to the Infisical API where its signature is +// verified server-side. +func getMachineIdentityIdFromEnv() string { + // Mirror the env-var precedence in util.GetInfisicalToken so that the + // telemetry distinctId aligns with the credential the API call will + // actually use: + // 1. INFISICAL_UNIVERSAL_AUTH_ACCESS_TOKEN + // 2. INFISICAL_TOKEN + // 3. TOKEN (legacy gateway env var) + envVars := []string{ + util.INFISICAL_UNIVERSAL_AUTH_ACCESS_TOKEN_NAME, + util.INFISICAL_TOKEN_NAME, + util.INFISICAL_GATEWAY_TOKEN_NAME_LEGACY, + } + + var token string + for _, name := range envVars { + if v := os.Getenv(name); v != "" { + token = v + break + } + } + + if token == "" { + return "" + } + + // Service tokens are deprecated and not JWTs — no identityId to extract. + if strings.HasPrefix(token, "st.") { + return "" + } + + parts := strings.Split(token, ".") + if len(parts) != 3 { + return "" + } + + payloadBytes, err := base64.RawURLEncoding.DecodeString(parts[1]) + if err != nil { + // Some JWT issuers pad the payload with `=`; tolerate that variant. + payloadBytes, err = base64.URLEncoding.DecodeString(parts[1]) + if err != nil { + return "" + } + } + + var claims struct { + IdentityID string `json:"identityId"` + } + if err := json.Unmarshal(payloadBytes, &claims); err != nil { + return "" + } + + return claims.IdentityID +} + func (t *Telemetry) GetDistinctId() (string, error) { var distinctId string @@ -102,7 +176,18 @@ func (t *Telemetry) GetDistinctId() (string, error) { log.Debug().Err(err).Msg("failed to get config file for telemetry") } - if infisicalConfig.LoggedInUserEmail != "" { + // Resolution priority: + // 1. Machine-identity access token from env (matches the credential the + // API call will use, and aligns with the `identity-` distinctId + // the backend already uses for MachineIdentityLogin and other + // identity-scoped events). This deliberately beats LoggedInUserEmail + // because when both are present (e.g. a developer testing CI locally), + // the CLI authenticates as the machine identity, not the user. + // 2. Logged-in user email from the persisted config. + // 3. Anonymous fallback keyed by the local machine ID. + if identityId := getMachineIdentityIdFromEnv(); identityId != "" { + distinctId = "identity-" + identityId + } else if infisicalConfig.LoggedInUserEmail != "" { distinctId = infisicalConfig.LoggedInUserEmail } else if machineId != "" { distinctId = "anonymous_cli_" + machineId From 18f2006193b899876eb5b91ff8e56581ffc4410a Mon Sep 17 00:00:00 2001 From: Devin AI Date: Mon, 27 Apr 2026 21:45:06 +0000 Subject: [PATCH 2/2] fix(telemetry): prefer logged-in email over env-token identity in distinctId resolution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses Codex P2 and Claude review nit on #197. The original precedence (env-token identity > logged-in email) misattributes telemetry for two real flows: 1. `infisical user switch` — pure local-config command that never authenticates against the backend. With INFISICAL_TOKEN exported in the shell, every cli-command:* event from a logged-in user would land on identity- from a token the command never used. 2. `infisical login` (interactive user flow) — the CLI just persisted the user's email, called IdentifyUser/Alias on the email person, and then captures cli-command:login. With INFISICAL_TOKEN exported, the capture event would land on identity- while the Identify+ Alias enriched the email person, splitting the login-flow signal across two person records. Flip the precedence: logged-in email wins when present, env-token identity is consulted only when no user is logged in. This preserves the PR's primary goal — CI / containers / K8s pods (no logged-in user, INFISICAL_TOKEN set) still attribute to identity- instead of anonymous_cli_ — while keeping all logged-in flows (switch, login, day-to-day commands) attributing to the user. Resolution table after this change: LoggedInUserEmail INFISICAL_TOKEN (UA JWT) Resolved distinctId -------------------------------------------------------------- set (a@x.com) set a@x.com set (a@x.com) unset a@x.com unset set identity- (the goal) unset unset anonymous_cli_ --- packages/telemetry/telemetry.go | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/packages/telemetry/telemetry.go b/packages/telemetry/telemetry.go index 4ad2e269..1bb1e979 100644 --- a/packages/telemetry/telemetry.go +++ b/packages/telemetry/telemetry.go @@ -177,18 +177,26 @@ func (t *Telemetry) GetDistinctId() (string, error) { } // Resolution priority: - // 1. Machine-identity access token from env (matches the credential the - // API call will use, and aligns with the `identity-` distinctId - // the backend already uses for MachineIdentityLogin and other - // identity-scoped events). This deliberately beats LoggedInUserEmail - // because when both are present (e.g. a developer testing CI locally), - // the CLI authenticates as the machine identity, not the user. - // 2. Logged-in user email from the persisted config. + // 1. Logged-in user email from the persisted config. A logged-in user + // takes precedence over any machine-identity token that happens to + // be exported in the shell, because some commands never authenticate + // against the backend at all (e.g. `infisical user switch`, the + // local-config branch of `infisical login`) and others authenticate + // with the user's session JWT rather than the env-token. Attributing + // those events to a stale `identity-` would corrupt person-level + // analytics, while attributing them to the logged-in email is always + // correct. + // 2. Machine-identity access token from env. This is the dominant case + // in CI / containers / Kubernetes pods, where there is no logged-in + // user and the only credential is `INFISICAL_TOKEN` (or the UA-scoped + // env var). Aligns with the `identity-` distinctId the backend + // uses for MachineIdentityLogin and other identity-scoped events, + // so CLI events flow into the same person record. // 3. Anonymous fallback keyed by the local machine ID. - if identityId := getMachineIdentityIdFromEnv(); identityId != "" { - distinctId = "identity-" + identityId - } else if infisicalConfig.LoggedInUserEmail != "" { + if infisicalConfig.LoggedInUserEmail != "" { distinctId = infisicalConfig.LoggedInUserEmail + } else if identityId := getMachineIdentityIdFromEnv(); identityId != "" { + distinctId = "identity-" + identityId } else if machineId != "" { distinctId = "anonymous_cli_" + machineId }