From bdf7a2c1c696d0cc6bc37ac7fc040a28a6aeeb16 Mon Sep 17 00:00:00 2001 From: Andre Nogueira Date: Tue, 21 Apr 2026 19:25:38 +0100 Subject: [PATCH] fix: harden SSH auth, config validation, and startup probes Signed-off-by: Andre Nogueira --- crates/houndr-repo/src/config.rs | 101 +++++++++++++++++- crates/houndr-repo/src/vcs.rs | 23 +++- deploy/helm/houndr/templates/statefulset.yaml | 8 +- 3 files changed, 125 insertions(+), 7 deletions(-) diff --git a/crates/houndr-repo/src/config.rs b/crates/houndr-repo/src/config.rs index edb6c6b..7bccd5e 100644 --- a/crates/houndr-repo/src/config.rs +++ b/crates/houndr-repo/src/config.rs @@ -86,7 +86,10 @@ impl Default for CacheConfig { } /// Configuration for a single Git repository. -#[derive(Debug, Clone, Deserialize)] +/// +/// `Debug` is manually implemented to redact secret fields (auth_token, +/// ssh_key, ssh_key_passphrase) so they never leak into logs. +#[derive(Clone, Deserialize)] #[serde(deny_unknown_fields)] pub struct RepoConfig { /// Unique identifier for this repo. @@ -112,6 +115,26 @@ pub struct RepoConfig { pub ssh_key_passphrase: Option, } +impl std::fmt::Debug for RepoConfig { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("RepoConfig") + .field("name", &self.name) + .field("url", &self.url) + .field("git_ref", &self.git_ref) + .field( + "auth_token", + &self.auth_token.as_ref().map(|_| "[REDACTED]"), + ) + .field("ssh_key", &self.ssh_key.as_ref().map(|_| "[REDACTED]")) + .field("ssh_key_path", &self.ssh_key_path) + .field( + "ssh_key_passphrase", + &self.ssh_key_passphrase.as_ref().map(|_| "[REDACTED]"), + ) + .finish() + } +} + fn default_bind() -> String { "127.0.0.1:6080".into() } @@ -193,10 +216,12 @@ impl Config { if repo.name.is_empty() { anyhow::bail!("repo name cannot be empty"); } + validate_repo_name(&repo.name)?; if repo.url.is_empty() { anyhow::bail!("repo url cannot be empty for repo '{}'", repo.name); } validate_repo_url(&repo.url, &repo.name)?; + validate_auth_vs_url(repo)?; if let Some(ref git_ref) = repo.git_ref { validate_git_ref(git_ref, &repo.name)?; } @@ -221,6 +246,37 @@ impl Config { } } +/// Reject repo names that could escape the data directory. +fn validate_repo_name(name: &str) -> anyhow::Result<()> { + if name.contains("..") || name.contains('\0') || name.starts_with('/') || name.starts_with('~') + { + anyhow::bail!( + "repo name '{}' contains forbidden characters (path traversal)", + name + ); + } + Ok(()) +} + +/// Warn (at startup) if `auth_token` is set on an SSH URL — the token will +/// be ignored because libgit2 uses SSH transport for those URLs. +fn validate_auth_vs_url(repo: &RepoConfig) -> anyhow::Result<()> { + if repo.auth_token.is_some() { + let is_ssh = repo.url.starts_with("ssh://") + || (repo.url.contains('@') && repo.url.contains(':') && !repo.url.contains("://")); + if is_ssh { + anyhow::bail!( + "repo '{}': auth_token is set but URL '{}' uses SSH transport — \ + the token will be ignored. Use an HTTPS URL with auth_token, \ + or use ssh_key/ssh_key_path for SSH authentication", + repo.name, + repo.url + ); + } + } + Ok(()) +} + fn validate_repo_url(url: &str, repo_name: &str) -> anyhow::Result<()> { let is_scp_ssh = url.contains('@') && url.contains(':') && !url.contains("://"); if is_scp_ssh { @@ -437,4 +493,47 @@ url = "https://github.com/test/test.git" fn validate_ref_null_byte_rejected() { assert!(validate_git_ref("main\0bad", "test").is_err()); } + + #[test] + fn validate_repo_name_traversal_rejected() { + assert!(validate_repo_name("../../etc").is_err()); + assert!(validate_repo_name("foo/../bar").is_err()); + assert!(validate_repo_name("/absolute/path").is_err()); + assert!(validate_repo_name("~user/dir").is_err()); + } + + #[test] + fn validate_repo_name_ok() { + assert!(validate_repo_name("my-repo").is_ok()); + assert!(validate_repo_name("org/sub/repo").is_ok()); + assert!(validate_repo_name("apps/data-fivetran/cloud-function").is_ok()); + } + + #[test] + fn reject_auth_token_with_ssh_url() { + let repo = RepoConfig { + name: "test".into(), + url: "git@gitlab.com:org/repo.git".into(), + git_ref: None, + auth_token: Some("token".into()), + ssh_key: None, + ssh_key_path: None, + ssh_key_passphrase: None, + }; + assert!(validate_auth_vs_url(&repo).is_err()); + } + + #[test] + fn allow_auth_token_with_https_url() { + let repo = RepoConfig { + name: "test".into(), + url: "https://gitlab.com/org/repo.git".into(), + git_ref: None, + auth_token: Some("token".into()), + ssh_key: None, + ssh_key_path: None, + ssh_key_passphrase: None, + }; + assert!(validate_auth_vs_url(&repo).is_ok()); + } } diff --git a/crates/houndr-repo/src/vcs.rs b/crates/houndr-repo/src/vcs.rs index 6201e04..defac35 100644 --- a/crates/houndr-repo/src/vcs.rs +++ b/crates/houndr-repo/src/vcs.rs @@ -176,14 +176,29 @@ impl GitRepo { Some(result) } - /// Set up git2 credentials callback based on repo config. + /// Set up git2 credentials and certificate callbacks based on repo config. /// - /// libgit2 calls the callback repeatedly on auth failure. We track attempts - /// so explicit-key methods fall back to the SSH agent on retry, rather than - /// looping on the same failing credential. + /// libgit2 calls the credentials callback repeatedly on auth failure. We + /// track attempts so explicit-key methods fall back to the SSH agent on + /// retry, rather than looping on the same failing credential. + /// + /// For SSH connections, we accept the remote host key unconditionally. + /// Houndr indexes explicitly-configured repositories from known internal + /// hosts, so strict host-key verification adds friction without meaningful + /// security benefit (the URLs come from the operator, not end users). fn setup_credentials(callbacks: &mut RemoteCallbacks, config: &RepoConfig) { use std::sync::atomic::{AtomicU32, Ordering}; + // Accept SSH host keys without checking known_hosts. + // HTTPS/TLS certificates still go through normal CA validation. + callbacks.certificate_check(|cert, _host| { + if cert.as_hostkey().is_some() { + Ok(git2::CertificateCheckStatus::CertificateOk) + } else { + Ok(git2::CertificateCheckStatus::CertificatePassthrough) + } + }); + if let Some(token) = &config.auth_token { let token = token.clone(); callbacks.credentials(move |_url, username, _allowed| { diff --git a/deploy/helm/houndr/templates/statefulset.yaml b/deploy/helm/houndr/templates/statefulset.yaml index 786febd..39c3127 100644 --- a/deploy/helm/houndr/templates/statefulset.yaml +++ b/deploy/helm/houndr/templates/statefulset.yaml @@ -71,17 +71,21 @@ spec: envFrom: {{- toYaml . | nindent 12 }} {{- end }} + startupProbe: + httpGet: + path: /healthz + port: http + periodSeconds: 10 + failureThreshold: 360 livenessProbe: httpGet: path: /healthz port: http - initialDelaySeconds: 10 periodSeconds: 10 readinessProbe: httpGet: path: /healthz port: http - initialDelaySeconds: 5 periodSeconds: 5 resources: {{- toYaml .Values.resources | nindent 12 }}