From 981aaeac3e6d4c5dbda89b277c50ddcc3bc2e6c1 Mon Sep 17 00:00:00 2001 From: anilb Date: Tue, 31 Mar 2026 23:24:57 +0200 Subject: [PATCH 1/3] feat(vulnerabilities): add reference_link field with priority-based URL selection Signed-off-by: anilb --- .../services/vulnerability_scanner/db.go | 5 +++- .../services/vulnerability_scanner/types.go | 1 + .../vulnerability_scanner.go | 26 +++++++++++++++++++ .../datasources/vulnerabilities.datasource | 2 ++ .../tinybird/pipes/vulnerabilities_list.pipe | 2 +- 5 files changed, 34 insertions(+), 2 deletions(-) diff --git a/services/apps/git_integration/src/crowdgit/services/vulnerability_scanner/db.go b/services/apps/git_integration/src/crowdgit/services/vulnerability_scanner/db.go index 343c9e5a17..eb6af0f0b4 100644 --- a/services/apps/git_integration/src/crowdgit/services/vulnerability_scanner/db.go +++ b/services/apps/git_integration/src/crowdgit/services/vulnerability_scanner/db.go @@ -107,8 +107,9 @@ func (db *InsightsDB) saveVulnerabilities(ctx context.Context, repoURL string, v source_path, source_type, status, fixed_version, published_at, modified_at, + reference_link, scanned_at - ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20) + ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21) ON CONFLICT (repo_url, vulnerability_id, package_name, source_path) DO UPDATE SET scan_id = EXCLUDED.scan_id, @@ -126,6 +127,7 @@ func (db *InsightsDB) saveVulnerabilities(ctx context.Context, repoURL string, v source_type = EXCLUDED.source_type, published_at = EXCLUDED.published_at, modified_at = EXCLUDED.modified_at, + reference_link = EXCLUDED.reference_link, scanned_at = EXCLUDED.scanned_at, resolved_at = NULL RETURNING (xmax = 0) AS is_new` @@ -139,6 +141,7 @@ func (db *InsightsDB) saveVulnerabilities(ctx context.Context, repoURL string, v v.SourcePath, v.SourceType, v.Status, v.FixedVersion, v.PublishedAt, v.ModifiedAt, + v.ReferenceLink, now, ) } diff --git a/services/apps/git_integration/src/crowdgit/services/vulnerability_scanner/types.go b/services/apps/git_integration/src/crowdgit/services/vulnerability_scanner/types.go index 20cf0c7a22..3c82b97595 100644 --- a/services/apps/git_integration/src/crowdgit/services/vulnerability_scanner/types.go +++ b/services/apps/git_integration/src/crowdgit/services/vulnerability_scanner/types.go @@ -68,4 +68,5 @@ type Vulnerability struct { FixedVersion string `json:"fixed_version"` PublishedAt *time.Time `json:"published_at"` ModifiedAt *time.Time `json:"modified_at"` + ReferenceLink string `json:"reference_link"` } diff --git a/services/apps/git_integration/src/crowdgit/services/vulnerability_scanner/vulnerability_scanner.go b/services/apps/git_integration/src/crowdgit/services/vulnerability_scanner/vulnerability_scanner.go index fbe78f0e41..29517941be 100644 --- a/services/apps/git_integration/src/crowdgit/services/vulnerability_scanner/vulnerability_scanner.go +++ b/services/apps/git_integration/src/crowdgit/services/vulnerability_scanner/vulnerability_scanner.go @@ -139,6 +139,30 @@ func (s *VulnerabilityScanner) scan(transitiveScanning bool) (models.Vulnerabili } } +func (s *VulnerabilityScanner) selectBestReference(v *osvschema.Vulnerability) string { + if v == nil || v.References == nil || len(v.References) == 0 { + return "" + } + + // First pass: Check for NVD URL + for _, ref := range v.References { + if ref.Url != "" && strings.Contains(ref.Url, "nvd.nist.gov") { + return ref.Url + } + } + + // Second pass: Select by type priority + for _, priorityType := range []string{"ADVISORY", "ARTICLE", "WEB", "INTRODUCED"} { + for _, ref := range v.References { + if ref.Type.String() == priorityType && ref.Url != "" { + return ref.Url + } + } + } + + return "" +} + func (s *VulnerabilityScanner) processResults(scanID string, results models.VulnerabilityResults) []Vulnerability { flattened := results.Flatten() @@ -167,6 +191,7 @@ func (s *VulnerabilityScanner) processResults(scanID string, results models.Vuln status, fixedVersion := s.getFixInfo(v) publishedAt := protoTimestampToTime(v.Vulnerability.Published) modifiedAt := protoTimestampToTime(v.Vulnerability.Modified) + referenceLink := s.selectBestReference(v.Vulnerability) seen[key] = Vulnerability{ RepoURL: s.gitURL, @@ -188,6 +213,7 @@ func (s *VulnerabilityScanner) processResults(scanID string, results models.Vuln FixedVersion: fixedVersion, PublishedAt: publishedAt, ModifiedAt: modifiedAt, + ReferenceLink: referenceLink, } } diff --git a/services/libs/tinybird/datasources/vulnerabilities.datasource b/services/libs/tinybird/datasources/vulnerabilities.datasource index dc700e6694..684d5c69be 100644 --- a/services/libs/tinybird/datasources/vulnerabilities.datasource +++ b/services/libs/tinybird/datasources/vulnerabilities.datasource @@ -20,6 +20,7 @@ DESCRIPTION > - `fixedVersion` is the version that fixes the vulnerability. - `publishedAt` is when the vulnerability was publicly disclosed. - `modifiedAt` is when the vulnerability record was last modified. + - `referenceLink` is the reference URL for this vulnerability. - `firstDetectedAt` is when the vulnerability was first detected in this repository. - `scannedAt` is the timestamp of the scan that last detected this vulnerability. - `resolvedAt` is when the vulnerability was resolved within this repository. @@ -45,6 +46,7 @@ SCHEMA > `fixedVersion` String `json:$.record.fixed_version` DEFAULT '', `publishedAt` Nullable(DateTime64(3)) `json:$.record.published_at`, `modifiedAt` Nullable(DateTime64(3)) `json:$.record.modified_at`, + `referenceLink` String `json:$.record.reference_link` DEFAULT '', `firstDetectedAt` DateTime64(3) `json:$.record.first_detected_at`, `scannedAt` DateTime64(3) `json:$.record.scanned_at`, `resolvedAt` Nullable(DateTime64(3)) `json:$.record.resolved_at` diff --git a/services/libs/tinybird/pipes/vulnerabilities_list.pipe b/services/libs/tinybird/pipes/vulnerabilities_list.pipe index 606fc0962d..1fbb493959 100644 --- a/services/libs/tinybird/pipes/vulnerabilities_list.pipe +++ b/services/libs/tinybird/pipes/vulnerabilities_list.pipe @@ -23,7 +23,7 @@ SQL > any (v.publishedAt) as publishedAt, any (v.status) as status, groupArray(concat(v.repoUrl, '/blob/HEAD', v.sourcePath)) as paths, - max(v.fixedVersion) as fixedVersion + max(v.fixedVersion) as fixedVersion, any (v.referenceLink) as referenceLink FROM vulnerabilities as v FINAL WHERE repoUrl in (select arrayJoin(repositories) from segments_filtered) From b3526a57d9c95f019b84e5c2edb365753462d830 Mon Sep 17 00:00:00 2001 From: anilb Date: Wed, 1 Apr 2026 00:03:05 +0200 Subject: [PATCH 2/3] feat(vulnerabilities): prioritize CVE ID for NVD URL construction Signed-off-by: anilb --- .../vulnerability_scanner.go | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/services/apps/git_integration/src/crowdgit/services/vulnerability_scanner/vulnerability_scanner.go b/services/apps/git_integration/src/crowdgit/services/vulnerability_scanner/vulnerability_scanner.go index 29517941be..e624c8c97e 100644 --- a/services/apps/git_integration/src/crowdgit/services/vulnerability_scanner/vulnerability_scanner.go +++ b/services/apps/git_integration/src/crowdgit/services/vulnerability_scanner/vulnerability_scanner.go @@ -139,19 +139,17 @@ func (s *VulnerabilityScanner) scan(transitiveScanning bool) (models.Vulnerabili } } -func (s *VulnerabilityScanner) selectBestReference(v *osvschema.Vulnerability) string { - if v == nil || v.References == nil || len(v.References) == 0 { - return "" +func (s *VulnerabilityScanner) selectBestReference(v *osvschema.Vulnerability, cveIDs []string) string { + // First priority: Construct NVD URL from CVE ID if available + if len(cveIDs) > 0 && cveIDs[0] != "" { + return "https://nvd.nist.gov/vuln/detail/" + cveIDs[0] } - // First pass: Check for NVD URL - for _, ref := range v.References { - if ref.Url != "" && strings.Contains(ref.Url, "nvd.nist.gov") { - return ref.Url - } + if v == nil || v.References == nil || len(v.References) == 0 { + return "" } - // Second pass: Select by type priority + // Second priority: Select by reference type priority for _, priorityType := range []string{"ADVISORY", "ARTICLE", "WEB", "INTRODUCED"} { for _, ref := range v.References { if ref.Type.String() == priorityType && ref.Url != "" { @@ -191,7 +189,7 @@ func (s *VulnerabilityScanner) processResults(scanID string, results models.Vuln status, fixedVersion := s.getFixInfo(v) publishedAt := protoTimestampToTime(v.Vulnerability.Published) modifiedAt := protoTimestampToTime(v.Vulnerability.Modified) - referenceLink := s.selectBestReference(v.Vulnerability) + referenceLink := s.selectBestReference(v.Vulnerability, cveIDs) seen[key] = Vulnerability{ RepoURL: s.gitURL, From c37a007298e3bc56b6d9d838462d2bb30c53b418 Mon Sep 17 00:00:00 2001 From: anilb Date: Wed, 1 Apr 2026 10:39:26 +0200 Subject: [PATCH 3/3] fix(vulnerabilities): use anyIf for referenceLink aggregation in Tinybird Signed-off-by: anilb --- services/libs/tinybird/pipes/vulnerabilities_list.pipe | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/services/libs/tinybird/pipes/vulnerabilities_list.pipe b/services/libs/tinybird/pipes/vulnerabilities_list.pipe index 1fbb493959..a60e60473f 100644 --- a/services/libs/tinybird/pipes/vulnerabilities_list.pipe +++ b/services/libs/tinybird/pipes/vulnerabilities_list.pipe @@ -23,7 +23,8 @@ SQL > any (v.publishedAt) as publishedAt, any (v.status) as status, groupArray(concat(v.repoUrl, '/blob/HEAD', v.sourcePath)) as paths, - max(v.fixedVersion) as fixedVersion, any (v.referenceLink) as referenceLink + max(v.fixedVersion) as fixedVersion, + anyIf(v.referenceLink, v.referenceLink != '') as referenceLink FROM vulnerabilities as v FINAL WHERE repoUrl in (select arrayJoin(repositories) from segments_filtered)