From 18fc79fa0a79119ece2d5791e2a3e1bcf718c6e1 Mon Sep 17 00:00:00 2001 From: Jonathan Miller Date: Sun, 7 Jun 2026 10:32:04 -0500 Subject: [PATCH] feat(security): add dependency vulnerability scanner (#551) Add dependency scanner module that parses manifest files (go.mod, package.json, composer.json, requirements.txt) and checks dependencies against the OSV.dev API for known CVEs. Implements the existing Scanner interface and wires into the orchestrator for push-time scanning. --- services/security/dependency_scanner.go | 541 ++++++++++++++++++++++++ services/security/orchestrator.go | 4 +- 2 files changed, 544 insertions(+), 1 deletion(-) create mode 100644 services/security/dependency_scanner.go diff --git a/services/security/dependency_scanner.go b/services/security/dependency_scanner.go new file mode 100644 index 0000000000..b5773abbfd --- /dev/null +++ b/services/security/dependency_scanner.go @@ -0,0 +1,541 @@ +// Copyright 2026 Moko Consulting +// SPDX-License-Identifier: GPL-3.0-or-later + +package security + +import ( + "bytes" + "crypto/sha256" + "encoding/json" + "fmt" + "io" + "net/http" + "strings" + "time" + + security_model "code.mokoconsulting.tech/MokoConsulting/MokoGitea/models/security" + "code.mokoconsulting.tech/MokoConsulting/MokoGitea/modules/git" + "code.mokoconsulting.tech/MokoConsulting/MokoGitea/modules/log" +) + +// ────────────────────────────────────────────────────────────────────── +// Dependency manifest parsers +// ────────────────────────────────────────────────────────────────────── + +// dependency represents a single package with version. +type dependency struct { + Name string + Version string + Ecosystem string // "Go", "npm", "PyPI", "Packagist" + FilePath string // which manifest file it came from +} + +// manifestParser extracts dependencies from a file's contents. +type manifestParser struct { + FileName string + Ecosystem string + Parse func(content string, filePath string) []dependency +} + +var manifestParsers = []manifestParser{ + {"go.mod", "Go", parseGoMod}, + {"package.json", "npm", parsePackageJSON}, + {"composer.json", "Packagist", parseComposerJSON}, + {"requirements.txt", "PyPI", parseRequirementsTxt}, +} + +// parseGoMod extracts dependencies from go.mod. +func parseGoMod(content, filePath string) []dependency { + var deps []dependency + inRequire := false + + for _, line := range strings.Split(content, "\n") { + line = strings.TrimSpace(line) + + if line == ")" { + inRequire = false + continue + } + if strings.HasPrefix(line, "require (") || strings.HasPrefix(line, "require(") { + inRequire = true + continue + } + + if inRequire { + // Lines like: github.com/foo/bar v1.2.3 + parts := strings.Fields(line) + if len(parts) >= 2 && !strings.HasPrefix(parts[0], "//") { + deps = append(deps, dependency{ + Name: parts[0], + Version: parts[1], + Ecosystem: "Go", + FilePath: filePath, + }) + } + continue + } + + // Single-line require: require github.com/foo/bar v1.2.3 + if strings.HasPrefix(line, "require ") && !strings.Contains(line, "(") { + parts := strings.Fields(line) + if len(parts) >= 3 { + deps = append(deps, dependency{ + Name: parts[1], + Version: parts[2], + Ecosystem: "Go", + FilePath: filePath, + }) + } + } + } + return deps +} + +// parsePackageJSON extracts dependencies from package.json. +func parsePackageJSON(content, filePath string) []dependency { + var pkg struct { + Dependencies map[string]string `json:"dependencies"` + DevDependencies map[string]string `json:"devDependencies"` + } + if err := json.Unmarshal([]byte(content), &pkg); err != nil { + return nil + } + + var deps []dependency + for name, version := range pkg.Dependencies { + deps = append(deps, dependency{ + Name: name, + Version: cleanSemver(version), + Ecosystem: "npm", + FilePath: filePath, + }) + } + for name, version := range pkg.DevDependencies { + deps = append(deps, dependency{ + Name: name, + Version: cleanSemver(version), + Ecosystem: "npm", + FilePath: filePath, + }) + } + return deps +} + +// parseComposerJSON extracts dependencies from composer.json. +func parseComposerJSON(content, filePath string) []dependency { + var pkg struct { + Require map[string]string `json:"require"` + RequireDev map[string]string `json:"require-dev"` + } + if err := json.Unmarshal([]byte(content), &pkg); err != nil { + return nil + } + + var deps []dependency + for name, version := range pkg.Require { + if name == "php" || strings.HasPrefix(name, "ext-") { + continue // skip platform requirements + } + deps = append(deps, dependency{ + Name: name, + Version: cleanSemver(version), + Ecosystem: "Packagist", + FilePath: filePath, + }) + } + for name, version := range pkg.RequireDev { + if name == "php" || strings.HasPrefix(name, "ext-") { + continue + } + deps = append(deps, dependency{ + Name: name, + Version: cleanSemver(version), + Ecosystem: "Packagist", + FilePath: filePath, + }) + } + return deps +} + +// parseRequirementsTxt extracts dependencies from requirements.txt. +func parseRequirementsTxt(content, filePath string) []dependency { + var deps []dependency + for _, line := range strings.Split(content, "\n") { + line = strings.TrimSpace(line) + if line == "" || strings.HasPrefix(line, "#") || strings.HasPrefix(line, "-") { + continue + } + + // Handle: package==1.0.0, package>=1.0.0, package~=1.0.0 + for _, sep := range []string{"==", ">=", "~=", "<=", "!="} { + if idx := strings.Index(line, sep); idx > 0 { + name := strings.TrimSpace(line[:idx]) + version := strings.TrimSpace(line[idx+len(sep):]) + // Strip any trailing constraints like ",<2.0" + if ci := strings.Index(version, ","); ci > 0 { + version = version[:ci] + } + deps = append(deps, dependency{ + Name: name, + Version: version, + Ecosystem: "PyPI", + FilePath: filePath, + }) + break + } + } + } + return deps +} + +// cleanSemver strips npm/composer range prefixes (^, ~, >=) to get a plain version. +func cleanSemver(v string) string { + v = strings.TrimSpace(v) + v = strings.TrimLeft(v, "^~>= 0 { + v = v[:idx] + } + return v +} + +// ────────────────────────────────────────────────────────────────────── +// OSV.dev API client +// ────────────────────────────────────────────────────────────────────── + +const osvBatchURL = "https://api.osv.dev/v1/querybatch" +const osvMaxBatch = 1000 // OSV batch limit + +var osvClient = &http.Client{Timeout: 30 * time.Second} + +// osvQuery is a single query in a batch request. +type osvQuery struct { + Package *osvPackage `json:"package"` + Version string `json:"version"` +} + +type osvPackage struct { + Name string `json:"name"` + Ecosystem string `json:"ecosystem"` +} + +// osvBatchRequest is the batch query body. +type osvBatchRequest struct { + Queries []osvQuery `json:"queries"` +} + +// osvBatchResponse is the batch response. +type osvBatchResponse struct { + Results []osvResult `json:"results"` +} + +type osvResult struct { + Vulns []osvVuln `json:"vulns"` +} + +type osvVuln struct { + ID string `json:"id"` + Summary string `json:"summary"` + Details string `json:"details"` + Severity []osvSeverity `json:"severity"` + Aliases []string `json:"aliases"` +} + +type osvSeverity struct { + Type string `json:"type"` // "CVSS_V3", "CVSS_V2" + Score string `json:"score"` // CVSS vector string +} + +// queryOSV sends a batch of dependencies to OSV.dev and returns vulnerabilities. +func queryOSV(deps []dependency) (*osvBatchResponse, error) { + queries := make([]osvQuery, 0, len(deps)) + for _, d := range deps { + if d.Version == "" || d.Version == "*" || d.Version == "latest" { + continue // can't query without a concrete version + } + queries = append(queries, osvQuery{ + Package: &osvPackage{Name: d.Name, Ecosystem: d.Ecosystem}, + Version: d.Version, + }) + } + + if len(queries) == 0 { + return &osvBatchResponse{}, nil + } + + body, err := json.Marshal(osvBatchRequest{Queries: queries}) + if err != nil { + return nil, fmt.Errorf("marshal OSV request: %w", err) + } + + resp, err := osvClient.Post(osvBatchURL, "application/json", bytes.NewReader(body)) + if err != nil { + return nil, fmt.Errorf("OSV API request: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 1024)) + return nil, fmt.Errorf("OSV API returned %d: %s", resp.StatusCode, string(respBody)) + } + + var result osvBatchResponse + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + return nil, fmt.Errorf("decode OSV response: %w", err) + } + return &result, nil +} + +// ────────────────────────────────────────────────────────────────────── +// Severity mapping +// ────────────────────────────────────────────────────────────────────── + +// mapCVSSSeverity converts a CVSS v3 base score to an AlertSeverity. +func mapCVSSSeverity(vulnSeverities []osvSeverity) security_model.AlertSeverity { + for _, s := range vulnSeverities { + if s.Type == "CVSS_V3" { + score := extractCVSSBaseScore(s.Score) + switch { + case score >= 9.0: + return security_model.SeverityCritical + case score >= 7.0: + return security_model.SeverityHigh + case score >= 4.0: + return security_model.SeverityMedium + case score > 0: + return security_model.SeverityLow + } + } + } + + // No CVSS score available - default to medium + return security_model.SeverityMedium +} + +// extractCVSSBaseScore parses the base score from a CVSS v3 vector string. +// Vector format: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H +// We compute a simplified score from the vector metrics. +func extractCVSSBaseScore(vector string) float64 { + if vector == "" { + return 0 + } + + // CVSS v3 vectors encode severity in metrics. Use a simplified + // lookup based on the most impactful metrics. + parts := make(map[string]string) + for _, segment := range strings.Split(vector, "/") { + kv := strings.SplitN(segment, ":", 2) + if len(kv) == 2 { + parts[kv[0]] = kv[1] + } + } + + // Simplified scoring based on key CVSS v3 metrics + var score float64 + + // Attack Vector (AV) + switch parts["AV"] { + case "N": // Network + score += 3.0 + case "A": // Adjacent + score += 2.0 + case "L": // Local + score += 1.0 + case "P": // Physical + score += 0.5 + } + + // Attack Complexity (AC) + switch parts["AC"] { + case "L": // Low + score += 1.5 + case "H": // High + score += 0.5 + } + + // Privileges Required (PR) + switch parts["PR"] { + case "N": // None + score += 1.5 + case "L": // Low + score += 1.0 + case "H": // High + score += 0.5 + } + + // Impact metrics (C/I/A) + for _, metric := range []string{"C", "I", "A"} { + switch parts[metric] { + case "H": + score += 1.2 + case "L": + score += 0.5 + } + } + + // Cap at 10.0 + if score > 10.0 { + score = 10.0 + } + return score +} + +// ────────────────────────────────────────────────────────────────────── +// DependencyScanner +// ────────────────────────────────────────────────────────────────────── + +// DependencyScanner checks project dependencies against known vulnerabilities. +type DependencyScanner struct{} + +// NewDependencyScanner creates a new dependency vulnerability scanner. +func NewDependencyScanner() *DependencyScanner { + return &DependencyScanner{} +} + +func (s *DependencyScanner) Type() security_model.ScannerType { + return security_model.ScannerDependency +} + +func (s *DependencyScanner) ScanCommit(commit *git.Commit) ([]Finding, error) { + return s.ScanTree(commit) +} + +func (s *DependencyScanner) ScanTree(commit *git.Commit) ([]Finding, error) { + if commit == nil { + return nil, nil + } + + // Step 1: Find and parse manifest files + entries, err := commit.ListEntriesRecursiveFast() + if err != nil { + return nil, fmt.Errorf("ListEntriesRecursiveFast: %w", err) + } + + var allDeps []dependency + for _, entry := range entries { + if !entry.IsRegular() { + continue + } + + path := entry.Name() + baseName := path + if idx := strings.LastIndex(path, "/"); idx >= 0 { + baseName = path[idx+1:] + } + + // Skip vendored/nested files + lower := strings.ToLower(path) + if strings.Contains(lower, "vendor/") || strings.Contains(lower, "node_modules/") || + strings.Contains(lower, "testdata/") { + continue + } + + for _, parser := range manifestParsers { + if baseName == parser.FileName { + reader, err := entry.Blob().DataAsync() + if err != nil { + log.Trace("DependencyScanner: skip %s: %v", path, err) + continue + } + content, err := io.ReadAll(io.LimitReader(reader, 5*1024*1024)) // 5MB limit + reader.Close() + if err != nil { + continue + } + + deps := parser.Parse(string(content), path) + allDeps = append(allDeps, deps...) + break + } + } + } + + if len(allDeps) == 0 { + return nil, nil + } + + log.Info("DependencyScanner: found %d dependencies across manifest files", len(allDeps)) + + // Step 2: Query OSV in batches + var findings []Finding + for i := 0; i < len(allDeps); i += osvMaxBatch { + end := i + osvMaxBatch + if end > len(allDeps) { + end = len(allDeps) + } + batch := allDeps[i:end] + + resp, err := queryOSV(batch) + if err != nil { + log.Error("DependencyScanner: OSV query failed: %v", err) + continue + } + + // Step 3: Map results to findings + // OSV batch response indices correspond 1:1 with the query indices. + // But we may have skipped deps with empty versions, so build the + // queryable subset to align indices. + queryable := make([]dependency, 0, len(batch)) + for _, d := range batch { + if d.Version != "" && d.Version != "*" && d.Version != "latest" { + queryable = append(queryable, d) + } + } + + for j, result := range resp.Results { + if j >= len(queryable) { + break + } + dep := queryable[j] + + for _, vuln := range result.Vulns { + severity := mapCVSSSeverity(vuln.Severity) + + // Build CVE alias for rule ID (prefer CVE over GHSA) + ruleID := vuln.ID + for _, alias := range vuln.Aliases { + if strings.HasPrefix(alias, "CVE-") { + ruleID = alias + break + } + } + + title := fmt.Sprintf("%s in %s@%s", ruleID, dep.Name, dep.Version) + + description := vuln.Summary + if description == "" { + description = vuln.Details + } + // Truncate long descriptions + if len(description) > 500 { + description = description[:497] + "..." + } + + // Metadata JSON + meta, _ := json.Marshal(map[string]string{ + "vuln_id": vuln.ID, + "ecosystem": dep.Ecosystem, + "package": dep.Name, + "version": dep.Version, + }) + + fingerprint := fmt.Sprintf("%x", sha256.Sum256([]byte(vuln.ID+":"+dep.Name+":"+dep.Version))) + + findings = append(findings, Finding{ + Scanner: security_model.ScannerDependency, + Severity: severity, + RuleID: ruleID, + Title: title, + Description: description, + FilePath: dep.FilePath, + CommitSHA: commit.ID.String(), + Fingerprint: fingerprint[:32], + Metadata: string(meta), + }) + } + } + } + + return findings, nil +} diff --git a/services/security/orchestrator.go b/services/security/orchestrator.go index a44c9c1b73..f4108d9a89 100644 --- a/services/security/orchestrator.go +++ b/services/security/orchestrator.go @@ -32,8 +32,10 @@ func ScanOnPush(ctx context.Context, repo *repo_model.Repository, commit *git.Co if cfg.SecretScanner { scanners = append(scanners, NewSecretScanner()) } + if cfg.DependScanner { + scanners = append(scanners, NewDependencyScanner()) + } // Future scanners added here: - // if cfg.DependScanner { scanners = append(scanners, NewDependencyScanner()) } // if cfg.CodeScanner { scanners = append(scanners, NewCodeScanner()) } if len(scanners) == 0 { -- 2.52.0