diff --git a/services/security/dependency_scanner.go b/services/security/dependency_scanner.go new file mode 100644 index 0000000000..b5773abbfd --- /dev/null +++ b/services/security/dependency_scanner.go @@ -0,0 +1,541 @@ +// Copyright 2026 Moko Consulting +// SPDX-License-Identifier: GPL-3.0-or-later + +package security + +import ( + "bytes" + "crypto/sha256" + "encoding/json" + "fmt" + "io" + "net/http" + "strings" + "time" + + security_model "code.mokoconsulting.tech/MokoConsulting/MokoGitea/models/security" + "code.mokoconsulting.tech/MokoConsulting/MokoGitea/modules/git" + "code.mokoconsulting.tech/MokoConsulting/MokoGitea/modules/log" +) + +// ────────────────────────────────────────────────────────────────────── +// Dependency manifest parsers +// ────────────────────────────────────────────────────────────────────── + +// dependency represents a single package with version. +type dependency struct { + Name string + Version string + Ecosystem string // "Go", "npm", "PyPI", "Packagist" + FilePath string // which manifest file it came from +} + +// manifestParser extracts dependencies from a file's contents. +type manifestParser struct { + FileName string + Ecosystem string + Parse func(content string, filePath string) []dependency +} + +var manifestParsers = []manifestParser{ + {"go.mod", "Go", parseGoMod}, + {"package.json", "npm", parsePackageJSON}, + {"composer.json", "Packagist", parseComposerJSON}, + {"requirements.txt", "PyPI", parseRequirementsTxt}, +} + +// parseGoMod extracts dependencies from go.mod. +func parseGoMod(content, filePath string) []dependency { + var deps []dependency + inRequire := false + + for _, line := range strings.Split(content, "\n") { + line = strings.TrimSpace(line) + + if line == ")" { + inRequire = false + continue + } + if strings.HasPrefix(line, "require (") || strings.HasPrefix(line, "require(") { + inRequire = true + continue + } + + if inRequire { + // Lines like: github.com/foo/bar v1.2.3 + parts := strings.Fields(line) + if len(parts) >= 2 && !strings.HasPrefix(parts[0], "//") { + deps = append(deps, dependency{ + Name: parts[0], + Version: parts[1], + Ecosystem: "Go", + FilePath: filePath, + }) + } + continue + } + + // Single-line require: require github.com/foo/bar v1.2.3 + if strings.HasPrefix(line, "require ") && !strings.Contains(line, "(") { + parts := strings.Fields(line) + if len(parts) >= 3 { + deps = append(deps, dependency{ + Name: parts[1], + Version: parts[2], + Ecosystem: "Go", + FilePath: filePath, + }) + } + } + } + return deps +} + +// parsePackageJSON extracts dependencies from package.json. +func parsePackageJSON(content, filePath string) []dependency { + var pkg struct { + Dependencies map[string]string `json:"dependencies"` + DevDependencies map[string]string `json:"devDependencies"` + } + if err := json.Unmarshal([]byte(content), &pkg); err != nil { + return nil + } + + var deps []dependency + for name, version := range pkg.Dependencies { + deps = append(deps, dependency{ + Name: name, + Version: cleanSemver(version), + Ecosystem: "npm", + FilePath: filePath, + }) + } + for name, version := range pkg.DevDependencies { + deps = append(deps, dependency{ + Name: name, + Version: cleanSemver(version), + Ecosystem: "npm", + FilePath: filePath, + }) + } + return deps +} + +// parseComposerJSON extracts dependencies from composer.json. +func parseComposerJSON(content, filePath string) []dependency { + var pkg struct { + Require map[string]string `json:"require"` + RequireDev map[string]string `json:"require-dev"` + } + if err := json.Unmarshal([]byte(content), &pkg); err != nil { + return nil + } + + var deps []dependency + for name, version := range pkg.Require { + if name == "php" || strings.HasPrefix(name, "ext-") { + continue // skip platform requirements + } + deps = append(deps, dependency{ + Name: name, + Version: cleanSemver(version), + Ecosystem: "Packagist", + FilePath: filePath, + }) + } + for name, version := range pkg.RequireDev { + if name == "php" || strings.HasPrefix(name, "ext-") { + continue + } + deps = append(deps, dependency{ + Name: name, + Version: cleanSemver(version), + Ecosystem: "Packagist", + FilePath: filePath, + }) + } + return deps +} + +// parseRequirementsTxt extracts dependencies from requirements.txt. +func parseRequirementsTxt(content, filePath string) []dependency { + var deps []dependency + for _, line := range strings.Split(content, "\n") { + line = strings.TrimSpace(line) + if line == "" || strings.HasPrefix(line, "#") || strings.HasPrefix(line, "-") { + continue + } + + // Handle: package==1.0.0, package>=1.0.0, package~=1.0.0 + for _, sep := range []string{"==", ">=", "~=", "<=", "!="} { + if idx := strings.Index(line, sep); idx > 0 { + name := strings.TrimSpace(line[:idx]) + version := strings.TrimSpace(line[idx+len(sep):]) + // Strip any trailing constraints like ",<2.0" + if ci := strings.Index(version, ","); ci > 0 { + version = version[:ci] + } + deps = append(deps, dependency{ + Name: name, + Version: version, + Ecosystem: "PyPI", + FilePath: filePath, + }) + break + } + } + } + return deps +} + +// cleanSemver strips npm/composer range prefixes (^, ~, >=) to get a plain version. +func cleanSemver(v string) string { + v = strings.TrimSpace(v) + v = strings.TrimLeft(v, "^~>= 0 { + v = v[:idx] + } + return v +} + +// ────────────────────────────────────────────────────────────────────── +// OSV.dev API client +// ────────────────────────────────────────────────────────────────────── + +const osvBatchURL = "https://api.osv.dev/v1/querybatch" +const osvMaxBatch = 1000 // OSV batch limit + +var osvClient = &http.Client{Timeout: 30 * time.Second} + +// osvQuery is a single query in a batch request. +type osvQuery struct { + Package *osvPackage `json:"package"` + Version string `json:"version"` +} + +type osvPackage struct { + Name string `json:"name"` + Ecosystem string `json:"ecosystem"` +} + +// osvBatchRequest is the batch query body. +type osvBatchRequest struct { + Queries []osvQuery `json:"queries"` +} + +// osvBatchResponse is the batch response. +type osvBatchResponse struct { + Results []osvResult `json:"results"` +} + +type osvResult struct { + Vulns []osvVuln `json:"vulns"` +} + +type osvVuln struct { + ID string `json:"id"` + Summary string `json:"summary"` + Details string `json:"details"` + Severity []osvSeverity `json:"severity"` + Aliases []string `json:"aliases"` +} + +type osvSeverity struct { + Type string `json:"type"` // "CVSS_V3", "CVSS_V2" + Score string `json:"score"` // CVSS vector string +} + +// queryOSV sends a batch of dependencies to OSV.dev and returns vulnerabilities. +func queryOSV(deps []dependency) (*osvBatchResponse, error) { + queries := make([]osvQuery, 0, len(deps)) + for _, d := range deps { + if d.Version == "" || d.Version == "*" || d.Version == "latest" { + continue // can't query without a concrete version + } + queries = append(queries, osvQuery{ + Package: &osvPackage{Name: d.Name, Ecosystem: d.Ecosystem}, + Version: d.Version, + }) + } + + if len(queries) == 0 { + return &osvBatchResponse{}, nil + } + + body, err := json.Marshal(osvBatchRequest{Queries: queries}) + if err != nil { + return nil, fmt.Errorf("marshal OSV request: %w", err) + } + + resp, err := osvClient.Post(osvBatchURL, "application/json", bytes.NewReader(body)) + if err != nil { + return nil, fmt.Errorf("OSV API request: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 1024)) + return nil, fmt.Errorf("OSV API returned %d: %s", resp.StatusCode, string(respBody)) + } + + var result osvBatchResponse + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + return nil, fmt.Errorf("decode OSV response: %w", err) + } + return &result, nil +} + +// ────────────────────────────────────────────────────────────────────── +// Severity mapping +// ────────────────────────────────────────────────────────────────────── + +// mapCVSSSeverity converts a CVSS v3 base score to an AlertSeverity. +func mapCVSSSeverity(vulnSeverities []osvSeverity) security_model.AlertSeverity { + for _, s := range vulnSeverities { + if s.Type == "CVSS_V3" { + score := extractCVSSBaseScore(s.Score) + switch { + case score >= 9.0: + return security_model.SeverityCritical + case score >= 7.0: + return security_model.SeverityHigh + case score >= 4.0: + return security_model.SeverityMedium + case score > 0: + return security_model.SeverityLow + } + } + } + + // No CVSS score available - default to medium + return security_model.SeverityMedium +} + +// extractCVSSBaseScore parses the base score from a CVSS v3 vector string. +// Vector format: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H +// We compute a simplified score from the vector metrics. +func extractCVSSBaseScore(vector string) float64 { + if vector == "" { + return 0 + } + + // CVSS v3 vectors encode severity in metrics. Use a simplified + // lookup based on the most impactful metrics. + parts := make(map[string]string) + for _, segment := range strings.Split(vector, "/") { + kv := strings.SplitN(segment, ":", 2) + if len(kv) == 2 { + parts[kv[0]] = kv[1] + } + } + + // Simplified scoring based on key CVSS v3 metrics + var score float64 + + // Attack Vector (AV) + switch parts["AV"] { + case "N": // Network + score += 3.0 + case "A": // Adjacent + score += 2.0 + case "L": // Local + score += 1.0 + case "P": // Physical + score += 0.5 + } + + // Attack Complexity (AC) + switch parts["AC"] { + case "L": // Low + score += 1.5 + case "H": // High + score += 0.5 + } + + // Privileges Required (PR) + switch parts["PR"] { + case "N": // None + score += 1.5 + case "L": // Low + score += 1.0 + case "H": // High + score += 0.5 + } + + // Impact metrics (C/I/A) + for _, metric := range []string{"C", "I", "A"} { + switch parts[metric] { + case "H": + score += 1.2 + case "L": + score += 0.5 + } + } + + // Cap at 10.0 + if score > 10.0 { + score = 10.0 + } + return score +} + +// ────────────────────────────────────────────────────────────────────── +// DependencyScanner +// ────────────────────────────────────────────────────────────────────── + +// DependencyScanner checks project dependencies against known vulnerabilities. +type DependencyScanner struct{} + +// NewDependencyScanner creates a new dependency vulnerability scanner. +func NewDependencyScanner() *DependencyScanner { + return &DependencyScanner{} +} + +func (s *DependencyScanner) Type() security_model.ScannerType { + return security_model.ScannerDependency +} + +func (s *DependencyScanner) ScanCommit(commit *git.Commit) ([]Finding, error) { + return s.ScanTree(commit) +} + +func (s *DependencyScanner) ScanTree(commit *git.Commit) ([]Finding, error) { + if commit == nil { + return nil, nil + } + + // Step 1: Find and parse manifest files + entries, err := commit.ListEntriesRecursiveFast() + if err != nil { + return nil, fmt.Errorf("ListEntriesRecursiveFast: %w", err) + } + + var allDeps []dependency + for _, entry := range entries { + if !entry.IsRegular() { + continue + } + + path := entry.Name() + baseName := path + if idx := strings.LastIndex(path, "/"); idx >= 0 { + baseName = path[idx+1:] + } + + // Skip vendored/nested files + lower := strings.ToLower(path) + if strings.Contains(lower, "vendor/") || strings.Contains(lower, "node_modules/") || + strings.Contains(lower, "testdata/") { + continue + } + + for _, parser := range manifestParsers { + if baseName == parser.FileName { + reader, err := entry.Blob().DataAsync() + if err != nil { + log.Trace("DependencyScanner: skip %s: %v", path, err) + continue + } + content, err := io.ReadAll(io.LimitReader(reader, 5*1024*1024)) // 5MB limit + reader.Close() + if err != nil { + continue + } + + deps := parser.Parse(string(content), path) + allDeps = append(allDeps, deps...) + break + } + } + } + + if len(allDeps) == 0 { + return nil, nil + } + + log.Info("DependencyScanner: found %d dependencies across manifest files", len(allDeps)) + + // Step 2: Query OSV in batches + var findings []Finding + for i := 0; i < len(allDeps); i += osvMaxBatch { + end := i + osvMaxBatch + if end > len(allDeps) { + end = len(allDeps) + } + batch := allDeps[i:end] + + resp, err := queryOSV(batch) + if err != nil { + log.Error("DependencyScanner: OSV query failed: %v", err) + continue + } + + // Step 3: Map results to findings + // OSV batch response indices correspond 1:1 with the query indices. + // But we may have skipped deps with empty versions, so build the + // queryable subset to align indices. + queryable := make([]dependency, 0, len(batch)) + for _, d := range batch { + if d.Version != "" && d.Version != "*" && d.Version != "latest" { + queryable = append(queryable, d) + } + } + + for j, result := range resp.Results { + if j >= len(queryable) { + break + } + dep := queryable[j] + + for _, vuln := range result.Vulns { + severity := mapCVSSSeverity(vuln.Severity) + + // Build CVE alias for rule ID (prefer CVE over GHSA) + ruleID := vuln.ID + for _, alias := range vuln.Aliases { + if strings.HasPrefix(alias, "CVE-") { + ruleID = alias + break + } + } + + title := fmt.Sprintf("%s in %s@%s", ruleID, dep.Name, dep.Version) + + description := vuln.Summary + if description == "" { + description = vuln.Details + } + // Truncate long descriptions + if len(description) > 500 { + description = description[:497] + "..." + } + + // Metadata JSON + meta, _ := json.Marshal(map[string]string{ + "vuln_id": vuln.ID, + "ecosystem": dep.Ecosystem, + "package": dep.Name, + "version": dep.Version, + }) + + fingerprint := fmt.Sprintf("%x", sha256.Sum256([]byte(vuln.ID+":"+dep.Name+":"+dep.Version))) + + findings = append(findings, Finding{ + Scanner: security_model.ScannerDependency, + Severity: severity, + RuleID: ruleID, + Title: title, + Description: description, + FilePath: dep.FilePath, + CommitSHA: commit.ID.String(), + Fingerprint: fingerprint[:32], + Metadata: string(meta), + }) + } + } + } + + return findings, nil +} diff --git a/services/security/orchestrator.go b/services/security/orchestrator.go index a44c9c1b73..f4108d9a89 100644 --- a/services/security/orchestrator.go +++ b/services/security/orchestrator.go @@ -32,8 +32,10 @@ func ScanOnPush(ctx context.Context, repo *repo_model.Repository, commit *git.Co if cfg.SecretScanner { scanners = append(scanners, NewSecretScanner()) } + if cfg.DependScanner { + scanners = append(scanners, NewDependencyScanner()) + } // Future scanners added here: - // if cfg.DependScanner { scanners = append(scanners, NewDependencyScanner()) } // if cfg.CodeScanner { scanners = append(scanners, NewCodeScanner()) } if len(scanners) == 0 {