feat(security): dependency vulnerability scanner #562

Merged
jmiller merged 1 commits from feat/dependency-scanner into dev 2026-06-07 16:12:32 +00:00
2 changed files with 544 additions and 1 deletions
+541
View File
@@ -0,0 +1,541 @@
// Copyright 2026 Moko Consulting <hello@mokoconsulting.tech>
// SPDX-License-Identifier: GPL-3.0-or-later
package security
import (
"bytes"
"crypto/sha256"
"encoding/json"
"fmt"
"io"
"net/http"
"strings"
"time"
security_model "code.mokoconsulting.tech/MokoConsulting/MokoGitea/models/security"
"code.mokoconsulting.tech/MokoConsulting/MokoGitea/modules/git"
"code.mokoconsulting.tech/MokoConsulting/MokoGitea/modules/log"
)
// ──────────────────────────────────────────────────────────────────────
// Dependency manifest parsers
// ──────────────────────────────────────────────────────────────────────
// dependency represents a single package with version.
type dependency struct {
Name string
Version string
Ecosystem string // "Go", "npm", "PyPI", "Packagist"
FilePath string // which manifest file it came from
}
// manifestParser extracts dependencies from a file's contents.
type manifestParser struct {
FileName string
Ecosystem string
Parse func(content string, filePath string) []dependency
}
var manifestParsers = []manifestParser{
{"go.mod", "Go", parseGoMod},
{"package.json", "npm", parsePackageJSON},
{"composer.json", "Packagist", parseComposerJSON},
{"requirements.txt", "PyPI", parseRequirementsTxt},
}
// parseGoMod extracts dependencies from go.mod.
func parseGoMod(content, filePath string) []dependency {
var deps []dependency
inRequire := false
for _, line := range strings.Split(content, "\n") {
line = strings.TrimSpace(line)
if line == ")" {
inRequire = false
continue
}
if strings.HasPrefix(line, "require (") || strings.HasPrefix(line, "require(") {
inRequire = true
continue
}
if inRequire {
// Lines like: github.com/foo/bar v1.2.3
parts := strings.Fields(line)
if len(parts) >= 2 && !strings.HasPrefix(parts[0], "//") {
deps = append(deps, dependency{
Name: parts[0],
Version: parts[1],
Ecosystem: "Go",
FilePath: filePath,
})
}
continue
}
// Single-line require: require github.com/foo/bar v1.2.3
if strings.HasPrefix(line, "require ") && !strings.Contains(line, "(") {
parts := strings.Fields(line)
if len(parts) >= 3 {
deps = append(deps, dependency{
Name: parts[1],
Version: parts[2],
Ecosystem: "Go",
FilePath: filePath,
})
}
}
}
return deps
}
// parsePackageJSON extracts dependencies from package.json.
func parsePackageJSON(content, filePath string) []dependency {
var pkg struct {
Dependencies map[string]string `json:"dependencies"`
DevDependencies map[string]string `json:"devDependencies"`
}
if err := json.Unmarshal([]byte(content), &pkg); err != nil {
return nil
}
var deps []dependency
for name, version := range pkg.Dependencies {
deps = append(deps, dependency{
Name: name,
Version: cleanSemver(version),
Ecosystem: "npm",
FilePath: filePath,
})
}
for name, version := range pkg.DevDependencies {
deps = append(deps, dependency{
Name: name,
Version: cleanSemver(version),
Ecosystem: "npm",
FilePath: filePath,
})
}
return deps
}
// parseComposerJSON extracts dependencies from composer.json.
func parseComposerJSON(content, filePath string) []dependency {
var pkg struct {
Require map[string]string `json:"require"`
RequireDev map[string]string `json:"require-dev"`
}
if err := json.Unmarshal([]byte(content), &pkg); err != nil {
return nil
}
var deps []dependency
for name, version := range pkg.Require {
if name == "php" || strings.HasPrefix(name, "ext-") {
continue // skip platform requirements
}
deps = append(deps, dependency{
Name: name,
Version: cleanSemver(version),
Ecosystem: "Packagist",
FilePath: filePath,
})
}
for name, version := range pkg.RequireDev {
if name == "php" || strings.HasPrefix(name, "ext-") {
continue
}
deps = append(deps, dependency{
Name: name,
Version: cleanSemver(version),
Ecosystem: "Packagist",
FilePath: filePath,
})
}
return deps
}
// parseRequirementsTxt extracts dependencies from requirements.txt.
func parseRequirementsTxt(content, filePath string) []dependency {
var deps []dependency
for _, line := range strings.Split(content, "\n") {
line = strings.TrimSpace(line)
if line == "" || strings.HasPrefix(line, "#") || strings.HasPrefix(line, "-") {
continue
}
// Handle: package==1.0.0, package>=1.0.0, package~=1.0.0
for _, sep := range []string{"==", ">=", "~=", "<=", "!="} {
if idx := strings.Index(line, sep); idx > 0 {
name := strings.TrimSpace(line[:idx])
version := strings.TrimSpace(line[idx+len(sep):])
// Strip any trailing constraints like ",<2.0"
if ci := strings.Index(version, ","); ci > 0 {
version = version[:ci]
}
deps = append(deps, dependency{
Name: name,
Version: version,
Ecosystem: "PyPI",
FilePath: filePath,
})
break
}
}
}
return deps
}
// cleanSemver strips npm/composer range prefixes (^, ~, >=) to get a plain version.
func cleanSemver(v string) string {
v = strings.TrimSpace(v)
v = strings.TrimLeft(v, "^~>=<!")
v = strings.TrimSpace(v)
// If it has " || " or " - " (ranges), take the first version
if idx := strings.Index(v, " "); idx > 0 {
v = v[:idx]
}
return v
}
// ──────────────────────────────────────────────────────────────────────
// OSV.dev API client
// ──────────────────────────────────────────────────────────────────────
const osvBatchURL = "https://api.osv.dev/v1/querybatch"
const osvMaxBatch = 1000 // OSV batch limit
var osvClient = &http.Client{Timeout: 30 * time.Second}
// osvQuery is a single query in a batch request.
type osvQuery struct {
Package *osvPackage `json:"package"`
Version string `json:"version"`
}
type osvPackage struct {
Name string `json:"name"`
Ecosystem string `json:"ecosystem"`
}
// osvBatchRequest is the batch query body.
type osvBatchRequest struct {
Queries []osvQuery `json:"queries"`
}
// osvBatchResponse is the batch response.
type osvBatchResponse struct {
Results []osvResult `json:"results"`
}
type osvResult struct {
Vulns []osvVuln `json:"vulns"`
}
type osvVuln struct {
ID string `json:"id"`
Summary string `json:"summary"`
Details string `json:"details"`
Severity []osvSeverity `json:"severity"`
Aliases []string `json:"aliases"`
}
type osvSeverity struct {
Type string `json:"type"` // "CVSS_V3", "CVSS_V2"
Score string `json:"score"` // CVSS vector string
}
// queryOSV sends a batch of dependencies to OSV.dev and returns vulnerabilities.
func queryOSV(deps []dependency) (*osvBatchResponse, error) {
queries := make([]osvQuery, 0, len(deps))
for _, d := range deps {
if d.Version == "" || d.Version == "*" || d.Version == "latest" {
continue // can't query without a concrete version
}
queries = append(queries, osvQuery{
Package: &osvPackage{Name: d.Name, Ecosystem: d.Ecosystem},
Version: d.Version,
})
}
if len(queries) == 0 {
return &osvBatchResponse{}, nil
}
body, err := json.Marshal(osvBatchRequest{Queries: queries})
if err != nil {
return nil, fmt.Errorf("marshal OSV request: %w", err)
}
resp, err := osvClient.Post(osvBatchURL, "application/json", bytes.NewReader(body))
if err != nil {
return nil, fmt.Errorf("OSV API request: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 1024))
return nil, fmt.Errorf("OSV API returned %d: %s", resp.StatusCode, string(respBody))
}
var result osvBatchResponse
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return nil, fmt.Errorf("decode OSV response: %w", err)
}
return &result, nil
}
// ──────────────────────────────────────────────────────────────────────
// Severity mapping
// ──────────────────────────────────────────────────────────────────────
// mapCVSSSeverity converts a CVSS v3 base score to an AlertSeverity.
func mapCVSSSeverity(vulnSeverities []osvSeverity) security_model.AlertSeverity {
for _, s := range vulnSeverities {
if s.Type == "CVSS_V3" {
score := extractCVSSBaseScore(s.Score)
switch {
case score >= 9.0:
return security_model.SeverityCritical
case score >= 7.0:
return security_model.SeverityHigh
case score >= 4.0:
return security_model.SeverityMedium
case score > 0:
return security_model.SeverityLow
}
}
}
// No CVSS score available - default to medium
return security_model.SeverityMedium
}
// extractCVSSBaseScore parses the base score from a CVSS v3 vector string.
// Vector format: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H
// We compute a simplified score from the vector metrics.
func extractCVSSBaseScore(vector string) float64 {
if vector == "" {
return 0
}
// CVSS v3 vectors encode severity in metrics. Use a simplified
// lookup based on the most impactful metrics.
parts := make(map[string]string)
for _, segment := range strings.Split(vector, "/") {
kv := strings.SplitN(segment, ":", 2)
if len(kv) == 2 {
parts[kv[0]] = kv[1]
}
}
// Simplified scoring based on key CVSS v3 metrics
var score float64
// Attack Vector (AV)
switch parts["AV"] {
case "N": // Network
score += 3.0
case "A": // Adjacent
score += 2.0
case "L": // Local
score += 1.0
case "P": // Physical
score += 0.5
}
// Attack Complexity (AC)
switch parts["AC"] {
case "L": // Low
score += 1.5
case "H": // High
score += 0.5
}
// Privileges Required (PR)
switch parts["PR"] {
case "N": // None
score += 1.5
case "L": // Low
score += 1.0
case "H": // High
score += 0.5
}
// Impact metrics (C/I/A)
for _, metric := range []string{"C", "I", "A"} {
switch parts[metric] {
case "H":
score += 1.2
case "L":
score += 0.5
}
}
// Cap at 10.0
if score > 10.0 {
score = 10.0
}
return score
}
// ──────────────────────────────────────────────────────────────────────
// DependencyScanner
// ──────────────────────────────────────────────────────────────────────
// DependencyScanner checks project dependencies against known vulnerabilities.
type DependencyScanner struct{}
// NewDependencyScanner creates a new dependency vulnerability scanner.
func NewDependencyScanner() *DependencyScanner {
return &DependencyScanner{}
}
func (s *DependencyScanner) Type() security_model.ScannerType {
return security_model.ScannerDependency
}
func (s *DependencyScanner) ScanCommit(commit *git.Commit) ([]Finding, error) {
return s.ScanTree(commit)
}
func (s *DependencyScanner) ScanTree(commit *git.Commit) ([]Finding, error) {
if commit == nil {
return nil, nil
}
// Step 1: Find and parse manifest files
entries, err := commit.ListEntriesRecursiveFast()
if err != nil {
return nil, fmt.Errorf("ListEntriesRecursiveFast: %w", err)
}
var allDeps []dependency
for _, entry := range entries {
if !entry.IsRegular() {
continue
}
path := entry.Name()
baseName := path
if idx := strings.LastIndex(path, "/"); idx >= 0 {
baseName = path[idx+1:]
}
// Skip vendored/nested files
lower := strings.ToLower(path)
if strings.Contains(lower, "vendor/") || strings.Contains(lower, "node_modules/") ||
strings.Contains(lower, "testdata/") {
continue
}
for _, parser := range manifestParsers {
if baseName == parser.FileName {
reader, err := entry.Blob().DataAsync()
if err != nil {
log.Trace("DependencyScanner: skip %s: %v", path, err)
continue
}
content, err := io.ReadAll(io.LimitReader(reader, 5*1024*1024)) // 5MB limit
reader.Close()
if err != nil {
continue
}
deps := parser.Parse(string(content), path)
allDeps = append(allDeps, deps...)
break
}
}
}
if len(allDeps) == 0 {
return nil, nil
}
log.Info("DependencyScanner: found %d dependencies across manifest files", len(allDeps))
// Step 2: Query OSV in batches
var findings []Finding
for i := 0; i < len(allDeps); i += osvMaxBatch {
end := i + osvMaxBatch
if end > len(allDeps) {
end = len(allDeps)
}
batch := allDeps[i:end]
resp, err := queryOSV(batch)
if err != nil {
log.Error("DependencyScanner: OSV query failed: %v", err)
continue
}
// Step 3: Map results to findings
// OSV batch response indices correspond 1:1 with the query indices.
// But we may have skipped deps with empty versions, so build the
// queryable subset to align indices.
queryable := make([]dependency, 0, len(batch))
for _, d := range batch {
if d.Version != "" && d.Version != "*" && d.Version != "latest" {
queryable = append(queryable, d)
}
}
for j, result := range resp.Results {
if j >= len(queryable) {
break
}
dep := queryable[j]
for _, vuln := range result.Vulns {
severity := mapCVSSSeverity(vuln.Severity)
// Build CVE alias for rule ID (prefer CVE over GHSA)
ruleID := vuln.ID
for _, alias := range vuln.Aliases {
if strings.HasPrefix(alias, "CVE-") {
ruleID = alias
break
}
}
title := fmt.Sprintf("%s in %s@%s", ruleID, dep.Name, dep.Version)
description := vuln.Summary
if description == "" {
description = vuln.Details
}
// Truncate long descriptions
if len(description) > 500 {
description = description[:497] + "..."
}
// Metadata JSON
meta, _ := json.Marshal(map[string]string{
"vuln_id": vuln.ID,
"ecosystem": dep.Ecosystem,
"package": dep.Name,
"version": dep.Version,
})
fingerprint := fmt.Sprintf("%x", sha256.Sum256([]byte(vuln.ID+":"+dep.Name+":"+dep.Version)))
findings = append(findings, Finding{
Scanner: security_model.ScannerDependency,
Severity: severity,
RuleID: ruleID,
Title: title,
Description: description,
FilePath: dep.FilePath,
CommitSHA: commit.ID.String(),
Fingerprint: fingerprint[:32],
Metadata: string(meta),
})
}
}
}
return findings, nil
}
+3 -1
View File
@@ -32,8 +32,10 @@ func ScanOnPush(ctx context.Context, repo *repo_model.Repository, commit *git.Co
if cfg.SecretScanner {
scanners = append(scanners, NewSecretScanner())
}
if cfg.DependScanner {
scanners = append(scanners, NewDependencyScanner())
}
// Future scanners added here:
// if cfg.DependScanner { scanners = append(scanners, NewDependencyScanner()) }
// if cfg.CodeScanner { scanners = append(scanners, NewCodeScanner()) }
if len(scanners) == 0 {