Files
MokoGitea-Fork/services/security/code_scanner.go
T
jmiller 7c75133ef1
Universal: Auto Version Bump / Version Bump (push) Successful in 13s
Universal: PR Check / Branch Policy (pull_request) Successful in 1s
PR RC Release / Build RC Release (pull_request) Successful in 2s
Universal: PR Check / Validate PR (pull_request) Successful in 12s
Generic: Project CI / Lint & Validate (pull_request) Successful in 37s
Universal: PR Check / Secret Scan (pull_request) Successful in 1m20s
Generic: Project CI / Tests (pull_request) Has been cancelled
Universal: PR Check / Build RC Package (pull_request) Has been cancelled
Universal: PR Check / Report Issues (pull_request) Has been cancelled
feat: code security scanner with OWASP pattern detection (#552)
Implements the code analysis scanner module that detects insecure
patterns across Go, PHP, Python, JavaScript, and TypeScript:

- SQL injection (CWE-89): string concat in queries across 4 languages
- XSS (CWE-79): innerHTML, document.write, unescaped output, dangerouslySetInnerHTML
- Command injection (CWE-78): exec with variables, shell=True, os.system
- Path traversal (CWE-22): unsanitized path joins, file open with user input
- Insecure deserialization (CWE-502): unserialize(), yaml.load()
- Hardcoded credentials (CWE-798): password assignments in source
- Weak cryptography (CWE-327): MD5/SHA-1 usage

22 rules total, language-filtered by file extension. Wired into the
existing scanner orchestrator via the CodeScanner config toggle.
API updated to expose code_scanner in GET/PATCH security config.

Claude-Session: https://claude.ai/code/session_011AAFzotGMf3ayvXhEmStCd
2026-06-28 08:15:34 -05:00

336 lines
13 KiB
Go

// Copyright 2026 Moko Consulting <hello@mokoconsulting.tech>
// SPDX-License-Identifier: GPL-3.0-or-later
package security
import (
"bufio"
"crypto/sha256"
"fmt"
"io"
"regexp"
"strings"
security_model "code.mokoconsulting.tech/MokoConsulting/MokoGitea/models/security"
"code.mokoconsulting.tech/MokoConsulting/MokoGitea/modules/git"
"code.mokoconsulting.tech/MokoConsulting/MokoGitea/modules/log"
)
// CodeRule defines a pattern to detect insecure code patterns.
type CodeRule struct {
ID string
Title string
Pattern *regexp.Regexp
Severity security_model.AlertSeverity
Description string
CWE string
Languages []string // file extensions this rule applies to (empty = all)
}
// DefaultCodeRules contains the built-in code security patterns.
var DefaultCodeRules = []CodeRule{
// ── SQL Injection (CWE-89) ──────────────────────────────────────────
{
ID: "sqli-string-concat-go", Title: "SQL Injection: String Concatenation (Go)",
Severity: security_model.SeverityHigh, CWE: "CWE-89",
Pattern: regexp.MustCompile(`(?i)(?:db|tx|sess|engine|orm)\.\s*(?:Exec|Query|QueryRow|Raw|Where|And|Or|Select|SQL)\s*\(\s*(?:"[^"]*\s*\+|fmt\.Sprintf\s*\(\s*"[^"]*(?:%s|%v|%d))`),
Description: "SQL query built with string concatenation or fmt.Sprintf — use parameterized queries instead",
Languages: []string{".go"},
},
{
ID: "sqli-string-concat-php", Title: "SQL Injection: String Concatenation (PHP)",
Severity: security_model.SeverityHigh, CWE: "CWE-89",
Pattern: regexp.MustCompile(`(?i)(?:mysql_query|mysqli_query|pg_query|\$(?:db|pdo|conn|dbo|mysqli)->(?:query|exec|prepare|execute))\s*\(\s*(?:["'][^"']*\.\s*\$|["'][^"']*\{\$)`),
Description: "SQL query built with variable interpolation — use prepared statements instead",
Languages: []string{".php"},
},
{
ID: "sqli-string-concat-py", Title: "SQL Injection: String Concatenation (Python)",
Severity: security_model.SeverityHigh, CWE: "CWE-89",
Pattern: regexp.MustCompile(`(?i)(?:cursor|conn|db|engine)\.(?:execute|executemany|raw)\s*\(\s*(?:f["']|["'][^"']*%s|["'][^"']*\s*\+|["'][^"']*\.format\()`),
Description: "SQL query built with f-strings, %-formatting, or concatenation — use parameterized queries",
Languages: []string{".py"},
},
{
ID: "sqli-string-concat-js", Title: "SQL Injection: String Concatenation (JS)",
Severity: security_model.SeverityHigh, CWE: "CWE-89",
Pattern: regexp.MustCompile("(?i)(?:query|execute|raw|knex\\.raw|sequelize\\.query)\\s*\\(\\s*(?:`[^`]*\\$\\{|[\"'][^\"']*\\s*\\+)"),
Description: "SQL query built with template literals or concatenation — use parameterized queries",
Languages: []string{".js", ".ts", ".mjs", ".cjs"},
},
// ── Cross-Site Scripting / XSS (CWE-79) ─────────────────────────────
{
ID: "xss-innerhtml", Title: "XSS: innerHTML Assignment",
Severity: security_model.SeverityMedium, CWE: "CWE-79",
Pattern: regexp.MustCompile(`(?i)\.innerHTML\s*[+]?=\s*(?:[^"'` + "`" + `]|$)`),
Description: "Direct innerHTML assignment with non-literal value — use textContent or sanitize first",
Languages: []string{".js", ".ts", ".jsx", ".tsx", ".vue", ".svelte"},
},
{
ID: "xss-document-write", Title: "XSS: document.write()",
Severity: security_model.SeverityMedium, CWE: "CWE-79",
Pattern: regexp.MustCompile(`(?i)document\.write(?:ln)?\s*\(`),
Description: "document.write() can execute injected scripts — use DOM APIs instead",
Languages: []string{".js", ".ts", ".jsx", ".tsx"},
},
{
ID: "xss-echo-unescaped-php", Title: "XSS: Unescaped Output (PHP)",
Severity: security_model.SeverityMedium, CWE: "CWE-79",
Pattern: regexp.MustCompile(`(?:echo|print)\s+\$_(?:GET|POST|REQUEST|COOKIE|SERVER)\s*\[`),
Description: "Direct output of superglobal without escaping — use htmlspecialchars()",
Languages: []string{".php"},
},
{
ID: "xss-dangerously-set-html", Title: "XSS: dangerouslySetInnerHTML (React)",
Severity: security_model.SeverityMedium, CWE: "CWE-79",
Pattern: regexp.MustCompile(`dangerouslySetInnerHTML\s*=\s*\{\s*\{\s*__html\s*:`),
Description: "dangerouslySetInnerHTML bypasses React's XSS protection — ensure input is sanitized",
Languages: []string{".jsx", ".tsx", ".js", ".ts"},
},
// ── Command Injection (CWE-78) ──────────────────────────────────────
{
ID: "cmdi-exec-go", Title: "Command Injection: exec.Command with Variable (Go)",
Severity: security_model.SeverityHigh, CWE: "CWE-78",
Pattern: regexp.MustCompile(`exec\.Command\s*\(\s*(?:fmt\.Sprintf|[a-zA-Z_]+\s*\+|[a-zA-Z_]+\s*,)`),
Description: "exec.Command with dynamic arguments — validate and sanitize inputs",
Languages: []string{".go"},
},
{
ID: "cmdi-shell-exec-php", Title: "Command Injection: Shell Execution (PHP)",
Severity: security_model.SeverityCritical, CWE: "CWE-78",
Pattern: regexp.MustCompile(`(?i)(?:system|exec|passthru|shell_exec|popen|proc_open)\s*\(\s*\$`),
Description: "Shell command with variable input — use escapeshellarg() or avoid shell execution",
Languages: []string{".php"},
},
{
ID: "cmdi-child-process-js", Title: "Command Injection: child_process (Node.js)",
Severity: security_model.SeverityHigh, CWE: "CWE-78",
Pattern: regexp.MustCompile("(?i)(?:child_process|exec|execSync|spawn|spawnSync)\\s*\\(\\s*(?:`[^`]*\\$\\{|[\"'][^\"']*\\s*\\+)"),
Description: "Shell command built with dynamic input — use array arguments or sanitize",
Languages: []string{".js", ".ts", ".mjs", ".cjs"},
},
{
ID: "cmdi-subprocess-py", Title: "Command Injection: subprocess with shell=True (Python)",
Severity: security_model.SeverityHigh, CWE: "CWE-78",
Pattern: regexp.MustCompile(`(?i)subprocess\.(?:call|run|Popen|check_output|check_call)\s*\([^)]*shell\s*=\s*True`),
Description: "subprocess with shell=True is vulnerable to injection — use shell=False with list args",
Languages: []string{".py"},
},
{
ID: "cmdi-os-system-py", Title: "Command Injection: os.system() (Python)",
Severity: security_model.SeverityHigh, CWE: "CWE-78",
Pattern: regexp.MustCompile(`(?i)os\.(?:system|popen)\s*\(\s*(?:f["']|[a-zA-Z_])`),
Description: "os.system/popen with dynamic input — use subprocess with shell=False instead",
Languages: []string{".py"},
},
// ── Path Traversal (CWE-22) ─────────────────────────────────────────
{
ID: "path-traversal-join", Title: "Path Traversal: Unsanitized Path Join",
Severity: security_model.SeverityMedium, CWE: "CWE-22",
Pattern: regexp.MustCompile(`(?i)(?:filepath\.Join|path\.join|os\.path\.join)\s*\([^)]*(?:ctx\.|req\.|r\.|request\.|params\.|query\.)`),
Description: "User input in file path join without sanitization — validate against directory traversal",
Languages: []string{".go", ".js", ".ts", ".py"},
},
{
ID: "path-traversal-open", Title: "Path Traversal: File Open with User Input",
Severity: security_model.SeverityMedium, CWE: "CWE-22",
Pattern: regexp.MustCompile(`(?i)(?:os\.Open|ioutil\.ReadFile|os\.ReadFile)\s*\(\s*(?:fmt\.Sprintf|[a-zA-Z_]+\s*\+)`),
Description: "File operation with dynamic path — sanitize and restrict to safe directories",
Languages: []string{".go"},
},
{
ID: "path-traversal-php", Title: "Path Traversal: File Include/Open (PHP)",
Severity: security_model.SeverityHigh, CWE: "CWE-22",
Pattern: regexp.MustCompile(`(?i)(?:include|require|include_once|require_once|file_get_contents|fopen|readfile)\s*\(\s*\$`),
Description: "File operation with variable path — validate path against traversal",
Languages: []string{".php"},
},
// ── Insecure Deserialization (CWE-502) ──────────────────────────────
{
ID: "deserialize-php", Title: "Insecure Deserialization: unserialize() (PHP)",
Severity: security_model.SeverityHigh, CWE: "CWE-502",
Pattern: regexp.MustCompile(`(?i)unserialize\s*\(\s*\$`),
Description: "unserialize() with untrusted data can lead to remote code execution",
Languages: []string{".php"},
},
{
ID: "deserialize-yaml-py", Title: "Insecure Deserialization: yaml.load() (Python)",
Severity: security_model.SeverityHigh, CWE: "CWE-502",
Pattern: regexp.MustCompile(`yaml\.load\s*\([^)]*(?:Loader\s*=\s*yaml\.(?:Unsafe|Full)Loader|[^)]*\)(?!\s*#))`),
Description: "yaml.load() without SafeLoader allows arbitrary code execution — use yaml.safe_load()",
Languages: []string{".py"},
},
// ── Hardcoded Credentials in Code (CWE-798) ─────────────────────────
{
ID: "hardcoded-password-assignment", Title: "Hardcoded Password in Source",
Severity: security_model.SeverityHigh, CWE: "CWE-798",
Pattern: regexp.MustCompile(`(?i)(?:password|passwd|pwd)\s*(?::=|=)\s*["'][^"']{8,}["']`),
Description: "Hardcoded password in source code — use environment variables or config",
Languages: []string{".go", ".py", ".js", ".ts", ".php", ".rb", ".java"},
},
// ── Weak Cryptography (CWE-327) ─────────────────────────────────────
{
ID: "weak-crypto-md5", Title: "Weak Cryptography: MD5",
Severity: security_model.SeverityLow, CWE: "CWE-327",
Pattern: regexp.MustCompile(`(?i)(?:md5\.New|hashlib\.md5|MD5\.Create|MessageDigest\.getInstance\s*\(\s*["']MD5)`),
Description: "MD5 is cryptographically broken — use SHA-256 or stronger for security purposes",
Languages: []string{".go", ".py", ".cs", ".java"},
},
{
ID: "weak-crypto-sha1", Title: "Weak Cryptography: SHA-1 for Security",
Severity: security_model.SeverityLow, CWE: "CWE-327",
Pattern: regexp.MustCompile(`(?i)(?:sha1\.New|hashlib\.sha1|SHA1\.Create|MessageDigest\.getInstance\s*\(\s*["']SHA-?1)`),
Description: "SHA-1 is deprecated for security — use SHA-256 or stronger",
Languages: []string{".go", ".py", ".cs", ".java"},
},
}
// Language extensions for file filtering.
var codeFileExtensions = map[string]bool{
".go": true, ".py": true, ".js": true, ".ts": true, ".jsx": true, ".tsx": true,
".php": true, ".rb": true, ".java": true, ".cs": true, ".rs": true,
".vue": true, ".svelte": true, ".mjs": true, ".cjs": true,
}
// CodeScanner implements the Scanner interface for code pattern analysis.
type CodeScanner struct {
Rules []CodeRule
}
// NewCodeScanner creates a scanner with default code analysis rules.
func NewCodeScanner() *CodeScanner {
return &CodeScanner{Rules: DefaultCodeRules}
}
func (s *CodeScanner) Type() security_model.ScannerType {
return security_model.ScannerCode
}
func (s *CodeScanner) ScanCommit(commit *git.Commit) ([]Finding, error) {
return s.ScanTree(commit)
}
func (s *CodeScanner) ScanTree(commit *git.Commit) ([]Finding, error) {
if commit == nil {
return nil, nil
}
entries, err := commit.ListEntriesRecursiveFast()
if err != nil {
return nil, fmt.Errorf("ListEntriesRecursiveFast: %w", err)
}
var findings []Finding
for _, entry := range entries {
if !entry.IsRegular() {
continue
}
path := entry.Name()
if shouldSkipFile(path) {
continue
}
ext := fileExtension(path)
if !codeFileExtensions[ext] {
continue
}
if entry.Blob().Size() > 1024*1024 {
continue
}
applicableRules := s.rulesForExtension(ext)
if len(applicableRules) == 0 {
continue
}
reader, err := entry.Blob().DataAsync()
if err != nil {
log.Trace("CodeScanner: skip %s: %v", path, err)
continue
}
fileFindings := s.scanReader(reader, path, commit.ID.String(), applicableRules)
reader.Close()
findings = append(findings, fileFindings...)
}
return findings, nil
}
func (s *CodeScanner) rulesForExtension(ext string) []CodeRule {
var rules []CodeRule
for _, rule := range s.Rules {
if len(rule.Languages) == 0 {
rules = append(rules, rule)
continue
}
for _, lang := range rule.Languages {
if lang == ext {
rules = append(rules, rule)
break
}
}
}
return rules
}
func (s *CodeScanner) scanReader(r io.Reader, filePath, commitSHA string, rules []CodeRule) []Finding {
var findings []Finding
scanner := bufio.NewScanner(r)
lineNum := 0
for scanner.Scan() {
lineNum++
line := scanner.Text()
trimmed := strings.TrimSpace(line)
if isCommentLine(trimmed) {
continue
}
for _, rule := range rules {
if rule.Pattern.MatchString(line) {
fingerprint := fmt.Sprintf("%x", sha256.Sum256([]byte(rule.ID+":"+filePath+":"+line)))
findings = append(findings, Finding{
Scanner: security_model.ScannerCode,
Severity: rule.Severity,
RuleID: rule.ID,
Title: rule.Title,
Description: rule.Description + " [" + rule.CWE + "]",
FilePath: filePath,
LineNumber: lineNum,
CommitSHA: commitSHA,
Fingerprint: fingerprint[:32],
Metadata: fmt.Sprintf(`{"cwe":"%s"}`, rule.CWE),
})
break
}
}
}
return findings
}
func fileExtension(path string) string {
lower := strings.ToLower(path)
if i := strings.LastIndex(lower, "."); i >= 0 {
return lower[i:]
}
return ""
}
func isCommentLine(line string) bool {
return strings.HasPrefix(line, "//") ||
strings.HasPrefix(line, "#") ||
strings.HasPrefix(line, "*") ||
strings.HasPrefix(line, "/*") ||
strings.HasPrefix(line, "<!--")
}