Release: monitoring, wiki, and infrastructure consolidation #9
@@ -62,7 +62,7 @@ jobs:
|
||||
API="${GITEA_URL}/api/v1"
|
||||
|
||||
# Platform/standards/infra repos to exclude
|
||||
EXCLUDE="gitea-org-config org-profile gitea-private gitea-server-setup MokoStandards MokoStandards-API MokoTesting"
|
||||
EXCLUDE="gitea-org-config org-profile gitea-private .mokogitea-private MokoStandards MokoStandards-API MokoTesting"
|
||||
EXCLUDE="$EXCLUDE MokoStandards-Template-Client MokoStandards-Template-Dolibarr MokoStandards-Template-Generic MokoStandards-Template-Joomla MokoDoliProjTemplate"
|
||||
|
||||
if [ -n "${{ inputs.repos }}" ]; then
|
||||
|
||||
@@ -61,7 +61,7 @@ jobs:
|
||||
run: |
|
||||
API="${GITEA_URL}/api/v1"
|
||||
|
||||
EXCLUDE="gitea-org-config org-profile gitea-private gitea-server-setup MokoStandards MokoStandards-API MokoTesting"
|
||||
EXCLUDE="gitea-org-config org-profile gitea-private .mokogitea-private MokoStandards MokoStandards-API MokoTesting"
|
||||
EXCLUDE="$EXCLUDE MokoStandards-Template-Client MokoStandards-Template-Dolibarr MokoStandards-Template-Generic MokoStandards-Template-Joomla MokoDoliProjTemplate"
|
||||
|
||||
if [ -n "${{ inputs.repos }}" ]; then
|
||||
|
||||
@@ -0,0 +1,678 @@
|
||||
#!/usr/bin/env bash
|
||||
# server-autoheal.sh - Auto-heal on restart + split backup management
|
||||
#
|
||||
# Copyright (C) 2026 Moko Consulting <hello@mokoconsulting.tech>
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# DEFGROUP: MokoStandards.Automation.ServerAutoheal
|
||||
# INGROUP: MokoStandards.Automation
|
||||
# REPO: https://git.mokoconsulting.tech/MokoConsulting/moko-platform
|
||||
# PATH: /automation/server-autoheal.sh
|
||||
# BRIEF: Server auto-heal on unclean restart + split system/content backups
|
||||
#
|
||||
# Usage:
|
||||
# server-autoheal.sh <command> [options]
|
||||
#
|
||||
# Commands:
|
||||
# boot-check Run at boot — auto-heals if no safe point exists
|
||||
# set-safepoint Mark current state as safe (call before planned shutdown)
|
||||
# backup-system Run a system backup (configs, packages, services)
|
||||
# backup-content Run a content backup (site files, databases, uploads)
|
||||
# cleanup Prune expired backups per retention policy
|
||||
# status Show safe point and backup status
|
||||
#
|
||||
# Scheduling (cron):
|
||||
# @reboot server-autoheal.sh boot-check
|
||||
# 0 3 * * * server-autoheal.sh backup-system (daily at 3am)
|
||||
# 0 */2 * * * server-autoheal.sh backup-content (every 2 hours)
|
||||
# 30 */2 * * * server-autoheal.sh cleanup (30 min after content backup)
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# ──────────────────────────────────────────────
|
||||
# Configuration — override via /etc/moko/autoheal.conf
|
||||
# ──────────────────────────────────────────────
|
||||
CONF_FILE="/etc/moko/autoheal.conf"
|
||||
[[ -f "$CONF_FILE" ]] && source "$CONF_FILE"
|
||||
|
||||
BACKUP_ROOT="${BACKUP_ROOT:-/var/backups/moko}"
|
||||
SAFEPOINT_FILE="${SAFEPOINT_FILE:-/var/run/moko/safepoint}"
|
||||
LOG_FILE="${LOG_FILE:-/var/log/moko/autoheal.log}"
|
||||
LOCK_DIR="${LOCK_DIR:-/var/run/moko}"
|
||||
|
||||
# System backup: configs, package lists, service state, cron
|
||||
SYSTEM_BACKUP_DIR="${BACKUP_ROOT}/system"
|
||||
SYSTEM_BACKUP_RETAIN="${SYSTEM_BACKUP_RETAIN:-7}" # keep 7 daily system backups
|
||||
|
||||
# Content backup: web roots, databases, uploads
|
||||
CONTENT_BACKUP_DIR="${BACKUP_ROOT}/content"
|
||||
CONTENT_BACKUP_RETAIN_HOURS="${CONTENT_BACKUP_RETAIN_HOURS:-24}" # 1 day of content backups
|
||||
|
||||
# Paths to back up — override these in /etc/moko/autoheal.conf
|
||||
SYSTEM_PATHS="${SYSTEM_PATHS:-/etc/nginx /etc/php /etc/mysql /etc/cron.d /etc/systemd/system}"
|
||||
CONTENT_PATHS="${CONTENT_PATHS:-/var/www}"
|
||||
DB_NAMES="${DB_NAMES:-}" # space-separated list, empty = auto-detect all
|
||||
|
||||
# ──────────────────────────────────────────────
|
||||
# Helpers
|
||||
# ──────────────────────────────────────────────
|
||||
log() {
|
||||
local level="$1"; shift
|
||||
local ts
|
||||
ts=$(date -u '+%Y-%m-%dT%H:%M:%SZ')
|
||||
local msg="[$ts] [$level] $*"
|
||||
echo "$msg" | tee -a "$LOG_FILE" >&2
|
||||
}
|
||||
|
||||
ensure_dirs() {
|
||||
mkdir -p "$SYSTEM_BACKUP_DIR" "$CONTENT_BACKUP_DIR" \
|
||||
"$LOCK_DIR" "$(dirname "$LOG_FILE")"
|
||||
}
|
||||
|
||||
acquire_lock() {
|
||||
local lockfile="${LOCK_DIR}/autoheal-${1}.lock"
|
||||
if [[ -f "$lockfile" ]]; then
|
||||
local pid
|
||||
pid=$(<"$lockfile")
|
||||
if kill -0 "$pid" 2>/dev/null; then
|
||||
log WARN "Another $1 operation is running (PID $pid), skipping"
|
||||
exit 0
|
||||
fi
|
||||
rm -f "$lockfile"
|
||||
fi
|
||||
echo $$ > "$lockfile"
|
||||
trap "rm -f '$lockfile'" EXIT
|
||||
}
|
||||
|
||||
timestamp() {
|
||||
date -u '+%Y%m%d_%H%M%S'
|
||||
}
|
||||
|
||||
# ──────────────────────────────────────────────
|
||||
# Safe-point management
|
||||
# ──────────────────────────────────────────────
|
||||
cmd_set_safepoint() {
|
||||
ensure_dirs
|
||||
local ts
|
||||
ts=$(timestamp)
|
||||
cat > "$SAFEPOINT_FILE" <<EOF
|
||||
timestamp=$ts
|
||||
hostname=$(hostname)
|
||||
kernel=$(uname -r)
|
||||
uptime=$(uptime -s 2>/dev/null || echo "unknown")
|
||||
set_by=${SUDO_USER:-$(whoami)}
|
||||
EOF
|
||||
log INFO "Safe point set at $ts by ${SUDO_USER:-$(whoami)}"
|
||||
}
|
||||
|
||||
cmd_clear_safepoint() {
|
||||
rm -f "$SAFEPOINT_FILE"
|
||||
log INFO "Safe point cleared"
|
||||
}
|
||||
|
||||
has_safepoint() {
|
||||
[[ -f "$SAFEPOINT_FILE" ]]
|
||||
}
|
||||
|
||||
# ──────────────────────────────────────────────
|
||||
# System backup (daily)
|
||||
# ──────────────────────────────────────────────
|
||||
cmd_backup_system() {
|
||||
ensure_dirs
|
||||
acquire_lock "system-backup"
|
||||
|
||||
local ts
|
||||
ts=$(timestamp)
|
||||
local archive="${SYSTEM_BACKUP_DIR}/system_${ts}.tar.gz"
|
||||
local manifest="${SYSTEM_BACKUP_DIR}/system_${ts}.manifest"
|
||||
|
||||
log INFO "Starting system backup → $archive"
|
||||
|
||||
# Collect existing paths only
|
||||
local existing_paths=()
|
||||
for p in $SYSTEM_PATHS; do
|
||||
[[ -e "$p" ]] && existing_paths+=("$p")
|
||||
done
|
||||
|
||||
if [[ ${#existing_paths[@]} -eq 0 ]]; then
|
||||
log WARN "No system paths found to back up"
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Archive configs and system files
|
||||
tar -czf "$archive" "${existing_paths[@]}" 2>/dev/null || true
|
||||
|
||||
# Capture package list and service state as manifest
|
||||
{
|
||||
echo "=== PACKAGES ==="
|
||||
if command -v dpkg &>/dev/null; then
|
||||
dpkg --get-selections
|
||||
elif command -v rpm &>/dev/null; then
|
||||
rpm -qa --qf '%{NAME}\t%{VERSION}\n'
|
||||
fi
|
||||
echo ""
|
||||
echo "=== ENABLED SERVICES ==="
|
||||
if command -v systemctl &>/dev/null; then
|
||||
systemctl list-unit-files --state=enabled --no-pager 2>/dev/null || true
|
||||
fi
|
||||
echo ""
|
||||
echo "=== CRONTABS ==="
|
||||
for user_home in /var/spool/cron/crontabs/*; do
|
||||
[[ -f "$user_home" ]] && echo "--- $(basename "$user_home") ---" && cat "$user_home"
|
||||
done 2>/dev/null || true
|
||||
} > "$manifest"
|
||||
|
||||
local size
|
||||
size=$(du -sh "$archive" 2>/dev/null | cut -f1)
|
||||
log INFO "System backup complete: $archive ($size)"
|
||||
|
||||
# Prune old system backups (keep $SYSTEM_BACKUP_RETAIN)
|
||||
local count
|
||||
count=$(find "$SYSTEM_BACKUP_DIR" -name 'system_*.tar.gz' | wc -l)
|
||||
if [[ "$count" -gt "$SYSTEM_BACKUP_RETAIN" ]]; then
|
||||
local to_remove=$((count - SYSTEM_BACKUP_RETAIN))
|
||||
find "$SYSTEM_BACKUP_DIR" -name 'system_*.tar.gz' -printf '%T+ %p\n' \
|
||||
| sort | head -n "$to_remove" | awk '{print $2}' \
|
||||
| while read -r f; do
|
||||
rm -f "$f" "${f%.tar.gz}.manifest"
|
||||
log INFO "Pruned old system backup: $f"
|
||||
done
|
||||
fi
|
||||
}
|
||||
|
||||
# ──────────────────────────────────────────────
|
||||
# Content backup (every 2 hours)
|
||||
# ──────────────────────────────────────────────
|
||||
cmd_backup_content() {
|
||||
ensure_dirs
|
||||
acquire_lock "content-backup"
|
||||
|
||||
local ts
|
||||
ts=$(timestamp)
|
||||
local archive="${CONTENT_BACKUP_DIR}/content_${ts}.tar.gz"
|
||||
local db_dump="${CONTENT_BACKUP_DIR}/content_${ts}.sql.gz"
|
||||
|
||||
log INFO "Starting content backup → $archive"
|
||||
|
||||
# Back up web content / uploads
|
||||
local existing_paths=()
|
||||
for p in $CONTENT_PATHS; do
|
||||
[[ -e "$p" ]] && existing_paths+=("$p")
|
||||
done
|
||||
|
||||
if [[ ${#existing_paths[@]} -gt 0 ]]; then
|
||||
tar -czf "$archive" "${existing_paths[@]}" 2>/dev/null || true
|
||||
local size
|
||||
size=$(du -sh "$archive" 2>/dev/null | cut -f1)
|
||||
log INFO "Content files archived: $archive ($size)"
|
||||
else
|
||||
log WARN "No content paths found to back up"
|
||||
fi
|
||||
|
||||
# Database dump
|
||||
if command -v mysqldump &>/dev/null || command -v mariadb-dump &>/dev/null; then
|
||||
local dump_cmd="mysqldump"
|
||||
command -v mariadb-dump &>/dev/null && dump_cmd="mariadb-dump"
|
||||
|
||||
local databases=()
|
||||
if [[ -n "$DB_NAMES" ]]; then
|
||||
read -ra databases <<< "$DB_NAMES"
|
||||
else
|
||||
# Auto-detect: dump all databases except system ones
|
||||
databases=($(${dump_cmd%dump} -N -e \
|
||||
"SELECT schema_name FROM information_schema.schemata
|
||||
WHERE schema_name NOT IN ('information_schema','performance_schema','mysql','sys')" \
|
||||
2>/dev/null | tr '\n' ' ')) || true
|
||||
fi
|
||||
|
||||
if [[ ${#databases[@]} -gt 0 ]]; then
|
||||
$dump_cmd --single-transaction --routines --triggers \
|
||||
--databases "${databases[@]}" 2>/dev/null \
|
||||
| gzip > "$db_dump"
|
||||
local db_size
|
||||
db_size=$(du -sh "$db_dump" 2>/dev/null | cut -f1)
|
||||
log INFO "Database dump complete: $db_dump ($db_size)"
|
||||
else
|
||||
log WARN "No databases found to dump"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
# ──────────────────────────────────────────────
|
||||
# Cleanup — prune content backups older than retention
|
||||
# ──────────────────────────────────────────────
|
||||
cmd_cleanup() {
|
||||
ensure_dirs
|
||||
local before_count after_count
|
||||
|
||||
# Content: keep only last 24 hours (1 day)
|
||||
before_count=$(find "$CONTENT_BACKUP_DIR" -name 'content_*' -type f | wc -l)
|
||||
find "$CONTENT_BACKUP_DIR" -name 'content_*' -type f \
|
||||
-mmin +$((CONTENT_BACKUP_RETAIN_HOURS * 60)) -delete 2>/dev/null || true
|
||||
after_count=$(find "$CONTENT_BACKUP_DIR" -name 'content_*' -type f | wc -l)
|
||||
local removed=$((before_count - after_count))
|
||||
[[ "$removed" -gt 0 ]] && log INFO "Pruned $removed content backup(s) older than ${CONTENT_BACKUP_RETAIN_HOURS}h"
|
||||
|
||||
# System: keep N most recent (handled in backup-system, but double-check here)
|
||||
before_count=$(find "$SYSTEM_BACKUP_DIR" -name 'system_*' -type f | wc -l)
|
||||
local max_system_files=$((SYSTEM_BACKUP_RETAIN * 2)) # .tar.gz + .manifest
|
||||
if [[ "$before_count" -gt "$max_system_files" ]]; then
|
||||
local excess=$((before_count - max_system_files))
|
||||
find "$SYSTEM_BACKUP_DIR" -name 'system_*' -type f -printf '%T+ %p\n' \
|
||||
| sort | head -n "$excess" | awk '{print $2}' \
|
||||
| xargs -r rm -f
|
||||
log INFO "Pruned excess system backups"
|
||||
fi
|
||||
|
||||
log INFO "Cleanup complete"
|
||||
}
|
||||
|
||||
# ──────────────────────────────────────────────
|
||||
# Boot check — the auto-heal entry point
|
||||
# ──────────────────────────────────────────────
|
||||
cmd_boot_check() {
|
||||
ensure_dirs
|
||||
acquire_lock "boot-check"
|
||||
|
||||
log INFO "=== Boot check started ==="
|
||||
log INFO "Hostname: $(hostname), Kernel: $(uname -r)"
|
||||
|
||||
if has_safepoint; then
|
||||
log INFO "Safe point found — server was shut down cleanly"
|
||||
log INFO "Clearing safe point for next cycle"
|
||||
cmd_clear_safepoint
|
||||
log INFO "=== Boot check passed (clean restart) ==="
|
||||
return 0
|
||||
fi
|
||||
|
||||
log WARN "NO safe point found — server restarted without clean shutdown"
|
||||
log WARN "Initiating auto-heal sequence..."
|
||||
|
||||
auto_heal
|
||||
local rc=$?
|
||||
|
||||
# Set safe point after successful heal
|
||||
if [[ $rc -eq 0 ]]; then
|
||||
cmd_set_safepoint
|
||||
log INFO "=== Boot check complete (healed successfully) ==="
|
||||
else
|
||||
log ERROR "=== Boot check FAILED — manual intervention required ==="
|
||||
fi
|
||||
|
||||
return $rc
|
||||
}
|
||||
|
||||
# ──────────────────────────────────────────────
|
||||
# Auto-heal strategy
|
||||
#
|
||||
# TODO: This is the core decision point. Implement the recovery
|
||||
# steps that match your server's architecture. See guidance below.
|
||||
#
|
||||
# Trade-offs to consider:
|
||||
# - Restore-from-backup: safest, but content may be up to 2h stale
|
||||
# - Service-restart-only: faster, keeps current data, but won't fix
|
||||
# corrupted configs or broken filesystem state
|
||||
# - Hybrid: restart services first, verify health, only restore if
|
||||
# health checks fail — best of both worlds but more complex
|
||||
#
|
||||
# The function receives no arguments. Use the latest system + content
|
||||
# backups to restore if needed. Return 0 on success, 1 on failure.
|
||||
# ──────────────────────────────────────────────
|
||||
auto_heal() {
|
||||
log INFO "Phase 1: Verify and repair filesystem"
|
||||
# Check for common post-crash issues
|
||||
repair_filesystem
|
||||
|
||||
log INFO "Phase 2: Restore system configuration if corrupted"
|
||||
restore_system_if_needed
|
||||
|
||||
log INFO "Phase 3: Restart core services"
|
||||
restart_services
|
||||
|
||||
log INFO "Phase 4: Verify health"
|
||||
if ! verify_health; then
|
||||
log WARN "Health check failed after service restart — restoring from backup"
|
||||
restore_from_backup
|
||||
restart_services
|
||||
|
||||
if ! verify_health; then
|
||||
log ERROR "Health check still failing after restore — giving up"
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
|
||||
log INFO "Auto-heal completed successfully"
|
||||
return 0
|
||||
}
|
||||
|
||||
# ──────────────────────────────────────────────
|
||||
# Heal sub-steps
|
||||
# ──────────────────────────────────────────────
|
||||
repair_filesystem() {
|
||||
# Fix common post-crash filesystem issues
|
||||
# Clear stale PID/lock/socket files that prevent services from starting
|
||||
local stale_files=(
|
||||
/var/run/nginx.pid
|
||||
/var/run/mysqld/mysqld.pid
|
||||
/var/run/php-fpm.pid
|
||||
/var/lib/mysql/*.pid
|
||||
)
|
||||
for f in "${stale_files[@]}"; do
|
||||
for expanded in $f; do
|
||||
if [[ -f "$expanded" ]]; then
|
||||
local pid
|
||||
pid=$(<"$expanded") 2>/dev/null || true
|
||||
if [[ -n "$pid" ]] && ! kill -0 "$pid" 2>/dev/null; then
|
||||
rm -f "$expanded"
|
||||
log INFO "Removed stale PID file: $expanded"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
# Fix permissions on critical dirs that may get mangled
|
||||
[[ -d /var/run/mysqld ]] && chown mysql:mysql /var/run/mysqld 2>/dev/null || true
|
||||
[[ -d /var/lib/php/sessions ]] && chmod 1733 /var/lib/php/sessions 2>/dev/null || true
|
||||
|
||||
# Repair tmp/cache dirs
|
||||
for d in /tmp /var/tmp; do
|
||||
[[ -d "$d" ]] && chmod 1777 "$d" 2>/dev/null || true
|
||||
done
|
||||
}
|
||||
|
||||
restore_system_if_needed() {
|
||||
# Find latest system backup
|
||||
local latest_system
|
||||
latest_system=$(find "$SYSTEM_BACKUP_DIR" -name 'system_*.tar.gz' -printf '%T+ %p\n' \
|
||||
2>/dev/null | sort -r | head -1 | awk '{print $2}')
|
||||
|
||||
if [[ -z "$latest_system" ]]; then
|
||||
log WARN "No system backup available to verify against"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Check if critical configs exist and are non-empty
|
||||
local needs_restore=false
|
||||
local critical_configs=("/etc/nginx/nginx.conf" "/etc/php" "/etc/mysql")
|
||||
|
||||
for cfg in "${critical_configs[@]}"; do
|
||||
if [[ -e "$cfg" ]]; then
|
||||
# Config exists — check if it's a file and non-empty, or a directory
|
||||
if [[ -f "$cfg" && ! -s "$cfg" ]]; then
|
||||
log WARN "Critical config is empty: $cfg"
|
||||
needs_restore=true
|
||||
break
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
if $needs_restore; then
|
||||
log WARN "Restoring system config from $latest_system"
|
||||
tar -xzf "$latest_system" -C / 2>/dev/null || {
|
||||
log ERROR "System restore failed from $latest_system"
|
||||
return 1
|
||||
}
|
||||
log INFO "System config restored"
|
||||
else
|
||||
log INFO "System configs look intact — skipping restore"
|
||||
fi
|
||||
}
|
||||
|
||||
restart_services() {
|
||||
if ! command -v systemctl &>/dev/null; then
|
||||
log WARN "systemctl not available — skipping service restart"
|
||||
return 0
|
||||
fi
|
||||
|
||||
local services=("mysql" "mariadb" "nginx" "apache2" "php-fpm" "php8.1-fpm" "php8.2-fpm" "php8.3-fpm")
|
||||
|
||||
for svc in "${services[@]}"; do
|
||||
if systemctl is-enabled "$svc" &>/dev/null; then
|
||||
log INFO "Restarting $svc..."
|
||||
systemctl restart "$svc" 2>/dev/null && \
|
||||
log INFO "$svc restarted OK" || \
|
||||
log WARN "$svc restart failed"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
verify_health() {
|
||||
local failures=0
|
||||
|
||||
# Check critical services are running
|
||||
local services=("mysql" "mariadb" "nginx" "apache2")
|
||||
for svc in "${services[@]}"; do
|
||||
if systemctl is-enabled "$svc" &>/dev/null; then
|
||||
if ! systemctl is-active "$svc" &>/dev/null; then
|
||||
log WARN "Service not running: $svc"
|
||||
((failures++))
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
# Check if web server responds
|
||||
if command -v curl &>/dev/null; then
|
||||
if ! curl -sf -o /dev/null --max-time 10 "http://localhost/" 2>/dev/null; then
|
||||
log WARN "Local web server not responding"
|
||||
((failures++))
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check if database accepts connections
|
||||
if command -v mysqladmin &>/dev/null; then
|
||||
if ! mysqladmin ping --silent 2>/dev/null; then
|
||||
log WARN "Database not responding to ping"
|
||||
((failures++))
|
||||
fi
|
||||
fi
|
||||
|
||||
[[ $failures -eq 0 ]]
|
||||
}
|
||||
|
||||
restore_from_backup() {
|
||||
log WARN "=== Full restore from backup ==="
|
||||
|
||||
# Restore system config
|
||||
local latest_system
|
||||
latest_system=$(find "$SYSTEM_BACKUP_DIR" -name 'system_*.tar.gz' -printf '%T+ %p\n' \
|
||||
2>/dev/null | sort -r | head -1 | awk '{print $2}')
|
||||
|
||||
if [[ -n "$latest_system" ]]; then
|
||||
log INFO "Restoring system from $latest_system"
|
||||
tar -xzf "$latest_system" -C / 2>/dev/null || \
|
||||
log ERROR "System restore failed"
|
||||
fi
|
||||
|
||||
# Restore content
|
||||
local latest_content
|
||||
latest_content=$(find "$CONTENT_BACKUP_DIR" -name 'content_*.tar.gz' -printf '%T+ %p\n' \
|
||||
2>/dev/null | sort -r | head -1 | awk '{print $2}')
|
||||
|
||||
if [[ -n "$latest_content" ]]; then
|
||||
log INFO "Restoring content from $latest_content"
|
||||
tar -xzf "$latest_content" -C / 2>/dev/null || \
|
||||
log ERROR "Content restore failed"
|
||||
fi
|
||||
|
||||
# Restore database
|
||||
local latest_db
|
||||
latest_db=$(find "$CONTENT_BACKUP_DIR" -name 'content_*.sql.gz' -printf '%T+ %p\n' \
|
||||
2>/dev/null | sort -r | head -1 | awk '{print $2}')
|
||||
|
||||
if [[ -n "$latest_db" ]]; then
|
||||
log INFO "Restoring database from $latest_db"
|
||||
local mysql_cmd="mysql"
|
||||
command -v mariadb &>/dev/null && mysql_cmd="mariadb"
|
||||
zcat "$latest_db" | $mysql_cmd 2>/dev/null || \
|
||||
log ERROR "Database restore failed"
|
||||
fi
|
||||
}
|
||||
|
||||
# ──────────────────────────────────────────────
|
||||
# Status
|
||||
# ──────────────────────────────────────────────
|
||||
cmd_status() {
|
||||
echo "=== Moko Server Auto-Heal Status ==="
|
||||
echo ""
|
||||
|
||||
# Safe point
|
||||
if has_safepoint; then
|
||||
echo "Safe point: SET"
|
||||
cat "$SAFEPOINT_FILE" | sed 's/^/ /'
|
||||
else
|
||||
echo "Safe point: NOT SET (will auto-heal on next boot)"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# System backups
|
||||
echo "System backups (${SYSTEM_BACKUP_DIR}):"
|
||||
local sys_count
|
||||
sys_count=$(find "$SYSTEM_BACKUP_DIR" -name 'system_*.tar.gz' 2>/dev/null | wc -l)
|
||||
echo " Count: $sys_count (retain $SYSTEM_BACKUP_RETAIN)"
|
||||
local latest_sys
|
||||
latest_sys=$(find "$SYSTEM_BACKUP_DIR" -name 'system_*.tar.gz' -printf '%T+ %p\n' \
|
||||
2>/dev/null | sort -r | head -1)
|
||||
if [[ -n "$latest_sys" ]]; then
|
||||
echo " Latest: $(echo "$latest_sys" | awk '{print $2}')"
|
||||
echo " Timestamp: $(echo "$latest_sys" | awk '{print $1}')"
|
||||
else
|
||||
echo " Latest: (none)"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Content backups
|
||||
echo "Content backups (${CONTENT_BACKUP_DIR}):"
|
||||
local cnt_count
|
||||
cnt_count=$(find "$CONTENT_BACKUP_DIR" -name 'content_*.tar.gz' 2>/dev/null | wc -l)
|
||||
echo " Count: $cnt_count (retain ${CONTENT_BACKUP_RETAIN_HOURS}h)"
|
||||
local latest_cnt
|
||||
latest_cnt=$(find "$CONTENT_BACKUP_DIR" -name 'content_*.tar.gz' -printf '%T+ %p\n' \
|
||||
2>/dev/null | sort -r | head -1)
|
||||
if [[ -n "$latest_cnt" ]]; then
|
||||
echo " Latest: $(echo "$latest_cnt" | awk '{print $2}')"
|
||||
echo " Timestamp: $(echo "$latest_cnt" | awk '{print $1}')"
|
||||
else
|
||||
echo " Latest: (none)"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Disk usage
|
||||
echo "Backup disk usage:"
|
||||
du -sh "$SYSTEM_BACKUP_DIR" "$CONTENT_BACKUP_DIR" 2>/dev/null | sed 's/^/ /'
|
||||
}
|
||||
|
||||
# ──────────────────────────────────────────────
|
||||
# Install helper — sets up cron + systemd
|
||||
# ──────────────────────────────────────────────
|
||||
cmd_install() {
|
||||
local script_path
|
||||
script_path=$(readlink -f "$0")
|
||||
|
||||
echo "Installing Moko Auto-Heal..."
|
||||
|
||||
# Create config directory
|
||||
mkdir -p /etc/moko "$(dirname "$LOG_FILE")" "$LOCK_DIR"
|
||||
|
||||
# Write example config if none exists
|
||||
if [[ ! -f "$CONF_FILE" ]]; then
|
||||
cat > "$CONF_FILE" <<'CONF'
|
||||
# /etc/moko/autoheal.conf — Server auto-heal configuration
|
||||
# Uncomment and modify as needed
|
||||
|
||||
# BACKUP_ROOT="/var/backups/moko"
|
||||
# SAFEPOINT_FILE="/var/run/moko/safepoint"
|
||||
# LOG_FILE="/var/log/moko/autoheal.log"
|
||||
|
||||
# System backup paths (space-separated)
|
||||
# SYSTEM_PATHS="/etc/nginx /etc/php /etc/mysql /etc/cron.d /etc/systemd/system"
|
||||
|
||||
# Content backup paths (space-separated)
|
||||
# CONTENT_PATHS="/var/www"
|
||||
|
||||
# Database names (space-separated, empty = auto-detect all)
|
||||
# DB_NAMES=""
|
||||
|
||||
# Retention
|
||||
# SYSTEM_BACKUP_RETAIN=7 # daily backups to keep
|
||||
# CONTENT_BACKUP_RETAIN_HOURS=24 # hours of content backups to keep
|
||||
CONF
|
||||
echo " Created config: $CONF_FILE"
|
||||
fi
|
||||
|
||||
# Install cron jobs
|
||||
local cron_file="/etc/cron.d/moko-autoheal"
|
||||
cat > "$cron_file" <<CRON
|
||||
# Moko Server Auto-Heal — managed by server-autoheal.sh install
|
||||
SHELL=/bin/bash
|
||||
PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||
|
||||
# Boot check — auto-heal if no safe point
|
||||
@reboot root ${script_path} boot-check
|
||||
|
||||
# System backup — daily at 3:00 AM
|
||||
0 3 * * * root ${script_path} backup-system
|
||||
|
||||
# Content backup — every 2 hours
|
||||
0 */2 * * * root ${script_path} backup-content
|
||||
|
||||
# Cleanup expired backups — 30 min after each content backup
|
||||
30 */2 * * * root ${script_path} cleanup
|
||||
CRON
|
||||
echo " Installed cron: $cron_file"
|
||||
|
||||
# Install shutdown hook to set safe point on clean shutdown
|
||||
local shutdown_hook="/etc/systemd/system/moko-safepoint.service"
|
||||
cat > "$shutdown_hook" <<UNIT
|
||||
[Unit]
|
||||
Description=Moko Safe Point — mark clean shutdown
|
||||
DefaultDependencies=no
|
||||
Before=shutdown.target reboot.target halt.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
RemainAfterExit=yes
|
||||
ExecStart=/bin/true
|
||||
ExecStop=${script_path} set-safepoint
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
UNIT
|
||||
systemctl daemon-reload
|
||||
systemctl enable moko-safepoint.service
|
||||
echo " Installed systemd hook: $shutdown_hook"
|
||||
|
||||
echo ""
|
||||
echo "Done! Edit $CONF_FILE to configure paths for your server."
|
||||
echo "Run '${script_path} status' to verify."
|
||||
}
|
||||
|
||||
# ──────────────────────────────────────────────
|
||||
# Main dispatcher
|
||||
# ──────────────────────────────────────────────
|
||||
main() {
|
||||
local cmd="${1:-help}"
|
||||
|
||||
case "$cmd" in
|
||||
boot-check) cmd_boot_check ;;
|
||||
set-safepoint) cmd_set_safepoint ;;
|
||||
clear-safepoint) cmd_clear_safepoint ;;
|
||||
backup-system) cmd_backup_system ;;
|
||||
backup-content) cmd_backup_content ;;
|
||||
cleanup) cmd_cleanup ;;
|
||||
status) cmd_status ;;
|
||||
install) cmd_install ;;
|
||||
help|--help|-h)
|
||||
sed -n '2,/^$/s/^# //p' "$0"
|
||||
echo ""
|
||||
echo "Commands: boot-check, set-safepoint, clear-safepoint,"
|
||||
echo " backup-system, backup-content, cleanup, status, install"
|
||||
;;
|
||||
*)
|
||||
echo "Unknown command: $cmd" >&2
|
||||
echo "Run '$0 help' for usage" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
main "$@"
|
||||
@@ -0,0 +1,186 @@
|
||||
networks:
|
||||
monitoring:
|
||||
driver: bridge
|
||||
volumes:
|
||||
prometheus_data: null
|
||||
grafana_data: null
|
||||
services:
|
||||
prometheus:
|
||||
image: prom/prometheus:latest
|
||||
container_name: prometheus
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- 127.0.0.1:9091:9090
|
||||
volumes:
|
||||
- ./prometheus.yml:/etc/prometheus/prometheus.yml:ro
|
||||
- ./targets:/etc/prometheus/targets:ro
|
||||
- prometheus_data:/prometheus
|
||||
command:
|
||||
- --config.file=/etc/prometheus/prometheus.yml
|
||||
- --storage.tsdb.path=/prometheus
|
||||
- --storage.tsdb.retention.time=90d
|
||||
- --web.enable-lifecycle
|
||||
extra_hosts:
|
||||
- host.docker.internal:host-gateway
|
||||
networks:
|
||||
- monitoring
|
||||
healthcheck:
|
||||
test:
|
||||
- CMD
|
||||
- wget
|
||||
- -qO-
|
||||
- http://localhost:9090/-/healthy
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
node-exporter:
|
||||
image: prom/node-exporter:latest
|
||||
container_name: node-exporter
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- 127.0.0.1:9100:9100
|
||||
volumes:
|
||||
- /proc:/host/proc:ro
|
||||
- /sys:/host/sys:ro
|
||||
- /:/rootfs:ro
|
||||
- /var/run/dbus/system_bus_socket:/var/run/dbus/system_bus_socket:ro
|
||||
- /var/lib/prometheus/node-exporter:/textfile:ro
|
||||
command:
|
||||
- --path.procfs=/host/proc
|
||||
- --path.sysfs=/host/sys
|
||||
- --path.rootfs=/rootfs
|
||||
- --collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)
|
||||
- --collector.netclass.ignored-devices=^(veth.*|br-.*|docker.*)$$
|
||||
- --collector.diskstats.device-exclude=^(ram|loop|fd|dm-)\d+$$
|
||||
- --collector.systemd
|
||||
- --collector.systemd.unit-include=.+
|
||||
- --collector.textfile.directory=/textfile
|
||||
pid: host
|
||||
security_opt:
|
||||
- apparmor:unconfined
|
||||
networks:
|
||||
- monitoring
|
||||
healthcheck:
|
||||
test:
|
||||
- CMD
|
||||
- wget
|
||||
- --spider
|
||||
- -q
|
||||
- http://localhost:9100/metrics
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
cadvisor:
|
||||
image: gcr.io/cadvisor/cadvisor:latest
|
||||
container_name: cadvisor
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- 127.0.0.1:8082:8080
|
||||
volumes:
|
||||
- /:/rootfs:ro
|
||||
- /var/run:/var/run:ro
|
||||
- /sys:/sys:ro
|
||||
- /var/lib/docker/:/var/lib/docker:ro
|
||||
- /dev/disk/:/dev/disk:ro
|
||||
privileged: true
|
||||
devices:
|
||||
- /dev/kmsg
|
||||
networks:
|
||||
- monitoring
|
||||
healthcheck:
|
||||
test:
|
||||
- CMD
|
||||
- wget
|
||||
- --spider
|
||||
- -q
|
||||
- http://localhost:8080/healthz
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
nginx-exporter:
|
||||
image: nginx/nginx-prometheus-exporter:latest
|
||||
container_name: nginx-exporter
|
||||
restart: unless-stopped
|
||||
network_mode: host
|
||||
command:
|
||||
- --nginx.scrape-uri=http://127.0.0.1:8888/nginx_status
|
||||
- --web.listen-address=0.0.0.0:9113
|
||||
grafana:
|
||||
image: grafana/grafana:latest
|
||||
container_name: grafana
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- 127.0.0.1:3001:3000
|
||||
environment:
|
||||
- GF_SECURITY_ADMIN_USER=jmiller
|
||||
- GF_SECURITY_ADMIN_PASSWORD=#2918HeatherfieldDrive
|
||||
- GF_SERVER_ROOT_URL=https://bench.mokoconsulting.tech/
|
||||
- GF_SERVER_DOMAIN=bench.mokoconsulting.tech
|
||||
- GF_AUTH_ANONYMOUS_ENABLED=true
|
||||
- GF_AUTH_ANONYMOUS_ORG_ROLE=Viewer
|
||||
- GF_AUTH_ANONYMOUS_ORG_ID=1
|
||||
- GF_USERS_ALLOW_SIGN_UP=false
|
||||
- GF_USERS_ALLOW_ORG_CREATE=false
|
||||
- GF_SECURITY_COOKIE_SECURE=true
|
||||
- GF_SECURITY_STRICT_TRANSPORT_SECURITY=true
|
||||
- GF_SECURITY_X_CONTENT_TYPE_OPTIONS=true
|
||||
- GF_SECURITY_X_XSS_PROTECTION=true
|
||||
- GF_LOG_MODE=console
|
||||
- GF_LOG_LEVEL=debug
|
||||
- GF_USERS_DEFAULT_THEME=dark
|
||||
- GF_BRANDING_APP_TITLE=Moko Bench
|
||||
- GF_BRANDING_LOGIN_TITLE=Moko Consulting
|
||||
- GF_BRANDING_LOGIN_SUBTITLE=Server Performance Dashboard
|
||||
- GF_DATE_FORMATS_FULL_DATE=YYYY-MM-DD HH:mm:ss
|
||||
- GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-polystat-panel,yesoreyeram-infinity-datasource,natel-discrete-panel
|
||||
- GF_AUTH_GOOGLE_ENABLED=true
|
||||
- GF_AUTH_GOOGLE_CLIENT_ID=349391103517-oiq974b2gq4r3t9f9cf43im31gtruhml.apps.googleusercontent.com
|
||||
- GF_AUTH_GOOGLE_CLIENT_SECRET=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
|
||||
- GF_AUTH_GOOGLE_SCOPES=openid email profile
|
||||
- GF_AUTH_GOOGLE_AUTH_URL=https://accounts.google.com/o/oauth2/v2/auth
|
||||
- GF_AUTH_GOOGLE_TOKEN_URL=https://oauth2.googleapis.com/token
|
||||
- GF_AUTH_GOOGLE_ALLOWED_DOMAINS=mokoconsulting.tech
|
||||
- GF_AUTH_GOOGLE_ALLOW_SIGN_UP=true
|
||||
- GF_AUTH_GOOGLE_AUTO_LOGIN=false
|
||||
- GF_AUTH_GOOGLE_SKIP_ORG_ROLE_SYNC=true
|
||||
- GF_USERS_AUTO_ASSIGN_ORG_ROLE=Admin
|
||||
volumes:
|
||||
- grafana_data:/var/lib/grafana
|
||||
- ./grafana/provisioning:/etc/grafana/provisioning:ro
|
||||
- ./grafana/custom.ini:/etc/grafana/grafana.ini:ro
|
||||
- ./grafana/dashboards:/var/lib/grafana/dashboards:ro
|
||||
networks:
|
||||
- monitoring
|
||||
depends_on:
|
||||
prometheus:
|
||||
condition: service_healthy
|
||||
healthcheck:
|
||||
test:
|
||||
- CMD
|
||||
- wget
|
||||
- --spider
|
||||
- -q
|
||||
- http://localhost:3000/api/health
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
mysqld-exporter:
|
||||
image: prom/mysqld-exporter:latest
|
||||
container_name: mysqld-exporter
|
||||
restart: unless-stopped
|
||||
network_mode: host
|
||||
volumes:
|
||||
- /opt/gitea-server-setup/docker/monitoring/.mysqld-exporter.cnf:/cfg/.my.cnf:ro
|
||||
environment:
|
||||
MYSQLD_EXPORTER_PASSWORD: exporter_moko_2026
|
||||
command:
|
||||
- --config.my-cnf=/cfg/.my.cnf
|
||||
- --web.listen-address=127.0.0.1:9104
|
||||
healthcheck:
|
||||
test:
|
||||
- CMD-SHELL
|
||||
- wget -q --spider http://localhost:9104/metrics || exit 1
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 15s
|
||||
@@ -79,6 +79,10 @@
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"options": {
|
||||
"legend": { "displayMode": "list", "placement": "right", "calcs": [] },
|
||||
"tooltip": { "mode": "multi" }
|
||||
},
|
||||
"title": "CPU Usage %",
|
||||
"type": "timeseries"
|
||||
}
|
||||
|
||||
@@ -22,6 +22,10 @@
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"options": {
|
||||
"legend": { "displayMode": "list", "placement": "right", "calcs": [] },
|
||||
"tooltip": { "mode": "multi" }
|
||||
},
|
||||
"title": "CPU Usage by Container",
|
||||
"type": "timeseries"
|
||||
}
|
||||
|
||||
@@ -22,6 +22,10 @@
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"options": {
|
||||
"legend": { "displayMode": "list", "placement": "right", "calcs": [] },
|
||||
"tooltip": { "mode": "multi" }
|
||||
},
|
||||
"title": "Memory Usage by Container",
|
||||
"type": "timeseries"
|
||||
}
|
||||
|
||||
@@ -38,6 +38,10 @@
|
||||
"refId": "D"
|
||||
}
|
||||
],
|
||||
"options": {
|
||||
"legend": { "displayMode": "list", "placement": "right", "calcs": [] },
|
||||
"tooltip": { "mode": "multi" }
|
||||
},
|
||||
"title": "Memory Usage",
|
||||
"type": "timeseries"
|
||||
}
|
||||
|
||||
@@ -28,6 +28,10 @@
|
||||
"refId": "C"
|
||||
}
|
||||
],
|
||||
"options": {
|
||||
"legend": { "displayMode": "list", "placement": "right", "calcs": [] },
|
||||
"tooltip": { "mode": "multi" }
|
||||
},
|
||||
"title": "Connections",
|
||||
"type": "timeseries"
|
||||
}
|
||||
|
||||
@@ -59,6 +59,10 @@
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"options": {
|
||||
"legend": { "displayMode": "list", "placement": "right", "calcs": [] },
|
||||
"tooltip": { "mode": "multi" }
|
||||
},
|
||||
"title": "Queries per Second",
|
||||
"type": "timeseries"
|
||||
}
|
||||
|
||||
@@ -69,6 +69,10 @@
|
||||
"refId": "D"
|
||||
}
|
||||
],
|
||||
"options": {
|
||||
"legend": { "displayMode": "list", "placement": "right", "calcs": [] },
|
||||
"tooltip": { "mode": "multi" }
|
||||
},
|
||||
"title": "Network Traffic",
|
||||
"type": "timeseries"
|
||||
}
|
||||
|
||||
@@ -33,6 +33,10 @@
|
||||
"refId": "D"
|
||||
}
|
||||
],
|
||||
"options": {
|
||||
"legend": { "displayMode": "list", "placement": "right", "calcs": [] },
|
||||
"tooltip": { "mode": "multi" }
|
||||
},
|
||||
"title": "Connections Over Time",
|
||||
"type": "timeseries"
|
||||
}
|
||||
|
||||
@@ -36,6 +36,10 @@
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"options": {
|
||||
"legend": { "displayMode": "list", "placement": "right", "calcs": [] },
|
||||
"tooltip": { "mode": "multi" }
|
||||
},
|
||||
"title": "Request Rate",
|
||||
"type": "timeseries"
|
||||
}
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
"placement": "right"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
|
||||
@@ -43,7 +43,7 @@
|
||||
"min"
|
||||
],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom"
|
||||
"placement": "right"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
|
||||
@@ -37,7 +37,8 @@
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
"placement": "right",
|
||||
"calcs": []
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
|
||||
@@ -85,7 +85,7 @@
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"expr": "probe_success{site_name=~\"$site\", job=\"blackbox-http\"} and on(site_name) label_replace(joomla_site_online{site=~\"$site\"} == 1, \"site_name\", \"$1\", \"site\", \"(.+)\")",
|
||||
"expr": "probe_success{site_name=~\"$site\", job=\"blackbox-http\"}",
|
||||
"instant": true, "format": "table", "refId": "STATUS"
|
||||
},
|
||||
{
|
||||
@@ -100,7 +100,7 @@
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"expr": "label_replace(joomla_site_api_reachable{site=~\"$site\"}, \"site_name\", \"$1\", \"site\", \"(.+)\") and on(site_name) label_replace(joomla_site_online{site=~\"$site\"} == 1, \"site_name\", \"$1\", \"site\", \"(.+)\")",
|
||||
"expr": "label_replace(joomla_site_api_reachable{site=~\"$site\"}, \"site_name\", \"$1\", \"site\", \"(.+)\")",
|
||||
"instant": true, "format": "table", "refId": "API"
|
||||
},
|
||||
{
|
||||
@@ -255,22 +255,18 @@
|
||||
],
|
||||
"transformations": [
|
||||
{ "id": "joinByField", "options": { "byField": "site", "mode": "outer" } },
|
||||
{ "id": "filterFieldsByName", "options": { "include": { "pattern": "^(site_url|version|Value #).*" } } },
|
||||
{ "id": "filterFieldsByName", "options": { "include": { "pattern": "^(site_url|version|Value #(SYSTEM|EXTUPDATES|TOTAL|ENABLED|DISABLED))$" } } },
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"renameByName": {
|
||||
"site_url": "Site",
|
||||
"version": "Version",
|
||||
"Value #VERSION": "v_hidden",
|
||||
"Value #SYSTEM": "System",
|
||||
"Value #EXTUPDATES": "Ext Updates",
|
||||
"Value #TOTAL": "Total",
|
||||
"Value #ENABLED": "Enabled",
|
||||
"Value #DISABLED": "Disabled"
|
||||
},
|
||||
"excludeByName": {
|
||||
"v_hidden": true
|
||||
}
|
||||
}
|
||||
},
|
||||
@@ -294,108 +290,32 @@
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "s"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 16,
|
||||
"x": 0,
|
||||
"y": 47
|
||||
},
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"fieldConfig": { "defaults": { "unit": "s" } },
|
||||
"gridPos": { "h": 8, "w": 16, "x": 0, "y": 47 },
|
||||
"id": 20,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"options": { "legend": { "displayMode": "list", "placement": "bottom" }, "tooltip": { "mode": "multi" } },
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"expr": "probe_http_duration_seconds{site_name=~\"$site\", job=\"blackbox-http\", phase=\"transfer\"}",
|
||||
"legendFormat": "{{site_name}} transfer"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"expr": "probe_http_duration_seconds{site_name=~\"$site\", job=\"blackbox-http\", phase=\"processing\"}",
|
||||
"legendFormat": "{{site_name}} processing"
|
||||
}
|
||||
{ "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, "expr": "probe_http_duration_seconds{site_name=~\"$site\", job=\"blackbox-http\", phase=\"transfer\"}", "legendFormat": "{{site_name}} transfer" },
|
||||
{ "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, "expr": "probe_http_duration_seconds{site_name=~\"$site\", job=\"blackbox-http\", phase=\"processing\"}", "legendFormat": "{{site_name}} processing" }
|
||||
],
|
||||
"title": "Response Time",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"max": 10,
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 2
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 5
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "s"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 8,
|
||||
"x": 16,
|
||||
"y": 47
|
||||
},
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"fieldConfig": { "defaults": { "max": 10, "min": 0, "thresholds": { "steps": [{ "color": "green", "value": 0 }, { "color": "yellow", "value": 2 }, { "color": "red", "value": 5 }] }, "unit": "s" } },
|
||||
"gridPos": { "h": 8, "w": 8, "x": 16, "y": 47 },
|
||||
"id": 22,
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"expr": "probe_duration_seconds{site_name=~\"$site\", job=\"blackbox-http\"}",
|
||||
"legendFormat": "{{site_name}}"
|
||||
}
|
||||
{ "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, "expr": "probe_duration_seconds{site_name=~\"$site\", job=\"blackbox-http\"}", "legendFormat": "{{site_name}}" }
|
||||
],
|
||||
"title": "Total Duration",
|
||||
"type": "gauge"
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 55
|
||||
},
|
||||
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 55 },
|
||||
"id": 104,
|
||||
"title": "Backup Status",
|
||||
"type": "row"
|
||||
@@ -403,78 +323,26 @@
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"noValue": "—",
|
||||
"custom": { "align": "center", "cellOptions": { "type": "auto" } }
|
||||
},
|
||||
"defaults": { "noValue": "—", "custom": { "align": "center", "cellOptions": { "type": "auto" } } },
|
||||
"overrides": [
|
||||
{
|
||||
"matcher": { "id": "byName", "options": "Site" },
|
||||
"properties": [
|
||||
{ "id": "custom.width", "value": 300 },
|
||||
{ "id": "custom.align", "value": "left" },
|
||||
{ "id": "links", "value": [{ "title": "Manage Backups", "url": "${__value.text}/administrator/index.php?option=com_akeebabackup&view=Manage", "targetBlank": true }] }
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": { "id": "byName", "options": "Status" },
|
||||
"properties": [
|
||||
{ "id": "mappings", "value": [{ "options": { "0": { "color": "red", "text": "FAILED" }, "1": { "color": "green", "text": "OK" } }, "type": "value" }] },
|
||||
{ "id": "thresholds", "value": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] } },
|
||||
{ "id": "custom.cellOptions", "value": { "type": "color-background", "mode": "basic" } }
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": { "id": "byName", "options": "Age" },
|
||||
"properties": [
|
||||
{ "id": "unit", "value": "s" },
|
||||
{ "id": "thresholds", "value": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 172800 }, { "color": "red", "value": 604800 }] } },
|
||||
{ "id": "custom.cellOptions", "value": { "type": "color-text" } },
|
||||
{ "id": "mappings", "value": [{ "options": { "-1": { "text": "—" } }, "type": "value" }] }
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": { "id": "byName", "options": "Records" },
|
||||
"properties": [
|
||||
{ "id": "mappings", "value": [{ "options": { "0": { "text": "—" } }, "type": "value" }] }
|
||||
]
|
||||
}
|
||||
{ "matcher": { "id": "byName", "options": "Site" }, "properties": [{ "id": "custom.width", "value": 300 }, { "id": "custom.align", "value": "left" }, { "id": "links", "value": [{ "title": "Manage Backups", "url": "${__value.text}/administrator/index.php?option=com_akeebabackup&view=Manage", "targetBlank": true }] }] },
|
||||
{ "matcher": { "id": "byName", "options": "Status" }, "properties": [{ "id": "mappings", "value": [{ "options": { "0": { "color": "red", "text": "FAILED" }, "1": { "color": "green", "text": "OK" } }, "type": "value" }] }, { "id": "thresholds", "value": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] } }, { "id": "custom.cellOptions", "value": { "type": "color-background", "mode": "basic" } }] },
|
||||
{ "matcher": { "id": "byName", "options": "Age" }, "properties": [{ "id": "unit", "value": "s" }, { "id": "thresholds", "value": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 172800 }, { "color": "red", "value": 604800 }] } }, { "id": "custom.cellOptions", "value": { "type": "color-text" } }, { "id": "mappings", "value": [{ "options": { "-1": { "text": "—" } }, "type": "value" }] }] },
|
||||
{ "matcher": { "id": "byName", "options": "Records" }, "properties": [{ "id": "mappings", "value": [{ "options": { "0": { "text": "—" } }, "type": "value" }] }] }
|
||||
]
|
||||
},
|
||||
"gridPos": { "x": 0, "y": 56, "w": 24, "h": 8 },
|
||||
"id": 40,
|
||||
"options": { "showHeader": true, "cellHeight": "sm", "footer": { "show": false } },
|
||||
"targets": [
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"expr": "label_replace(max by (site, exported_instance) (joomla_backup_status{site=~\"$site\"}), \"site_url\", \"$1\", \"exported_instance\", \"(.+)\")",
|
||||
"instant": true, "format": "table", "refId": "STATUS"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"expr": "joomla_backup_age_seconds{site=~\"$site\"}",
|
||||
"instant": true, "format": "table", "refId": "AGE"
|
||||
},
|
||||
{
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"expr": "joomla_backup_records_total{site=~\"$site\"}",
|
||||
"instant": true, "format": "table", "refId": "RECORDS"
|
||||
}
|
||||
{ "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, "expr": "label_replace(max by (site, exported_instance) (joomla_backup_status{site=~\"$site\"}), \"site_url\", \"$1\", \"exported_instance\", \"(.+)\")", "instant": true, "format": "table", "refId": "STATUS" },
|
||||
{ "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, "expr": "joomla_backup_age_seconds{site=~\"$site\"}", "instant": true, "format": "table", "refId": "AGE" },
|
||||
{ "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, "expr": "joomla_backup_records_total{site=~\"$site\"}", "instant": true, "format": "table", "refId": "RECORDS" }
|
||||
],
|
||||
"transformations": [
|
||||
{ "id": "joinByField", "options": { "byField": "site", "mode": "outer" } },
|
||||
{ "id": "filterFieldsByName", "options": { "include": { "pattern": "^(site_url|Value #).*" } } },
|
||||
{
|
||||
"id": "organize",
|
||||
"options": {
|
||||
"renameByName": {
|
||||
"site_url": "Site",
|
||||
"Value #STATUS": "Status",
|
||||
"Value #AGE": "Age",
|
||||
"Value #RECORDS": "Records"
|
||||
}
|
||||
}
|
||||
},
|
||||
{ "id": "organize", "options": { "renameByName": { "site_url": "Site", "Value #STATUS": "Status", "Value #AGE": "Age", "Value #RECORDS": "Records" } } },
|
||||
{ "id": "sortBy", "options": { "sort": [{ "field": "Site", "desc": false }] } }
|
||||
],
|
||||
"title": "Backup Status",
|
||||
@@ -482,96 +350,31 @@
|
||||
},
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 68
|
||||
},
|
||||
"gridPos": { "h": 1, "w": 24, "x": 0, "y": 68 },
|
||||
"id": 103,
|
||||
"title": "Uptime History",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"fillOpacity": 10,
|
||||
"lineWidth": 2
|
||||
},
|
||||
"max": 1,
|
||||
"min": 0,
|
||||
"thresholds": {
|
||||
"steps": [
|
||||
{
|
||||
"color": "red",
|
||||
"value": 0
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 0.95
|
||||
},
|
||||
{
|
||||
"color": "green",
|
||||
"value": 0.99
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "percentunit"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 69
|
||||
},
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"fieldConfig": { "defaults": { "custom": { "fillOpacity": 10, "lineWidth": 2 }, "max": 1, "min": 0, "thresholds": { "steps": [{ "color": "red", "value": 0 }, { "color": "yellow", "value": 0.95 }, { "color": "green", "value": 0.99 }] }, "unit": "percentunit" } },
|
||||
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 69 },
|
||||
"id": 30,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single"
|
||||
}
|
||||
},
|
||||
"options": { "legend": { "displayMode": "list", "placement": "bottom" }, "tooltip": { "mode": "single" } },
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"expr": "avg_over_time(probe_success{site_name=~\"$site\", job=\"blackbox-http\"}[1h])",
|
||||
"legendFormat": "{{site_name}}"
|
||||
}
|
||||
{ "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, "expr": "avg_over_time(probe_success{site_name=~\"$site\", job=\"blackbox-http\"}[1h])", "legendFormat": "{{site_name}}" }
|
||||
],
|
||||
"title": "Availability (30d)",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "5m",
|
||||
"tags": [
|
||||
"mokowaas",
|
||||
"joomla",
|
||||
"endpoints",
|
||||
"monitoring"
|
||||
],
|
||||
"tags": ["mokowaas", "joomla", "endpoints", "monitoring"],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"current": { "text": "All", "value": "$__all" },
|
||||
"datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" },
|
||||
"includeAll": true,
|
||||
"label": "Site",
|
||||
"multi": true,
|
||||
@@ -583,10 +386,7 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-24h",
|
||||
"to": "now"
|
||||
},
|
||||
"time": { "from": "now-24h", "to": "now" },
|
||||
"timezone": "browser",
|
||||
"title": "MokoWaaS",
|
||||
"uid": "mokowaas",
|
||||
|
||||
Reference in New Issue
Block a user