Files
MokoSuiteBackup/source/packages/com_mokosuitebackup/src/Engine/DatabaseDumper.php
T
Jonathan Miller 5698c074da
Universal: PR Check / Branch Policy (pull_request) Failing after 1s
Joomla: Extension CI / Release Readiness Check (pull_request) Failing after 5s
Universal: PR Check / Validate PR (pull_request) Failing after 7s
Generic: Repo Health / Site Health (pull_request) Has been skipped
Universal: PR Check / Secret Scan (pull_request) Successful in 7s
Generic: Repo Health / Access control (pull_request) Successful in 2s
Joomla: Extension CI / Lint & Validate (pull_request) Failing after 9s
Branch Cleanup / Delete merged branch (pull_request) Successful in 3s
RC Revert / Rename rc/ back to dev/ (pull_request) Has been skipped
Universal: Workflow Sync Trigger / Sync workflows to live repos (pull_request) Failing after 6s
Joomla: Metadata Validation / Validate Joomla Metadata (pull_request) Successful in 54s
Universal: Build & Release / Promote to RC (pull_request) Has been skipped
Universal: Build & Release / Build & Release Pipeline (pull_request) Successful in 33s
Joomla: Extension CI / Tests (PHP 8.2) (pull_request) Has been cancelled
Joomla: Extension CI / Tests (PHP 8.3) (pull_request) Has been cancelled
Joomla: Extension CI / PHPStan Analysis (pull_request) Has been cancelled
Joomla: Extension CI / Build RC Pre-Release (pull_request) Has been cancelled
Universal: PR Check / Build RC Package (pull_request) Has been cancelled
Universal: PR Check / Report Issues (pull_request) Has been cancelled
Generic: Repo Health / Scripts governance (pull_request) Has been cancelled
Generic: Repo Health / Repository health (pull_request) Has been cancelled
Generic: Repo Health / Report Issues (pull_request) Has been cancelled
feat: data sanitization — passwords, emails, sessions (#129)
New "Data Sanitization" fieldset on profile form with four options:
- Sanitize User Passwords: replaces all bcrypt hashes with invalid sentinel
- Preserve Super Admin: keeps Super Users group passwords intact
- Sanitize User Emails: replaces with user123@sanitized.example.com
- Clear Session Data: excludes #__session table data (default: on)

DatabaseDumper sanitizes rows inline during dump — both in-memory
and file-streaming paths. Super admin detection uses group_id=8
from #__user_usergroup_map with static caching.

Use cases: sharing backups, creating demo/staging sites, GDPR compliance.

Partial #129 (Part 2 — restore script password reset — tracked separately)
2026-06-23 12:06:19 -05:00

474 lines
12 KiB
PHP

<?php
/**
* @package MokoSuiteBackup
* @subpackage com_mokosuitebackup
* @author Moko Consulting <hello@mokoconsulting.tech>
* @copyright Copyright (C) 2026 Moko Consulting. All rights reserved.
* @license GNU General Public License version 3 or later; see LICENSE
*/
namespace Joomla\Component\MokoSuiteBackup\Administrator\Engine;
defined('_JEXEC') or die;
use Joomla\CMS\Factory;
class DatabaseDumper
{
/** @var array Tables to exclude entirely (both structure and data) */
private array $excludeBoth = [];
/** @var array Tables to exclude data only (structure is kept) */
private array $excludeDataOnly = [];
/** @var array Tables to exclude structure only (data is kept — unusual) */
private array $excludeStructureOnly = [];
private int $tablesCount = 0;
/** @var bool Whether to sanitize user passwords */
private bool $sanitizePasswords = false;
/** @var bool Whether to preserve super admin password when sanitizing */
private bool $preserveSuperAdmin = false;
/** @var bool Whether to sanitize user emails */
private bool $sanitizeEmails = false;
/** @var bool Whether to clear session data */
private bool $sanitizeSessions = false;
/** Known invalid bcrypt hash used for sanitized passwords */
private const SANITIZED_HASH = '$2y$10$SANITIZED.MOKOSUITEBACKUP.INVALID.HASH.DO.NOT.USE.000000';
/**
* @param array $excludeTables Table names to exclude (with #__ prefix).
* @param bool $sanitizePasswords Replace user password hashes with invalid value
* @param bool $preserveSuperAdmin Keep super admin password when sanitizing
* @param bool $sanitizeEmails Replace user emails with sanitized placeholders
* @param bool $sanitizeSessions Skip session table data entirely
*/
public function __construct(
array $excludeTables = [],
bool $sanitizePasswords = false,
bool $preserveSuperAdmin = false,
bool $sanitizeEmails = false,
bool $sanitizeSessions = false
)
{
foreach ($excludeTables as $entry) {
if (str_ends_with($entry, ':data-only')) {
$this->excludeDataOnly[] = substr($entry, 0, -10);
} elseif (str_ends_with($entry, ':structure-only')) {
$this->excludeStructureOnly[] = substr($entry, 0, -15);
} else {
$this->excludeBoth[] = $entry;
}
}
$this->sanitizePasswords = $sanitizePasswords;
$this->preserveSuperAdmin = $preserveSuperAdmin;
$this->sanitizeEmails = $sanitizeEmails;
$this->sanitizeSessions = $sanitizeSessions;
/* If session sanitization is on, auto-exclude session table data */
if ($sanitizeSessions) {
$this->excludeDataOnly[] = '#__session';
}
}
/**
* Dump all database tables to SQL.
*
* @return string The SQL dump
*/
public function dump(): string
{
$db = Factory::getDbo();
$prefix = $db->getPrefix();
$output = [];
$output[] = '-- MokoSuiteBackup Database Dump';
$output[] = '-- Generated: ' . date('Y-m-d H:i:s');
$output[] = '-- Server: ' . $db->getServerType();
$output[] = '-- Database: ' . $db->getName();
$output[] = '-- Original Prefix: ' . $prefix;
$output[] = '-- Abstract Prefix: #__';
$output[] = '-- Note: Table names use #__ placeholder. Replace with your prefix on restore.';
$output[] = '';
$output[] = 'SET SQL_MODE = "NO_AUTO_VALUE_ON_ZERO";';
$output[] = 'SET time_zone = "+00:00";';
$output[] = '';
// Get all tables with the site prefix
$tables = $db->getTableList();
$siteTables = [];
foreach ($tables as $table) {
if (str_starts_with($table, $prefix)) {
$siteTables[] = $table;
}
}
foreach ($siteTables as $table) {
// Check if excluded
$abstractName = '#__' . substr($table, strlen($prefix));
if ($this->isExcludedBoth($abstractName, $table)) {
continue;
}
$skipData = $this->isExcludedDataOnly($abstractName, $table);
$skipStructure = $this->isExcludedStructureOnly($abstractName, $table);
$this->tablesCount++;
$output[] = '-- --------------------------------------------------------';
$output[] = '-- Table: ' . $abstractName;
if ($skipData) {
$output[] = '-- (data excluded)';
}
if ($skipStructure) {
$output[] = '-- (structure excluded)';
}
$output[] = '-- --------------------------------------------------------';
$output[] = '';
// Get CREATE TABLE statement (unless structure is excluded)
if (!$skipStructure) {
$db->setQuery('SHOW CREATE TABLE ' . $db->quoteName($table));
$createRow = $db->loadRow();
if (!$createRow || empty($createRow[1])) {
continue;
}
// Replace all occurrences of the live prefix with #__ in CREATE TABLE
// output — covers the table itself and FK REFERENCES to other tables
$createSql = str_replace('`' . $prefix, '`#__', $createRow[1]);
$output[] = 'DROP TABLE IF EXISTS `' . $abstractName . '`;';
$output[] = $createSql . ';';
$output[] = '';
}
// Dump data (unless data is excluded)
if ($skipData) {
$output[] = '';
continue;
}
$db->setQuery('SELECT COUNT(*) FROM ' . $db->quoteName($table));
$rowCount = (int) $db->loadResult();
if ($rowCount === 0) {
$output[] = '-- (empty table)';
$output[] = '';
continue;
}
$chunkSize = 500;
for ($offset = 0; $offset < $rowCount; $offset += $chunkSize) {
$db->setQuery(
$db->getQuery(true)
->select('*')
->from($db->quoteName($table)),
$offset,
$chunkSize
);
$rows = $db->loadAssocList();
if (empty($rows)) {
break;
}
foreach ($rows as $row) {
$this->sanitizeRow($row, $abstractName, $db);
$values = [];
foreach ($row as $value) {
if ($value === null) {
$values[] = 'NULL';
} else {
$values[] = $db->quote($value);
}
}
$columns = array_map([$db, 'quoteName'], array_keys($row));
$output[] = 'INSERT INTO `' . $abstractName . '`'
. ' (' . implode(', ', $columns) . ')'
. ' VALUES (' . implode(', ', $values) . ');';
}
}
$output[] = '';
}
return implode("\n", $output);
}
/**
* Check if a table is fully excluded (both data and structure).
*/
private function isExcludedBoth(string $abstractName, string $realName): bool
{
foreach ($this->excludeBoth as $pattern) {
if ($pattern === $abstractName || $pattern === $realName) {
return true;
}
}
return false;
}
/**
* Check if a table's data is excluded (structure only).
*/
private function isExcludedDataOnly(string $abstractName, string $realName): bool
{
foreach ($this->excludeDataOnly as $pattern) {
if ($pattern === $abstractName || $pattern === $realName) {
return true;
}
}
return false;
}
/**
* Check if a table's structure is excluded (data only).
*/
private function isExcludedStructureOnly(string $abstractName, string $realName): bool
{
foreach ($this->excludeStructureOnly as $pattern) {
if ($pattern === $abstractName || $pattern === $realName) {
return true;
}
}
return false;
}
/**
* Dump all database tables directly to a file, streaming row by row.
* Avoids loading the entire dump into RAM.
*
* @param string $filePath Absolute path to write the SQL file
*
* @return int Size of the dump file in bytes
*/
public function dumpToFile(string $filePath): int
{
$db = Factory::getDbo();
$prefix = $db->getPrefix();
$fp = fopen($filePath, 'w');
if ($fp === false) {
throw new \RuntimeException('Cannot open dump file for writing: ' . $filePath);
}
fwrite($fp, "-- MokoSuiteBackup Database Dump\n");
fwrite($fp, "-- Generated: " . date('Y-m-d H:i:s') . "\n");
fwrite($fp, "-- Server: " . $db->getServerType() . "\n");
fwrite($fp, "-- Database: " . $db->getName() . "\n");
fwrite($fp, "-- Original Prefix: " . $prefix . "\n");
fwrite($fp, "-- Abstract Prefix: #__\n");
fwrite($fp, "-- Note: Table names use #__ placeholder. Replace with your prefix on restore.\n\n");
fwrite($fp, "SET SQL_MODE = \"NO_AUTO_VALUE_ON_ZERO\";\n");
fwrite($fp, "SET time_zone = \"+00:00\";\n\n");
// Get all tables with the site prefix
$tables = $db->getTableList();
$siteTables = [];
foreach ($tables as $table) {
if (str_starts_with($table, $prefix)) {
$siteTables[] = $table;
}
}
foreach ($siteTables as $table) {
$abstractName = '#__' . substr($table, strlen($prefix));
if ($this->isExcludedBoth($abstractName, $table)) {
continue;
}
$skipData = $this->isExcludedDataOnly($abstractName, $table);
$skipStructure = $this->isExcludedStructureOnly($abstractName, $table);
$this->tablesCount++;
fwrite($fp, "-- --------------------------------------------------------\n");
fwrite($fp, "-- Table: " . $abstractName . "\n");
if ($skipData) {
fwrite($fp, "-- (data excluded)\n");
}
if ($skipStructure) {
fwrite($fp, "-- (structure excluded)\n");
}
fwrite($fp, "-- --------------------------------------------------------\n\n");
if (!$skipStructure) {
$db->setQuery('SHOW CREATE TABLE ' . $db->quoteName($table));
$createRow = $db->loadRow();
if (!$createRow || empty($createRow[1])) {
continue;
}
$createSql = str_replace('`' . $prefix, '`#__', $createRow[1]);
fwrite($fp, 'DROP TABLE IF EXISTS `' . $abstractName . "`;\\n");
fwrite($fp, $createSql . ";\n\n");
}
if ($skipData) {
fwrite($fp, "\n");
continue;
}
$db->setQuery('SELECT COUNT(*) FROM ' . $db->quoteName($table));
$rowCount = (int) $db->loadResult();
if ($rowCount === 0) {
fwrite($fp, "-- (empty table)\n\n");
continue;
}
$chunkSize = 500;
for ($offset = 0; $offset < $rowCount; $offset += $chunkSize) {
$db->setQuery(
$db->getQuery(true)
->select('*')
->from($db->quoteName($table)),
$offset,
$chunkSize
);
$rows = $db->loadAssocList();
if (empty($rows)) {
break;
}
foreach ($rows as $row) {
$this->sanitizeRow($row, $abstractName, $db);
$values = [];
foreach ($row as $value) {
if ($value === null) {
$values[] = 'NULL';
} else {
$values[] = $db->quote($value);
}
}
$columns = array_map([$db, 'quoteName'], array_keys($row));
fwrite($fp, 'INSERT INTO `' . $abstractName . '`'
. ' (' . implode(', ', $columns) . ')'
. ' VALUES (' . implode(', ', $values) . ");\n");
}
}
fwrite($fp, "\n");
}
fclose($fp);
return filesize($filePath) ?: 0;
}
/**
* Sanitize a row if it belongs to the users table and sanitization is enabled.
*
* Replaces the password column with an invalid hash so the backup
* cannot be used to extract user credentials.
*/
private function sanitizeRow(array &$row, string $abstractTable, object $db): void
{
if ($abstractTable !== '#__users') {
return;
}
if (!$this->sanitizePasswords && !$this->sanitizeEmails) {
return;
}
if ($this->sanitizeEmails && isset($row['email']) && isset($row['id'])) {
$userId = (int) $row['id'];
/* Preserve super admin emails if preserving super admin */
if (!$this->preserveSuperAdmin || !$this->isSuperAdmin($userId, $db)) {
$row['email'] = 'user' . $userId . '@sanitized.example.com';
}
}
if (!$this->sanitizePasswords || !isset($row['password'])) {
return;
}
if ($this->preserveSuperAdmin && isset($row['id'])) {
if ($this->isSuperAdmin((int) $row['id'], $db)) {
return;
}
}
$row['password'] = self::SANITIZED_HASH;
}
/**
* Check if a user ID belongs to the Super Users group (group_id = 8).
*/
private function isSuperAdmin(int $userId, object $db): bool
{
static $superAdminIds = null;
if ($superAdminIds === null) {
$prefix = $db->getPrefix();
try {
$db->setQuery(
$db->getQuery(true)
->select('DISTINCT ' . $db->quoteName('user_id'))
->from($db->quoteName($prefix . 'user_usergroup_map'))
->where($db->quoteName('group_id') . ' = 8')
);
$superAdminIds = array_map('intval', $db->loadColumn() ?: []);
} catch (\Throwable $e) {
$superAdminIds = [];
}
}
return in_array($userId, $superAdminIds, true);
}
/**
* Check if passwords were sanitized (for use by callers to log the action).
*/
public function isPasswordSanitizationEnabled(): bool
{
return $this->sanitizePasswords;
}
/**
* Get the sentinel hash used for sanitized passwords.
*/
public static function getSanitizedHash(): string
{
return self::SANITIZED_HASH;
}
public function getTablesCount(): int
{
return $this->tablesCount;
}
}