diff --git a/source/packages/com_mokoog/src/Controller/BatchController.php b/source/packages/com_mokoog/src/Controller/BatchController.php index ee2aff8..2031b4e 100644 --- a/source/packages/com_mokoog/src/Controller/BatchController.php +++ b/source/packages/com_mokoog/src/Controller/BatchController.php @@ -73,7 +73,9 @@ class BatchController extends BaseController } $app = Factory::getApplication(); - $limit = min($app->getInput()->getInt('limit', 50), 200); + $input = $app->getInput(); + $limit = min($input->getInt('limit', 50), 200); + $lastId = max(0, $input->getInt('lastid', 0)); $db = Factory::getContainer()->get(\Joomla\Database\DatabaseInterface::class); $query = $db->getQuery(true) @@ -88,18 +90,25 @@ class BatchController extends BaseController ) ->where($db->quoteName('c.state') . ' = 1') ->where($db->quoteName('t.id') . ' IS NULL') + ->where($db->quoteName('c.id') . ' > ' . $lastId) ->order($db->quoteName('c.id') . ' ASC'); - // Always offset=0: processed articles now have #__mokoog_tags rows - // and are excluded by the LEFT JOIN ... IS NULL filter automatically. + // Cursor-based pagination by id: each chunk fetches the next articles whose + // id is greater than the previous chunk's highest id. A row that fails to + // insert is passed over on the next chunk (its id is already behind the + // cursor) instead of being re-fetched forever, so the batch always reaches + // the end. The client stops when a chunk examines 0 rows. $db->setQuery($query, 0, $limit); $articles = $db->loadObjectList(); - $created = 0; - $skipped = 0; - $now = Factory::getDate()->toSql(); + $created = 0; + $skipped = 0; + $lastProcessedId = $lastId; + $now = Factory::getDate()->toSql(); foreach ($articles as $article) { + $lastProcessedId = (int) $article->id; + $ogTitle = $article->title; $ogDescription = $this->extractDescription($article); $ogImage = $this->extractImage($article); @@ -131,7 +140,10 @@ class BatchController extends BaseController } echo new JsonResponse([ - 'created' => $created, + 'created' => $created, + 'skipped' => $skipped, + 'examined' => \count($articles), + 'last_id' => $lastProcessedId, ]); $app->close(); diff --git a/source/packages/com_mokoog/tmpl/tags/default.php b/source/packages/com_mokoog/tmpl/tags/default.php index 77e8c56..82143a4 100644 --- a/source/packages/com_mokoog/tmpl/tags/default.php +++ b/source/packages/com_mokoog/tmpl/tags/default.php @@ -234,27 +234,31 @@ document.addEventListener('DOMContentLoaded', function() { return; } status.textContent = total + ' '; - processChunk(0, total, chunkSize, token, bar, status); + processChunk(0, 0, total, chunkSize, token, bar, status); }) .catch(function(err) { status.textContent = ' ' + err.message; }); } - function processChunk(processed, total, chunkSize, token, bar, status) { - // Always offset=0: processed items are excluded by the IS NULL filter - fetch('index.php?option=com_mokoog&task=batch.process&format=json&limit=' + chunkSize + '&' + token + '=1') + function processChunk(lastId, processed, total, chunkSize, token, bar, status) { + // Cursor-based: pass the highest id seen so far. Failed rows fall behind + // the cursor and are not re-fetched, so the loop always terminates. + fetch('index.php?option=com_mokoog&task=batch.process&format=json&limit=' + chunkSize + '&lastid=' + lastId + '&' + token + '=1') .then(function(r) { return r.json(); }) .then(function(resp) { - processed += resp.data.created; - var pct = Math.min(100, Math.round((processed / total) * 100)); + var examined = resp.data.examined || 0; + processed += examined; + var pct = total > 0 ? Math.min(100, Math.round((processed / total) * 100)) : 100; bar.style.width = pct + '%'; bar.textContent = pct + '%'; status.textContent = processed + ' / ' + total + ' '; - if (resp.data.created > 0 && processed < total) { - processChunk(processed, total, chunkSize, token, bar, status); + if (examined > 0) { + processChunk(resp.data.last_id, processed, total, chunkSize, token, bar, status); } else { + bar.style.width = '100%'; + bar.textContent = '100%'; bar.classList.remove('progress-bar-animated'); bar.classList.add('bg-success'); status.textContent = ' ' + processed + ' articles.'; diff --git a/source/packages/plg_system_mokoog/src/Extension/MokoOG.php b/source/packages/plg_system_mokoog/src/Extension/MokoOG.php index 38d661c..c625448 100644 --- a/source/packages/plg_system_mokoog/src/Extension/MokoOG.php +++ b/source/packages/plg_system_mokoog/src/Extension/MokoOG.php @@ -28,6 +28,11 @@ final class MokoOG extends CMSPlugin implements SubscriberInterface */ protected $autoloadLanguage = true; + /** + * Minimum seconds between full sitemap regenerations (save-time throttle). + */ + private const SITEMAP_MIN_INTERVAL = 60; + /** * Returns the events this plugin subscribes to. * @@ -845,6 +850,15 @@ final class MokoOG extends CMSPlugin implements SubscriberInterface return; } + // Throttle: rebuilding the whole sitemap on every save does not scale + // (bulk edits/imports). Regenerate at most once per interval — the + // sitemap is eventually consistent within that window. + $path = JPATH_ROOT . '/sitemap.xml'; + + if (is_file($path) && (time() - filemtime($path)) < self::SITEMAP_MIN_INTERVAL) { + return; + } + $changefreq = $this->params->get('sitemap_changefreq', 'weekly'); $xml = SitemapBuilder::generate($changefreq); diff --git a/source/packages/plg_system_mokoog/src/Helper/SitemapBuilder.php b/source/packages/plg_system_mokoog/src/Helper/SitemapBuilder.php index 45b4040..6b7df61 100644 --- a/source/packages/plg_system_mokoog/src/Helper/SitemapBuilder.php +++ b/source/packages/plg_system_mokoog/src/Helper/SitemapBuilder.php @@ -81,7 +81,7 @@ class SitemapBuilder continue; } - $url = $root . '/index.php?option=com_content&view=article&id=' . $article->id; + $url = self::articleUrl($article, $root); $lastmod = $article->modified && $article->modified !== '0000-00-00 00:00:00' ? date('Y-m-d', strtotime($article->modified)) : ''; @@ -102,6 +102,45 @@ class SitemapBuilder return $xml; } + /** + * Build the SEF/canonical site URL for an article, with a safe fallback. + * + * Routes through the site router so the sitemap matches the canonical URLs + * the plugin emits. If routing fails (or SEF is off), falls back to the + * non-SEF index.php URL — never an empty or broken URL. + * + * @param object $article Row with id, alias, catid, language + * @param string $root Site root without trailing slash + * + * @return string Absolute URL + */ + private static function articleUrl(object $article, string $root): string + { + $fallback = $root . '/index.php?option=com_content&view=article&id=' . (int) $article->id; + + $internal = 'index.php?option=com_content&view=article&id=' . (int) $article->id + . (!empty($article->alias) ? ':' . $article->alias : '') + . (!empty($article->catid) ? '&catid=' . (int) $article->catid : ''); + + try { + $routed = \Joomla\CMS\Router\Route::link( + 'site', + $internal, + false, + \Joomla\CMS\Router\Route::TLS_IGNORE, + true + ); + + if (\is_string($routed) && $routed !== '') { + return $routed; + } + } catch (\Throwable $e) { + // Fall back to the non-SEF URL below. + } + + return $fallback; + } + /** * Write sitemap XML to the site root. *