fix(actions): retry workflow insertion on database deadlock
Branch Policy Check / Verify merge target (pull_request) Successful in 1s
PR RC Release / Build RC Release (pull_request) Successful in 2s
Universal: Pre-Release / Build Pre-Release (${{ inputs.stability || 'development' }}) (pull_request) Failing after 21s

When multiple workflows are triggered by a single event (e.g. a
pull_request with several matching workflow files), each InsertRun
transaction acquires an X-lock on the repository row via
UpdateRepoRunsNumbers and an index lock on action_run. Two concurrent
transactions can deadlock when each holds one lock and waits for the
other. InnoDB kills the lighter transaction, but handleWorkflows only
logged the error and silently dropped the workflow run — making it
appear as though pull_request events were never fired.

This was the root cause of API-created PRs appearing to not trigger
Actions workflows: the notification pipeline was correct, but the DB
insert was lost to an unretried deadlock.

The fix wraps PrepareRunAndInsert in a retry loop (up to 3 attempts
with exponential backoff) that detects deadlock errors across MySQL,
PostgreSQL, and SQLite. On deadlock, the rolled-back run fields are
reset before the next attempt.

Also adds db.IsErrDeadlock() for cross-engine deadlock detection and
unit tests for the same.

Closes #220

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Jonathan Miller
2026-05-26 15:16:30 -05:00
parent c633024a9c
commit dd6fc4b69c
3 changed files with 106 additions and 1 deletions
+25
View File
@@ -5,6 +5,7 @@ package db
import (
"fmt"
"strings"
"git.mokoconsulting.tech/MokoConsulting/MokoGitea/modules/util"
)
@@ -72,3 +73,27 @@ func (err ErrNotExist) Error() string {
func (err ErrNotExist) Unwrap() error {
return util.ErrNotExist
}
// IsErrDeadlock checks whether err is a database deadlock.
// MySQL returns error 1213 (ER_LOCK_DEADLOCK / SQLSTATE 40001).
// PostgreSQL returns SQLSTATE 40P01 with "deadlock detected".
// SQLite returns SQLITE_BUSY (error 5) with "database is locked".
func IsErrDeadlock(err error) bool {
if err == nil {
return false
}
msg := err.Error()
// MySQL / MariaDB: "Error 1213 (40001): Deadlock found when trying to get lock"
if strings.Contains(msg, "Error 1213") || strings.Contains(msg, "40001") {
return true
}
// PostgreSQL: "deadlock detected"
if strings.Contains(msg, "deadlock detected") {
return true
}
// SQLite: "database is locked"
if strings.Contains(msg, "database is locked") {
return true
}
return false
}
+31
View File
@@ -0,0 +1,31 @@
// Copyright 2026 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package db
import (
"errors"
"testing"
"github.com/stretchr/testify/assert"
)
func TestIsErrDeadlock(t *testing.T) {
tests := []struct {
name string
err error
want bool
}{
{name: "nil", err: nil, want: false},
{name: "unrelated", err: errors.New("connection refused"), want: false},
{name: "mysql 1213", err: errors.New("Error 1213 (40001): Deadlock found when trying to get lock; try restarting transaction"), want: true},
{name: "mysql sqlstate", err: errors.New("SQLSTATE 40001: serialization failure"), want: true},
{name: "postgres", err: errors.New("pq: deadlock detected"), want: true},
{name: "sqlite", err: errors.New("database is locked"), want: true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, tt.want, IsErrDeadlock(tt.err))
})
}
}
+50 -1
View File
@@ -9,6 +9,7 @@ import (
"fmt"
"slices"
"strings"
"time"
actions_model "git.mokoconsulting.tech/MokoConsulting/MokoGitea/models/actions"
"git.mokoconsulting.tech/MokoConsulting/MokoGitea/models/db"
@@ -344,7 +345,7 @@ func handleWorkflows(
run.NeedApproval = need
if err := PrepareRunAndInsert(ctx, dwf.Content, run, nil); err != nil {
if err := prepareRunAndInsertWithRetry(ctx, dwf.Content, run); err != nil {
log.Error("PrepareRunAndInsert: %v", err)
continue
}
@@ -352,6 +353,54 @@ func handleWorkflows(
return nil
}
// prepareRunAndInsertWithRetry wraps PrepareRunAndInsert with retries on
// database deadlocks. When multiple workflow runs are inserted for the same
// event (e.g. several workflows triggered by a single pull_request), each
// InsertRun transaction acquires an X-lock on the repository row (via
// UpdateRepoRunsNumbers) and an index lock on action_run. Two concurrent
// transactions can deadlock when each holds one lock and waits for the other.
// InnoDB resolves this by killing the lighter transaction, but handleWorkflows
// only logged the error and moved on — silently dropping the workflow run.
// Retrying the insert is safe because the rolled-back transaction left no
// partial state.
func prepareRunAndInsertWithRetry(ctx context.Context, content []byte, run *actions_model.ActionRun) error {
const maxRetries = 3
backoff := 50 * time.Millisecond
// Save original values that InsertRun mutates inside its transaction.
// On deadlock rollback these become stale and must be reset before retry.
origTitle := run.Title
var err error
for attempt := range maxRetries {
if err = PrepareRunAndInsert(ctx, content, run, nil); err == nil {
return nil
}
if !db.IsErrDeadlock(err) {
return err
}
log.Warn("PrepareRunAndInsert deadlock (attempt %d/%d) for workflow %s in repo %d, retrying: %v",
attempt+1, maxRetries, run.WorkflowID, run.RepoID, err)
// Reset fields that InsertRun sets inside the (now rolled-back) transaction
// so the next attempt starts clean.
run.ID = 0
run.Index = 0
run.Status = actions_model.StatusWaiting
run.Title = origTitle
run.ConcurrencyGroup = ""
run.ConcurrencyCancel = false
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(backoff):
}
backoff *= 2
}
return fmt.Errorf("deadlock persisted after %d retries: %w", maxRetries, err)
}
func newNotifyInputFromIssue(issue *issues_model.Issue, event webhook_module.HookEventType) *notifyInput {
return newNotifyInput(issue.Repo, issue.Poster, event)
}