dev-pod-api-build/internal/store/runners.go
2026-04-16 04:16:36 +00:00

227 lines
7 KiB
Go

package store
import (
"context"
"crypto/rand"
"encoding/hex"
"errors"
"fmt"
"time"
"github.com/jackc/pgx/v5"
"github.com/iliaivanov/spec-kit-remote/cmd/dev-pod-api/internal/model"
)
// GenerateRunnerID creates a unique runner identifier.
func GenerateRunnerID() (string, error) {
b := make([]byte, 8)
if _, err := rand.Read(b); err != nil {
return "", fmt.Errorf("generate random bytes: %w", err)
}
return "runner-" + hex.EncodeToString(b), nil
}
// CreateRunner inserts a new runner record.
func (s *Store) CreateRunner(ctx context.Context, r *model.Runner) error {
_, err := s.db.Exec(ctx,
`INSERT INTO runners (id, user_id, repo_url, branch, tools, task, status, webhook_delivery_id, pod_name, cpu_req, mem_req, created_at)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)`,
r.ID, r.User, r.RepoURL, r.Branch, r.Tools, r.Task, string(r.Status),
r.WebhookDeliveryID, r.PodName, r.CPUReq, r.MemReq, r.CreatedAt,
)
if err != nil {
if isDuplicateError(err) {
return fmt.Errorf("runner %q: %w", r.ID, ErrDuplicate)
}
return fmt.Errorf("insert runner: %w", err)
}
return nil
}
// GetRunner retrieves a runner by ID.
func (s *Store) GetRunner(ctx context.Context, id string) (*model.Runner, error) {
row := s.db.QueryRow(ctx,
`SELECT id, user_id, repo_url, branch, tools, task, status,
forgejo_runner_id, webhook_delivery_id, pod_name, cpu_req, mem_req,
created_at, claimed_at, completed_at
FROM runners WHERE id = $1`, id)
r, err := scanRunner(row)
if err != nil {
if errors.Is(err, pgx.ErrNoRows) {
return nil, fmt.Errorf("runner %q: %w", id, ErrNotFound)
}
return nil, fmt.Errorf("query runner: %w", err)
}
return r, nil
}
// ListRunners returns runners, optionally filtered by user and/or status.
func (s *Store) ListRunners(ctx context.Context, userFilter string, statusFilter string) ([]model.Runner, error) {
query := `SELECT id, user_id, repo_url, branch, tools, task, status,
forgejo_runner_id, webhook_delivery_id, pod_name, cpu_req, mem_req,
created_at, claimed_at, completed_at
FROM runners WHERE 1=1`
var args []any
argIdx := 1
if userFilter != "" {
query += fmt.Sprintf(" AND user_id = $%d", argIdx)
args = append(args, userFilter)
argIdx++
}
if statusFilter != "" {
query += fmt.Sprintf(" AND status = $%d", argIdx)
args = append(args, statusFilter)
argIdx++
}
query += " ORDER BY created_at DESC"
rows, err := s.db.Query(ctx, query, args...)
if err != nil {
return nil, fmt.Errorf("query runners: %w", err)
}
defer rows.Close()
var runners []model.Runner
for rows.Next() {
var r model.Runner
var claimedAt, completedAt *time.Time
if err := rows.Scan(&r.ID, &r.User, &r.RepoURL, &r.Branch, &r.Tools, &r.Task,
&r.Status, &r.ForgejoRunnerID, &r.WebhookDeliveryID, &r.PodName, &r.CPUReq, &r.MemReq,
&r.CreatedAt, &claimedAt, &completedAt); err != nil {
return nil, fmt.Errorf("scan runner: %w", err)
}
r.ClaimedAt = claimedAt
r.CompletedAt = completedAt
runners = append(runners, r)
}
return runners, rows.Err()
}
// UpdateRunnerStatus transitions a runner to a new status with state machine validation.
func (s *Store) UpdateRunnerStatus(ctx context.Context, id string, newStatus model.RunnerStatus, forgejoRunnerID string) error {
current, err := s.GetRunner(ctx, id)
if err != nil {
return err
}
if !current.Status.CanTransitionTo(newStatus) {
return fmt.Errorf("invalid transition from %s to %s", current.Status, newStatus)
}
now := time.Now().UTC()
var claimedAt, completedAt *time.Time
if newStatus == model.RunnerStatusJobClaimed {
claimedAt = &now
}
if newStatus.IsTerminal() {
completedAt = &now
}
query := `UPDATE runners SET status = $1, forgejo_runner_id = CASE WHEN $2 = '' THEN forgejo_runner_id ELSE $2 END`
args := []any{string(newStatus), forgejoRunnerID}
argIdx := 3
if claimedAt != nil {
query += fmt.Sprintf(", claimed_at = $%d", argIdx)
args = append(args, *claimedAt)
argIdx++
}
if completedAt != nil {
query += fmt.Sprintf(", completed_at = $%d", argIdx)
args = append(args, *completedAt)
argIdx++
}
query += fmt.Sprintf(" WHERE id = $%d", argIdx)
args = append(args, id)
tag, err := s.db.Exec(ctx, query, args...)
if err != nil {
return fmt.Errorf("update runner status: %w", err)
}
if tag.RowsAffected() == 0 {
return fmt.Errorf("runner %q: %w", id, ErrNotFound)
}
return nil
}
// DeleteRunner removes a runner record by ID.
func (s *Store) DeleteRunner(ctx context.Context, id string) error {
tag, err := s.db.Exec(ctx, `DELETE FROM runners WHERE id = $1`, id)
if err != nil {
return fmt.Errorf("delete runner: %w", err)
}
if tag.RowsAffected() == 0 {
return fmt.Errorf("runner %q: %w", id, ErrNotFound)
}
return nil
}
// IsDeliveryProcessed checks if a webhook delivery ID has already been processed.
func (s *Store) IsDeliveryProcessed(ctx context.Context, deliveryID string) (bool, error) {
if deliveryID == "" {
return false, nil
}
var exists bool
err := s.db.QueryRow(ctx,
`SELECT EXISTS(SELECT 1 FROM runners WHERE webhook_delivery_id = $1)`,
deliveryID).Scan(&exists)
if err != nil {
return false, fmt.Errorf("check delivery: %w", err)
}
return exists, nil
}
// GetRunnersForCleanup returns runners in terminal states (completed/failed).
func (s *Store) GetRunnersForCleanup(ctx context.Context) ([]model.Runner, error) {
return s.ListRunners(ctx, "", "")
}
// GetStaleRunners returns runners older than the given TTL that aren't in terminal/cleanup states.
func (s *Store) GetStaleRunners(ctx context.Context, ttl time.Duration) ([]model.Runner, error) {
cutoff := time.Now().UTC().Add(-ttl)
rows, err := s.db.Query(ctx,
`SELECT id, user_id, repo_url, branch, tools, task, status,
forgejo_runner_id, webhook_delivery_id, pod_name, cpu_req, mem_req,
created_at, claimed_at, completed_at
FROM runners
WHERE created_at < $1
AND status NOT IN ('completed', 'failed', 'cleanup_pending')
ORDER BY created_at`, cutoff)
if err != nil {
return nil, fmt.Errorf("query stale runners: %w", err)
}
defer rows.Close()
var runners []model.Runner
for rows.Next() {
var r model.Runner
var claimedAt, completedAt *time.Time
if err := rows.Scan(&r.ID, &r.User, &r.RepoURL, &r.Branch, &r.Tools, &r.Task,
&r.Status, &r.ForgejoRunnerID, &r.WebhookDeliveryID, &r.PodName, &r.CPUReq, &r.MemReq,
&r.CreatedAt, &claimedAt, &completedAt); err != nil {
return nil, fmt.Errorf("scan stale runner: %w", err)
}
r.ClaimedAt = claimedAt
r.CompletedAt = completedAt
runners = append(runners, r)
}
return runners, rows.Err()
}
func scanRunner(row pgx.Row) (*model.Runner, error) {
var r model.Runner
var claimedAt, completedAt *time.Time
err := row.Scan(&r.ID, &r.User, &r.RepoURL, &r.Branch, &r.Tools, &r.Task,
&r.Status, &r.ForgejoRunnerID, &r.WebhookDeliveryID, &r.PodName, &r.CPUReq, &r.MemReq,
&r.CreatedAt, &claimedAt, &completedAt)
if err != nil {
return nil, err
}
r.ClaimedAt = claimedAt
r.CompletedAt = completedAt
return &r, nil
}