227 lines
7 KiB
Go
227 lines
7 KiB
Go
package store
|
|
|
|
import (
|
|
"context"
|
|
"crypto/rand"
|
|
"encoding/hex"
|
|
"errors"
|
|
"fmt"
|
|
"time"
|
|
|
|
"github.com/jackc/pgx/v5"
|
|
|
|
"github.com/iliaivanov/spec-kit-remote/cmd/dev-pod-api/internal/model"
|
|
)
|
|
|
|
// GenerateRunnerID creates a unique runner identifier.
|
|
func GenerateRunnerID() (string, error) {
|
|
b := make([]byte, 8)
|
|
if _, err := rand.Read(b); err != nil {
|
|
return "", fmt.Errorf("generate random bytes: %w", err)
|
|
}
|
|
return "runner-" + hex.EncodeToString(b), nil
|
|
}
|
|
|
|
// CreateRunner inserts a new runner record.
|
|
func (s *Store) CreateRunner(ctx context.Context, r *model.Runner) error {
|
|
_, err := s.db.Exec(ctx,
|
|
`INSERT INTO runners (id, user_id, repo_url, branch, tools, task, status, webhook_delivery_id, pod_name, cpu_req, mem_req, created_at)
|
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)`,
|
|
r.ID, r.User, r.RepoURL, r.Branch, r.Tools, r.Task, string(r.Status),
|
|
r.WebhookDeliveryID, r.PodName, r.CPUReq, r.MemReq, r.CreatedAt,
|
|
)
|
|
if err != nil {
|
|
if isDuplicateError(err) {
|
|
return fmt.Errorf("runner %q: %w", r.ID, ErrDuplicate)
|
|
}
|
|
return fmt.Errorf("insert runner: %w", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// GetRunner retrieves a runner by ID.
|
|
func (s *Store) GetRunner(ctx context.Context, id string) (*model.Runner, error) {
|
|
row := s.db.QueryRow(ctx,
|
|
`SELECT id, user_id, repo_url, branch, tools, task, status,
|
|
forgejo_runner_id, webhook_delivery_id, pod_name, cpu_req, mem_req,
|
|
created_at, claimed_at, completed_at
|
|
FROM runners WHERE id = $1`, id)
|
|
|
|
r, err := scanRunner(row)
|
|
if err != nil {
|
|
if errors.Is(err, pgx.ErrNoRows) {
|
|
return nil, fmt.Errorf("runner %q: %w", id, ErrNotFound)
|
|
}
|
|
return nil, fmt.Errorf("query runner: %w", err)
|
|
}
|
|
return r, nil
|
|
}
|
|
|
|
// ListRunners returns runners, optionally filtered by user and/or status.
|
|
func (s *Store) ListRunners(ctx context.Context, userFilter string, statusFilter string) ([]model.Runner, error) {
|
|
query := `SELECT id, user_id, repo_url, branch, tools, task, status,
|
|
forgejo_runner_id, webhook_delivery_id, pod_name, cpu_req, mem_req,
|
|
created_at, claimed_at, completed_at
|
|
FROM runners WHERE 1=1`
|
|
var args []any
|
|
argIdx := 1
|
|
|
|
if userFilter != "" {
|
|
query += fmt.Sprintf(" AND user_id = $%d", argIdx)
|
|
args = append(args, userFilter)
|
|
argIdx++
|
|
}
|
|
if statusFilter != "" {
|
|
query += fmt.Sprintf(" AND status = $%d", argIdx)
|
|
args = append(args, statusFilter)
|
|
argIdx++
|
|
}
|
|
query += " ORDER BY created_at DESC"
|
|
|
|
rows, err := s.db.Query(ctx, query, args...)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("query runners: %w", err)
|
|
}
|
|
defer rows.Close()
|
|
|
|
var runners []model.Runner
|
|
for rows.Next() {
|
|
var r model.Runner
|
|
var claimedAt, completedAt *time.Time
|
|
if err := rows.Scan(&r.ID, &r.User, &r.RepoURL, &r.Branch, &r.Tools, &r.Task,
|
|
&r.Status, &r.ForgejoRunnerID, &r.WebhookDeliveryID, &r.PodName, &r.CPUReq, &r.MemReq,
|
|
&r.CreatedAt, &claimedAt, &completedAt); err != nil {
|
|
return nil, fmt.Errorf("scan runner: %w", err)
|
|
}
|
|
r.ClaimedAt = claimedAt
|
|
r.CompletedAt = completedAt
|
|
runners = append(runners, r)
|
|
}
|
|
return runners, rows.Err()
|
|
}
|
|
|
|
// UpdateRunnerStatus transitions a runner to a new status with state machine validation.
|
|
func (s *Store) UpdateRunnerStatus(ctx context.Context, id string, newStatus model.RunnerStatus, forgejoRunnerID string) error {
|
|
current, err := s.GetRunner(ctx, id)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if !current.Status.CanTransitionTo(newStatus) {
|
|
return fmt.Errorf("invalid transition from %s to %s", current.Status, newStatus)
|
|
}
|
|
|
|
now := time.Now().UTC()
|
|
var claimedAt, completedAt *time.Time
|
|
if newStatus == model.RunnerStatusJobClaimed {
|
|
claimedAt = &now
|
|
}
|
|
if newStatus.IsTerminal() {
|
|
completedAt = &now
|
|
}
|
|
|
|
query := `UPDATE runners SET status = $1, forgejo_runner_id = CASE WHEN $2 = '' THEN forgejo_runner_id ELSE $2 END`
|
|
args := []any{string(newStatus), forgejoRunnerID}
|
|
argIdx := 3
|
|
|
|
if claimedAt != nil {
|
|
query += fmt.Sprintf(", claimed_at = $%d", argIdx)
|
|
args = append(args, *claimedAt)
|
|
argIdx++
|
|
}
|
|
if completedAt != nil {
|
|
query += fmt.Sprintf(", completed_at = $%d", argIdx)
|
|
args = append(args, *completedAt)
|
|
argIdx++
|
|
}
|
|
|
|
query += fmt.Sprintf(" WHERE id = $%d", argIdx)
|
|
args = append(args, id)
|
|
|
|
tag, err := s.db.Exec(ctx, query, args...)
|
|
if err != nil {
|
|
return fmt.Errorf("update runner status: %w", err)
|
|
}
|
|
if tag.RowsAffected() == 0 {
|
|
return fmt.Errorf("runner %q: %w", id, ErrNotFound)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// DeleteRunner removes a runner record by ID.
|
|
func (s *Store) DeleteRunner(ctx context.Context, id string) error {
|
|
tag, err := s.db.Exec(ctx, `DELETE FROM runners WHERE id = $1`, id)
|
|
if err != nil {
|
|
return fmt.Errorf("delete runner: %w", err)
|
|
}
|
|
if tag.RowsAffected() == 0 {
|
|
return fmt.Errorf("runner %q: %w", id, ErrNotFound)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// IsDeliveryProcessed checks if a webhook delivery ID has already been processed.
|
|
func (s *Store) IsDeliveryProcessed(ctx context.Context, deliveryID string) (bool, error) {
|
|
if deliveryID == "" {
|
|
return false, nil
|
|
}
|
|
var exists bool
|
|
err := s.db.QueryRow(ctx,
|
|
`SELECT EXISTS(SELECT 1 FROM runners WHERE webhook_delivery_id = $1)`,
|
|
deliveryID).Scan(&exists)
|
|
if err != nil {
|
|
return false, fmt.Errorf("check delivery: %w", err)
|
|
}
|
|
return exists, nil
|
|
}
|
|
|
|
// GetRunnersForCleanup returns runners in terminal states (completed/failed).
|
|
func (s *Store) GetRunnersForCleanup(ctx context.Context) ([]model.Runner, error) {
|
|
return s.ListRunners(ctx, "", "")
|
|
}
|
|
|
|
// GetStaleRunners returns runners older than the given TTL that aren't in terminal/cleanup states.
|
|
func (s *Store) GetStaleRunners(ctx context.Context, ttl time.Duration) ([]model.Runner, error) {
|
|
cutoff := time.Now().UTC().Add(-ttl)
|
|
rows, err := s.db.Query(ctx,
|
|
`SELECT id, user_id, repo_url, branch, tools, task, status,
|
|
forgejo_runner_id, webhook_delivery_id, pod_name, cpu_req, mem_req,
|
|
created_at, claimed_at, completed_at
|
|
FROM runners
|
|
WHERE created_at < $1
|
|
AND status NOT IN ('completed', 'failed', 'cleanup_pending')
|
|
ORDER BY created_at`, cutoff)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("query stale runners: %w", err)
|
|
}
|
|
defer rows.Close()
|
|
|
|
var runners []model.Runner
|
|
for rows.Next() {
|
|
var r model.Runner
|
|
var claimedAt, completedAt *time.Time
|
|
if err := rows.Scan(&r.ID, &r.User, &r.RepoURL, &r.Branch, &r.Tools, &r.Task,
|
|
&r.Status, &r.ForgejoRunnerID, &r.WebhookDeliveryID, &r.PodName, &r.CPUReq, &r.MemReq,
|
|
&r.CreatedAt, &claimedAt, &completedAt); err != nil {
|
|
return nil, fmt.Errorf("scan stale runner: %w", err)
|
|
}
|
|
r.ClaimedAt = claimedAt
|
|
r.CompletedAt = completedAt
|
|
runners = append(runners, r)
|
|
}
|
|
return runners, rows.Err()
|
|
}
|
|
|
|
func scanRunner(row pgx.Row) (*model.Runner, error) {
|
|
var r model.Runner
|
|
var claimedAt, completedAt *time.Time
|
|
err := row.Scan(&r.ID, &r.User, &r.RepoURL, &r.Branch, &r.Tools, &r.Task,
|
|
&r.Status, &r.ForgejoRunnerID, &r.WebhookDeliveryID, &r.PodName, &r.CPUReq, &r.MemReq,
|
|
&r.CreatedAt, &claimedAt, &completedAt)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
r.ClaimedAt = claimedAt
|
|
r.CompletedAt = completedAt
|
|
return &r, nil
|
|
}
|