Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 99 additions & 0 deletions src/kernelbot/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -756,6 +756,105 @@ async def admin_update_problems(
}


@app.post("/admin/backfill")
async def admin_backfill(
payload: dict,
_: Annotated[None, Depends(require_admin)],
db_context=Depends(get_db),
) -> dict:
"""Queue a backfill: re-run top submissions against the current task version.

After an update-problems changes the eval for a leaderboard, old run scores
become stale. This endpoint fetches the top N submissions (best per user)
from any previous task_version and re-submits each one so that new runs are
recorded with the current task_version.

Payload:
leaderboard (str): Leaderboard name (required).
gpu (str): GPU type to backfill (required).
top_n (int): How many top submissions to re-run (default 100).
"""
leaderboard_name = payload.get("leaderboard")
gpu = payload.get("gpu")
top_n = payload.get("top_n", 100)

if not leaderboard_name or not gpu:
raise HTTPException(status_code=400, detail="leaderboard and gpu are required")

if not backend_instance:
raise HTTPException(status_code=500, detail="Backend not initialized")

with db_context as db:
task_version = db.get_leaderboard_task_version(leaderboard_name)
if task_version <= 1:
return {
"status": "ok",
"message": "Leaderboard is still on task_version 1, nothing to backfill",
"queued": 0,
}

submissions = db.get_top_submissions_for_backfill(leaderboard_name, gpu, top_n)
lb = db.get_leaderboard(leaderboard_name)

if not submissions:
return {
"status": "ok",
"message": "No eligible submissions found from previous versions",
"queued": 0,
}

if not background_submission_manager:
raise HTTPException(
status_code=500,
detail="Background submission manager not available",
)

queued_ids = []
errors = []
for sub in submissions:
try:
req = ProcessedSubmissionRequest(
code=sub["code"],
file_name=sub["file_name"],
user_id=sub["user_id"],
user_name=sub["user_name"],
leaderboard=leaderboard_name,
gpus=[gpu],
task=lb["task"],
secret_seed=lb.get("secret_seed", 0),
task_gpus=[gpu],
)
with db_context as db:
new_sub_id = db.create_submission(
leaderboard=leaderboard_name,
file_name=sub["file_name"],
code=sub["code"],
user_id=sub["user_id"],
time=datetime.datetime.now(),
user_name=sub["user_name"],
)
await background_submission_manager.enqueue(
req, SubmissionMode.LEADERBOARD, new_sub_id
)
queued_ids.append(new_sub_id)
except Exception as e:
errors.append({
"submission_id": sub["submission_id"],
"user_id": sub["user_id"],
"error": str(e),
})

return {
"status": "ok",
"leaderboard": leaderboard_name,
"gpu": gpu,
"task_version": task_version,
"queued": len(queued_ids),
"queued_submission_ids": queued_ids,
"errors": errors,
}


@app.post("/admin/export-hf")
async def admin_export_hf(
payload: dict,
Expand Down
112 changes: 112 additions & 0 deletions src/kernelbot/cogs/admin_cog.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,10 @@ def __init__(self, bot: "ClusterBot"):
name="export-hf", description="Export competition data to Hugging Face dataset"
)(self.export_to_hf)

self.backfill = bot.admin_group.command(
name="backfill", description="Re-run top submissions after eval change"
)(self.backfill)

self._scheduled_cleanup_temp_users.start()
if env.HF_TOKEN:
self._scheduled_hf_export.start()
Expand Down Expand Up @@ -200,6 +204,114 @@ async def unban_user(self, interaction: discord.Interaction, user_id: str):
interaction, f"User `{user_id}` not found.", ephemeral=True
)

@discord.app_commands.describe(
leaderboard_name="Name of the leaderboard to backfill",
gpu="GPU type to backfill",
top_n="Number of top submissions to re-run (default 100)",
)
@app_commands.autocomplete(leaderboard_name=leaderboard_name_autocomplete)
@app_commands.choices(
gpu=[app_commands.Choice(name=gpu.name, value=gpu.value) for gpu in GitHubGPU]
+ [app_commands.Choice(name=gpu.name, value=gpu.value) for gpu in ModalGPU]
)
@with_error_handling
async def backfill(
self,
interaction: discord.Interaction,
leaderboard_name: str,
gpu: str,
top_n: int = 100,
):
if not await self.admin_check(interaction):
await send_discord_message(
interaction, "You need to have Admin permissions to run this command", ephemeral=True
)
return

await interaction.response.defer(ephemeral=True)

with self.bot.leaderboard_db as db:
task_version = db.get_leaderboard_task_version(leaderboard_name)
if task_version <= 1:
await interaction.edit_original_response(
content=f"Leaderboard `{leaderboard_name}` is on task_version 1 — nothing to backfill."
)
return

submissions = db.get_top_submissions_for_backfill(leaderboard_name, gpu, top_n)
lb = db.get_leaderboard(leaderboard_name)

if not submissions:
await interaction.edit_original_response(
content=f"No eligible submissions found for `{leaderboard_name}` ({gpu}) from previous versions."
)
return

await interaction.edit_original_response(
content=(
f"**Backfill: {leaderboard_name} ({gpu}) v{task_version - 1} → v{task_version}**\n"
f"Found {len(submissions)} submissions to re-run\nQueued: 0/{len(submissions)}"
)
)

queued = 0
errors = 0
for sub in submissions:
try:
from libkernelbot.submission import ProcessedSubmissionRequest

req = ProcessedSubmissionRequest(
code=sub["code"],
file_name=sub["file_name"],
user_id=sub["user_id"],
user_name=sub["user_name"],
leaderboard=leaderboard_name,
gpus=[gpu],
task=lb["task"],
secret_seed=lb.get("secret_seed", 0),
task_gpus=[gpu],
)
from libkernelbot.background_submission_manager import BackgroundSubmissionManagerReporter
from libkernelbot.consts import SubmissionMode

with self.bot.leaderboard_db as db:
new_sub_id = db.create_submission(
leaderboard=leaderboard_name,
file_name=sub["file_name"],
code=sub["code"],
user_id=sub["user_id"],
time=datetime.now(tz=timezone.utc),
user_name=sub["user_name"],
)

reporter = BackgroundSubmissionManagerReporter(new_sub_id, self.bot.backend)
# Fire and forget — don't block on each submission
self.bot.loop.create_task(
self.bot.backend.submit_full(req, SubmissionMode.LEADERBOARD, reporter, new_sub_id)
)
queued += 1
except Exception as e:
logger.error("Backfill error for submission %s: %s", sub["submission_id"], e)
errors += 1

if queued % 5 == 0 or queued + errors == len(submissions):
await interaction.edit_original_response(
content=(
f"**Backfill: {leaderboard_name} ({gpu}) v{task_version - 1} → v{task_version}**\n"
f"Found {len(submissions)} submissions to re-run\n"
f"Queued: {queued}/{len(submissions)}\n"
f"Errors: {errors}"
)
)

await interaction.edit_original_response(
content=(
f"**Backfill complete: {leaderboard_name} ({gpu}) v{task_version - 1} → v{task_version}**\n"
f"Queued: {queued}/{len(submissions)}\n"
f"Errors: {errors}"
)
)

@discord.app_commands.describe(
directory="Directory of the kernel definition. Also used as the leaderboard's name",
gpu="The GPU to submit to. Leave empty for interactive selection/multiple GPUs",
Expand Down
Loading
Loading