{
  "benchmark": "HumanEval-LokiMode",
  "mode": "multi-agent",
  "version": "1.0",
  "timestamp": "2026-01-05T08:46:10.291133",
  "model": "opus",
  "max_retries": 3,
  "total_problems": 164,
  "problems": [
    {
      "task_id": "HumanEval/0",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/1",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/2",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/3",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/4",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/5",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/6",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/7",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/8",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/9",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/10",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/11",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/12",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/13",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/14",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/15",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/16",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/17",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/18",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/19",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/20",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/21",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/22",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/23",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/24",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/25",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/26",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/27",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/28",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/29",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/30",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/31",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/32",
      "passed": false,
      "attempts": 3,
      "error": "Failed after 3 RARV attempts"
    },
    {
      "task_id": "HumanEval/33",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/34",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/35",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/36",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/37",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/38",
      "passed": true,
      "attempts": 2,
      "error": null
    },
    {
      "task_id": "HumanEval/39",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/40",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/41",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/42",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/43",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/44",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/45",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/46",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/47",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/48",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/49",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/50",
      "passed": false,
      "attempts": 3,
      "error": "Failed after 3 RARV attempts"
    },
    {
      "task_id": "HumanEval/51",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/52",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/53",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/54",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/55",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/56",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/57",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/58",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/59",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/60",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/61",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/62",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/63",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/64",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/65",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/66",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/67",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/68",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/69",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/70",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/71",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/72",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/73",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/74",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/75",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/76",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/77",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/78",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/79",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/80",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/81",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/82",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/83",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/84",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/85",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/86",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/87",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/88",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/89",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/90",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/91",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/92",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/93",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/94",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/95",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/96",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/97",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/98",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/99",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/100",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/101",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/102",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/103",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/104",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/105",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/106",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/107",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/108",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/109",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/110",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/111",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/112",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/113",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/114",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/115",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/116",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/117",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/118",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/119",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/120",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/121",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/122",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/123",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/124",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/125",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/126",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/127",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/128",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/129",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/130",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/131",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/132",
      "passed": true,
      "attempts": 2,
      "error": null
    },
    {
      "task_id": "HumanEval/133",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/134",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/135",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/136",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/137",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/138",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/139",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/140",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/141",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/142",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/143",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/144",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/145",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/146",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/147",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/148",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/149",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/150",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/151",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/152",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/153",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/154",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/155",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/156",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/157",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/158",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/159",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/160",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/161",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/162",
      "passed": true,
      "attempts": 1,
      "error": null
    },
    {
      "task_id": "HumanEval/163",
      "passed": true,
      "attempts": 1,
      "error": null
    }
  ],
  "passed": 162,
  "failed": 0,
  "errors": 2,
  "pass_rate": 98.78048780487805,
  "avg_attempts": 1.0365853658536586,
  "elapsed_time": 2704.4724848270416
}