{
  "version": 1,
  "generated_at": "2026-05-01T20:40:59.233646Z",
  "run_count": 120,
  "repeat_groups": [
    {
      "group_id": "c231858746135469",
      "cluster_scope": "repeat",
      "task_fingerprint": "7326c5be7cb61f6e9585d417047cb29a4d0b0ca63102404c9692e99eeae036b2",
      "normalized_tag_key": "*like*;discourse;english;pragmatics;v1",
      "task_name_display": "*like* discourse/pragm",
      "task_names_seen": [
        "*like* discourse/pragm"
      ],
      "tags_display": "*like*; discourse; pragmatics; English; v1",
      "provider": "requesty",
      "model": "claude-sonnet-4-6",
      "run_count": 2,
      "run_stems": [
        "like__requesty__anthropicclaudesonnet46__2026-03-12-15-43",
        "like__requesty__anthropicclaudesonnet46__2026-03-12-14-40"
      ],
      "distance_metric": "nominal_disagreement_rate",
      "linkage_method": "average",
      "comparable_pair_count": 1,
      "representatives": [
        {
          "provider": "requesty",
          "model": "claude-sonnet-4-6",
          "run_stem": "like__requesty__anthropicclaudesonnet46__2026-03-12-15-43",
          "metrics_file": "like__requesty__anthropicclaudesonnet46__2026-03-12-15-43__metrics.json",
          "timestamp": "2026-03-12T14:43:10.303101Z",
          "accuracy": 0.8228228228228228,
          "cohen_kappa": 0.7478161408052575
        },
        {
          "provider": "requesty",
          "model": "claude-sonnet-4-6",
          "run_stem": "like__requesty__anthropicclaudesonnet46__2026-03-12-14-40",
          "metrics_file": "like__requesty__anthropicclaudesonnet46__2026-03-12-14-40__metrics.json",
          "timestamp": "2026-03-12T13:40:58.748047Z",
          "accuracy": 0.8298298298298298,
          "cohen_kappa": 0.7574231404297621
        }
      ],
      "pairwise": [
        {
          "a": 0,
          "b": 1,
          "distance": 0.04944500504540868,
          "overlap_count": 991,
          "agreement_count": 942,
          "disagreement_count": 49
        }
      ],
      "linkage": [
        [
          0,
          1,
          0.04944500504540868,
          2
        ]
      ],
      "linkage_complete": true
    },
    {
      "group_id": "7f82d6ed0511901d",
      "cluster_scope": "repeat",
      "task_fingerprint": "7326c5be7cb61f6e9585d417047cb29a4d0b0ca63102404c9692e99eeae036b2",
      "normalized_tag_key": "*like*;discourse;english;pragmatics;v1",
      "task_name_display": "*like* discourse/pragm",
      "task_names_seen": [
        "*like* discourse/pragm"
      ],
      "tags_display": "*like*; discourse; pragmatics; English; v1",
      "provider": "requesty",
      "model": "deepseek-v3.2",
      "run_count": 2,
      "run_stems": [
        "like__requesty__novitadeepseekdeepseekv32__2026-03-12-17-27",
        "like__requesty__novitadeepseekdeepseekv32__2026-03-12-16-52"
      ],
      "distance_metric": "nominal_disagreement_rate",
      "linkage_method": "average",
      "comparable_pair_count": 1,
      "representatives": [
        {
          "provider": "requesty",
          "model": "deepseek-v3.2",
          "run_stem": "like__requesty__novitadeepseekdeepseekv32__2026-03-12-17-27",
          "metrics_file": "like__requesty__novitadeepseekdeepseekv32__2026-03-12-17-27__metrics.json",
          "timestamp": "2026-03-12T16:27:57.675261Z",
          "accuracy": 0.6296296296296297,
          "cohen_kappa": 0.46022553031165947
        },
        {
          "provider": "requesty",
          "model": "deepseek-v3.2",
          "run_stem": "like__requesty__novitadeepseekdeepseekv32__2026-03-12-16-52",
          "metrics_file": "like__requesty__novitadeepseekdeepseekv32__2026-03-12-16-52__metrics.json",
          "timestamp": "2026-03-12T15:52:04.876466Z",
          "accuracy": 0.6446446446446447,
          "cohen_kappa": 0.47974932556430355
        }
      ],
      "pairwise": [
        {
          "a": 0,
          "b": 1,
          "distance": 0.33969849246231154,
          "overlap_count": 995,
          "agreement_count": 657,
          "disagreement_count": 338
        }
      ],
      "linkage": [
        [
          0,
          1,
          0.33969849246231154,
          2
        ]
      ],
      "linkage_complete": true
    },
    {
      "group_id": "9d01f5b7f24ac4bc",
      "cluster_scope": "repeat",
      "task_fingerprint": "cd60855ae86dc7a8391248627a68bbc5a77fabf2d75d0b967118c09aec3d8faa",
      "normalized_tag_key": "*like*;discourse;english;pragmatics;v3",
      "task_name_display": "*like* discourse/pragm",
      "task_names_seen": [
        "*like* discourse/pragm"
      ],
      "tags_display": "*like*; discourse; pragmatics; English; v3",
      "provider": "vertex",
      "model": "gemini-3-flash-preview",
      "run_count": 2,
      "run_stems": [
        "like_interrater__vertex__gemini3flashpreview__2026-03-16-23-22",
        "like_interrater__vertex__gemini3flashpreview__2026-03-16-20-22"
      ],
      "distance_metric": "nominal_disagreement_rate",
      "linkage_method": "average",
      "comparable_pair_count": 1,
      "representatives": [
        {
          "provider": "vertex",
          "model": "gemini-3-flash-preview",
          "run_stem": "like_interrater__vertex__gemini3flashpreview__2026-03-16-23-22",
          "metrics_file": "like_interrater__vertex__gemini3flashpreview__2026-03-16-23-22__metrics.json",
          "timestamp": "2026-03-16T22:22:38.058348Z",
          "accuracy": 0.9130434782608695,
          "cohen_kappa": 0.8854581673306772
        },
        {
          "provider": "vertex",
          "model": "gemini-3-flash-preview",
          "run_stem": "like_interrater__vertex__gemini3flashpreview__2026-03-16-20-22",
          "metrics_file": "like_interrater__vertex__gemini3flashpreview__2026-03-16-20-22__metrics.json",
          "timestamp": "2026-03-16T19:22:29.614294Z",
          "accuracy": 0.9217391304347826,
          "cohen_kappa": 0.8961885656970913
        }
      ],
      "pairwise": [
        {
          "a": 0,
          "b": 1,
          "distance": 0.02702702702702703,
          "overlap_count": 111,
          "agreement_count": 108,
          "disagreement_count": 3
        }
      ],
      "linkage": [
        [
          0,
          1,
          0.02702702702702703,
          2
        ]
      ],
      "linkage_complete": true
    },
    {
      "group_id": "d2ad6ced0669cd8e",
      "cluster_scope": "repeat",
      "task_fingerprint": "0eb7fe892594e382024c3813de8ec46058d12d33d835017ca9df11d4d238d61e",
      "normalized_tag_key": "*-ing*;adverbial;clause;english;syntax",
      "task_name_display": "ADV *-ing* clause",
      "task_names_seen": [
        "ADV *-ing* clause"
      ],
      "tags_display": "*-ing*; syntax; adverbial; clause; English",
      "provider": "e-infra",
      "model": "qwen3.5",
      "run_count": 2,
      "run_stems": [
        "adv-ing__einfra__qwen35__2026-03-20-15-45",
        "adv-ing__einfra__qwen35__2026-03-12-14-37"
      ],
      "distance_metric": "nominal_disagreement_rate",
      "linkage_method": "average",
      "comparable_pair_count": 1,
      "representatives": [
        {
          "provider": "e-infra",
          "model": "qwen3.5",
          "run_stem": "adv-ing__einfra__qwen35__2026-03-20-15-45",
          "metrics_file": "adv-ing__einfra__qwen35__2026-03-20-15-45__metrics.json",
          "timestamp": "2026-03-20T14:45:23.934528Z",
          "accuracy": 0.8113207547169812,
          "cohen_kappa": 0.6568283602223062
        },
        {
          "provider": "e-infra",
          "model": "qwen3.5",
          "run_stem": "adv-ing__einfra__qwen35__2026-03-12-14-37",
          "metrics_file": "adv-ing__einfra__qwen35__2026-03-12-14-37__metrics.json",
          "timestamp": "2026-03-12T13:37:29.843039Z",
          "accuracy": 0.8238993710691824,
          "cohen_kappa": 0.670716148000222
        }
      ],
      "pairwise": [
        {
          "a": 0,
          "b": 1,
          "distance": 0.015873015873015872,
          "overlap_count": 252,
          "agreement_count": 248,
          "disagreement_count": 4
        }
      ],
      "linkage": [
        [
          0,
          1,
          0.015873015873015872,
          2
        ]
      ],
      "linkage_complete": true
    },
    {
      "group_id": "dbae833088ea91ef",
      "cluster_scope": "repeat",
      "task_fingerprint": "0eb7fe892594e382024c3813de8ec46058d12d33d835017ca9df11d4d238d61e",
      "normalized_tag_key": "*-ing*;adverbial;clause;english;syntax",
      "task_name_display": "ADV *-ing* clause",
      "task_names_seen": [
        "ADV *-ing* clause"
      ],
      "tags_display": "*-ing*; syntax; adverbial; clause; English",
      "provider": "google",
      "model": "gemini-3-flash-preview",
      "run_count": 2,
      "run_stems": [
        "adv-ing__google__modelsgemini3flashpreview__2026-03-11-15-40",
        "adv-ing__vertex__gemini3flashpreview__2026-03-11-01-07"
      ],
      "distance_metric": "nominal_disagreement_rate",
      "linkage_method": "average",
      "comparable_pair_count": 1,
      "representatives": [
        {
          "provider": "google",
          "model": "gemini-3-flash-preview",
          "run_stem": "adv-ing__google__modelsgemini3flashpreview__2026-03-11-15-40",
          "metrics_file": "adv-ing__google__modelsgemini3flashpreview__2026-03-11-15-40__metrics.json",
          "timestamp": "2026-03-11T14:40:24.117684Z",
          "accuracy": 0.9811320754716981,
          "cohen_kappa": 0.9596241747079736
        },
        {
          "provider": "google",
          "model": "gemini-3-flash-preview",
          "run_stem": "adv-ing__vertex__gemini3flashpreview__2026-03-11-01-07",
          "metrics_file": "adv-ing__vertex__gemini3flashpreview__2026-03-11-01-07__metrics.json",
          "timestamp": "2026-03-11T10:11:38.314790Z",
          "accuracy": 0.9748427672955975,
          "cohen_kappa": 0.9465388979952087
        }
      ],
      "pairwise": [
        {
          "a": 0,
          "b": 1,
          "distance": 0.012578616352201259,
          "overlap_count": 318,
          "agreement_count": 314,
          "disagreement_count": 4
        }
      ],
      "linkage": [
        [
          0,
          1,
          0.012578616352201259,
          2
        ]
      ],
      "linkage_complete": true
    },
    {
      "group_id": "0816f7ef27ef6768",
      "cluster_scope": "repeat",
      "task_fingerprint": "0eb7fe892594e382024c3813de8ec46058d12d33d835017ca9df11d4d238d61e",
      "normalized_tag_key": "*-ing*;adverbial;clause;english;syntax",
      "task_name_display": "ADV *-ing* clause",
      "task_names_seen": [
        "ADV *-ing* clause"
      ],
      "tags_display": "*-ing*; syntax; adverbial; clause; English",
      "provider": "openai",
      "model": "gpt-oss-120b",
      "run_count": 2,
      "run_stems": [
        "adv-ing____gptoss120b__2025-11-08-23-28",
        "adv-ing____gptoss120b__2025-11-08-17-25"
      ],
      "distance_metric": "nominal_disagreement_rate",
      "linkage_method": "average",
      "comparable_pair_count": 1,
      "representatives": [
        {
          "provider": "openai",
          "model": "gpt-oss-120b",
          "run_stem": "adv-ing____gptoss120b__2025-11-08-23-28",
          "metrics_file": "adv-ing____gptoss120b__2025-11-08-23-28__metrics.json",
          "timestamp": "2025-11-08T22:28:31.410308Z",
          "accuracy": 0.9308176100628931,
          "cohen_kappa": 0.857107843137255
        },
        {
          "provider": "openai",
          "model": "gpt-oss-120b",
          "run_stem": "adv-ing____gptoss120b__2025-11-08-17-25",
          "metrics_file": "adv-ing____gptoss120b__2025-11-08-17-25__metrics.json",
          "timestamp": "2025-11-08T16:25:41.584007Z",
          "accuracy": 0.7924528301886793,
          "cohen_kappa": 0.6249396879858469
        }
      ],
      "pairwise": [
        {
          "a": 0,
          "b": 1,
          "distance": 0.027450980392156862,
          "overlap_count": 255,
          "agreement_count": 248,
          "disagreement_count": 7
        }
      ],
      "linkage": [
        [
          0,
          1,
          0.027450980392156862,
          2
        ]
      ],
      "linkage_complete": true
    },
    {
      "group_id": "8f6fb5570231a90f",
      "cluster_scope": "repeat",
      "task_fingerprint": "0eb7fe892594e382024c3813de8ec46058d12d33d835017ca9df11d4d238d61e",
      "normalized_tag_key": "*-ing*;adverbial;clause;english;syntax",
      "task_name_display": "ADV *-ing* clause",
      "task_names_seen": [
        "ADV *-ing* clause"
      ],
      "tags_display": "*-ing*; syntax; adverbial; clause; English",
      "provider": "openai",
      "model": "gpt5",
      "run_count": 2,
      "run_stems": [
        "adv-ing____gpt5__2025-11-09-00-09",
        "adv-ing____gpt5__2025-11-08-17-24"
      ],
      "distance_metric": "nominal_disagreement_rate",
      "linkage_method": "average",
      "comparable_pair_count": 1,
      "representatives": [
        {
          "provider": "openai",
          "model": "gpt5",
          "run_stem": "adv-ing____gpt5__2025-11-09-00-09",
          "metrics_file": "adv-ing____gpt5__2025-11-09-00-09__metrics.json",
          "timestamp": "2025-11-08T23:09:42.353960Z",
          "accuracy": 0.9025157232704403,
          "cohen_kappa": 0.8061013748746091
        },
        {
          "provider": "openai",
          "model": "gpt5",
          "run_stem": "adv-ing____gpt5__2025-11-08-17-24",
          "metrics_file": "adv-ing____gpt5__2025-11-08-17-24__metrics.json",
          "timestamp": "2025-11-08T16:24:26.999467Z",
          "accuracy": 0.9245283018867925,
          "cohen_kappa": 0.8476190476190476
        }
      ],
      "pairwise": [
        {
          "a": 0,
          "b": 1,
          "distance": 0.010169491525423728,
          "overlap_count": 295,
          "agreement_count": 292,
          "disagreement_count": 3
        }
      ],
      "linkage": [
        [
          0,
          1,
          0.010169491525423728,
          2
        ]
      ],
      "linkage_complete": true
    },
    {
      "group_id": "e771e84742134b17",
      "cluster_scope": "repeat",
      "task_fingerprint": "0eb7fe892594e382024c3813de8ec46058d12d33d835017ca9df11d4d238d61e",
      "normalized_tag_key": "*-ing*;adverbial;clause;english;syntax",
      "task_name_display": "ADV *-ing* clause",
      "task_names_seen": [
        "ADV *-ing* clause"
      ],
      "tags_display": "*-ing*; syntax; adverbial; clause; English",
      "provider": "requesty",
      "model": "claude-sonnet-4-6",
      "run_count": 2,
      "run_stems": [
        "adv-ing__requesty__anthropicclaudesonnet46__2026-03-11-16-11",
        "adv-ing__requesty__anthropicclaudesonnet46__2026-03-11-16-06"
      ],
      "distance_metric": "nominal_disagreement_rate",
      "linkage_method": "average",
      "comparable_pair_count": 1,
      "representatives": [
        {
          "provider": "requesty",
          "model": "claude-sonnet-4-6",
          "run_stem": "adv-ing__requesty__anthropicclaudesonnet46__2026-03-11-16-11",
          "metrics_file": "adv-ing__requesty__anthropicclaudesonnet46__2026-03-11-16-11__metrics.json",
          "timestamp": "2026-03-11T15:11:34.763226Z",
          "accuracy": 0.9308176100628931,
          "cohen_kappa": 0.8585638039786512
        },
        {
          "provider": "requesty",
          "model": "claude-sonnet-4-6",
          "run_stem": "adv-ing__requesty__anthropicclaudesonnet46__2026-03-11-16-06",
          "metrics_file": "adv-ing__requesty__anthropicclaudesonnet46__2026-03-11-16-06__metrics.json",
          "timestamp": "2026-03-11T15:06:20.109974Z",
          "accuracy": 0.9056603773584906,
          "cohen_kappa": 0.8112535612535613
        }
      ],
      "pairwise": [
        {
          "a": 0,
          "b": 1,
          "distance": 0.01639344262295082,
          "overlap_count": 305,
          "agreement_count": 300,
          "disagreement_count": 5
        }
      ],
      "linkage": [
        [
          0,
          1,
          0.01639344262295082,
          2
        ]
      ],
      "linkage_complete": true
    },
    {
      "group_id": "7117b4cf3248e0be",
      "cluster_scope": "repeat",
      "task_fingerprint": "5d2775386b7429a15d795e64be76f5d0829aff4b3b67fc8acae71e4adebccffd",
      "normalized_tag_key": "morphology;number;old english",
      "task_name_display": "OE number",
      "task_names_seen": [
        "OE number"
      ],
      "tags_display": "Old English; morphology; number",
      "provider": "vertex",
      "model": "gemini-3-flash-preview",
      "run_count": 6,
      "run_stems": [
        "OE_number__vertex__gemini3flashpreview__2026-03-11-00-23",
        "OE_number__vertex__gemini3flashpreview__2026-03-10-22-35",
        "OE_number__vertex__gemini3flashpreview__2026-03-10-22-02",
        "OE_number__vertex__gemini3flashpreview__2026-03-10-21-17",
        "OE_number__vertex__gemini3flashpreview__2026-02-27-02-12",
        "OE_number__vertex__gemini3flashpreview__2026-02-23-16-58"
      ],
      "distance_metric": "nominal_disagreement_rate",
      "linkage_method": "average",
      "comparable_pair_count": 15,
      "representatives": [
        {
          "provider": "vertex",
          "model": "gemini-3-flash-preview",
          "run_stem": "OE_number__vertex__gemini3flashpreview__2026-03-11-00-23",
          "metrics_file": "OE_number__vertex__gemini3flashpreview__2026-03-11-00-23__metrics.json",
          "timestamp": "2026-03-10T23:24:01.576132Z",
          "accuracy": 0.9783333333333334,
          "cohen_kappa": 0.937877315662881
        },
        {
          "provider": "vertex",
          "model": "gemini-3-flash-preview",
          "run_stem": "OE_number__vertex__gemini3flashpreview__2026-03-10-22-35",
          "metrics_file": "OE_number__vertex__gemini3flashpreview__2026-03-10-22-35__metrics.json",
          "timestamp": "2026-03-10T21:35:33.887375Z",
          "accuracy": 0.9783333333333334,
          "cohen_kappa": 0.937877315662881
        },
        {
          "provider": "vertex",
          "model": "gemini-3-flash-preview",
          "run_stem": "OE_number__vertex__gemini3flashpreview__2026-03-10-22-02",
          "metrics_file": "OE_number__vertex__gemini3flashpreview__2026-03-10-22-02__metrics.json",
          "timestamp": "2026-03-10T21:02:30.244324Z",
          "accuracy": 0.9775,
          "cohen_kappa": 0.9360689508204486
        },
        {
          "provider": "vertex",
          "model": "gemini-3-flash-preview",
          "run_stem": "OE_number__vertex__gemini3flashpreview__2026-03-10-21-17",
          "metrics_file": "OE_number__vertex__gemini3flashpreview__2026-03-10-21-17__metrics.json",
          "timestamp": "2026-03-10T20:18:02.213187Z",
          "accuracy": 0.955,
          "cohen_kappa": 0.8780322536929122
        },
        {
          "provider": "vertex",
          "model": "gemini-3-flash-preview",
          "run_stem": "OE_number__vertex__gemini3flashpreview__2026-02-27-02-12",
          "metrics_file": "OE_number__vertex__gemini3flashpreview__2026-02-27-02-12__metrics.json",
          "timestamp": "2026-02-27T01:12:08.626686Z",
          "accuracy": 0.9791666666666666,
          "cohen_kappa": 0.9403440952585486
        },
        {
          "provider": "vertex",
          "model": "gemini-3-flash-preview",
          "run_stem": "OE_number__vertex__gemini3flashpreview__2026-02-23-16-58",
          "metrics_file": "OE_number__vertex__gemini3flashpreview__2026-02-23-16-58__metrics.json",
          "timestamp": "2026-02-23T15:58:09.118614Z",
          "accuracy": 0.9783333333333334,
          "cohen_kappa": 0.937877315662881
        }
      ],
      "pairwise": [
        {
          "a": 0,
          "b": 1,
          "distance": 0.0016666666666666668,
          "overlap_count": 1200,
          "agreement_count": 1198,
          "disagreement_count": 2
        },
        {
          "a": 0,
          "b": 2,
          "distance": 0.010833333333333334,
          "overlap_count": 1200,
          "agreement_count": 1187,
          "disagreement_count": 13
        },
        {
          "a": 0,
          "b": 3,
          "distance": 0.021996615905245348,
          "overlap_count": 1182,
          "agreement_count": 1156,
          "disagreement_count": 26
        },
        {
          "a": 0,
          "b": 4,
          "distance": 0.004166666666666667,
          "overlap_count": 1200,
          "agreement_count": 1195,
          "disagreement_count": 5
        },
        {
          "a": 0,
          "b": 5,
          "distance": 0.005,
          "overlap_count": 1200,
          "agreement_count": 1194,
          "disagreement_count": 6
        },
        {
          "a": 1,
          "b": 2,
          "distance": 0.010833333333333334,
          "overlap_count": 1200,
          "agreement_count": 1187,
          "disagreement_count": 13
        },
        {
          "a": 1,
          "b": 3,
          "distance": 0.021996615905245348,
          "overlap_count": 1182,
          "agreement_count": 1156,
          "disagreement_count": 26
        },
        {
          "a": 1,
          "b": 4,
          "distance": 0.004166666666666667,
          "overlap_count": 1200,
          "agreement_count": 1195,
          "disagreement_count": 5
        },
        {
          "a": 1,
          "b": 5,
          "distance": 0.0033333333333333335,
          "overlap_count": 1200,
          "agreement_count": 1196,
          "disagreement_count": 4
        },
        {
          "a": 2,
          "b": 3,
          "distance": 0.016074450084602367,
          "overlap_count": 1182,
          "agreement_count": 1163,
          "disagreement_count": 19
        },
        {
          "a": 2,
          "b": 4,
          "distance": 0.01,
          "overlap_count": 1200,
          "agreement_count": 1188,
          "disagreement_count": 12
        },
        {
          "a": 2,
          "b": 5,
          "distance": 0.0125,
          "overlap_count": 1200,
          "agreement_count": 1185,
          "disagreement_count": 15
        },
        {
          "a": 3,
          "b": 4,
          "distance": 0.02284263959390863,
          "overlap_count": 1182,
          "agreement_count": 1155,
          "disagreement_count": 27
        },
        {
          "a": 3,
          "b": 5,
          "distance": 0.021996615905245348,
          "overlap_count": 1182,
          "agreement_count": 1156,
          "disagreement_count": 26
        },
        {
          "a": 4,
          "b": 5,
          "distance": 0.004166666666666667,
          "overlap_count": 1200,
          "agreement_count": 1195,
          "disagreement_count": 5
        }
      ],
      "linkage": [
        [
          0,
          1,
          0.0016666666666666668,
          2
        ],
        [
          4,
          5,
          0.004166666666666667,
          2
        ],
        [
          6,
          7,
          0.004166666666666667,
          4
        ],
        [
          2,
          8,
          0.011041666666666667,
          5
        ],
        [
          3,
          9,
          0.02098138747884941,
          6
        ]
      ],
      "linkage_complete": true
    }
  ],
  "cross_model": {
    "latest": [
      {
        "group_id": "4c9b09f197f45ed4",
        "cluster_scope": "cross_model",
        "representative_policy": "latest",
        "task_fingerprint": "7326c5be7cb61f6e9585d417047cb29a4d0b0ca63102404c9692e99eeae036b2",
        "normalized_tag_key": "*like*;discourse;english;pragmatics;v1",
        "task_name_display": "*like* discourse/pragm",
        "task_names_seen": [
          "*like* discourse/pragm"
        ],
        "tags_display": "*like*; discourse; pragmatics; English; v1",
        "model_count": 6,
        "distance_metric": "nominal_disagreement_rate",
        "linkage_method": "average",
        "representative_run_stems": [
          "like____gptoss120b__2025-11-09-17-09",
          "like____gpt5__2025-11-09-02-03",
          "like__requesty__anthropicclaudesonnet46__2026-03-12-15-43",
          "like__requesty__novitadeepseekdeepseekv32__2026-03-12-17-27",
          "like__vertex__gemini3flashpreview__2026-03-12-01-31",
          "like__vertex__gemini31propreview__2026-03-12-18-10"
        ],
        "comparable_pair_count": 15,
        "representatives": [
          {
            "provider": "openai",
            "model": "gpt-oss-120b",
            "run_stem": "like____gptoss120b__2025-11-09-17-09",
            "metrics_file": "like____gptoss120b__2025-11-09-17-09__metrics.json",
            "timestamp": "2025-11-09T16:09:03.246413Z",
            "accuracy": 0.6416416416416416,
            "cohen_kappa": 0.5149610698897534
          },
          {
            "provider": "openai",
            "model": "gpt5",
            "run_stem": "like____gpt5__2025-11-09-02-03",
            "metrics_file": "like____gpt5__2025-11-09-02-03__metrics.json",
            "timestamp": "2025-11-09T01:03:38.395018Z",
            "accuracy": 0.8398398398398398,
            "cohen_kappa": 0.7709513457898124
          },
          {
            "provider": "requesty",
            "model": "claude-sonnet-4-6",
            "run_stem": "like__requesty__anthropicclaudesonnet46__2026-03-12-15-43",
            "metrics_file": "like__requesty__anthropicclaudesonnet46__2026-03-12-15-43__metrics.json",
            "timestamp": "2026-03-12T14:43:10.303101Z",
            "accuracy": 0.8228228228228228,
            "cohen_kappa": 0.7478161408052575
          },
          {
            "provider": "requesty",
            "model": "deepseek-v3.2",
            "run_stem": "like__requesty__novitadeepseekdeepseekv32__2026-03-12-17-27",
            "metrics_file": "like__requesty__novitadeepseekdeepseekv32__2026-03-12-17-27__metrics.json",
            "timestamp": "2026-03-12T16:27:57.675261Z",
            "accuracy": 0.6296296296296297,
            "cohen_kappa": 0.46022553031165947
          },
          {
            "provider": "vertex",
            "model": "gemini-3-flash-preview",
            "run_stem": "like__vertex__gemini3flashpreview__2026-03-12-01-31",
            "metrics_file": "like__vertex__gemini3flashpreview__2026-03-12-01-31__metrics.json",
            "timestamp": "2026-03-12T00:31:13.838640Z",
            "accuracy": 0.8838838838838838,
            "cohen_kappa": 0.832653167320839
          },
          {
            "provider": "vertex",
            "model": "gemini-3.1-pro-preview",
            "run_stem": "like__vertex__gemini31propreview__2026-03-12-18-10",
            "metrics_file": "like__vertex__gemini31propreview__2026-03-12-18-10__metrics.json",
            "timestamp": "2026-03-12T17:10:58.353313Z",
            "accuracy": 0.8948948948948949,
            "cohen_kappa": 0.8484215728492612
          }
        ],
        "pairwise": [
          {
            "a": 0,
            "b": 1,
            "distance": 0.22894424673784106,
            "overlap_count": 843,
            "agreement_count": 650,
            "disagreement_count": 193
          },
          {
            "a": 0,
            "b": 2,
            "distance": 0.2529411764705882,
            "overlap_count": 850,
            "agreement_count": 635,
            "disagreement_count": 215
          },
          {
            "a": 0,
            "b": 3,
            "distance": 0.39176470588235296,
            "overlap_count": 850,
            "agreement_count": 517,
            "disagreement_count": 333
          },
          {
            "a": 0,
            "b": 4,
            "distance": 0.21672555948174324,
            "overlap_count": 849,
            "agreement_count": 665,
            "disagreement_count": 184
          },
          {
            "a": 0,
            "b": 5,
            "distance": 0.2429245283018868,
            "overlap_count": 848,
            "agreement_count": 642,
            "disagreement_count": 206
          },
          {
            "a": 1,
            "b": 2,
            "distance": 0.18944099378881987,
            "overlap_count": 966,
            "agreement_count": 783,
            "disagreement_count": 183
          },
          {
            "a": 1,
            "b": 3,
            "distance": 0.3581780538302277,
            "overlap_count": 966,
            "agreement_count": 620,
            "disagreement_count": 346
          },
          {
            "a": 1,
            "b": 4,
            "distance": 0.08799171842650104,
            "overlap_count": 966,
            "agreement_count": 881,
            "disagreement_count": 85
          },
          {
            "a": 1,
            "b": 5,
            "distance": 0.09326424870466321,
            "overlap_count": 965,
            "agreement_count": 875,
            "disagreement_count": 90
          },
          {
            "a": 2,
            "b": 3,
            "distance": 0.3784056508577195,
            "overlap_count": 991,
            "agreement_count": 616,
            "disagreement_count": 375
          },
          {
            "a": 2,
            "b": 4,
            "distance": 0.15656565656565657,
            "overlap_count": 990,
            "agreement_count": 835,
            "disagreement_count": 155
          },
          {
            "a": 2,
            "b": 5,
            "distance": 0.1720647773279352,
            "overlap_count": 988,
            "agreement_count": 818,
            "disagreement_count": 170
          },
          {
            "a": 3,
            "b": 4,
            "distance": 0.3474747474747475,
            "overlap_count": 990,
            "agreement_count": 646,
            "disagreement_count": 344
          },
          {
            "a": 3,
            "b": 5,
            "distance": 0.3481781376518219,
            "overlap_count": 988,
            "agreement_count": 644,
            "disagreement_count": 344
          },
          {
            "a": 4,
            "b": 5,
            "distance": 0.053643724696356275,
            "overlap_count": 988,
            "agreement_count": 935,
            "disagreement_count": 53
          }
        ],
        "linkage": [
          [
            4,
            5,
            0.053643724696356275,
            2
          ],
          [
            1,
            6,
            0.09062798356558213,
            3
          ],
          [
            2,
            7,
            0.17269047589413722,
            4
          ],
          [
            0,
            8,
            0.23538387774801484,
            5
          ],
          [
            3,
            9,
            0.3648002591393739,
            6
          ]
        ],
        "linkage_complete": true
      },
      {
        "group_id": "55424975a19f10e2",
        "cluster_scope": "cross_model",
        "representative_policy": "latest",
        "task_fingerprint": "7326c5be7cb61f6e9585d417047cb29a4d0b0ca63102404c9692e99eeae036b2",
        "normalized_tag_key": "*like*;discourse;english;pragmatics;v2",
        "task_name_display": "*like* discourse/pragm",
        "task_names_seen": [
          "*like* discourse/pragm"
        ],
        "tags_display": "*like*; discourse; pragmatics; English; v2",
        "model_count": 4,
        "distance_metric": "nominal_disagreement_rate",
        "linkage_method": "average",
        "representative_run_stems": [
          "like__vertex__gemini3flashpreview__2026-03-13-02-32",
          "like__vertex__gemini3propreview__2026-03-13-03-28",
          "like__vertex__gemini31flashlitepreview__2026-03-13-03-29",
          "like__vertex__gemini31propreview__2026-03-13-12-02"
        ],
        "comparable_pair_count": 6,
        "representatives": [
          {
            "provider": "vertex",
            "model": "gemini-3-flash-preview",
            "run_stem": "like__vertex__gemini3flashpreview__2026-03-13-02-32",
            "metrics_file": "like__vertex__gemini3flashpreview__2026-03-13-02-32__metrics.json",
            "timestamp": "2026-03-13T01:32:26.042657Z",
            "accuracy": 0.908908908908909,
            "cohen_kappa": 0.8684070726005488
          },
          {
            "provider": "vertex",
            "model": "gemini-3-pro-preview",
            "run_stem": "like__vertex__gemini3propreview__2026-03-13-03-28",
            "metrics_file": "like__vertex__gemini3propreview__2026-03-13-03-28__metrics.json",
            "timestamp": "2026-03-13T02:29:02.578184Z",
            "accuracy": 0.9019019019019019,
            "cohen_kappa": 0.8586077115714009
          },
          {
            "provider": "vertex",
            "model": "gemini-3.1-flash-lite-preview",
            "run_stem": "like__vertex__gemini31flashlitepreview__2026-03-13-03-29",
            "metrics_file": "like__vertex__gemini31flashlitepreview__2026-03-13-03-29__metrics.json",
            "timestamp": "2026-03-13T02:29:25.545069Z",
            "accuracy": 0.7877877877877878,
            "cohen_kappa": 0.6900339840354066
          },
          {
            "provider": "vertex",
            "model": "gemini-3.1-pro-preview",
            "run_stem": "like__vertex__gemini31propreview__2026-03-13-12-02",
            "metrics_file": "like__vertex__gemini31propreview__2026-03-13-12-02__metrics.json",
            "timestamp": "2026-03-13T11:02:34.724184Z",
            "accuracy": 0.914914914914915,
            "cohen_kappa": 0.876989156966848
          }
        ],
        "pairwise": [
          {
            "a": 0,
            "b": 1,
            "distance": 0.06666666666666667,
            "overlap_count": 990,
            "agreement_count": 924,
            "disagreement_count": 66
          },
          {
            "a": 0,
            "b": 2,
            "distance": 0.1717479674796748,
            "overlap_count": 984,
            "agreement_count": 815,
            "disagreement_count": 169
          },
          {
            "a": 0,
            "b": 3,
            "distance": 0.06060606060606061,
            "overlap_count": 990,
            "agreement_count": 930,
            "disagreement_count": 60
          },
          {
            "a": 1,
            "b": 2,
            "distance": 0.17682926829268292,
            "overlap_count": 984,
            "agreement_count": 810,
            "disagreement_count": 174
          },
          {
            "a": 1,
            "b": 3,
            "distance": 0.046464646464646465,
            "overlap_count": 990,
            "agreement_count": 944,
            "disagreement_count": 46
          },
          {
            "a": 2,
            "b": 3,
            "distance": 0.17073170731707318,
            "overlap_count": 984,
            "agreement_count": 816,
            "disagreement_count": 168
          }
        ],
        "linkage": [
          [
            1,
            3,
            0.046464646464646465,
            2
          ],
          [
            0,
            4,
            0.06363636363636363,
            3
          ],
          [
            2,
            5,
            0.17310298102981028,
            4
          ]
        ],
        "linkage_complete": true
      },
      {
        "group_id": "47ee6582d2d2f0c4",
        "cluster_scope": "cross_model",
        "representative_policy": "latest",
        "task_fingerprint": "cd60855ae86dc7a8391248627a68bbc5a77fabf2d75d0b967118c09aec3d8faa",
        "normalized_tag_key": "*like*;discourse;english;pragmatics;v3",
        "task_name_display": "*like* discourse/pragm",
        "task_names_seen": [
          "*like* discourse/pragm"
        ],
        "tags_display": "*like*; discourse; pragmatics; English; v3",
        "model_count": 17,
        "distance_metric": "nominal_disagreement_rate",
        "linkage_method": "average",
        "representative_run_stems": [
          "like_interrater__einfra__deepseekv32thinking__2026-03-16-23-13",
          "like_interrater__einfra__glm47__2026-03-17-00-08",
          "like_interrater__einfra__glm5__2026-03-31-17-53",
          "like_interrater__einfra__glm51__2026-04-24-16-04",
          "like_interrater__einfra__kimik25__2026-03-19-14-20",
          "like_interrater__einfra__kimik26__2026-04-21-18-06",
          "like_interrater__einfra__qwen35__2026-03-16-23-35",
          "like_interrater__google__modelsgemma426ba4bit__2026-04-04-01-49",
          "like_interrater__openai__gpt54__2026-03-16-23-16",
          "like_interrater__openai__gpt54mini__2026-03-20-18-10",
          "like_interrater__openrouter__qwenqwen36plusfree__2026-04-03-16-35",
          "like_interrater__requesty__anthropicclaudehaiku45__2026-03-23-00-11",
          "like_interrater__requesty__anthropicclaudeopus46__2026-03-21-02-24",
          "like_interrater__requesty__nebiuszaiorgglm47__2026-03-17-15-44",
          "like_interrater__requesty__moonshotkimik25__2026-03-17-15-43",
          "like_interrater__vertex__gemini3flashpreview__2026-03-16-23-22",
          "like_interrater__vertex__gemini31propreview__2026-03-16-23-17"
        ],
        "comparable_pair_count": 136,
        "representatives": [
          {
            "provider": "e-infra",
            "model": "deepseek-v3.2-thinking",
            "run_stem": "like_interrater__einfra__deepseekv32thinking__2026-03-16-23-13",
            "metrics_file": "like_interrater__einfra__deepseekv32thinking__2026-03-16-23-13__metrics.json",
            "timestamp": "2026-03-16T22:13:59.292606Z",
            "accuracy": 0.7565217391304347,
            "cohen_kappa": 0.6858536585365853
          },
          {
            "provider": "e-infra",
            "model": "glm-4.7",
            "run_stem": "like_interrater__einfra__glm47__2026-03-17-00-08",
            "metrics_file": "like_interrater__einfra__glm47__2026-03-17-00-08__metrics.json",
            "timestamp": "2026-03-16T23:08:13.298000Z",
            "accuracy": 0.8260869565217391,
            "cohen_kappa": 0.7717121588089332
          },
          {
            "provider": "e-infra",
            "model": "glm-5",
            "run_stem": "like_interrater__einfra__glm5__2026-03-31-17-53",
            "metrics_file": "like_interrater__einfra__glm5__2026-03-31-17-53__metrics.json",
            "timestamp": "2026-03-31T15:53:31.126778Z",
            "accuracy": 0.8869565217391304,
            "cohen_kappa": 0.8496732026143791
          },
          {
            "provider": "e-infra",
            "model": "glm-5.1",
            "run_stem": "like_interrater__einfra__glm51__2026-04-24-16-04",
            "metrics_file": "like_interrater__einfra__glm51__2026-04-24-16-04__metrics.json",
            "timestamp": "2026-04-24T14:04:41.439134Z",
            "accuracy": 0.8782608695652174,
            "cohen_kappa": 0.8386773547094188
          },
          {
            "provider": "e-infra",
            "model": "kimi-k2.5",
            "run_stem": "like_interrater__einfra__kimik25__2026-03-19-14-20",
            "metrics_file": "like_interrater__einfra__kimik25__2026-03-19-14-20__metrics.json",
            "timestamp": "2026-03-19T13:20:12.743909Z",
            "accuracy": 0.8434782608695652,
            "cohen_kappa": 0.7933100349475787
          },
          {
            "provider": "e-infra",
            "model": "kimi-k2.6",
            "run_stem": "like_interrater__einfra__kimik26__2026-04-21-18-06",
            "metrics_file": "like_interrater__einfra__kimik26__2026-04-21-18-06__metrics.json",
            "timestamp": "2026-04-25T10:26:44.480602Z",
            "accuracy": 0.808695652173913,
            "cohen_kappa": 0.7486338797814208
          },
          {
            "provider": "e-infra",
            "model": "qwen3.5",
            "run_stem": "like_interrater__einfra__qwen35__2026-03-16-23-35",
            "metrics_file": "like_interrater__einfra__qwen35__2026-03-16-23-35__metrics.json",
            "timestamp": "2026-03-16T22:35:09.822527Z",
            "accuracy": 0.8956521739130435,
            "cohen_kappa": 0.8637037037037038
          },
          {
            "provider": "google",
            "model": "gemma-4-26b-a4b-it",
            "run_stem": "like_interrater__google__modelsgemma426ba4bit__2026-04-04-01-49",
            "metrics_file": "like_interrater__google__modelsgemma426ba4bit__2026-04-04-01-49__metrics.json",
            "timestamp": "2026-04-03T23:49:56.734262Z",
            "accuracy": 0.8173913043478261,
            "cohen_kappa": 0.7577733199598797
          },
          {
            "provider": "openai",
            "model": "gpt-5.4",
            "run_stem": "like_interrater__openai__gpt54__2026-03-16-23-16",
            "metrics_file": "like_interrater__openai__gpt54__2026-03-16-23-16__metrics.json",
            "timestamp": "2026-03-16T22:16:19.942954Z",
            "accuracy": 0.6869565217391305,
            "cohen_kappa": 0.6090651558073654
          },
          {
            "provider": "openai",
            "model": "gpt-5.4-mini",
            "run_stem": "like_interrater__openai__gpt54mini__2026-03-20-18-10",
            "metrics_file": "like_interrater__openai__gpt54mini__2026-03-20-18-10__metrics.json",
            "timestamp": "2026-03-20T17:11:01.726403Z",
            "accuracy": 0.6086956521739131,
            "cohen_kappa": 0.48964497041420124
          },
          {
            "provider": "openrouter",
            "model": "qwen3.6",
            "run_stem": "like_interrater__openrouter__qwenqwen36plusfree__2026-04-03-16-35",
            "metrics_file": "like_interrater__openrouter__qwenqwen36plusfree__2026-04-03-16-35__metrics.json",
            "timestamp": "2026-04-03T14:35:02.774817Z",
            "accuracy": 0.8869565217391304,
            "cohen_kappa": 0.8521266073194856
          },
          {
            "provider": "requesty",
            "model": "claude-haiku-4-5",
            "run_stem": "like_interrater__requesty__anthropicclaudehaiku45__2026-03-23-00-11",
            "metrics_file": "like_interrater__requesty__anthropicclaudehaiku45__2026-03-23-00-11__metrics.json",
            "timestamp": "2026-03-22T23:11:59.214488Z",
            "accuracy": 0.5478260869565217,
            "cohen_kappa": 0.40909090909090906
          },
          {
            "provider": "requesty",
            "model": "claude-opus-4-6",
            "run_stem": "like_interrater__requesty__anthropicclaudeopus46__2026-03-21-02-24",
            "metrics_file": "like_interrater__requesty__anthropicclaudeopus46__2026-03-21-02-24__metrics.json",
            "timestamp": "2026-03-21T01:24:28.930846Z",
            "accuracy": 0.8173913043478261,
            "cohen_kappa": 0.7521806054386866
          },
          {
            "provider": "requesty",
            "model": "glm-4.7",
            "run_stem": "like_interrater__requesty__nebiuszaiorgglm47__2026-03-17-15-44",
            "metrics_file": "like_interrater__requesty__nebiuszaiorgglm47__2026-03-17-15-44__metrics.json",
            "timestamp": "2026-03-17T14:44:34.806095Z",
            "accuracy": 0.8,
            "cohen_kappa": 0.7374689826302729
          },
          {
            "provider": "requesty",
            "model": "kimi-k2.5",
            "run_stem": "like_interrater__requesty__moonshotkimik25__2026-03-17-15-43",
            "metrics_file": "like_interrater__requesty__moonshotkimik25__2026-03-17-15-43__metrics.json",
            "timestamp": "2026-03-17T14:43:35.190186Z",
            "accuracy": 0.8608695652173913,
            "cohen_kappa": 0.817279046673287
          },
          {
            "provider": "vertex",
            "model": "gemini-3-flash-preview",
            "run_stem": "like_interrater__vertex__gemini3flashpreview__2026-03-16-23-22",
            "metrics_file": "like_interrater__vertex__gemini3flashpreview__2026-03-16-23-22__metrics.json",
            "timestamp": "2026-03-16T22:22:38.058348Z",
            "accuracy": 0.9130434782608695,
            "cohen_kappa": 0.8854581673306772
          },
          {
            "provider": "vertex",
            "model": "gemini-3.1-pro-preview",
            "run_stem": "like_interrater__vertex__gemini31propreview__2026-03-16-23-17",
            "metrics_file": "like_interrater__vertex__gemini31propreview__2026-03-16-23-17__metrics.json",
            "timestamp": "2026-03-16T22:17:19.752371Z",
            "accuracy": 0.9130434782608695,
            "cohen_kappa": 0.8857426726279185
          }
        ],
        "pairwise": [
          {
            "a": 0,
            "b": 1,
            "distance": 0.20388349514563106,
            "overlap_count": 103,
            "agreement_count": 82,
            "disagreement_count": 21
          },
          {
            "a": 0,
            "b": 2,
            "distance": 0.20952380952380953,
            "overlap_count": 105,
            "agreement_count": 83,
            "disagreement_count": 22
          },
          {
            "a": 0,
            "b": 3,
            "distance": 0.20952380952380953,
            "overlap_count": 105,
            "agreement_count": 83,
            "disagreement_count": 22
          },
          {
            "a": 0,
            "b": 4,
            "distance": 0.16346153846153846,
            "overlap_count": 104,
            "agreement_count": 87,
            "disagreement_count": 17
          },
          {
            "a": 0,
            "b": 5,
            "distance": 0.13333333333333333,
            "overlap_count": 105,
            "agreement_count": 91,
            "disagreement_count": 14
          },
          {
            "a": 0,
            "b": 6,
            "distance": 0.17142857142857143,
            "overlap_count": 105,
            "agreement_count": 87,
            "disagreement_count": 18
          },
          {
            "a": 0,
            "b": 7,
            "distance": 0.2403846153846154,
            "overlap_count": 104,
            "agreement_count": 79,
            "disagreement_count": 25
          },
          {
            "a": 0,
            "b": 8,
            "distance": 0.38095238095238093,
            "overlap_count": 105,
            "agreement_count": 65,
            "disagreement_count": 40
          },
          {
            "a": 0,
            "b": 9,
            "distance": 0.30097087378640774,
            "overlap_count": 103,
            "agreement_count": 72,
            "disagreement_count": 31
          },
          {
            "a": 0,
            "b": 10,
            "distance": 0.19047619047619047,
            "overlap_count": 105,
            "agreement_count": 85,
            "disagreement_count": 20
          },
          {
            "a": 0,
            "b": 11,
            "distance": 0.33980582524271846,
            "overlap_count": 103,
            "agreement_count": 68,
            "disagreement_count": 35
          },
          {
            "a": 0,
            "b": 12,
            "distance": 0.2857142857142857,
            "overlap_count": 105,
            "agreement_count": 75,
            "disagreement_count": 30
          },
          {
            "a": 0,
            "b": 13,
            "distance": 0.1619047619047619,
            "overlap_count": 105,
            "agreement_count": 88,
            "disagreement_count": 17
          },
          {
            "a": 0,
            "b": 14,
            "distance": 0.14705882352941177,
            "overlap_count": 102,
            "agreement_count": 87,
            "disagreement_count": 15
          },
          {
            "a": 0,
            "b": 15,
            "distance": 0.17142857142857143,
            "overlap_count": 105,
            "agreement_count": 87,
            "disagreement_count": 18
          },
          {
            "a": 0,
            "b": 16,
            "distance": 0.2,
            "overlap_count": 105,
            "agreement_count": 84,
            "disagreement_count": 21
          },
          {
            "a": 1,
            "b": 2,
            "distance": 0.0660377358490566,
            "overlap_count": 106,
            "agreement_count": 99,
            "disagreement_count": 7
          },
          {
            "a": 1,
            "b": 3,
            "distance": 0.09433962264150944,
            "overlap_count": 106,
            "agreement_count": 96,
            "disagreement_count": 10
          },
          {
            "a": 1,
            "b": 4,
            "distance": 0.10476190476190476,
            "overlap_count": 105,
            "agreement_count": 94,
            "disagreement_count": 11
          },
          {
            "a": 1,
            "b": 5,
            "distance": 0.09345794392523364,
            "overlap_count": 107,
            "agreement_count": 97,
            "disagreement_count": 10
          },
          {
            "a": 1,
            "b": 6,
            "distance": 0.08571428571428572,
            "overlap_count": 105,
            "agreement_count": 96,
            "disagreement_count": 9
          },
          {
            "a": 1,
            "b": 7,
            "distance": 0.13333333333333333,
            "overlap_count": 105,
            "agreement_count": 91,
            "disagreement_count": 14
          },
          {
            "a": 1,
            "b": 8,
            "distance": 0.29906542056074764,
            "overlap_count": 107,
            "agreement_count": 75,
            "disagreement_count": 32
          },
          {
            "a": 1,
            "b": 9,
            "distance": 0.2857142857142857,
            "overlap_count": 105,
            "agreement_count": 75,
            "disagreement_count": 30
          },
          {
            "a": 1,
            "b": 10,
            "distance": 0.14018691588785046,
            "overlap_count": 107,
            "agreement_count": 92,
            "disagreement_count": 15
          },
          {
            "a": 1,
            "b": 11,
            "distance": 0.3333333333333333,
            "overlap_count": 105,
            "agreement_count": 70,
            "disagreement_count": 35
          },
          {
            "a": 1,
            "b": 12,
            "distance": 0.20754716981132076,
            "overlap_count": 106,
            "agreement_count": 84,
            "disagreement_count": 22
          },
          {
            "a": 1,
            "b": 13,
            "distance": 0.1320754716981132,
            "overlap_count": 106,
            "agreement_count": 92,
            "disagreement_count": 14
          },
          {
            "a": 1,
            "b": 14,
            "distance": 0.11650485436893204,
            "overlap_count": 103,
            "agreement_count": 91,
            "disagreement_count": 12
          },
          {
            "a": 1,
            "b": 15,
            "distance": 0.11320754716981132,
            "overlap_count": 106,
            "agreement_count": 94,
            "disagreement_count": 12
          },
          {
            "a": 1,
            "b": 16,
            "distance": 0.12380952380952381,
            "overlap_count": 105,
            "agreement_count": 92,
            "disagreement_count": 13
          },
          {
            "a": 2,
            "b": 3,
            "distance": 0.05504587155963303,
            "overlap_count": 109,
            "agreement_count": 103,
            "disagreement_count": 6
          },
          {
            "a": 2,
            "b": 4,
            "distance": 0.09259259259259259,
            "overlap_count": 108,
            "agreement_count": 98,
            "disagreement_count": 10
          },
          {
            "a": 2,
            "b": 5,
            "distance": 0.09090909090909091,
            "overlap_count": 110,
            "agreement_count": 100,
            "disagreement_count": 10
          },
          {
            "a": 2,
            "b": 6,
            "distance": 0.056074766355140186,
            "overlap_count": 107,
            "agreement_count": 101,
            "disagreement_count": 6
          },
          {
            "a": 2,
            "b": 7,
            "distance": 0.11926605504587157,
            "overlap_count": 109,
            "agreement_count": 96,
            "disagreement_count": 13
          },
          {
            "a": 2,
            "b": 8,
            "distance": 0.29357798165137616,
            "overlap_count": 109,
            "agreement_count": 77,
            "disagreement_count": 32
          },
          {
            "a": 2,
            "b": 9,
            "distance": 0.2962962962962963,
            "overlap_count": 108,
            "agreement_count": 76,
            "disagreement_count": 32
          },
          {
            "a": 2,
            "b": 10,
            "distance": 0.10185185185185185,
            "overlap_count": 108,
            "agreement_count": 97,
            "disagreement_count": 11
          },
          {
            "a": 2,
            "b": 11,
            "distance": 0.3425925925925926,
            "overlap_count": 108,
            "agreement_count": 71,
            "disagreement_count": 37
          },
          {
            "a": 2,
            "b": 12,
            "distance": 0.1559633027522936,
            "overlap_count": 109,
            "agreement_count": 92,
            "disagreement_count": 17
          },
          {
            "a": 2,
            "b": 13,
            "distance": 0.10091743119266056,
            "overlap_count": 109,
            "agreement_count": 98,
            "disagreement_count": 11
          },
          {
            "a": 2,
            "b": 14,
            "distance": 0.09523809523809523,
            "overlap_count": 105,
            "agreement_count": 95,
            "disagreement_count": 10
          },
          {
            "a": 2,
            "b": 15,
            "distance": 0.08181818181818182,
            "overlap_count": 110,
            "agreement_count": 101,
            "disagreement_count": 9
          },
          {
            "a": 2,
            "b": 16,
            "distance": 0.06481481481481481,
            "overlap_count": 108,
            "agreement_count": 101,
            "disagreement_count": 7
          },
          {
            "a": 3,
            "b": 4,
            "distance": 0.11214953271028037,
            "overlap_count": 107,
            "agreement_count": 95,
            "disagreement_count": 12
          },
          {
            "a": 3,
            "b": 5,
            "distance": 0.11009174311926606,
            "overlap_count": 109,
            "agreement_count": 97,
            "disagreement_count": 12
          },
          {
            "a": 3,
            "b": 6,
            "distance": 0.06542056074766354,
            "overlap_count": 107,
            "agreement_count": 100,
            "disagreement_count": 7
          },
          {
            "a": 3,
            "b": 7,
            "distance": 0.10185185185185185,
            "overlap_count": 108,
            "agreement_count": 97,
            "disagreement_count": 11
          },
          {
            "a": 3,
            "b": 8,
            "distance": 0.28440366972477066,
            "overlap_count": 109,
            "agreement_count": 78,
            "disagreement_count": 31
          },
          {
            "a": 3,
            "b": 9,
            "distance": 0.308411214953271,
            "overlap_count": 107,
            "agreement_count": 74,
            "disagreement_count": 33
          },
          {
            "a": 3,
            "b": 10,
            "distance": 0.08333333333333333,
            "overlap_count": 108,
            "agreement_count": 99,
            "disagreement_count": 9
          },
          {
            "a": 3,
            "b": 11,
            "distance": 0.34579439252336447,
            "overlap_count": 107,
            "agreement_count": 70,
            "disagreement_count": 37
          },
          {
            "a": 3,
            "b": 12,
            "distance": 0.1574074074074074,
            "overlap_count": 108,
            "agreement_count": 91,
            "disagreement_count": 17
          },
          {
            "a": 3,
            "b": 13,
            "distance": 0.14678899082568808,
            "overlap_count": 109,
            "agreement_count": 93,
            "disagreement_count": 16
          },
          {
            "a": 3,
            "b": 14,
            "distance": 0.11428571428571428,
            "overlap_count": 105,
            "agreement_count": 93,
            "disagreement_count": 12
          },
          {
            "a": 3,
            "b": 15,
            "distance": 0.06422018348623854,
            "overlap_count": 109,
            "agreement_count": 102,
            "disagreement_count": 7
          },
          {
            "a": 3,
            "b": 16,
            "distance": 0.102803738317757,
            "overlap_count": 107,
            "agreement_count": 96,
            "disagreement_count": 11
          },
          {
            "a": 4,
            "b": 5,
            "distance": 0.05555555555555555,
            "overlap_count": 108,
            "agreement_count": 102,
            "disagreement_count": 6
          },
          {
            "a": 4,
            "b": 6,
            "distance": 0.08490566037735849,
            "overlap_count": 106,
            "agreement_count": 97,
            "disagreement_count": 9
          },
          {
            "a": 4,
            "b": 7,
            "distance": 0.19626168224299065,
            "overlap_count": 107,
            "agreement_count": 86,
            "disagreement_count": 21
          },
          {
            "a": 4,
            "b": 8,
            "distance": 0.3364485981308411,
            "overlap_count": 107,
            "agreement_count": 71,
            "disagreement_count": 36
          },
          {
            "a": 4,
            "b": 9,
            "distance": 0.3584905660377358,
            "overlap_count": 106,
            "agreement_count": 68,
            "disagreement_count": 38
          },
          {
            "a": 4,
            "b": 10,
            "distance": 0.1308411214953271,
            "overlap_count": 107,
            "agreement_count": 93,
            "disagreement_count": 14
          },
          {
            "a": 4,
            "b": 11,
            "distance": 0.39622641509433965,
            "overlap_count": 106,
            "agreement_count": 64,
            "disagreement_count": 42
          },
          {
            "a": 4,
            "b": 12,
            "distance": 0.2037037037037037,
            "overlap_count": 108,
            "agreement_count": 86,
            "disagreement_count": 22
          },
          {
            "a": 4,
            "b": 13,
            "distance": 0.16822429906542055,
            "overlap_count": 107,
            "agreement_count": 89,
            "disagreement_count": 18
          },
          {
            "a": 4,
            "b": 14,
            "distance": 0.08571428571428572,
            "overlap_count": 105,
            "agreement_count": 96,
            "disagreement_count": 9
          },
          {
            "a": 4,
            "b": 15,
            "distance": 0.1111111111111111,
            "overlap_count": 108,
            "agreement_count": 96,
            "disagreement_count": 12
          },
          {
            "a": 4,
            "b": 16,
            "distance": 0.11214953271028037,
            "overlap_count": 107,
            "agreement_count": 95,
            "disagreement_count": 12
          },
          {
            "a": 5,
            "b": 6,
            "distance": 0.07476635514018691,
            "overlap_count": 107,
            "agreement_count": 99,
            "disagreement_count": 8
          },
          {
            "a": 5,
            "b": 7,
            "distance": 0.1834862385321101,
            "overlap_count": 109,
            "agreement_count": 89,
            "disagreement_count": 20
          },
          {
            "a": 5,
            "b": 8,
            "distance": 0.3333333333333333,
            "overlap_count": 111,
            "agreement_count": 74,
            "disagreement_count": 37
          },
          {
            "a": 5,
            "b": 9,
            "distance": 0.3392857142857143,
            "overlap_count": 112,
            "agreement_count": 74,
            "disagreement_count": 38
          },
          {
            "a": 5,
            "b": 10,
            "distance": 0.12844036697247707,
            "overlap_count": 109,
            "agreement_count": 95,
            "disagreement_count": 14
          },
          {
            "a": 5,
            "b": 11,
            "distance": 0.4017857142857143,
            "overlap_count": 112,
            "agreement_count": 67,
            "disagreement_count": 45
          },
          {
            "a": 5,
            "b": 12,
            "distance": 0.2,
            "overlap_count": 110,
            "agreement_count": 88,
            "disagreement_count": 22
          },
          {
            "a": 5,
            "b": 13,
            "distance": 0.14678899082568808,
            "overlap_count": 109,
            "agreement_count": 93,
            "disagreement_count": 16
          },
          {
            "a": 5,
            "b": 14,
            "distance": 0.05714285714285714,
            "overlap_count": 105,
            "agreement_count": 99,
            "disagreement_count": 6
          },
          {
            "a": 5,
            "b": 15,
            "distance": 0.10810810810810811,
            "overlap_count": 111,
            "agreement_count": 99,
            "disagreement_count": 12
          },
          {
            "a": 5,
            "b": 16,
            "distance": 0.1111111111111111,
            "overlap_count": 108,
            "agreement_count": 96,
            "disagreement_count": 12
          },
          {
            "a": 6,
            "b": 7,
            "distance": 0.1509433962264151,
            "overlap_count": 106,
            "agreement_count": 90,
            "disagreement_count": 16
          },
          {
            "a": 6,
            "b": 8,
            "distance": 0.2897196261682243,
            "overlap_count": 107,
            "agreement_count": 76,
            "disagreement_count": 31
          },
          {
            "a": 6,
            "b": 9,
            "distance": 0.3142857142857143,
            "overlap_count": 105,
            "agreement_count": 72,
            "disagreement_count": 33
          },
          {
            "a": 6,
            "b": 10,
            "distance": 0.056074766355140186,
            "overlap_count": 107,
            "agreement_count": 101,
            "disagreement_count": 6
          },
          {
            "a": 6,
            "b": 11,
            "distance": 0.3523809523809524,
            "overlap_count": 105,
            "agreement_count": 68,
            "disagreement_count": 37
          },
          {
            "a": 6,
            "b": 12,
            "distance": 0.17757009345794392,
            "overlap_count": 107,
            "agreement_count": 88,
            "disagreement_count": 19
          },
          {
            "a": 6,
            "b": 13,
            "distance": 0.1308411214953271,
            "overlap_count": 107,
            "agreement_count": 93,
            "disagreement_count": 14
          },
          {
            "a": 6,
            "b": 14,
            "distance": 0.09615384615384616,
            "overlap_count": 104,
            "agreement_count": 94,
            "disagreement_count": 10
          },
          {
            "a": 6,
            "b": 15,
            "distance": 0.037383177570093455,
            "overlap_count": 107,
            "agreement_count": 103,
            "disagreement_count": 4
          },
          {
            "a": 6,
            "b": 16,
            "distance": 0.04716981132075472,
            "overlap_count": 106,
            "agreement_count": 101,
            "disagreement_count": 5
          },
          {
            "a": 7,
            "b": 8,
            "distance": 0.3055555555555556,
            "overlap_count": 108,
            "agreement_count": 75,
            "disagreement_count": 33
          },
          {
            "a": 7,
            "b": 9,
            "distance": 0.29906542056074764,
            "overlap_count": 107,
            "agreement_count": 75,
            "disagreement_count": 32
          },
          {
            "a": 7,
            "b": 10,
            "distance": 0.14018691588785046,
            "overlap_count": 107,
            "agreement_count": 92,
            "disagreement_count": 15
          },
          {
            "a": 7,
            "b": 11,
            "distance": 0.308411214953271,
            "overlap_count": 107,
            "agreement_count": 74,
            "disagreement_count": 33
          },
          {
            "a": 7,
            "b": 12,
            "distance": 0.2037037037037037,
            "overlap_count": 108,
            "agreement_count": 86,
            "disagreement_count": 22
          },
          {
            "a": 7,
            "b": 13,
            "distance": 0.1574074074074074,
            "overlap_count": 108,
            "agreement_count": 91,
            "disagreement_count": 17
          },
          {
            "a": 7,
            "b": 14,
            "distance": 0.18269230769230768,
            "overlap_count": 104,
            "agreement_count": 85,
            "disagreement_count": 19
          },
          {
            "a": 7,
            "b": 15,
            "distance": 0.14678899082568808,
            "overlap_count": 109,
            "agreement_count": 93,
            "disagreement_count": 16
          },
          {
            "a": 7,
            "b": 16,
            "distance": 0.16822429906542055,
            "overlap_count": 107,
            "agreement_count": 89,
            "disagreement_count": 18
          },
          {
            "a": 8,
            "b": 9,
            "distance": 0.44036697247706424,
            "overlap_count": 109,
            "agreement_count": 61,
            "disagreement_count": 48
          },
          {
            "a": 8,
            "b": 10,
            "distance": 0.28440366972477066,
            "overlap_count": 109,
            "agreement_count": 78,
            "disagreement_count": 31
          },
          {
            "a": 8,
            "b": 11,
            "distance": 0.46788990825688076,
            "overlap_count": 109,
            "agreement_count": 58,
            "disagreement_count": 51
          },
          {
            "a": 8,
            "b": 12,
            "distance": 0.27522935779816515,
            "overlap_count": 109,
            "agreement_count": 79,
            "disagreement_count": 30
          },
          {
            "a": 8,
            "b": 13,
            "distance": 0.3302752293577982,
            "overlap_count": 109,
            "agreement_count": 73,
            "disagreement_count": 36
          },
          {
            "a": 8,
            "b": 14,
            "distance": 0.3142857142857143,
            "overlap_count": 105,
            "agreement_count": 72,
            "disagreement_count": 33
          },
          {
            "a": 8,
            "b": 15,
            "distance": 0.2727272727272727,
            "overlap_count": 110,
            "agreement_count": 80,
            "disagreement_count": 30
          },
          {
            "a": 8,
            "b": 16,
            "distance": 0.2897196261682243,
            "overlap_count": 107,
            "agreement_count": 76,
            "disagreement_count": 31
          },
          {
            "a": 9,
            "b": 10,
            "distance": 0.308411214953271,
            "overlap_count": 107,
            "agreement_count": 74,
            "disagreement_count": 33
          },
          {
            "a": 9,
            "b": 11,
            "distance": 0.32727272727272727,
            "overlap_count": 110,
            "agreement_count": 74,
            "disagreement_count": 36
          },
          {
            "a": 9,
            "b": 12,
            "distance": 0.37037037037037035,
            "overlap_count": 108,
            "agreement_count": 68,
            "disagreement_count": 40
          },
          {
            "a": 9,
            "b": 13,
            "distance": 0.29906542056074764,
            "overlap_count": 107,
            "agreement_count": 75,
            "disagreement_count": 32
          },
          {
            "a": 9,
            "b": 14,
            "distance": 0.30097087378640774,
            "overlap_count": 103,
            "agreement_count": 72,
            "disagreement_count": 31
          },
          {
            "a": 9,
            "b": 15,
            "distance": 0.3119266055045872,
            "overlap_count": 109,
            "agreement_count": 75,
            "disagreement_count": 34
          },
          {
            "a": 9,
            "b": 16,
            "distance": 0.330188679245283,
            "overlap_count": 106,
            "agreement_count": 71,
            "disagreement_count": 35
          },
          {
            "a": 10,
            "b": 11,
            "distance": 0.34579439252336447,
            "overlap_count": 107,
            "agreement_count": 70,
            "disagreement_count": 37
          },
          {
            "a": 10,
            "b": 12,
            "distance": 0.2037037037037037,
            "overlap_count": 108,
            "agreement_count": 86,
            "disagreement_count": 22
          },
          {
            "a": 10,
            "b": 13,
            "distance": 0.16666666666666666,
            "overlap_count": 108,
            "agreement_count": 90,
            "disagreement_count": 18
          },
          {
            "a": 10,
            "b": 14,
            "distance": 0.13333333333333333,
            "overlap_count": 105,
            "agreement_count": 91,
            "disagreement_count": 14
          },
          {
            "a": 10,
            "b": 15,
            "distance": 0.027777777777777776,
            "overlap_count": 108,
            "agreement_count": 105,
            "disagreement_count": 3
          },
          {
            "a": 10,
            "b": 16,
            "distance": 0.07476635514018691,
            "overlap_count": 107,
            "agreement_count": 99,
            "disagreement_count": 8
          },
          {
            "a": 11,
            "b": 12,
            "distance": 0.4351851851851852,
            "overlap_count": 108,
            "agreement_count": 61,
            "disagreement_count": 47
          },
          {
            "a": 11,
            "b": 13,
            "distance": 0.2897196261682243,
            "overlap_count": 107,
            "agreement_count": 76,
            "disagreement_count": 31
          },
          {
            "a": 11,
            "b": 14,
            "distance": 0.3592233009708738,
            "overlap_count": 103,
            "agreement_count": 66,
            "disagreement_count": 37
          },
          {
            "a": 11,
            "b": 15,
            "distance": 0.3669724770642202,
            "overlap_count": 109,
            "agreement_count": 69,
            "disagreement_count": 40
          },
          {
            "a": 11,
            "b": 16,
            "distance": 0.37735849056603776,
            "overlap_count": 106,
            "agreement_count": 66,
            "disagreement_count": 40
          },
          {
            "a": 12,
            "b": 13,
            "distance": 0.24074074074074073,
            "overlap_count": 108,
            "agreement_count": 82,
            "disagreement_count": 26
          },
          {
            "a": 12,
            "b": 14,
            "distance": 0.20952380952380953,
            "overlap_count": 105,
            "agreement_count": 83,
            "disagreement_count": 22
          },
          {
            "a": 12,
            "b": 15,
            "distance": 0.17272727272727273,
            "overlap_count": 110,
            "agreement_count": 91,
            "disagreement_count": 19
          },
          {
            "a": 12,
            "b": 16,
            "distance": 0.16666666666666666,
            "overlap_count": 108,
            "agreement_count": 90,
            "disagreement_count": 18
          },
          {
            "a": 13,
            "b": 14,
            "distance": 0.17142857142857143,
            "overlap_count": 105,
            "agreement_count": 87,
            "disagreement_count": 18
          },
          {
            "a": 13,
            "b": 15,
            "distance": 0.1651376146788991,
            "overlap_count": 109,
            "agreement_count": 91,
            "disagreement_count": 18
          },
          {
            "a": 13,
            "b": 16,
            "distance": 0.1588785046728972,
            "overlap_count": 107,
            "agreement_count": 90,
            "disagreement_count": 17
          },
          {
            "a": 14,
            "b": 15,
            "distance": 0.10476190476190476,
            "overlap_count": 105,
            "agreement_count": 94,
            "disagreement_count": 11
          },
          {
            "a": 14,
            "b": 16,
            "distance": 0.11538461538461539,
            "overlap_count": 104,
            "agreement_count": 92,
            "disagreement_count": 12
          },
          {
            "a": 15,
            "b": 16,
            "distance": 0.046296296296296294,
            "overlap_count": 108,
            "agreement_count": 103,
            "disagreement_count": 5
          }
        ],
        "linkage": [
          [
            10,
            15,
            0.027777777777777776,
            2
          ],
          [
            6,
            17,
            0.04672897196261682,
            3
          ],
          [
            2,
            3,
            0.05504587155963303,
            2
          ],
          [
            4,
            5,
            0.05555555555555555,
            2
          ],
          [
            16,
            18,
            0.05607748758574597,
            4
          ],
          [
            14,
            20,
            0.07142857142857142,
            3
          ],
          [
            19,
            21,
            0.07754217884062263,
            6
          ],
          [
            1,
            23,
            0.10388260517867288,
            7
          ],
          [
            22,
            24,
            0.10671707326527476,
            10
          ],
          [
            13,
            25,
            0.1487747662549932,
            11
          ],
          [
            7,
            26,
            0.15276749801011336,
            12
          ],
          [
            0,
            27,
            0.183533960011687,
            13
          ],
          [
            12,
            28,
            0.19884398922406554,
            14
          ],
          [
            8,
            29,
            0.30640695972422677,
            15
          ],
          [
            9,
            30,
            0.32425468152119363,
            16
          ],
          [
            11,
            31,
            0.3618591592758625,
            17
          ]
        ],
        "linkage_complete": true
      },
      {
        "group_id": "bcd86cbea72bcdfe",
        "cluster_scope": "cross_model",
        "representative_policy": "latest",
        "task_fingerprint": "9d6ea446c173cf044401c04094f095a25fb7ceec9caef163c35be7264a0bb349",
        "normalized_tag_key": "*like*;discourse;english;pragmatics;v3",
        "task_name_display": "*like* discourse/pragm",
        "task_names_seen": [
          "*like* discourse/pragm"
        ],
        "tags_display": "*like*; discourse; pragmatics; English; v3",
        "model_count": 3,
        "distance_metric": "nominal_disagreement_rate",
        "linkage_method": "average",
        "representative_run_stems": [
          "like_interrater__openai__gpt54__2026-03-13-23-51",
          "like_interrater__requesty__anthropicclaudesonnet46__2026-03-13-22-58",
          "like_interrater__vertex__gemini3flashpreview__2026-03-13-15-32"
        ],
        "comparable_pair_count": 3,
        "representatives": [
          {
            "provider": "openai",
            "model": "gpt-5.4",
            "run_stem": "like_interrater__openai__gpt54__2026-03-13-23-51",
            "metrics_file": "like_interrater__openai__gpt54__2026-03-13-23-51__metrics.json",
            "timestamp": "2026-03-13T22:51:21.706190Z",
            "accuracy": 0.5217391304347826,
            "cohen_kappa": 0.4387755102040816
          },
          {
            "provider": "requesty",
            "model": "claude-sonnet-4-6",
            "run_stem": "like_interrater__requesty__anthropicclaudesonnet46__2026-03-13-22-58",
            "metrics_file": "like_interrater__requesty__anthropicclaudesonnet46__2026-03-13-22-58__metrics.json",
            "timestamp": "2026-03-13T21:58:23.211617Z",
            "accuracy": 0.7478260869565218,
            "cohen_kappa": 0.6706172839506173
          },
          {
            "provider": "vertex",
            "model": "gemini-3-flash-preview",
            "run_stem": "like_interrater__vertex__gemini3flashpreview__2026-03-13-15-32",
            "metrics_file": "like_interrater__vertex__gemini3flashpreview__2026-03-13-15-32__metrics.json",
            "timestamp": "2026-03-13T14:32:09.952389Z",
            "accuracy": 0.808695652173913,
            "cohen_kappa": 0.7482587064676617
          }
        ],
        "pairwise": [
          {
            "a": 0,
            "b": 1,
            "distance": 0.336734693877551,
            "overlap_count": 98,
            "agreement_count": 65,
            "disagreement_count": 33
          },
          {
            "a": 0,
            "b": 2,
            "distance": 0.3333333333333333,
            "overlap_count": 99,
            "agreement_count": 66,
            "disagreement_count": 33
          },
          {
            "a": 1,
            "b": 2,
            "distance": 0.13513513513513514,
            "overlap_count": 111,
            "agreement_count": 96,
            "disagreement_count": 15
          }
        ],
        "linkage": [
          [
            1,
            2,
            0.13513513513513514,
            2
          ],
          [
            0,
            3,
            0.33503401360544216,
            3
          ]
        ],
        "linkage_complete": true
      },
      {
        "group_id": "cc0c9725606971a1",
        "cluster_scope": "cross_model",
        "representative_policy": "latest",
        "task_fingerprint": "9911e304116a3387591af8bb222b9d5c5983c625188825c9007ceff21f5910bc",
        "normalized_tag_key": "*like*;discourse;english;pragmatics;v3",
        "task_name_display": "*like* discourse/pragm",
        "task_names_seen": [
          "*like* discourse/pragm"
        ],
        "tags_display": "*like*; discourse; pragmatics; English; v3",
        "model_count": 2,
        "distance_metric": "nominal_disagreement_rate",
        "linkage_method": "average",
        "representative_run_stems": [
          "like_interrater__einfra__deepseekv32thinking__2026-03-13-20-58",
          "like_interrater__vertex__gemini31propreview"
        ],
        "comparable_pair_count": 1,
        "representatives": [
          {
            "provider": "e-infra",
            "model": "deepseek-v3.2-thinking",
            "run_stem": "like_interrater__einfra__deepseekv32thinking__2026-03-13-20-58",
            "metrics_file": "like_interrater__einfra__deepseekv32thinking__2026-03-13-20-58__metrics.json",
            "timestamp": "2026-03-13T19:58:49.743838Z",
            "accuracy": 0.7130434782608696,
            "cohen_kappa": 0.6246290801186943
          },
          {
            "provider": "vertex",
            "model": "gemini-3.1-pro-preview",
            "run_stem": "like_interrater__vertex__gemini31propreview",
            "metrics_file": "like_interrater__vertex__gemini31propreview__metrics.json",
            "timestamp": "2026-03-13T17:50:24.766352Z",
            "accuracy": 0.8260869565217391,
            "cohen_kappa": 0.7721644378405151
          }
        ],
        "pairwise": [
          {
            "a": 0,
            "b": 1,
            "distance": 0.1792452830188679,
            "overlap_count": 106,
            "agreement_count": 87,
            "disagreement_count": 19
          }
        ],
        "linkage": [
          [
            0,
            1,
            0.1792452830188679,
            2
          ]
        ],
        "linkage_complete": true
      },
      {
        "group_id": "bbb4c11dc6f1f20a",
        "cluster_scope": "cross_model",
        "representative_policy": "latest",
        "task_fingerprint": "0eb7fe892594e382024c3813de8ec46058d12d33d835017ca9df11d4d238d61e",
        "normalized_tag_key": "*-ing*;adverbial;clause;english;syntax",
        "task_name_display": "ADV *-ing* clause",
        "task_names_seen": [
          "ADV *-ing* clause"
        ],
        "tags_display": "*-ing*; syntax; adverbial; clause; English",
        "model_count": 16,
        "distance_metric": "nominal_disagreement_rate",
        "linkage_method": "average",
        "representative_run_stems": [
          "adv-ing__einfra__deepseekv32thinking__2026-03-12-01-08",
          "adv-ing__einfra__glm47__2026-03-17-13-00",
          "adv-ing__einfra__glm5__2026-03-30-21-43",
          "adv-ing__einfra__kimik25__2026-03-19-15-11",
          "adv-ing__einfra__qwen35__2026-03-20-15-45",
          "adv-ing__google__modelsgemini3flashpreview__2026-03-11-15-40",
          "adv-ing__vertex__gemini31propreview__2026-03-11-10-43",
          "adv-ing__google__modelsgemma426ba4bit__2026-04-05-01-59",
          "adv-ing__openai__gpt54__2026-03-11-16-26",
          "adv-ing__openai__gpt54mini__2026-03-18-16-49",
          "adv-ing__openai__gpt54pro__2026-03-12-00-22",
          "adv-ing____gptoss120b__2025-11-08-23-28",
          "adv-ing____gpt5__2025-11-09-00-09",
          "adv-ing__openrouter__qwenqwen36plusfree__2026-04-03-14-47",
          "adv-ing__requesty__anthropicclaudehaiku45__2026-03-23-00-13",
          "adv-ing__requesty__anthropicclaudesonnet46__2026-03-11-16-11"
        ],
        "comparable_pair_count": 120,
        "representatives": [
          {
            "provider": "e-infra",
            "model": "deepseek-v3.2-thinking",
            "run_stem": "adv-ing__einfra__deepseekv32thinking__2026-03-12-01-08",
            "metrics_file": "adv-ing__einfra__deepseekv32thinking__2026-03-12-01-08__metrics.json",
            "timestamp": "2026-03-12T10:15:45.563016Z",
            "accuracy": 0.9559748427672956,
            "cohen_kappa": 0.9071687726760916
          },
          {
            "provider": "e-infra",
            "model": "glm-4.7",
            "run_stem": "adv-ing__einfra__glm47__2026-03-17-13-00",
            "metrics_file": "adv-ing__einfra__glm47__2026-03-17-13-00__metrics.json",
            "timestamp": "2026-03-17T12:00:44.101281Z",
            "accuracy": 0.940251572327044,
            "cohen_kappa": 0.8767894286063869
          },
          {
            "provider": "e-infra",
            "model": "glm-5",
            "run_stem": "adv-ing__einfra__glm5__2026-03-30-21-43",
            "metrics_file": "adv-ing__einfra__glm5__2026-03-30-21-43__metrics.json",
            "timestamp": "2026-03-30T19:44:02.491601Z",
            "accuracy": 0.949685534591195,
            "cohen_kappa": 0.895925380461463
          },
          {
            "provider": "e-infra",
            "model": "kimi-k2.5",
            "run_stem": "adv-ing__einfra__kimik25__2026-03-19-15-11",
            "metrics_file": "adv-ing__einfra__kimik25__2026-03-19-15-11__metrics.json",
            "timestamp": "2026-03-19T14:11:22.566473Z",
            "accuracy": 0.9716981132075472,
            "cohen_kappa": 0.9404382843229069
          },
          {
            "provider": "e-infra",
            "model": "qwen3.5",
            "run_stem": "adv-ing__einfra__qwen35__2026-03-20-15-45",
            "metrics_file": "adv-ing__einfra__qwen35__2026-03-20-15-45__metrics.json",
            "timestamp": "2026-03-20T14:45:23.934528Z",
            "accuracy": 0.8113207547169812,
            "cohen_kappa": 0.6568283602223062
          },
          {
            "provider": "google",
            "model": "gemini-3-flash-preview",
            "run_stem": "adv-ing__google__modelsgemini3flashpreview__2026-03-11-15-40",
            "metrics_file": "adv-ing__google__modelsgemini3flashpreview__2026-03-11-15-40__metrics.json",
            "timestamp": "2026-03-11T14:40:24.117684Z",
            "accuracy": 0.9811320754716981,
            "cohen_kappa": 0.9596241747079736
          },
          {
            "provider": "google",
            "model": "gemini-3.1-pro-preview",
            "run_stem": "adv-ing__vertex__gemini31propreview__2026-03-11-10-43",
            "metrics_file": "adv-ing__vertex__gemini31propreview__2026-03-11-10-43__metrics.json",
            "timestamp": "2026-03-11T09:43:27.624333Z",
            "accuracy": 0.9748427672955975,
            "cohen_kappa": 0.9461655662772981
          },
          {
            "provider": "google",
            "model": "gemma-4-26b-a4b-it",
            "run_stem": "adv-ing__google__modelsgemma426ba4bit__2026-04-05-01-59",
            "metrics_file": "adv-ing__google__modelsgemma426ba4bit__2026-04-05-01-59__metrics.json",
            "timestamp": "2026-04-04T23:59:13.642882Z",
            "accuracy": 0.9559748427672956,
            "cohen_kappa": 0.9069825749028457
          },
          {
            "provider": "openai",
            "model": "gpt-5.4",
            "run_stem": "adv-ing__openai__gpt54__2026-03-11-16-26",
            "metrics_file": "adv-ing__openai__gpt54__2026-03-11-16-26__metrics.json",
            "timestamp": "2026-03-11T15:26:11.802357Z",
            "accuracy": 0.9182389937106918,
            "cohen_kappa": 0.8348513902205178
          },
          {
            "provider": "openai",
            "model": "gpt-5.4-mini",
            "run_stem": "adv-ing__openai__gpt54mini__2026-03-18-16-49",
            "metrics_file": "adv-ing__openai__gpt54mini__2026-03-18-16-49__metrics.json",
            "timestamp": "2026-03-18T15:49:58.142001Z",
            "accuracy": 0.7704402515723271,
            "cohen_kappa": 0.6185170577795307
          },
          {
            "provider": "openai",
            "model": "gpt-5.4-pro",
            "run_stem": "adv-ing__openai__gpt54pro__2026-03-12-00-22",
            "metrics_file": "adv-ing__openai__gpt54pro__2026-03-12-00-22__metrics.json",
            "timestamp": "2026-03-11T23:22:39.487962Z",
            "accuracy": 0.8459119496855346,
            "cohen_kappa": 0.7156776877600176
          },
          {
            "provider": "openai",
            "model": "gpt-oss-120b",
            "run_stem": "adv-ing____gptoss120b__2025-11-08-23-28",
            "metrics_file": "adv-ing____gptoss120b__2025-11-08-23-28__metrics.json",
            "timestamp": "2025-11-08T22:28:31.410308Z",
            "accuracy": 0.9308176100628931,
            "cohen_kappa": 0.857107843137255
          },
          {
            "provider": "openai",
            "model": "gpt5",
            "run_stem": "adv-ing____gpt5__2025-11-09-00-09",
            "metrics_file": "adv-ing____gpt5__2025-11-09-00-09__metrics.json",
            "timestamp": "2025-11-08T23:09:42.353960Z",
            "accuracy": 0.9025157232704403,
            "cohen_kappa": 0.8061013748746091
          },
          {
            "provider": "openrouter",
            "model": "qwen3.6",
            "run_stem": "adv-ing__openrouter__qwenqwen36plusfree__2026-04-03-14-47",
            "metrics_file": "adv-ing__openrouter__qwenqwen36plusfree__2026-04-03-14-47__metrics.json",
            "timestamp": "2026-04-03T12:47:18.635821Z",
            "accuracy": 0.9654088050314465,
            "cohen_kappa": 0.9257655822244859
          },
          {
            "provider": "requesty",
            "model": "claude-haiku-4-5",
            "run_stem": "adv-ing__requesty__anthropicclaudehaiku45__2026-03-23-00-13",
            "metrics_file": "adv-ing__requesty__anthropicclaudehaiku45__2026-03-23-00-13__metrics.json",
            "timestamp": "2026-03-22T23:13:14.236246Z",
            "accuracy": 0.4748427672955975,
            "cohen_kappa": 0.28195351478521885
          },
          {
            "provider": "requesty",
            "model": "claude-sonnet-4-6",
            "run_stem": "adv-ing__requesty__anthropicclaudesonnet46__2026-03-11-16-11",
            "metrics_file": "adv-ing__requesty__anthropicclaudesonnet46__2026-03-11-16-11__metrics.json",
            "timestamp": "2026-03-11T15:11:34.763226Z",
            "accuracy": 0.9308176100628931,
            "cohen_kappa": 0.8585638039786512
          }
        ],
        "pairwise": [
          {
            "a": 0,
            "b": 1,
            "distance": 0.02572347266881029,
            "overlap_count": 311,
            "agreement_count": 303,
            "disagreement_count": 8
          },
          {
            "a": 0,
            "b": 2,
            "distance": 0.01929260450160772,
            "overlap_count": 311,
            "agreement_count": 305,
            "disagreement_count": 6
          },
          {
            "a": 0,
            "b": 3,
            "distance": 0.028481012658227847,
            "overlap_count": 316,
            "agreement_count": 307,
            "disagreement_count": 9
          },
          {
            "a": 0,
            "b": 4,
            "distance": 0.018867924528301886,
            "overlap_count": 265,
            "agreement_count": 260,
            "disagreement_count": 5
          },
          {
            "a": 0,
            "b": 5,
            "distance": 0.03164556962025317,
            "overlap_count": 316,
            "agreement_count": 306,
            "disagreement_count": 10
          },
          {
            "a": 0,
            "b": 6,
            "distance": 0.0189873417721519,
            "overlap_count": 316,
            "agreement_count": 310,
            "disagreement_count": 6
          },
          {
            "a": 0,
            "b": 7,
            "distance": 0.01904761904761905,
            "overlap_count": 315,
            "agreement_count": 309,
            "disagreement_count": 6
          },
          {
            "a": 0,
            "b": 8,
            "distance": 0.032362459546925564,
            "overlap_count": 309,
            "agreement_count": 299,
            "disagreement_count": 10
          },
          {
            "a": 0,
            "b": 9,
            "distance": 0.2253968253968254,
            "overlap_count": 315,
            "agreement_count": 244,
            "disagreement_count": 71
          },
          {
            "a": 0,
            "b": 10,
            "distance": 0.007272727272727273,
            "overlap_count": 275,
            "agreement_count": 273,
            "disagreement_count": 2
          },
          {
            "a": 0,
            "b": 11,
            "distance": 0.036303630363036306,
            "overlap_count": 303,
            "agreement_count": 292,
            "disagreement_count": 11
          },
          {
            "a": 0,
            "b": 12,
            "distance": 0.013422818791946308,
            "overlap_count": 298,
            "agreement_count": 294,
            "disagreement_count": 4
          },
          {
            "a": 0,
            "b": 13,
            "distance": 0.022151898734177215,
            "overlap_count": 316,
            "agreement_count": 309,
            "disagreement_count": 7
          },
          {
            "a": 0,
            "b": 14,
            "distance": 0.10714285714285714,
            "overlap_count": 168,
            "agreement_count": 150,
            "disagreement_count": 18
          },
          {
            "a": 0,
            "b": 15,
            "distance": 0.042071197411003236,
            "overlap_count": 309,
            "agreement_count": 296,
            "disagreement_count": 13
          },
          {
            "a": 1,
            "b": 2,
            "distance": 0.032679738562091505,
            "overlap_count": 306,
            "agreement_count": 296,
            "disagreement_count": 10
          },
          {
            "a": 1,
            "b": 3,
            "distance": 0.028938906752411574,
            "overlap_count": 311,
            "agreement_count": 302,
            "disagreement_count": 9
          },
          {
            "a": 1,
            "b": 4,
            "distance": 0.022900763358778626,
            "overlap_count": 262,
            "agreement_count": 256,
            "disagreement_count": 6
          },
          {
            "a": 1,
            "b": 5,
            "distance": 0.03858520900321544,
            "overlap_count": 311,
            "agreement_count": 299,
            "disagreement_count": 12
          },
          {
            "a": 1,
            "b": 6,
            "distance": 0.02572347266881029,
            "overlap_count": 311,
            "agreement_count": 303,
            "disagreement_count": 8
          },
          {
            "a": 1,
            "b": 7,
            "distance": 0.03225806451612903,
            "overlap_count": 310,
            "agreement_count": 300,
            "disagreement_count": 10
          },
          {
            "a": 1,
            "b": 8,
            "distance": 0.05263157894736842,
            "overlap_count": 304,
            "agreement_count": 288,
            "disagreement_count": 16
          },
          {
            "a": 1,
            "b": 9,
            "distance": 0.23225806451612904,
            "overlap_count": 310,
            "agreement_count": 238,
            "disagreement_count": 72
          },
          {
            "a": 1,
            "b": 10,
            "distance": 0.022058823529411766,
            "overlap_count": 272,
            "agreement_count": 266,
            "disagreement_count": 6
          },
          {
            "a": 1,
            "b": 11,
            "distance": 0.030100334448160536,
            "overlap_count": 299,
            "agreement_count": 290,
            "disagreement_count": 9
          },
          {
            "a": 1,
            "b": 12,
            "distance": 0.023809523809523808,
            "overlap_count": 294,
            "agreement_count": 287,
            "disagreement_count": 7
          },
          {
            "a": 1,
            "b": 13,
            "distance": 0.028938906752411574,
            "overlap_count": 311,
            "agreement_count": 302,
            "disagreement_count": 9
          },
          {
            "a": 1,
            "b": 14,
            "distance": 0.10714285714285714,
            "overlap_count": 168,
            "agreement_count": 150,
            "disagreement_count": 18
          },
          {
            "a": 1,
            "b": 15,
            "distance": 0.04918032786885246,
            "overlap_count": 305,
            "agreement_count": 290,
            "disagreement_count": 15
          },
          {
            "a": 2,
            "b": 3,
            "distance": 0.012861736334405145,
            "overlap_count": 311,
            "agreement_count": 307,
            "disagreement_count": 4
          },
          {
            "a": 2,
            "b": 4,
            "distance": 0.015209125475285171,
            "overlap_count": 263,
            "agreement_count": 259,
            "disagreement_count": 4
          },
          {
            "a": 2,
            "b": 5,
            "distance": 0.022508038585209004,
            "overlap_count": 311,
            "agreement_count": 304,
            "disagreement_count": 7
          },
          {
            "a": 2,
            "b": 6,
            "distance": 0.01607717041800643,
            "overlap_count": 311,
            "agreement_count": 306,
            "disagreement_count": 5
          },
          {
            "a": 2,
            "b": 7,
            "distance": 0.01935483870967742,
            "overlap_count": 310,
            "agreement_count": 304,
            "disagreement_count": 6
          },
          {
            "a": 2,
            "b": 8,
            "distance": 0.04276315789473684,
            "overlap_count": 304,
            "agreement_count": 291,
            "disagreement_count": 13
          },
          {
            "a": 2,
            "b": 9,
            "distance": 0.2129032258064516,
            "overlap_count": 310,
            "agreement_count": 244,
            "disagreement_count": 66
          },
          {
            "a": 2,
            "b": 10,
            "distance": 0.007326007326007326,
            "overlap_count": 273,
            "agreement_count": 271,
            "disagreement_count": 2
          },
          {
            "a": 2,
            "b": 11,
            "distance": 0.030100334448160536,
            "overlap_count": 299,
            "agreement_count": 290,
            "disagreement_count": 9
          },
          {
            "a": 2,
            "b": 12,
            "distance": 0.02027027027027027,
            "overlap_count": 296,
            "agreement_count": 290,
            "disagreement_count": 6
          },
          {
            "a": 2,
            "b": 13,
            "distance": 0.022508038585209004,
            "overlap_count": 311,
            "agreement_count": 304,
            "disagreement_count": 7
          },
          {
            "a": 2,
            "b": 14,
            "distance": 0.10179640718562874,
            "overlap_count": 167,
            "agreement_count": 150,
            "disagreement_count": 17
          },
          {
            "a": 2,
            "b": 15,
            "distance": 0.04590163934426229,
            "overlap_count": 305,
            "agreement_count": 291,
            "disagreement_count": 14
          },
          {
            "a": 3,
            "b": 4,
            "distance": 0.022641509433962263,
            "overlap_count": 265,
            "agreement_count": 259,
            "disagreement_count": 6
          },
          {
            "a": 3,
            "b": 5,
            "distance": 0.02830188679245283,
            "overlap_count": 318,
            "agreement_count": 309,
            "disagreement_count": 9
          },
          {
            "a": 3,
            "b": 6,
            "distance": 0.0220125786163522,
            "overlap_count": 318,
            "agreement_count": 311,
            "disagreement_count": 7
          },
          {
            "a": 3,
            "b": 7,
            "distance": 0.022082018927444796,
            "overlap_count": 317,
            "agreement_count": 310,
            "disagreement_count": 7
          },
          {
            "a": 3,
            "b": 8,
            "distance": 0.04516129032258064,
            "overlap_count": 310,
            "agreement_count": 296,
            "disagreement_count": 14
          },
          {
            "a": 3,
            "b": 9,
            "distance": 0.21766561514195584,
            "overlap_count": 317,
            "agreement_count": 248,
            "disagreement_count": 69
          },
          {
            "a": 3,
            "b": 10,
            "distance": 0.007272727272727273,
            "overlap_count": 275,
            "agreement_count": 273,
            "disagreement_count": 2
          },
          {
            "a": 3,
            "b": 11,
            "distance": 0.02631578947368421,
            "overlap_count": 304,
            "agreement_count": 296,
            "disagreement_count": 8
          },
          {
            "a": 3,
            "b": 12,
            "distance": 0.016778523489932886,
            "overlap_count": 298,
            "agreement_count": 293,
            "disagreement_count": 5
          },
          {
            "a": 3,
            "b": 13,
            "distance": 0.0220125786163522,
            "overlap_count": 318,
            "agreement_count": 311,
            "disagreement_count": 7
          },
          {
            "a": 3,
            "b": 14,
            "distance": 0.10119047619047619,
            "overlap_count": 168,
            "agreement_count": 151,
            "disagreement_count": 17
          },
          {
            "a": 3,
            "b": 15,
            "distance": 0.04516129032258064,
            "overlap_count": 310,
            "agreement_count": 296,
            "disagreement_count": 14
          },
          {
            "a": 4,
            "b": 5,
            "distance": 0.022641509433962263,
            "overlap_count": 265,
            "agreement_count": 259,
            "disagreement_count": 6
          },
          {
            "a": 4,
            "b": 6,
            "distance": 0.022641509433962263,
            "overlap_count": 265,
            "agreement_count": 259,
            "disagreement_count": 6
          },
          {
            "a": 4,
            "b": 7,
            "distance": 0.007547169811320755,
            "overlap_count": 265,
            "agreement_count": 263,
            "disagreement_count": 2
          },
          {
            "a": 4,
            "b": 8,
            "distance": 0.05019305019305019,
            "overlap_count": 259,
            "agreement_count": 246,
            "disagreement_count": 13
          },
          {
            "a": 4,
            "b": 9,
            "distance": 0.23773584905660378,
            "overlap_count": 265,
            "agreement_count": 202,
            "disagreement_count": 63
          },
          {
            "a": 4,
            "b": 10,
            "distance": 0.016260162601626018,
            "overlap_count": 246,
            "agreement_count": 242,
            "disagreement_count": 4
          },
          {
            "a": 4,
            "b": 11,
            "distance": 0.0234375,
            "overlap_count": 256,
            "agreement_count": 250,
            "disagreement_count": 6
          },
          {
            "a": 4,
            "b": 12,
            "distance": 0.01937984496124031,
            "overlap_count": 258,
            "agreement_count": 253,
            "disagreement_count": 5
          },
          {
            "a": 4,
            "b": 13,
            "distance": 0.01509433962264151,
            "overlap_count": 265,
            "agreement_count": 261,
            "disagreement_count": 4
          },
          {
            "a": 4,
            "b": 14,
            "distance": 0.1125,
            "overlap_count": 160,
            "agreement_count": 142,
            "disagreement_count": 18
          },
          {
            "a": 4,
            "b": 15,
            "distance": 0.038314176245210725,
            "overlap_count": 261,
            "agreement_count": 251,
            "disagreement_count": 10
          },
          {
            "a": 5,
            "b": 6,
            "distance": 0.012578616352201259,
            "overlap_count": 318,
            "agreement_count": 314,
            "disagreement_count": 4
          },
          {
            "a": 5,
            "b": 7,
            "distance": 0.022082018927444796,
            "overlap_count": 317,
            "agreement_count": 310,
            "disagreement_count": 7
          },
          {
            "a": 5,
            "b": 8,
            "distance": 0.04516129032258064,
            "overlap_count": 310,
            "agreement_count": 296,
            "disagreement_count": 14
          },
          {
            "a": 5,
            "b": 9,
            "distance": 0.22712933753943218,
            "overlap_count": 317,
            "agreement_count": 245,
            "disagreement_count": 72
          },
          {
            "a": 5,
            "b": 10,
            "distance": 0.014545454545454545,
            "overlap_count": 275,
            "agreement_count": 271,
            "disagreement_count": 4
          },
          {
            "a": 5,
            "b": 11,
            "distance": 0.02631578947368421,
            "overlap_count": 304,
            "agreement_count": 296,
            "disagreement_count": 8
          },
          {
            "a": 5,
            "b": 12,
            "distance": 0.026845637583892617,
            "overlap_count": 298,
            "agreement_count": 290,
            "disagreement_count": 8
          },
          {
            "a": 5,
            "b": 13,
            "distance": 0.015723270440251572,
            "overlap_count": 318,
            "agreement_count": 313,
            "disagreement_count": 5
          },
          {
            "a": 5,
            "b": 14,
            "distance": 0.08928571428571429,
            "overlap_count": 168,
            "agreement_count": 153,
            "disagreement_count": 15
          },
          {
            "a": 5,
            "b": 15,
            "distance": 0.035483870967741936,
            "overlap_count": 310,
            "agreement_count": 299,
            "disagreement_count": 11
          },
          {
            "a": 6,
            "b": 7,
            "distance": 0.022082018927444796,
            "overlap_count": 317,
            "agreement_count": 310,
            "disagreement_count": 7
          },
          {
            "a": 6,
            "b": 8,
            "distance": 0.03225806451612903,
            "overlap_count": 310,
            "agreement_count": 300,
            "disagreement_count": 10
          },
          {
            "a": 6,
            "b": 9,
            "distance": 0.22712933753943218,
            "overlap_count": 317,
            "agreement_count": 245,
            "disagreement_count": 72
          },
          {
            "a": 6,
            "b": 10,
            "distance": 0.0036363636363636364,
            "overlap_count": 275,
            "agreement_count": 274,
            "disagreement_count": 1
          },
          {
            "a": 6,
            "b": 11,
            "distance": 0.023026315789473683,
            "overlap_count": 304,
            "agreement_count": 297,
            "disagreement_count": 7
          },
          {
            "a": 6,
            "b": 12,
            "distance": 0.020134228187919462,
            "overlap_count": 298,
            "agreement_count": 292,
            "disagreement_count": 6
          },
          {
            "a": 6,
            "b": 13,
            "distance": 0.015723270440251572,
            "overlap_count": 318,
            "agreement_count": 313,
            "disagreement_count": 5
          },
          {
            "a": 6,
            "b": 14,
            "distance": 0.09523809523809523,
            "overlap_count": 168,
            "agreement_count": 152,
            "disagreement_count": 16
          },
          {
            "a": 6,
            "b": 15,
            "distance": 0.02903225806451613,
            "overlap_count": 310,
            "agreement_count": 301,
            "disagreement_count": 9
          },
          {
            "a": 7,
            "b": 8,
            "distance": 0.045307443365695796,
            "overlap_count": 309,
            "agreement_count": 295,
            "disagreement_count": 14
          },
          {
            "a": 7,
            "b": 9,
            "distance": 0.22784810126582278,
            "overlap_count": 316,
            "agreement_count": 244,
            "disagreement_count": 72
          },
          {
            "a": 7,
            "b": 10,
            "distance": 0.01090909090909091,
            "overlap_count": 275,
            "agreement_count": 272,
            "disagreement_count": 3
          },
          {
            "a": 7,
            "b": 11,
            "distance": 0.0297029702970297,
            "overlap_count": 303,
            "agreement_count": 294,
            "disagreement_count": 9
          },
          {
            "a": 7,
            "b": 12,
            "distance": 0.013468013468013467,
            "overlap_count": 297,
            "agreement_count": 293,
            "disagreement_count": 4
          },
          {
            "a": 7,
            "b": 13,
            "distance": 0.012618296529968454,
            "overlap_count": 317,
            "agreement_count": 313,
            "disagreement_count": 4
          },
          {
            "a": 7,
            "b": 14,
            "distance": 0.09523809523809523,
            "overlap_count": 168,
            "agreement_count": 152,
            "disagreement_count": 16
          },
          {
            "a": 7,
            "b": 15,
            "distance": 0.038834951456310676,
            "overlap_count": 309,
            "agreement_count": 297,
            "disagreement_count": 12
          },
          {
            "a": 8,
            "b": 9,
            "distance": 0.22006472491909385,
            "overlap_count": 309,
            "agreement_count": 241,
            "disagreement_count": 68
          },
          {
            "a": 8,
            "b": 10,
            "distance": 0.022222222222222223,
            "overlap_count": 270,
            "agreement_count": 264,
            "disagreement_count": 6
          },
          {
            "a": 8,
            "b": 11,
            "distance": 0.05723905723905724,
            "overlap_count": 297,
            "agreement_count": 280,
            "disagreement_count": 17
          },
          {
            "a": 8,
            "b": 12,
            "distance": 0.03424657534246575,
            "overlap_count": 292,
            "agreement_count": 282,
            "disagreement_count": 10
          },
          {
            "a": 8,
            "b": 13,
            "distance": 0.041935483870967745,
            "overlap_count": 310,
            "agreement_count": 297,
            "disagreement_count": 13
          },
          {
            "a": 8,
            "b": 14,
            "distance": 0.11377245508982035,
            "overlap_count": 167,
            "agreement_count": 148,
            "disagreement_count": 19
          },
          {
            "a": 8,
            "b": 15,
            "distance": 0.03642384105960265,
            "overlap_count": 302,
            "agreement_count": 291,
            "disagreement_count": 11
          },
          {
            "a": 9,
            "b": 10,
            "distance": 0.21454545454545454,
            "overlap_count": 275,
            "agreement_count": 216,
            "disagreement_count": 59
          },
          {
            "a": 9,
            "b": 11,
            "distance": 0.23432343234323433,
            "overlap_count": 303,
            "agreement_count": 232,
            "disagreement_count": 71
          },
          {
            "a": 9,
            "b": 12,
            "distance": 0.2255892255892256,
            "overlap_count": 297,
            "agreement_count": 230,
            "disagreement_count": 67
          },
          {
            "a": 9,
            "b": 13,
            "distance": 0.23974763406940064,
            "overlap_count": 317,
            "agreement_count": 241,
            "disagreement_count": 76
          },
          {
            "a": 9,
            "b": 14,
            "distance": 0.25,
            "overlap_count": 168,
            "agreement_count": 126,
            "disagreement_count": 42
          },
          {
            "a": 9,
            "b": 15,
            "distance": 0.22653721682847897,
            "overlap_count": 309,
            "agreement_count": 239,
            "disagreement_count": 70
          },
          {
            "a": 10,
            "b": 11,
            "distance": 0.014925373134328358,
            "overlap_count": 268,
            "agreement_count": 264,
            "disagreement_count": 4
          },
          {
            "a": 10,
            "b": 12,
            "distance": 0.007380073800738007,
            "overlap_count": 271,
            "agreement_count": 269,
            "disagreement_count": 2
          },
          {
            "a": 10,
            "b": 13,
            "distance": 0.01090909090909091,
            "overlap_count": 275,
            "agreement_count": 272,
            "disagreement_count": 3
          },
          {
            "a": 10,
            "b": 14,
            "distance": 0.10062893081761007,
            "overlap_count": 159,
            "agreement_count": 143,
            "disagreement_count": 16
          },
          {
            "a": 10,
            "b": 15,
            "distance": 0.01838235294117647,
            "overlap_count": 272,
            "agreement_count": 267,
            "disagreement_count": 5
          },
          {
            "a": 11,
            "b": 12,
            "distance": 0.02422145328719723,
            "overlap_count": 289,
            "agreement_count": 282,
            "disagreement_count": 7
          },
          {
            "a": 11,
            "b": 13,
            "distance": 0.023026315789473683,
            "overlap_count": 304,
            "agreement_count": 297,
            "disagreement_count": 7
          },
          {
            "a": 11,
            "b": 14,
            "distance": 0.08695652173913043,
            "overlap_count": 161,
            "agreement_count": 147,
            "disagreement_count": 14
          },
          {
            "a": 11,
            "b": 15,
            "distance": 0.03355704697986577,
            "overlap_count": 298,
            "agreement_count": 288,
            "disagreement_count": 10
          },
          {
            "a": 12,
            "b": 13,
            "distance": 0.013422818791946308,
            "overlap_count": 298,
            "agreement_count": 294,
            "disagreement_count": 4
          },
          {
            "a": 12,
            "b": 14,
            "distance": 0.10843373493975904,
            "overlap_count": 166,
            "agreement_count": 148,
            "disagreement_count": 18
          },
          {
            "a": 12,
            "b": 15,
            "distance": 0.03741496598639456,
            "overlap_count": 294,
            "agreement_count": 283,
            "disagreement_count": 11
          },
          {
            "a": 13,
            "b": 14,
            "distance": 0.10119047619047619,
            "overlap_count": 168,
            "agreement_count": 151,
            "disagreement_count": 17
          },
          {
            "a": 13,
            "b": 15,
            "distance": 0.03870967741935484,
            "overlap_count": 310,
            "agreement_count": 298,
            "disagreement_count": 12
          },
          {
            "a": 14,
            "b": 15,
            "distance": 0.10843373493975904,
            "overlap_count": 166,
            "agreement_count": 148,
            "disagreement_count": 18
          }
        ],
        "linkage": [
          [
            6,
            10,
            0.0036363636363636364,
            2
          ],
          [
            4,
            7,
            0.007547169811320755,
            2
          ],
          [
            2,
            16,
            0.011701588872006879,
            3
          ],
          [
            0,
            12,
            0.013422818791946308,
            2
          ],
          [
            13,
            17,
            0.013856318076304983,
            3
          ],
          [
            3,
            18,
            0.014049014074494872,
            4
          ],
          [
            19,
            21,
            0.01732459774419692,
            6
          ],
          [
            20,
            22,
            0.018259520694483087,
            9
          ],
          [
            5,
            23,
            0.021874666920124672,
            10
          ],
          [
            11,
            24,
            0.02573754720560679,
            11
          ],
          [
            1,
            25,
            0.02833792873361404,
            12
          ],
          [
            8,
            15,
            0.03642384105960265,
            2
          ],
          [
            26,
            27,
            0.03973022619962708,
            14
          ],
          [
            14,
            28,
            0.1020678825814485,
            15
          ],
          [
            9,
            29,
            0.22792493630383603,
            16
          ]
        ],
        "linkage_complete": true
      },
      {
        "group_id": "d8684efc77ade9e8",
        "cluster_scope": "cross_model",
        "representative_policy": "latest",
        "task_fingerprint": "3d5a97ff902050847f35f1deb9e78aa16664eb5a88253b16a5d12e0d0981ef3d",
        "normalized_tag_key": "correction;english;error;preposition",
        "task_name_display": "Err. correct. prepositions",
        "task_names_seen": [
          "Err. correct. prepositions"
        ],
        "tags_display": "error; correction; preposition; English",
        "model_count": 5,
        "distance_metric": "nominal_disagreement_rate",
        "linkage_method": "average",
        "representative_run_stems": [
          "prepositions__einfra__deepseekv32thinking__2026-04-02-13-14",
          "prepositions__einfra__kimik25__2026-04-01-13-10",
          "prepositions__einfra__qwen35__2026-04-04-12-31",
          "prepositions__openrouter__qwenqwen36plusfree__2026-04-03-02-19",
          "prepositions__vertex__gemini3flashpreview__2026-04-01-12-27"
        ],
        "comparable_pair_count": 10,
        "representatives": [
          {
            "provider": "e-infra",
            "model": "deepseek-v3.2-thinking",
            "run_stem": "prepositions__einfra__deepseekv32thinking__2026-04-02-13-14",
            "metrics_file": "prepositions__einfra__deepseekv32thinking__2026-04-02-13-14__metrics.json",
            "timestamp": "2026-04-02T11:14:24.260967Z",
            "accuracy": 0.8924395946999221,
            "cohen_kappa": 0.7595338824791694
          },
          {
            "provider": "e-infra",
            "model": "kimi-k2.5",
            "run_stem": "prepositions__einfra__kimik25__2026-04-01-13-10",
            "metrics_file": "prepositions__einfra__kimik25__2026-04-01-13-10__metrics.json",
            "timestamp": "2026-04-01T11:10:43.580812Z",
            "accuracy": 0.9041309431021044,
            "cohen_kappa": 0.7827007734474937
          },
          {
            "provider": "e-infra",
            "model": "qwen3.5",
            "run_stem": "prepositions__einfra__qwen35__2026-04-04-12-31",
            "metrics_file": "prepositions__einfra__qwen35__2026-04-04-12-31__metrics.json",
            "timestamp": "2026-04-04T10:31:49.198333Z",
            "accuracy": 0.9033515198752923,
            "cohen_kappa": 0.7803725157791852
          },
          {
            "provider": "openrouter",
            "model": "qwen3.6",
            "run_stem": "prepositions__openrouter__qwenqwen36plusfree__2026-04-03-02-19",
            "metrics_file": "prepositions__openrouter__qwenqwen36plusfree__2026-04-03-02-19__metrics.json",
            "timestamp": "2026-04-03T00:19:27.374136Z",
            "accuracy": 0.9033515198752923,
            "cohen_kappa": 0.7755667190041489
          },
          {
            "provider": "vertex",
            "model": "gemini-3-flash-preview",
            "run_stem": "prepositions__vertex__gemini3flashpreview__2026-04-01-12-27",
            "metrics_file": "prepositions__vertex__gemini3flashpreview__2026-04-01-12-27__metrics.json",
            "timestamp": "2026-04-01T10:27:52.038487Z",
            "accuracy": 0.9025720966484801,
            "cohen_kappa": 0.7752016001861465
          }
        ],
        "pairwise": [
          {
            "a": 0,
            "b": 1,
            "distance": 0.05845674201091192,
            "overlap_count": 1283,
            "agreement_count": 1208,
            "disagreement_count": 75
          },
          {
            "a": 0,
            "b": 2,
            "distance": 0.05455962587685113,
            "overlap_count": 1283,
            "agreement_count": 1213,
            "disagreement_count": 70
          },
          {
            "a": 0,
            "b": 3,
            "distance": 0.04988308651597818,
            "overlap_count": 1283,
            "agreement_count": 1219,
            "disagreement_count": 64
          },
          {
            "a": 0,
            "b": 4,
            "distance": 0.05066250974279034,
            "overlap_count": 1283,
            "agreement_count": 1218,
            "disagreement_count": 65
          },
          {
            "a": 1,
            "b": 2,
            "distance": 0.05144193296960249,
            "overlap_count": 1283,
            "agreement_count": 1217,
            "disagreement_count": 66
          },
          {
            "a": 1,
            "b": 3,
            "distance": 0.04598597038191738,
            "overlap_count": 1283,
            "agreement_count": 1224,
            "disagreement_count": 59
          },
          {
            "a": 1,
            "b": 4,
            "distance": 0.049103663289166016,
            "overlap_count": 1283,
            "agreement_count": 1220,
            "disagreement_count": 63
          },
          {
            "a": 2,
            "b": 3,
            "distance": 0.03351519875292284,
            "overlap_count": 1283,
            "agreement_count": 1240,
            "disagreement_count": 43
          },
          {
            "a": 2,
            "b": 4,
            "distance": 0.034294621979735,
            "overlap_count": 1283,
            "agreement_count": 1239,
            "disagreement_count": 44
          },
          {
            "a": 3,
            "b": 4,
            "distance": 0.028838659392049885,
            "overlap_count": 1283,
            "agreement_count": 1246,
            "disagreement_count": 37
          }
        ],
        "linkage": [
          [
            3,
            4,
            0.028838659392049885,
            2
          ],
          [
            2,
            5,
            0.03390491036632892,
            3
          ],
          [
            1,
            6,
            0.0488438555468953,
            4
          ],
          [
            0,
            7,
            0.05339049103663289,
            5
          ]
        ],
        "linkage_complete": true
      },
      {
        "group_id": "c0a90aa5acad9608",
        "cluster_scope": "cross_model",
        "representative_policy": "latest",
        "task_fingerprint": "167cd427ec20c9daf6197b22d9658a19f92d953c7e59d7b9d1c98a7c6a3fb3f6",
        "normalized_tag_key": "disambiguation;homonymy;middle english;semantics",
        "task_name_display": "ME disambiguation",
        "task_names_seen": [
          "ME disambiguation"
        ],
        "tags_display": "Middle English; semantics; disambiguation; homonymy",
        "model_count": 11,
        "distance_metric": "nominal_disagreement_rate",
        "linkage_method": "average",
        "representative_run_stems": [
          "ME_disambiguation__einfra__deepseekv32thinking__2026-03-29-18-13",
          "ME_disambiguation__einfra__glm5__2026-03-29-21-59",
          "ME_disambiguation__einfra__gptoss120b__2026-03-29-18-48",
          "ME_disambiguation__einfra__kimik25__2026-03-29-19-32",
          "ME_disambiguation__einfra__qwen35__2026-03-29-20-02",
          "ME_disambiguation__openai__gpt54__2026-03-29-18-03",
          "ME_disambiguation__openai__gpt54mini__2026-03-29-18-07",
          "ME_disambiguation__openrouter__qwenqwen36plusfree__2026-04-03-19-16",
          "ME_disambiguation__requesty__anthropicclaudehaiku45__2026-03-29-18-08",
          "ME_disambiguation__vertex__gemini31flashlitepreview__2026-03-29-18-01",
          "ME_disambiguation__vertex__gemini31propreview__2026-03-29-17-50"
        ],
        "comparable_pair_count": 55,
        "representatives": [
          {
            "provider": "e-infra",
            "model": "deepseek-v3.2-thinking",
            "run_stem": "ME_disambiguation__einfra__deepseekv32thinking__2026-03-29-18-13",
            "metrics_file": "ME_disambiguation__einfra__deepseekv32thinking__2026-03-29-18-13__metrics.json",
            "timestamp": "2026-03-29T16:13:50.205096Z",
            "accuracy": 0.99,
            "cohen_kappa": 0.9864222674813306
          },
          {
            "provider": "e-infra",
            "model": "glm-5",
            "run_stem": "ME_disambiguation__einfra__glm5__2026-03-29-21-59",
            "metrics_file": "ME_disambiguation__einfra__glm5__2026-03-29-21-59__metrics.json",
            "timestamp": "2026-03-29T19:59:20.519323Z",
            "accuracy": 0.98,
            "cohen_kappa": 0.9728539798610463
          },
          {
            "provider": "e-infra",
            "model": "gpt-oss-120b",
            "run_stem": "ME_disambiguation__einfra__gptoss120b__2026-03-29-18-48",
            "metrics_file": "ME_disambiguation__einfra__gptoss120b__2026-03-29-18-48__metrics.json",
            "timestamp": "2026-03-29T16:48:51.685631Z",
            "accuracy": 0.945,
            "cohen_kappa": 0.9254767792418956
          },
          {
            "provider": "e-infra",
            "model": "kimi-k2.5",
            "run_stem": "ME_disambiguation__einfra__kimik25__2026-03-29-19-32",
            "metrics_file": "ME_disambiguation__einfra__kimik25__2026-03-29-19-32__metrics.json",
            "timestamp": "2026-03-29T17:32:26.587695Z",
            "accuracy": 0.99,
            "cohen_kappa": 0.9863975651641643
          },
          {
            "provider": "e-infra",
            "model": "qwen3.5",
            "run_stem": "ME_disambiguation__einfra__qwen35__2026-03-29-20-02",
            "metrics_file": "ME_disambiguation__einfra__qwen35__2026-03-29-20-02__metrics.json",
            "timestamp": "2026-03-29T18:02:10.650671Z",
            "accuracy": 0.9925,
            "cohen_kappa": 0.9897961786689115
          },
          {
            "provider": "openai",
            "model": "gpt-5.4",
            "run_stem": "ME_disambiguation__openai__gpt54__2026-03-29-18-03",
            "metrics_file": "ME_disambiguation__openai__gpt54__2026-03-29-18-03__metrics.json",
            "timestamp": "2026-03-29T16:03:41.707544Z",
            "accuracy": 0.9825,
            "cohen_kappa": 0.9761712267563084
          },
          {
            "provider": "openai",
            "model": "gpt-5.4-mini",
            "run_stem": "ME_disambiguation__openai__gpt54mini__2026-03-29-18-07",
            "metrics_file": "ME_disambiguation__openai__gpt54mini__2026-03-29-18-07__metrics.json",
            "timestamp": "2026-03-29T16:07:03.773591Z",
            "accuracy": 0.9525,
            "cohen_kappa": 0.9356319502672121
          },
          {
            "provider": "openrouter",
            "model": "qwen3.6",
            "run_stem": "ME_disambiguation__openrouter__qwenqwen36plusfree__2026-04-03-19-16",
            "metrics_file": "ME_disambiguation__openrouter__qwenqwen36plusfree__2026-04-03-19-16__metrics.json",
            "timestamp": "2026-04-03T17:16:36.765533Z",
            "accuracy": 0.99,
            "cohen_kappa": 0.986392706490679
          },
          {
            "provider": "requesty",
            "model": "claude-haiku-4-5",
            "run_stem": "ME_disambiguation__requesty__anthropicclaudehaiku45__2026-03-29-18-08",
            "metrics_file": "ME_disambiguation__requesty__anthropicclaudehaiku45__2026-03-29-18-08__metrics.json",
            "timestamp": "2026-03-29T16:09:02.369257Z",
            "accuracy": 0.975,
            "cohen_kappa": 0.9660804233163169
          },
          {
            "provider": "vertex",
            "model": "gemini-3.1-flash-lite-preview",
            "run_stem": "ME_disambiguation__vertex__gemini31flashlitepreview__2026-03-29-18-01",
            "metrics_file": "ME_disambiguation__vertex__gemini31flashlitepreview__2026-03-29-18-01__metrics.json",
            "timestamp": "2026-03-29T16:01:28.629386Z",
            "accuracy": 0.9925,
            "cohen_kappa": 0.9897796666467941
          },
          {
            "provider": "vertex",
            "model": "gemini-3.1-pro-preview",
            "run_stem": "ME_disambiguation__vertex__gemini31propreview__2026-03-29-17-50",
            "metrics_file": "ME_disambiguation__vertex__gemini31propreview__2026-03-29-17-50__metrics.json",
            "timestamp": "2026-03-29T15:50:03.914973Z",
            "accuracy": 0.9925,
            "cohen_kappa": 0.989791317515547
          }
        ],
        "pairwise": [
          {
            "a": 0,
            "b": 1,
            "distance": 0.017543859649122806,
            "overlap_count": 399,
            "agreement_count": 392,
            "disagreement_count": 7
          },
          {
            "a": 0,
            "b": 2,
            "distance": 0.05,
            "overlap_count": 400,
            "agreement_count": 380,
            "disagreement_count": 20
          },
          {
            "a": 0,
            "b": 3,
            "distance": 0.015,
            "overlap_count": 400,
            "agreement_count": 394,
            "disagreement_count": 6
          },
          {
            "a": 0,
            "b": 4,
            "distance": 0.0125,
            "overlap_count": 400,
            "agreement_count": 395,
            "disagreement_count": 5
          },
          {
            "a": 0,
            "b": 5,
            "distance": 0.0175,
            "overlap_count": 400,
            "agreement_count": 393,
            "disagreement_count": 7
          },
          {
            "a": 0,
            "b": 6,
            "distance": 0.0475,
            "overlap_count": 400,
            "agreement_count": 381,
            "disagreement_count": 19
          },
          {
            "a": 0,
            "b": 7,
            "distance": 0.015,
            "overlap_count": 400,
            "agreement_count": 394,
            "disagreement_count": 6
          },
          {
            "a": 0,
            "b": 8,
            "distance": 0.035,
            "overlap_count": 400,
            "agreement_count": 386,
            "disagreement_count": 14
          },
          {
            "a": 0,
            "b": 9,
            "distance": 0.0125,
            "overlap_count": 400,
            "agreement_count": 395,
            "disagreement_count": 5
          },
          {
            "a": 0,
            "b": 10,
            "distance": 0.0125,
            "overlap_count": 400,
            "agreement_count": 395,
            "disagreement_count": 5
          },
          {
            "a": 1,
            "b": 2,
            "distance": 0.05263157894736842,
            "overlap_count": 399,
            "agreement_count": 378,
            "disagreement_count": 21
          },
          {
            "a": 1,
            "b": 3,
            "distance": 0.012531328320802004,
            "overlap_count": 399,
            "agreement_count": 394,
            "disagreement_count": 5
          },
          {
            "a": 1,
            "b": 4,
            "distance": 0.020050125313283207,
            "overlap_count": 399,
            "agreement_count": 391,
            "disagreement_count": 8
          },
          {
            "a": 1,
            "b": 5,
            "distance": 0.015037593984962405,
            "overlap_count": 399,
            "agreement_count": 393,
            "disagreement_count": 6
          },
          {
            "a": 1,
            "b": 6,
            "distance": 0.05012531328320802,
            "overlap_count": 399,
            "agreement_count": 379,
            "disagreement_count": 20
          },
          {
            "a": 1,
            "b": 7,
            "distance": 0.017543859649122806,
            "overlap_count": 399,
            "agreement_count": 392,
            "disagreement_count": 7
          },
          {
            "a": 1,
            "b": 8,
            "distance": 0.03258145363408521,
            "overlap_count": 399,
            "agreement_count": 386,
            "disagreement_count": 13
          },
          {
            "a": 1,
            "b": 9,
            "distance": 0.015037593984962405,
            "overlap_count": 399,
            "agreement_count": 393,
            "disagreement_count": 6
          },
          {
            "a": 1,
            "b": 10,
            "distance": 0.015037593984962405,
            "overlap_count": 399,
            "agreement_count": 393,
            "disagreement_count": 6
          },
          {
            "a": 2,
            "b": 3,
            "distance": 0.06,
            "overlap_count": 400,
            "agreement_count": 376,
            "disagreement_count": 24
          },
          {
            "a": 2,
            "b": 4,
            "distance": 0.0575,
            "overlap_count": 400,
            "agreement_count": 377,
            "disagreement_count": 23
          },
          {
            "a": 2,
            "b": 5,
            "distance": 0.0575,
            "overlap_count": 400,
            "agreement_count": 377,
            "disagreement_count": 23
          },
          {
            "a": 2,
            "b": 6,
            "distance": 0.0725,
            "overlap_count": 400,
            "agreement_count": 371,
            "disagreement_count": 29
          },
          {
            "a": 2,
            "b": 7,
            "distance": 0.065,
            "overlap_count": 400,
            "agreement_count": 374,
            "disagreement_count": 26
          },
          {
            "a": 2,
            "b": 8,
            "distance": 0.045,
            "overlap_count": 400,
            "agreement_count": 382,
            "disagreement_count": 18
          },
          {
            "a": 2,
            "b": 9,
            "distance": 0.0475,
            "overlap_count": 400,
            "agreement_count": 381,
            "disagreement_count": 19
          },
          {
            "a": 2,
            "b": 10,
            "distance": 0.0575,
            "overlap_count": 400,
            "agreement_count": 377,
            "disagreement_count": 23
          },
          {
            "a": 3,
            "b": 4,
            "distance": 0.0175,
            "overlap_count": 400,
            "agreement_count": 393,
            "disagreement_count": 7
          },
          {
            "a": 3,
            "b": 5,
            "distance": 0.0175,
            "overlap_count": 400,
            "agreement_count": 393,
            "disagreement_count": 7
          },
          {
            "a": 3,
            "b": 6,
            "distance": 0.0525,
            "overlap_count": 400,
            "agreement_count": 379,
            "disagreement_count": 21
          },
          {
            "a": 3,
            "b": 7,
            "distance": 0.01,
            "overlap_count": 400,
            "agreement_count": 396,
            "disagreement_count": 4
          },
          {
            "a": 3,
            "b": 8,
            "distance": 0.03,
            "overlap_count": 400,
            "agreement_count": 388,
            "disagreement_count": 12
          },
          {
            "a": 3,
            "b": 9,
            "distance": 0.0175,
            "overlap_count": 400,
            "agreement_count": 393,
            "disagreement_count": 7
          },
          {
            "a": 3,
            "b": 10,
            "distance": 0.0075,
            "overlap_count": 400,
            "agreement_count": 397,
            "disagreement_count": 3
          },
          {
            "a": 4,
            "b": 5,
            "distance": 0.02,
            "overlap_count": 400,
            "agreement_count": 392,
            "disagreement_count": 8
          },
          {
            "a": 4,
            "b": 6,
            "distance": 0.05,
            "overlap_count": 400,
            "agreement_count": 380,
            "disagreement_count": 20
          },
          {
            "a": 4,
            "b": 7,
            "distance": 0.0125,
            "overlap_count": 400,
            "agreement_count": 395,
            "disagreement_count": 5
          },
          {
            "a": 4,
            "b": 8,
            "distance": 0.0325,
            "overlap_count": 400,
            "agreement_count": 387,
            "disagreement_count": 13
          },
          {
            "a": 4,
            "b": 9,
            "distance": 0.01,
            "overlap_count": 400,
            "agreement_count": 396,
            "disagreement_count": 4
          },
          {
            "a": 4,
            "b": 10,
            "distance": 0.01,
            "overlap_count": 400,
            "agreement_count": 396,
            "disagreement_count": 4
          },
          {
            "a": 5,
            "b": 6,
            "distance": 0.055,
            "overlap_count": 400,
            "agreement_count": 378,
            "disagreement_count": 22
          },
          {
            "a": 5,
            "b": 7,
            "distance": 0.0225,
            "overlap_count": 400,
            "agreement_count": 391,
            "disagreement_count": 9
          },
          {
            "a": 5,
            "b": 8,
            "distance": 0.0375,
            "overlap_count": 400,
            "agreement_count": 385,
            "disagreement_count": 15
          },
          {
            "a": 5,
            "b": 9,
            "distance": 0.015,
            "overlap_count": 400,
            "agreement_count": 394,
            "disagreement_count": 6
          },
          {
            "a": 5,
            "b": 10,
            "distance": 0.02,
            "overlap_count": 400,
            "agreement_count": 392,
            "disagreement_count": 8
          },
          {
            "a": 6,
            "b": 7,
            "distance": 0.0475,
            "overlap_count": 400,
            "agreement_count": 381,
            "disagreement_count": 19
          },
          {
            "a": 6,
            "b": 8,
            "distance": 0.0475,
            "overlap_count": 400,
            "agreement_count": 381,
            "disagreement_count": 19
          },
          {
            "a": 6,
            "b": 9,
            "distance": 0.05,
            "overlap_count": 400,
            "agreement_count": 380,
            "disagreement_count": 20
          },
          {
            "a": 6,
            "b": 10,
            "distance": 0.055,
            "overlap_count": 400,
            "agreement_count": 378,
            "disagreement_count": 22
          },
          {
            "a": 7,
            "b": 8,
            "distance": 0.03,
            "overlap_count": 400,
            "agreement_count": 388,
            "disagreement_count": 12
          },
          {
            "a": 7,
            "b": 9,
            "distance": 0.0175,
            "overlap_count": 400,
            "agreement_count": 393,
            "disagreement_count": 7
          },
          {
            "a": 7,
            "b": 10,
            "distance": 0.0075,
            "overlap_count": 400,
            "agreement_count": 397,
            "disagreement_count": 3
          },
          {
            "a": 8,
            "b": 9,
            "distance": 0.0325,
            "overlap_count": 400,
            "agreement_count": 387,
            "disagreement_count": 13
          },
          {
            "a": 8,
            "b": 10,
            "distance": 0.0325,
            "overlap_count": 400,
            "agreement_count": 387,
            "disagreement_count": 13
          },
          {
            "a": 9,
            "b": 10,
            "distance": 0.015,
            "overlap_count": 400,
            "agreement_count": 394,
            "disagreement_count": 6
          }
        ],
        "linkage": [
          [
            3,
            10,
            0.0075,
            2
          ],
          [
            7,
            11,
            0.00875,
            3
          ],
          [
            4,
            9,
            0.01,
            2
          ],
          [
            0,
            13,
            0.0125,
            3
          ],
          [
            12,
            14,
            0.014722222222222223,
            6
          ],
          [
            1,
            5,
            0.015037593984962405,
            2
          ],
          [
            15,
            16,
            0.0175203634085213,
            8
          ],
          [
            8,
            17,
            0.03282268170426065,
            9
          ],
          [
            6,
            18,
            0.050569479253689785,
            10
          ],
          [
            2,
            19,
            0.05651315789473684,
            11
          ]
        ],
        "linkage_complete": true
      },
      {
        "group_id": "bc91b92d621b9f2f",
        "cluster_scope": "cross_model",
        "representative_policy": "latest",
        "task_fingerprint": "e0860bb13f8109f7854720ca263054673c978e0e327536fcf1d0e1226f2388db",
        "normalized_tag_key": "lemmatization;morphology;old english;v3",
        "task_name_display": "OE lemmatization",
        "task_names_seen": [
          "OE lemmatization"
        ],
        "tags_display": "Old English; lemmatization; morphology; v3",
        "model_count": 8,
        "distance_metric": "nominal_disagreement_rate",
        "linkage_method": "average",
        "representative_run_stems": [
          "ycoe3__einfra__deepseekv32thinking__2026-03-18-23-50",
          "ycoe3__einfra__glm47__2026-03-20-10-02",
          "ycoe3__einfra__glm5__2026-03-31-20-38",
          "ycoe3__einfra__gptoss120b__2026-03-20-15-32",
          "ycoe3__einfra__kimik25__2026-03-19-21-22",
          "ycoe3__openai__gpt54mini__2026-03-18-17-04",
          "ycoe3__requesty__anthropicclaudehaiku45__2026-03-23-01-29",
          "ycoe3__vertex__gemini3flashpreview__2026-03-18-01-11"
        ],
        "comparable_pair_count": 28,
        "representatives": [
          {
            "provider": "e-infra",
            "model": "deepseek-v3.2-thinking",
            "run_stem": "ycoe3__einfra__deepseekv32thinking__2026-03-18-23-50",
            "metrics_file": "ycoe3__einfra__deepseekv32thinking__2026-03-18-23-50__metrics.json",
            "timestamp": "2026-03-18T22:50:33.957474Z",
            "accuracy": 0.809961144471918,
            "cohen_kappa": 0.8067742518245767
          },
          {
            "provider": "e-infra",
            "model": "glm-4.7",
            "run_stem": "ycoe3__einfra__glm47__2026-03-20-10-02",
            "metrics_file": "ycoe3__einfra__glm47__2026-03-20-10-02__metrics.json",
            "timestamp": "2026-03-20T09:02:57.831985Z",
            "accuracy": 0.8212645708230307,
            "cohen_kappa": 0.8182681322919435
          },
          {
            "provider": "e-infra",
            "model": "glm-5",
            "run_stem": "ycoe3__einfra__glm5__2026-03-31-20-38",
            "metrics_file": "ycoe3__einfra__glm5__2026-03-31-20-38__metrics.json",
            "timestamp": "2026-03-31T18:38:48.544821Z",
            "accuracy": 0.8205581066760862,
            "cohen_kappa": 0.8175208636693767
          },
          {
            "provider": "e-infra",
            "model": "gpt-oss-120b",
            "run_stem": "ycoe3__einfra__gptoss120b__2026-03-20-15-32",
            "metrics_file": "ycoe3__einfra__gptoss120b__2026-03-20-15-32__metrics.json",
            "timestamp": "2026-03-20T14:32:56.669315Z",
            "accuracy": 0.7004592016955139,
            "cohen_kappa": 0.6961615957069811
          },
          {
            "provider": "e-infra",
            "model": "kimi-k2.5",
            "run_stem": "ycoe3__einfra__kimik25__2026-03-19-21-22",
            "metrics_file": "ycoe3__einfra__kimik25__2026-03-19-21-22__metrics.json",
            "timestamp": "2026-03-19T20:22:24.560641Z",
            "accuracy": 0.8265630519251148,
            "cohen_kappa": 0.8235506107595435
          },
          {
            "provider": "openai",
            "model": "gpt-5.4-mini",
            "run_stem": "ycoe3__openai__gpt54mini__2026-03-18-17-04",
            "metrics_file": "ycoe3__openai__gpt54mini__2026-03-18-17-04__metrics.json",
            "timestamp": "2026-03-18T16:04:56.311644Z",
            "accuracy": 0.6944542564464854,
            "cohen_kappa": 0.6896664555799817
          },
          {
            "provider": "requesty",
            "model": "claude-haiku-4-5",
            "run_stem": "ycoe3__requesty__anthropicclaudehaiku45__2026-03-23-01-29",
            "metrics_file": "ycoe3__requesty__anthropicclaudehaiku45__2026-03-23-01-29__metrics.json",
            "timestamp": "2026-03-23T00:29:26.469275Z",
            "accuracy": 0.6937477922995408,
            "cohen_kappa": 0.6901560734270713
          },
          {
            "provider": "vertex",
            "model": "gemini-3-flash-preview",
            "run_stem": "ycoe3__vertex__gemini3flashpreview__2026-03-18-01-11",
            "metrics_file": "ycoe3__vertex__gemini3flashpreview__2026-03-18-01-11__metrics.json",
            "timestamp": "2026-03-18T00:11:38.185791Z",
            "accuracy": 0.8322147651006712,
            "cohen_kappa": 0.8292449559819026
          }
        ],
        "pairwise": [
          {
            "a": 0,
            "b": 1,
            "distance": 0.1279971791255289,
            "overlap_count": 2836,
            "agreement_count": 2473,
            "disagreement_count": 363
          },
          {
            "a": 0,
            "b": 2,
            "distance": 0.1229739252995067,
            "overlap_count": 2838,
            "agreement_count": 2489,
            "disagreement_count": 349
          },
          {
            "a": 0,
            "b": 3,
            "distance": 0.22532720198089848,
            "overlap_count": 2827,
            "agreement_count": 2190,
            "disagreement_count": 637
          },
          {
            "a": 0,
            "b": 4,
            "distance": 0.1346018322762509,
            "overlap_count": 2838,
            "agreement_count": 2456,
            "disagreement_count": 382
          },
          {
            "a": 0,
            "b": 5,
            "distance": 0.2690677966101695,
            "overlap_count": 2832,
            "agreement_count": 2070,
            "disagreement_count": 762
          },
          {
            "a": 0,
            "b": 6,
            "distance": 0.28621783574198095,
            "overlap_count": 2837,
            "agreement_count": 2025,
            "disagreement_count": 812
          },
          {
            "a": 0,
            "b": 7,
            "distance": 0.11557434813248767,
            "overlap_count": 2838,
            "agreement_count": 2510,
            "disagreement_count": 328
          },
          {
            "a": 1,
            "b": 2,
            "distance": 0.09026798307475317,
            "overlap_count": 2836,
            "agreement_count": 2580,
            "disagreement_count": 256
          },
          {
            "a": 1,
            "b": 3,
            "distance": 0.20601769911504425,
            "overlap_count": 2825,
            "agreement_count": 2243,
            "disagreement_count": 582
          },
          {
            "a": 1,
            "b": 4,
            "distance": 0.11636107193229901,
            "overlap_count": 2836,
            "agreement_count": 2506,
            "disagreement_count": 330
          },
          {
            "a": 1,
            "b": 5,
            "distance": 0.25830388692579503,
            "overlap_count": 2830,
            "agreement_count": 2099,
            "disagreement_count": 731
          },
          {
            "a": 1,
            "b": 6,
            "distance": 0.272310405643739,
            "overlap_count": 2835,
            "agreement_count": 2063,
            "disagreement_count": 772
          },
          {
            "a": 1,
            "b": 7,
            "distance": 0.12306064880112835,
            "overlap_count": 2836,
            "agreement_count": 2487,
            "disagreement_count": 349
          },
          {
            "a": 2,
            "b": 3,
            "distance": 0.20551821719136895,
            "overlap_count": 2827,
            "agreement_count": 2246,
            "disagreement_count": 581
          },
          {
            "a": 2,
            "b": 4,
            "distance": 0.11804087385482734,
            "overlap_count": 2838,
            "agreement_count": 2503,
            "disagreement_count": 335
          },
          {
            "a": 2,
            "b": 5,
            "distance": 0.2542372881355932,
            "overlap_count": 2832,
            "agreement_count": 2112,
            "disagreement_count": 720
          },
          {
            "a": 2,
            "b": 6,
            "distance": 0.2749383151216073,
            "overlap_count": 2837,
            "agreement_count": 2057,
            "disagreement_count": 780
          },
          {
            "a": 2,
            "b": 7,
            "distance": 0.12050739957716702,
            "overlap_count": 2838,
            "agreement_count": 2496,
            "disagreement_count": 342
          },
          {
            "a": 3,
            "b": 4,
            "distance": 0.23523169437566324,
            "overlap_count": 2827,
            "agreement_count": 2162,
            "disagreement_count": 665
          },
          {
            "a": 3,
            "b": 5,
            "distance": 0.30297661233167966,
            "overlap_count": 2822,
            "agreement_count": 1967,
            "disagreement_count": 855
          },
          {
            "a": 3,
            "b": 6,
            "distance": 0.2699929228591649,
            "overlap_count": 2826,
            "agreement_count": 2063,
            "disagreement_count": 763
          },
          {
            "a": 3,
            "b": 7,
            "distance": 0.2263883975946233,
            "overlap_count": 2827,
            "agreement_count": 2187,
            "disagreement_count": 640
          },
          {
            "a": 4,
            "b": 5,
            "distance": 0.2874293785310734,
            "overlap_count": 2832,
            "agreement_count": 2018,
            "disagreement_count": 814
          },
          {
            "a": 4,
            "b": 6,
            "distance": 0.3119492421572083,
            "overlap_count": 2837,
            "agreement_count": 1952,
            "disagreement_count": 885
          },
          {
            "a": 4,
            "b": 7,
            "distance": 0.1269338959212377,
            "overlap_count": 2844,
            "agreement_count": 2483,
            "disagreement_count": 361
          },
          {
            "a": 5,
            "b": 6,
            "distance": 0.32426704344754503,
            "overlap_count": 2831,
            "agreement_count": 1913,
            "disagreement_count": 918
          },
          {
            "a": 5,
            "b": 7,
            "distance": 0.2680084745762712,
            "overlap_count": 2832,
            "agreement_count": 2073,
            "disagreement_count": 759
          },
          {
            "a": 6,
            "b": 7,
            "distance": 0.2982023264011279,
            "overlap_count": 2837,
            "agreement_count": 1991,
            "disagreement_count": 846
          }
        ],
        "linkage": [
          [
            1,
            2,
            0.09026798307475317,
            2
          ],
          [
            0,
            7,
            0.11557434813248767,
            2
          ],
          [
            4,
            8,
            0.11720097289356318,
            3
          ],
          [
            9,
            10,
            0.12601248016680325,
            5
          ],
          [
            3,
            11,
            0.21969664205151967,
            6
          ],
          [
            5,
            12,
            0.2733372395184303,
            7
          ],
          [
            6,
            13,
            0.2911254416246248,
            8
          ]
        ],
        "linkage_complete": true
      },
      {
        "group_id": "34dd61f01b51cdcd",
        "cluster_scope": "cross_model",
        "representative_policy": "latest",
        "task_fingerprint": "eca5b779052ec243fcc76cdc818bae503645042de8cdc27356517265352b7c24",
        "normalized_tag_key": "lemmatization;morphology;old english;v4;validator",
        "task_name_display": "OE lemmatization",
        "task_names_seen": [
          "OE lemmatization"
        ],
        "tags_display": "Old English; lemmatization; morphology; v4; validator",
        "model_count": 3,
        "distance_metric": "nominal_disagreement_rate",
        "linkage_method": "average",
        "representative_run_stems": [
          "ycoe3__einfra__glm51__2026-04-27-22-51",
          "ycoe3__einfra__kimik26__2026-04-29-01-25",
          "ycoe3__vertex__gemini3flashpreview__2026-04-30-21-56"
        ],
        "comparable_pair_count": 3,
        "representatives": [
          {
            "provider": "e-infra",
            "model": "glm-5.1",
            "run_stem": "ycoe3__einfra__glm51__2026-04-27-22-51",
            "metrics_file": "ycoe3__einfra__glm51__2026-04-27-22-51__metrics.json",
            "timestamp": "2026-04-27T20:51:24.742584Z",
            "accuracy": 0.9739985945186226,
            "cohen_kappa": 0.9735352590737089
          },
          {
            "provider": "e-infra",
            "model": "kimi-k2.6",
            "run_stem": "ycoe3__einfra__kimik26__2026-04-29-01-25",
            "metrics_file": "ycoe3__einfra__kimik26__2026-04-29-01-25__metrics.json",
            "timestamp": "2026-04-28T23:25:53.664337Z",
            "accuracy": 0.9739985945186226,
            "cohen_kappa": 0.973533010784025
          },
          {
            "provider": "vertex",
            "model": "gemini-3-flash-preview",
            "run_stem": "ycoe3__vertex__gemini3flashpreview__2026-04-30-21-56",
            "metrics_file": "ycoe3__vertex__gemini3flashpreview__2026-04-30-21-56__metrics.json",
            "timestamp": "2026-04-30T19:56:09.797765Z",
            "accuracy": 0.9873506676036542,
            "cohen_kappa": 0.9871231511429811
          }
        ],
        "pairwise": [
          {
            "a": 0,
            "b": 1,
            "distance": 0.023313316849169905,
            "overlap_count": 2831,
            "agreement_count": 2765,
            "disagreement_count": 66
          },
          {
            "a": 0,
            "b": 2,
            "distance": 0.021441124780316345,
            "overlap_count": 2845,
            "agreement_count": 2784,
            "disagreement_count": 61
          },
          {
            "a": 1,
            "b": 2,
            "distance": 0.015536723163841809,
            "overlap_count": 2832,
            "agreement_count": 2788,
            "disagreement_count": 44
          }
        ],
        "linkage": [
          [
            1,
            2,
            0.015536723163841809,
            2
          ],
          [
            0,
            3,
            0.022377220814743123,
            3
          ]
        ],
        "linkage_complete": true
      },
      {
        "group_id": "59ce725da15f2d39",
        "cluster_scope": "cross_model",
        "representative_policy": "latest",
        "task_fingerprint": "5d2775386b7429a15d795e64be76f5d0829aff4b3b67fc8acae71e4adebccffd",
        "normalized_tag_key": "morphology;number;old english",
        "task_name_display": "OE number",
        "task_names_seen": [
          "OE number"
        ],
        "tags_display": "Old English; morphology; number",
        "model_count": 19,
        "distance_metric": "nominal_disagreement_rate",
        "linkage_method": "average",
        "representative_run_stems": [
          "OE_number____deepseekv32thinking__2026-02-21-02-26",
          "OE_number____gemini3flashpreview__2026-02-25-01-10",
          "OE_number____gemini31propreview__2026-02-21-02-26",
          "OE_number____glm47__2026-02-22-16-17",
          "OE_number____gptoss120b__2026-02-21-18-49",
          "OE_number____gpt51__2026-02-24-17-09",
          "OE_number____gpt52pro__2026-02-21-02-26",
          "OE_number____gpt5mini__2026-02-24-01-18",
          "OE_number____kimik25__2026-02-21-23-36",
          "OE_number__einfra__glm5__2026-04-02-01-04",
          "OE_number__inception__mercury2__2026-03-04-23-43",
          "OE_number__openai__gpt54mini__2026-03-20-18-21",
          "OE_number__openai__gpt54pro__2026-03-10-15-22",
          "OE_number__openai__qwen35__2026-02-23-02-19",
          "OE_number__openrouter__qwenqwen36plusfree__2026-04-04-00-50",
          "OE_number__requesty__anthropicclaudehaiku45__2026-03-23-01-17",
          "OE_number__requesty__claudesonnet46__2026-02-21-20-17",
          "OE_number__openai__gpt54__2026-03-10-14-52",
          "OE_number__vertex__gemini3flashpreview__2026-03-11-00-23"
        ],
        "comparable_pair_count": 171,
        "representatives": [
          {
            "provider": "",
            "model": "deepseek-v3.2-thinking",
            "run_stem": "OE_number____deepseekv32thinking__2026-02-21-02-26",
            "metrics_file": "OE_number____deepseekv32thinking__2026-02-21-02-26__metrics.json",
            "timestamp": "2026-02-21T01:26:38.907718Z",
            "accuracy": 0.9633333333333334,
            "cohen_kappa": 0.8979509004673386
          },
          {
            "provider": "",
            "model": "gemini-3-flash-preview",
            "run_stem": "OE_number____gemini3flashpreview__2026-02-25-01-10",
            "metrics_file": "OE_number____gemini3flashpreview__2026-02-25-01-10__metrics.json",
            "timestamp": "2026-02-25T00:10:24.527172Z",
            "accuracy": 0.9808333333333333,
            "cohen_kappa": 0.9449738825312014
          },
          {
            "provider": "",
            "model": "gemini-3.1-pro-preview",
            "run_stem": "OE_number____gemini31propreview__2026-02-21-02-26",
            "metrics_file": "OE_number____gemini31propreview__2026-02-21-02-26__metrics.json",
            "timestamp": "2026-02-21T01:26:31.217004Z",
            "accuracy": 0.9825,
            "cohen_kappa": 0.949889040017181
          },
          {
            "provider": "",
            "model": "glm-4.7",
            "run_stem": "OE_number____glm47__2026-02-22-16-17",
            "metrics_file": "OE_number____glm47__2026-02-22-16-17__metrics.json",
            "timestamp": "2026-02-22T15:17:02.084278Z",
            "accuracy": 0.9516666666666667,
            "cohen_kappa": 0.8702485039428794
          },
          {
            "provider": "",
            "model": "gpt-oss-120b",
            "run_stem": "OE_number____gptoss120b__2026-02-21-18-49",
            "metrics_file": "OE_number____gptoss120b__2026-02-21-18-49__metrics.json",
            "timestamp": "2026-02-21T17:49:34.964163Z",
            "accuracy": 0.8766666666666667,
            "cohen_kappa": 0.6989514152334654
          },
          {
            "provider": "",
            "model": "gpt51",
            "run_stem": "OE_number____gpt51__2026-02-24-17-09",
            "metrics_file": "OE_number____gpt51__2026-02-24-17-09__metrics.json",
            "timestamp": "2026-02-24T16:09:02.122628Z",
            "accuracy": 0.9308333333333333,
            "cohen_kappa": 0.8217653816833507
          },
          {
            "provider": "",
            "model": "gpt52pro",
            "run_stem": "OE_number____gpt52pro__2026-02-21-02-26",
            "metrics_file": "OE_number____gpt52pro__2026-02-21-02-26__metrics.json",
            "timestamp": "2026-02-21T01:26:20.586712Z",
            "accuracy": 0.9775,
            "cohen_kappa": 0.9357382563646894
          },
          {
            "provider": "",
            "model": "gpt5mini",
            "run_stem": "OE_number____gpt5mini__2026-02-24-01-18",
            "metrics_file": "OE_number____gpt5mini__2026-02-24-01-18__metrics.json",
            "timestamp": "2026-02-24T00:18:27.101111Z",
            "accuracy": 0.9175,
            "cohen_kappa": 0.7903386348715559
          },
          {
            "provider": "",
            "model": "kimi-k2.5",
            "run_stem": "OE_number____kimik25__2026-02-21-23-36",
            "metrics_file": "OE_number____kimik25__2026-02-21-23-36__metrics.json",
            "timestamp": "2026-02-21T22:37:01.106140Z",
            "accuracy": 0.9666666666666667,
            "cohen_kappa": 0.9075472279577182
          },
          {
            "provider": "e-infra",
            "model": "glm-5",
            "run_stem": "OE_number__einfra__glm5__2026-04-02-01-04",
            "metrics_file": "OE_number__einfra__glm5__2026-04-02-01-04__metrics.json",
            "timestamp": "2026-04-01T23:04:56.944335Z",
            "accuracy": 0.9641666666666666,
            "cohen_kappa": 0.8995307541034676
          },
          {
            "provider": "inception",
            "model": "mercury-2",
            "run_stem": "OE_number__inception__mercury2__2026-03-04-23-43",
            "metrics_file": "OE_number__inception__mercury2__2026-03-04-23-43__metrics.json",
            "timestamp": "2026-03-04T22:49:36.410032Z",
            "accuracy": 0.8308333333333333,
            "cohen_kappa": 0.6136240352496692
          },
          {
            "provider": "openai",
            "model": "gpt-5.4-mini",
            "run_stem": "OE_number__openai__gpt54mini__2026-03-20-18-21",
            "metrics_file": "OE_number__openai__gpt54mini__2026-03-20-18-21__metrics.json",
            "timestamp": "2026-03-20T17:21:04.759848Z",
            "accuracy": 0.9375,
            "cohen_kappa": 0.8335491030146107
          },
          {
            "provider": "openai",
            "model": "gpt-5.4-pro",
            "run_stem": "OE_number__openai__gpt54pro__2026-03-10-15-22",
            "metrics_file": "OE_number__openai__gpt54pro__2026-03-10-15-22__metrics.json",
            "timestamp": "2026-03-10T14:22:37.561603Z",
            "accuracy": 0.9791666666666666,
            "cohen_kappa": 0.9400331017278462
          },
          {
            "provider": "openai",
            "model": "qwen3.5",
            "run_stem": "OE_number__openai__qwen35__2026-02-23-02-19",
            "metrics_file": "OE_number__openai__qwen35__2026-02-23-02-19__metrics.json",
            "timestamp": "2026-02-23T01:19:56.706842Z",
            "accuracy": 0.9708333333333333,
            "cohen_kappa": 0.9179706725623618
          },
          {
            "provider": "openrouter",
            "model": "qwen3.6",
            "run_stem": "OE_number__openrouter__qwenqwen36plusfree__2026-04-04-00-50",
            "metrics_file": "OE_number__openrouter__qwenqwen36plusfree__2026-04-04-00-50__metrics.json",
            "timestamp": "2026-04-03T22:50:47.255105Z",
            "accuracy": 0.96,
            "cohen_kappa": 0.8909735156498432
          },
          {
            "provider": "requesty",
            "model": "claude-haiku-4-5",
            "run_stem": "OE_number__requesty__anthropicclaudehaiku45__2026-03-23-01-17",
            "metrics_file": "OE_number__requesty__anthropicclaudehaiku45__2026-03-23-01-17__metrics.json",
            "timestamp": "2026-03-23T00:17:54.157008Z",
            "accuracy": 0.9475,
            "cohen_kappa": 0.84400406084667
          },
          {
            "provider": "requesty",
            "model": "claude-sonnet-4-6",
            "run_stem": "OE_number__requesty__claudesonnet46__2026-02-21-20-17",
            "metrics_file": "OE_number__requesty__claudesonnet46__2026-02-21-20-17__metrics.json",
            "timestamp": "2026-02-21T19:17:16.089500Z",
            "accuracy": 0.97,
            "cohen_kappa": 0.9159480745883012
          },
          {
            "provider": "requesty",
            "model": "gpt-5.4-pro",
            "run_stem": "OE_number__openai__gpt54__2026-03-10-14-52",
            "metrics_file": "OE_number__openai__gpt54__2026-03-10-14-52__metrics.json",
            "timestamp": "2026-03-10T13:52:04.694015Z",
            "accuracy": 0.9491666666666667,
            "cohen_kappa": 0.8620278884612341
          },
          {
            "provider": "vertex",
            "model": "gemini-3-flash-preview",
            "run_stem": "OE_number__vertex__gemini3flashpreview__2026-03-11-00-23",
            "metrics_file": "OE_number__vertex__gemini3flashpreview__2026-03-11-00-23__metrics.json",
            "timestamp": "2026-03-10T23:24:01.576132Z",
            "accuracy": 0.9783333333333334,
            "cohen_kappa": 0.937877315662881
          }
        ],
        "pairwise": [
          {
            "a": 0,
            "b": 1,
            "distance": 0.02422723475355054,
            "overlap_count": 1197,
            "agreement_count": 1168,
            "disagreement_count": 29
          },
          {
            "a": 0,
            "b": 2,
            "distance": 0.025898078529657476,
            "overlap_count": 1197,
            "agreement_count": 1166,
            "disagreement_count": 31
          },
          {
            "a": 0,
            "b": 3,
            "distance": 0.05430242272347535,
            "overlap_count": 1197,
            "agreement_count": 1132,
            "disagreement_count": 65
          },
          {
            "a": 0,
            "b": 4,
            "distance": 0.06945681211041853,
            "overlap_count": 1123,
            "agreement_count": 1045,
            "disagreement_count": 78
          },
          {
            "a": 0,
            "b": 5,
            "distance": 0.07569386038687972,
            "overlap_count": 1189,
            "agreement_count": 1099,
            "disagreement_count": 90
          },
          {
            "a": 0,
            "b": 6,
            "distance": 0.029239766081871343,
            "overlap_count": 1197,
            "agreement_count": 1162,
            "disagreement_count": 35
          },
          {
            "a": 0,
            "b": 7,
            "distance": 0.05982905982905983,
            "overlap_count": 1170,
            "agreement_count": 1100,
            "disagreement_count": 70
          },
          {
            "a": 0,
            "b": 8,
            "distance": 0.040100250626566414,
            "overlap_count": 1197,
            "agreement_count": 1149,
            "disagreement_count": 48
          },
          {
            "a": 0,
            "b": 9,
            "distance": 0.03678929765886288,
            "overlap_count": 1196,
            "agreement_count": 1152,
            "disagreement_count": 44
          },
          {
            "a": 0,
            "b": 10,
            "distance": 0.13315696649029982,
            "overlap_count": 1134,
            "agreement_count": 983,
            "disagreement_count": 151
          },
          {
            "a": 0,
            "b": 11,
            "distance": 0.06683375104427736,
            "overlap_count": 1197,
            "agreement_count": 1117,
            "disagreement_count": 80
          },
          {
            "a": 0,
            "b": 12,
            "distance": 0.02756892230576441,
            "overlap_count": 1197,
            "agreement_count": 1164,
            "disagreement_count": 33
          },
          {
            "a": 0,
            "b": 13,
            "distance": 0.03341687552213868,
            "overlap_count": 1197,
            "agreement_count": 1157,
            "disagreement_count": 40
          },
          {
            "a": 0,
            "b": 14,
            "distance": 0.04344193817878028,
            "overlap_count": 1197,
            "agreement_count": 1145,
            "disagreement_count": 52
          },
          {
            "a": 0,
            "b": 15,
            "distance": 0.06098579782790309,
            "overlap_count": 1197,
            "agreement_count": 1124,
            "disagreement_count": 73
          },
          {
            "a": 0,
            "b": 16,
            "distance": 0.03842940685045948,
            "overlap_count": 1197,
            "agreement_count": 1151,
            "disagreement_count": 46
          },
          {
            "a": 0,
            "b": 17,
            "distance": 0.05434782608695652,
            "overlap_count": 1196,
            "agreement_count": 1131,
            "disagreement_count": 65
          },
          {
            "a": 0,
            "b": 18,
            "distance": 0.03007518796992481,
            "overlap_count": 1197,
            "agreement_count": 1161,
            "disagreement_count": 36
          },
          {
            "a": 1,
            "b": 2,
            "distance": 0.0016666666666666668,
            "overlap_count": 1200,
            "agreement_count": 1198,
            "disagreement_count": 2
          },
          {
            "a": 1,
            "b": 3,
            "distance": 0.05337781484570475,
            "overlap_count": 1199,
            "agreement_count": 1135,
            "disagreement_count": 64
          },
          {
            "a": 1,
            "b": 4,
            "distance": 0.06933333333333333,
            "overlap_count": 1125,
            "agreement_count": 1047,
            "disagreement_count": 78
          },
          {
            "a": 1,
            "b": 5,
            "distance": 0.06968933669185558,
            "overlap_count": 1191,
            "agreement_count": 1108,
            "disagreement_count": 83
          },
          {
            "a": 1,
            "b": 6,
            "distance": 0.013333333333333334,
            "overlap_count": 1200,
            "agreement_count": 1184,
            "disagreement_count": 16
          },
          {
            "a": 1,
            "b": 7,
            "distance": 0.06313993174061433,
            "overlap_count": 1172,
            "agreement_count": 1098,
            "disagreement_count": 74
          },
          {
            "a": 1,
            "b": 8,
            "distance": 0.030833333333333334,
            "overlap_count": 1200,
            "agreement_count": 1163,
            "disagreement_count": 37
          },
          {
            "a": 1,
            "b": 9,
            "distance": 0.03252710592160134,
            "overlap_count": 1199,
            "agreement_count": 1160,
            "disagreement_count": 39
          },
          {
            "a": 1,
            "b": 10,
            "distance": 0.12313104661389622,
            "overlap_count": 1137,
            "agreement_count": 997,
            "disagreement_count": 140
          },
          {
            "a": 1,
            "b": 11,
            "distance": 0.06666666666666667,
            "overlap_count": 1200,
            "agreement_count": 1120,
            "disagreement_count": 80
          },
          {
            "a": 1,
            "b": 12,
            "distance": 0.006666666666666667,
            "overlap_count": 1200,
            "agreement_count": 1192,
            "disagreement_count": 8
          },
          {
            "a": 1,
            "b": 13,
            "distance": 0.023333333333333334,
            "overlap_count": 1200,
            "agreement_count": 1172,
            "disagreement_count": 28
          },
          {
            "a": 1,
            "b": 14,
            "distance": 0.035833333333333335,
            "overlap_count": 1200,
            "agreement_count": 1157,
            "disagreement_count": 43
          },
          {
            "a": 1,
            "b": 15,
            "distance": 0.045,
            "overlap_count": 1200,
            "agreement_count": 1146,
            "disagreement_count": 54
          },
          {
            "a": 1,
            "b": 16,
            "distance": 0.0275,
            "overlap_count": 1200,
            "agreement_count": 1167,
            "disagreement_count": 33
          },
          {
            "a": 1,
            "b": 17,
            "distance": 0.05087572977481234,
            "overlap_count": 1199,
            "agreement_count": 1138,
            "disagreement_count": 61
          },
          {
            "a": 1,
            "b": 18,
            "distance": 0.005833333333333334,
            "overlap_count": 1200,
            "agreement_count": 1193,
            "disagreement_count": 7
          },
          {
            "a": 2,
            "b": 3,
            "distance": 0.05170975813177648,
            "overlap_count": 1199,
            "agreement_count": 1137,
            "disagreement_count": 62
          },
          {
            "a": 2,
            "b": 4,
            "distance": 0.06933333333333333,
            "overlap_count": 1125,
            "agreement_count": 1047,
            "disagreement_count": 78
          },
          {
            "a": 2,
            "b": 5,
            "distance": 0.06801007556675064,
            "overlap_count": 1191,
            "agreement_count": 1110,
            "disagreement_count": 81
          },
          {
            "a": 2,
            "b": 6,
            "distance": 0.011666666666666667,
            "overlap_count": 1200,
            "agreement_count": 1186,
            "disagreement_count": 14
          },
          {
            "a": 2,
            "b": 7,
            "distance": 0.06143344709897611,
            "overlap_count": 1172,
            "agreement_count": 1100,
            "disagreement_count": 72
          },
          {
            "a": 2,
            "b": 8,
            "distance": 0.029166666666666667,
            "overlap_count": 1200,
            "agreement_count": 1165,
            "disagreement_count": 35
          },
          {
            "a": 2,
            "b": 9,
            "distance": 0.030859049207673062,
            "overlap_count": 1199,
            "agreement_count": 1162,
            "disagreement_count": 37
          },
          {
            "a": 2,
            "b": 10,
            "distance": 0.12137203166226913,
            "overlap_count": 1137,
            "agreement_count": 999,
            "disagreement_count": 138
          },
          {
            "a": 2,
            "b": 11,
            "distance": 0.06666666666666667,
            "overlap_count": 1200,
            "agreement_count": 1120,
            "disagreement_count": 80
          },
          {
            "a": 2,
            "b": 12,
            "distance": 0.006666666666666667,
            "overlap_count": 1200,
            "agreement_count": 1192,
            "disagreement_count": 8
          },
          {
            "a": 2,
            "b": 13,
            "distance": 0.023333333333333334,
            "overlap_count": 1200,
            "agreement_count": 1172,
            "disagreement_count": 28
          },
          {
            "a": 2,
            "b": 14,
            "distance": 0.035833333333333335,
            "overlap_count": 1200,
            "agreement_count": 1157,
            "disagreement_count": 43
          },
          {
            "a": 2,
            "b": 15,
            "distance": 0.043333333333333335,
            "overlap_count": 1200,
            "agreement_count": 1148,
            "disagreement_count": 52
          },
          {
            "a": 2,
            "b": 16,
            "distance": 0.025833333333333333,
            "overlap_count": 1200,
            "agreement_count": 1169,
            "disagreement_count": 31
          },
          {
            "a": 2,
            "b": 17,
            "distance": 0.05254378648874062,
            "overlap_count": 1199,
            "agreement_count": 1136,
            "disagreement_count": 63
          },
          {
            "a": 2,
            "b": 18,
            "distance": 0.004166666666666667,
            "overlap_count": 1200,
            "agreement_count": 1195,
            "disagreement_count": 5
          },
          {
            "a": 3,
            "b": 4,
            "distance": 0.07466666666666667,
            "overlap_count": 1125,
            "agreement_count": 1041,
            "disagreement_count": 84
          },
          {
            "a": 3,
            "b": 5,
            "distance": 0.06801007556675064,
            "overlap_count": 1191,
            "agreement_count": 1110,
            "disagreement_count": 81
          },
          {
            "a": 3,
            "b": 6,
            "distance": 0.04837364470391994,
            "overlap_count": 1199,
            "agreement_count": 1141,
            "disagreement_count": 58
          },
          {
            "a": 3,
            "b": 7,
            "distance": 0.05721605465414176,
            "overlap_count": 1171,
            "agreement_count": 1104,
            "disagreement_count": 67
          },
          {
            "a": 3,
            "b": 8,
            "distance": 0.05087572977481234,
            "overlap_count": 1199,
            "agreement_count": 1138,
            "disagreement_count": 61
          },
          {
            "a": 3,
            "b": 9,
            "distance": 0.052587646076794656,
            "overlap_count": 1198,
            "agreement_count": 1135,
            "disagreement_count": 63
          },
          {
            "a": 3,
            "b": 10,
            "distance": 0.12764084507042253,
            "overlap_count": 1136,
            "agreement_count": 991,
            "disagreement_count": 145
          },
          {
            "a": 3,
            "b": 11,
            "distance": 0.07339449541284404,
            "overlap_count": 1199,
            "agreement_count": 1111,
            "disagreement_count": 88
          },
          {
            "a": 3,
            "b": 12,
            "distance": 0.05170975813177648,
            "overlap_count": 1199,
            "agreement_count": 1137,
            "disagreement_count": 62
          },
          {
            "a": 3,
            "b": 13,
            "distance": 0.05504587155963303,
            "overlap_count": 1199,
            "agreement_count": 1133,
            "disagreement_count": 66
          },
          {
            "a": 3,
            "b": 14,
            "distance": 0.05421184320266889,
            "overlap_count": 1199,
            "agreement_count": 1134,
            "disagreement_count": 65
          },
          {
            "a": 3,
            "b": 15,
            "distance": 0.08006672226855713,
            "overlap_count": 1199,
            "agreement_count": 1103,
            "disagreement_count": 96
          },
          {
            "a": 3,
            "b": 16,
            "distance": 0.0475396163469558,
            "overlap_count": 1199,
            "agreement_count": 1142,
            "disagreement_count": 57
          },
          {
            "a": 3,
            "b": 17,
            "distance": 0.0642737896494157,
            "overlap_count": 1198,
            "agreement_count": 1121,
            "disagreement_count": 77
          },
          {
            "a": 3,
            "b": 18,
            "distance": 0.05254378648874062,
            "overlap_count": 1199,
            "agreement_count": 1136,
            "disagreement_count": 63
          },
          {
            "a": 4,
            "b": 5,
            "distance": 0.07295373665480427,
            "overlap_count": 1124,
            "agreement_count": 1042,
            "disagreement_count": 82
          },
          {
            "a": 4,
            "b": 6,
            "distance": 0.07111111111111111,
            "overlap_count": 1125,
            "agreement_count": 1045,
            "disagreement_count": 80
          },
          {
            "a": 4,
            "b": 7,
            "distance": 0.0703971119133574,
            "overlap_count": 1108,
            "agreement_count": 1030,
            "disagreement_count": 78
          },
          {
            "a": 4,
            "b": 8,
            "distance": 0.07555555555555556,
            "overlap_count": 1125,
            "agreement_count": 1040,
            "disagreement_count": 85
          },
          {
            "a": 4,
            "b": 9,
            "distance": 0.0693950177935943,
            "overlap_count": 1124,
            "agreement_count": 1046,
            "disagreement_count": 78
          },
          {
            "a": 4,
            "b": 10,
            "distance": 0.12017937219730941,
            "overlap_count": 1115,
            "agreement_count": 981,
            "disagreement_count": 134
          },
          {
            "a": 4,
            "b": 11,
            "distance": 0.08444444444444445,
            "overlap_count": 1125,
            "agreement_count": 1030,
            "disagreement_count": 95
          },
          {
            "a": 4,
            "b": 12,
            "distance": 0.06933333333333333,
            "overlap_count": 1125,
            "agreement_count": 1047,
            "disagreement_count": 78
          },
          {
            "a": 4,
            "b": 13,
            "distance": 0.072,
            "overlap_count": 1125,
            "agreement_count": 1044,
            "disagreement_count": 81
          },
          {
            "a": 4,
            "b": 14,
            "distance": 0.07377777777777778,
            "overlap_count": 1125,
            "agreement_count": 1042,
            "disagreement_count": 83
          },
          {
            "a": 4,
            "b": 15,
            "distance": 0.09244444444444444,
            "overlap_count": 1125,
            "agreement_count": 1021,
            "disagreement_count": 104
          },
          {
            "a": 4,
            "b": 16,
            "distance": 0.06577777777777778,
            "overlap_count": 1125,
            "agreement_count": 1051,
            "disagreement_count": 74
          },
          {
            "a": 4,
            "b": 17,
            "distance": 0.08444444444444445,
            "overlap_count": 1125,
            "agreement_count": 1030,
            "disagreement_count": 95
          },
          {
            "a": 4,
            "b": 18,
            "distance": 0.06933333333333333,
            "overlap_count": 1125,
            "agreement_count": 1047,
            "disagreement_count": 78
          },
          {
            "a": 5,
            "b": 6,
            "distance": 0.07304785894206549,
            "overlap_count": 1191,
            "agreement_count": 1104,
            "disagreement_count": 87
          },
          {
            "a": 5,
            "b": 7,
            "distance": 0.061855670103092786,
            "overlap_count": 1164,
            "agreement_count": 1092,
            "disagreement_count": 72
          },
          {
            "a": 5,
            "b": 8,
            "distance": 0.07136859781696053,
            "overlap_count": 1191,
            "agreement_count": 1106,
            "disagreement_count": 85
          },
          {
            "a": 5,
            "b": 9,
            "distance": 0.07394957983193277,
            "overlap_count": 1190,
            "agreement_count": 1102,
            "disagreement_count": 88
          },
          {
            "a": 5,
            "b": 10,
            "distance": 0.13204225352112675,
            "overlap_count": 1136,
            "agreement_count": 986,
            "disagreement_count": 150
          },
          {
            "a": 5,
            "b": 11,
            "distance": 0.07472712006717044,
            "overlap_count": 1191,
            "agreement_count": 1102,
            "disagreement_count": 89
          },
          {
            "a": 5,
            "b": 12,
            "distance": 0.07304785894206549,
            "overlap_count": 1191,
            "agreement_count": 1104,
            "disagreement_count": 87
          },
          {
            "a": 5,
            "b": 13,
            "distance": 0.07556675062972293,
            "overlap_count": 1191,
            "agreement_count": 1101,
            "disagreement_count": 90
          },
          {
            "a": 5,
            "b": 14,
            "distance": 0.06633081444164568,
            "overlap_count": 1191,
            "agreement_count": 1112,
            "disagreement_count": 79
          },
          {
            "a": 5,
            "b": 15,
            "distance": 0.08648194794290512,
            "overlap_count": 1191,
            "agreement_count": 1088,
            "disagreement_count": 103
          },
          {
            "a": 5,
            "b": 16,
            "distance": 0.06381192275398824,
            "overlap_count": 1191,
            "agreement_count": 1115,
            "disagreement_count": 76
          },
          {
            "a": 5,
            "b": 17,
            "distance": 0.06633081444164568,
            "overlap_count": 1191,
            "agreement_count": 1112,
            "disagreement_count": 79
          },
          {
            "a": 5,
            "b": 18,
            "distance": 0.0688497061293031,
            "overlap_count": 1191,
            "agreement_count": 1109,
            "disagreement_count": 82
          },
          {
            "a": 6,
            "b": 7,
            "distance": 0.059726962457337884,
            "overlap_count": 1172,
            "agreement_count": 1102,
            "disagreement_count": 70
          },
          {
            "a": 6,
            "b": 8,
            "distance": 0.024166666666666666,
            "overlap_count": 1200,
            "agreement_count": 1171,
            "disagreement_count": 29
          },
          {
            "a": 6,
            "b": 9,
            "distance": 0.030859049207673062,
            "overlap_count": 1199,
            "agreement_count": 1162,
            "disagreement_count": 37
          },
          {
            "a": 6,
            "b": 10,
            "distance": 0.12576956904133685,
            "overlap_count": 1137,
            "agreement_count": 994,
            "disagreement_count": 143
          },
          {
            "a": 6,
            "b": 11,
            "distance": 0.06833333333333333,
            "overlap_count": 1200,
            "agreement_count": 1118,
            "disagreement_count": 82
          },
          {
            "a": 6,
            "b": 12,
            "distance": 0.008333333333333333,
            "overlap_count": 1200,
            "agreement_count": 1190,
            "disagreement_count": 10
          },
          {
            "a": 6,
            "b": 13,
            "distance": 0.021666666666666667,
            "overlap_count": 1200,
            "agreement_count": 1174,
            "disagreement_count": 26
          },
          {
            "a": 6,
            "b": 14,
            "distance": 0.04083333333333333,
            "overlap_count": 1200,
            "agreement_count": 1151,
            "disagreement_count": 49
          },
          {
            "a": 6,
            "b": 15,
            "distance": 0.045,
            "overlap_count": 1200,
            "agreement_count": 1146,
            "disagreement_count": 54
          },
          {
            "a": 6,
            "b": 16,
            "distance": 0.0225,
            "overlap_count": 1200,
            "agreement_count": 1173,
            "disagreement_count": 27
          },
          {
            "a": 6,
            "b": 17,
            "distance": 0.05587989991659716,
            "overlap_count": 1199,
            "agreement_count": 1132,
            "disagreement_count": 67
          },
          {
            "a": 6,
            "b": 18,
            "distance": 0.010833333333333334,
            "overlap_count": 1200,
            "agreement_count": 1187,
            "disagreement_count": 13
          },
          {
            "a": 7,
            "b": 8,
            "distance": 0.06313993174061433,
            "overlap_count": 1172,
            "agreement_count": 1098,
            "disagreement_count": 74
          },
          {
            "a": 7,
            "b": 9,
            "distance": 0.0623398804440649,
            "overlap_count": 1171,
            "agreement_count": 1098,
            "disagreement_count": 73
          },
          {
            "a": 7,
            "b": 10,
            "distance": 0.1275831087151842,
            "overlap_count": 1113,
            "agreement_count": 971,
            "disagreement_count": 142
          },
          {
            "a": 7,
            "b": 11,
            "distance": 0.07849829351535836,
            "overlap_count": 1172,
            "agreement_count": 1080,
            "disagreement_count": 92
          },
          {
            "a": 7,
            "b": 12,
            "distance": 0.06143344709897611,
            "overlap_count": 1172,
            "agreement_count": 1100,
            "disagreement_count": 72
          },
          {
            "a": 7,
            "b": 13,
            "distance": 0.060580204778156996,
            "overlap_count": 1172,
            "agreement_count": 1101,
            "disagreement_count": 71
          },
          {
            "a": 7,
            "b": 14,
            "distance": 0.05631399317406143,
            "overlap_count": 1172,
            "agreement_count": 1106,
            "disagreement_count": 66
          },
          {
            "a": 7,
            "b": 15,
            "distance": 0.08447098976109214,
            "overlap_count": 1172,
            "agreement_count": 1073,
            "disagreement_count": 99
          },
          {
            "a": 7,
            "b": 16,
            "distance": 0.06313993174061433,
            "overlap_count": 1172,
            "agreement_count": 1098,
            "disagreement_count": 74
          },
          {
            "a": 7,
            "b": 17,
            "distance": 0.06831767719897523,
            "overlap_count": 1171,
            "agreement_count": 1091,
            "disagreement_count": 80
          },
          {
            "a": 7,
            "b": 18,
            "distance": 0.05887372013651877,
            "overlap_count": 1172,
            "agreement_count": 1103,
            "disagreement_count": 69
          },
          {
            "a": 8,
            "b": 9,
            "distance": 0.041701417848206836,
            "overlap_count": 1199,
            "agreement_count": 1149,
            "disagreement_count": 50
          },
          {
            "a": 8,
            "b": 10,
            "distance": 0.13016710642040458,
            "overlap_count": 1137,
            "agreement_count": 989,
            "disagreement_count": 148
          },
          {
            "a": 8,
            "b": 11,
            "distance": 0.0725,
            "overlap_count": 1200,
            "agreement_count": 1113,
            "disagreement_count": 87
          },
          {
            "a": 8,
            "b": 12,
            "distance": 0.029166666666666667,
            "overlap_count": 1200,
            "agreement_count": 1165,
            "disagreement_count": 35
          },
          {
            "a": 8,
            "b": 13,
            "distance": 0.034166666666666665,
            "overlap_count": 1200,
            "agreement_count": 1159,
            "disagreement_count": 41
          },
          {
            "a": 8,
            "b": 14,
            "distance": 0.043333333333333335,
            "overlap_count": 1200,
            "agreement_count": 1148,
            "disagreement_count": 52
          },
          {
            "a": 8,
            "b": 15,
            "distance": 0.059166666666666666,
            "overlap_count": 1200,
            "agreement_count": 1129,
            "disagreement_count": 71
          },
          {
            "a": 8,
            "b": 16,
            "distance": 0.03333333333333333,
            "overlap_count": 1200,
            "agreement_count": 1160,
            "disagreement_count": 40
          },
          {
            "a": 8,
            "b": 17,
            "distance": 0.0567139282735613,
            "overlap_count": 1199,
            "agreement_count": 1131,
            "disagreement_count": 68
          },
          {
            "a": 8,
            "b": 18,
            "distance": 0.03,
            "overlap_count": 1200,
            "agreement_count": 1164,
            "disagreement_count": 36
          },
          {
            "a": 9,
            "b": 10,
            "distance": 0.12411971830985916,
            "overlap_count": 1136,
            "agreement_count": 995,
            "disagreement_count": 141
          },
          {
            "a": 9,
            "b": 11,
            "distance": 0.07172643869891576,
            "overlap_count": 1199,
            "agreement_count": 1113,
            "disagreement_count": 86
          },
          {
            "a": 9,
            "b": 12,
            "distance": 0.029190992493744787,
            "overlap_count": 1199,
            "agreement_count": 1164,
            "disagreement_count": 35
          },
          {
            "a": 9,
            "b": 13,
            "distance": 0.03753127606338615,
            "overlap_count": 1199,
            "agreement_count": 1154,
            "disagreement_count": 45
          },
          {
            "a": 9,
            "b": 14,
            "distance": 0.04837364470391994,
            "overlap_count": 1199,
            "agreement_count": 1141,
            "disagreement_count": 58
          },
          {
            "a": 9,
            "b": 15,
            "distance": 0.05921601334445371,
            "overlap_count": 1199,
            "agreement_count": 1128,
            "disagreement_count": 71
          },
          {
            "a": 9,
            "b": 16,
            "distance": 0.030025020850708923,
            "overlap_count": 1199,
            "agreement_count": 1163,
            "disagreement_count": 36
          },
          {
            "a": 9,
            "b": 17,
            "distance": 0.06343906510851419,
            "overlap_count": 1198,
            "agreement_count": 1122,
            "disagreement_count": 76
          },
          {
            "a": 9,
            "b": 18,
            "distance": 0.030025020850708923,
            "overlap_count": 1199,
            "agreement_count": 1163,
            "disagreement_count": 36
          },
          {
            "a": 10,
            "b": 11,
            "distance": 0.1363236587510994,
            "overlap_count": 1137,
            "agreement_count": 982,
            "disagreement_count": 155
          },
          {
            "a": 10,
            "b": 12,
            "distance": 0.12313104661389622,
            "overlap_count": 1137,
            "agreement_count": 997,
            "disagreement_count": 140
          },
          {
            "a": 10,
            "b": 13,
            "distance": 0.11961301671064203,
            "overlap_count": 1137,
            "agreement_count": 1001,
            "disagreement_count": 136
          },
          {
            "a": 10,
            "b": 14,
            "distance": 0.12752858399296393,
            "overlap_count": 1137,
            "agreement_count": 992,
            "disagreement_count": 145
          },
          {
            "a": 10,
            "b": 15,
            "distance": 0.1442392260334213,
            "overlap_count": 1137,
            "agreement_count": 973,
            "disagreement_count": 164
          },
          {
            "a": 10,
            "b": 16,
            "distance": 0.1248900615655233,
            "overlap_count": 1137,
            "agreement_count": 995,
            "disagreement_count": 142
          },
          {
            "a": 10,
            "b": 17,
            "distance": 0.13192612137203166,
            "overlap_count": 1137,
            "agreement_count": 987,
            "disagreement_count": 150
          },
          {
            "a": 10,
            "b": 18,
            "distance": 0.12137203166226913,
            "overlap_count": 1137,
            "agreement_count": 999,
            "disagreement_count": 138
          },
          {
            "a": 11,
            "b": 12,
            "distance": 0.06833333333333333,
            "overlap_count": 1200,
            "agreement_count": 1118,
            "disagreement_count": 82
          },
          {
            "a": 11,
            "b": 13,
            "distance": 0.06666666666666667,
            "overlap_count": 1200,
            "agreement_count": 1120,
            "disagreement_count": 80
          },
          {
            "a": 11,
            "b": 14,
            "distance": 0.07416666666666667,
            "overlap_count": 1200,
            "agreement_count": 1111,
            "disagreement_count": 89
          },
          {
            "a": 11,
            "b": 15,
            "distance": 0.08333333333333333,
            "overlap_count": 1200,
            "agreement_count": 1100,
            "disagreement_count": 100
          },
          {
            "a": 11,
            "b": 16,
            "distance": 0.0725,
            "overlap_count": 1200,
            "agreement_count": 1113,
            "disagreement_count": 87
          },
          {
            "a": 11,
            "b": 17,
            "distance": 0.07589658048373644,
            "overlap_count": 1199,
            "agreement_count": 1108,
            "disagreement_count": 91
          },
          {
            "a": 11,
            "b": 18,
            "distance": 0.0675,
            "overlap_count": 1200,
            "agreement_count": 1119,
            "disagreement_count": 81
          },
          {
            "a": 12,
            "b": 13,
            "distance": 0.021666666666666667,
            "overlap_count": 1200,
            "agreement_count": 1174,
            "disagreement_count": 26
          },
          {
            "a": 12,
            "b": 14,
            "distance": 0.03916666666666667,
            "overlap_count": 1200,
            "agreement_count": 1153,
            "disagreement_count": 47
          },
          {
            "a": 12,
            "b": 15,
            "distance": 0.043333333333333335,
            "overlap_count": 1200,
            "agreement_count": 1148,
            "disagreement_count": 52
          },
          {
            "a": 12,
            "b": 16,
            "distance": 0.025833333333333333,
            "overlap_count": 1200,
            "agreement_count": 1169,
            "disagreement_count": 31
          },
          {
            "a": 12,
            "b": 17,
            "distance": 0.05421184320266889,
            "overlap_count": 1199,
            "agreement_count": 1134,
            "disagreement_count": 65
          },
          {
            "a": 12,
            "b": 18,
            "distance": 0.0075,
            "overlap_count": 1200,
            "agreement_count": 1191,
            "disagreement_count": 9
          },
          {
            "a": 13,
            "b": 14,
            "distance": 0.035833333333333335,
            "overlap_count": 1200,
            "agreement_count": 1157,
            "disagreement_count": 43
          },
          {
            "a": 13,
            "b": 15,
            "distance": 0.06,
            "overlap_count": 1200,
            "agreement_count": 1128,
            "disagreement_count": 72
          },
          {
            "a": 13,
            "b": 16,
            "distance": 0.0325,
            "overlap_count": 1200,
            "agreement_count": 1161,
            "disagreement_count": 39
          },
          {
            "a": 13,
            "b": 17,
            "distance": 0.05587989991659716,
            "overlap_count": 1199,
            "agreement_count": 1132,
            "disagreement_count": 67
          },
          {
            "a": 13,
            "b": 18,
            "distance": 0.0225,
            "overlap_count": 1200,
            "agreement_count": 1173,
            "disagreement_count": 27
          },
          {
            "a": 14,
            "b": 15,
            "distance": 0.07416666666666667,
            "overlap_count": 1200,
            "agreement_count": 1111,
            "disagreement_count": 89
          },
          {
            "a": 14,
            "b": 16,
            "distance": 0.04,
            "overlap_count": 1200,
            "agreement_count": 1152,
            "disagreement_count": 48
          },
          {
            "a": 14,
            "b": 17,
            "distance": 0.060050041701417846,
            "overlap_count": 1199,
            "agreement_count": 1127,
            "disagreement_count": 72
          },
          {
            "a": 14,
            "b": 18,
            "distance": 0.03666666666666667,
            "overlap_count": 1200,
            "agreement_count": 1156,
            "disagreement_count": 44
          },
          {
            "a": 15,
            "b": 16,
            "distance": 0.05416666666666667,
            "overlap_count": 1200,
            "agreement_count": 1135,
            "disagreement_count": 65
          },
          {
            "a": 15,
            "b": 17,
            "distance": 0.0725604670558799,
            "overlap_count": 1199,
            "agreement_count": 1112,
            "disagreement_count": 87
          },
          {
            "a": 15,
            "b": 18,
            "distance": 0.0425,
            "overlap_count": 1200,
            "agreement_count": 1149,
            "disagreement_count": 51
          },
          {
            "a": 16,
            "b": 17,
            "distance": 0.05504587155963303,
            "overlap_count": 1199,
            "agreement_count": 1133,
            "disagreement_count": 66
          },
          {
            "a": 16,
            "b": 18,
            "distance": 0.02666666666666667,
            "overlap_count": 1200,
            "agreement_count": 1168,
            "disagreement_count": 32
          },
          {
            "a": 17,
            "b": 18,
            "distance": 0.05504587155963303,
            "overlap_count": 1199,
            "agreement_count": 1133,
            "disagreement_count": 66
          }
        ],
        "linkage": [
          [
            1,
            2,
            0.0016666666666666668,
            2
          ],
          [
            18,
            19,
            0.005,
            3
          ],
          [
            12,
            20,
            0.006944444444444445,
            4
          ],
          [
            6,
            21,
            0.011041666666666667,
            5
          ],
          [
            13,
            22,
            0.0225,
            6
          ],
          [
            16,
            23,
            0.026805555555555555,
            7
          ],
          [
            0,
            24,
            0.029836496001909536,
            8
          ],
          [
            8,
            25,
            0.031366697994987466,
            9
          ],
          [
            9,
            26,
            0.033278692233618436,
            10
          ],
          [
            14,
            27,
            0.039931558288270025,
            11
          ],
          [
            3,
            28,
            0.052025262907841666,
            12
          ],
          [
            15,
            29,
            0.05557793334229838,
            13
          ],
          [
            17,
            30,
            0.057759078484186745,
            14
          ],
          [
            5,
            7,
            0.061855670103092786,
            2
          ],
          [
            31,
            32,
            0.06714801542634556,
            16
          ],
          [
            11,
            33,
            0.07173395911806056,
            17
          ],
          [
            4,
            34,
            0.07375048435457235,
            18
          ],
          [
            10,
            35,
            0.12745476470799752,
            19
          ]
        ],
        "linkage_complete": true
      },
      {
        "group_id": "46e4f495cf8a7116",
        "cluster_scope": "cross_model",
        "representative_policy": "latest",
        "task_fingerprint": "2c0d314be63ddff932b8c577820159b00f71219bdb5eba31e8cbc0736a1243f5",
        "normalized_tag_key": "english;pragmatics;semantics;sentiment analysis",
        "task_name_display": "sentiment analysis",
        "task_names_seen": [
          "sentiment analysis"
        ],
        "tags_display": "sentiment analysis; English; semantics; pragmatics",
        "model_count": 2,
        "distance_metric": "nominal_disagreement_rate",
        "linkage_method": "average",
        "representative_run_stems": [
          "sentiment__vertex__gemini3flashpreview__2026-04-30-00-04",
          "sentiment__vertex__gemini31flashlitepreview__2026-04-29-23-18"
        ],
        "comparable_pair_count": 1,
        "representatives": [
          {
            "provider": "vertex",
            "model": "gemini-3-flash-preview",
            "run_stem": "sentiment__vertex__gemini3flashpreview__2026-04-30-00-04",
            "metrics_file": "sentiment__vertex__gemini3flashpreview__2026-04-30-00-04__metrics.json",
            "timestamp": "2026-04-29T22:04:02.485410Z",
            "accuracy": 0.9425,
            "cohen_kappa": 0.885
          },
          {
            "provider": "vertex",
            "model": "gemini-3.1-flash-lite-preview",
            "run_stem": "sentiment__vertex__gemini31flashlitepreview__2026-04-29-23-18",
            "metrics_file": "sentiment__vertex__gemini31flashlitepreview__2026-04-29-23-18__metrics.json",
            "timestamp": "2026-04-29T21:18:08.340230Z",
            "accuracy": 0.9475,
            "cohen_kappa": 0.8952618453865336
          }
        ],
        "pairwise": [
          {
            "a": 0,
            "b": 1,
            "distance": 0.017543859649122806,
            "overlap_count": 399,
            "agreement_count": 392,
            "disagreement_count": 7
          }
        ],
        "linkage": [
          [
            0,
            1,
            0.017543859649122806,
            2
          ]
        ],
        "linkage_complete": true
      }
    ],
    "best_accuracy": [
      {
        "group_id": "ea2931fcb792fdbc",
        "cluster_scope": "cross_model",
        "representative_policy": "best_accuracy",
        "task_fingerprint": "7326c5be7cb61f6e9585d417047cb29a4d0b0ca63102404c9692e99eeae036b2",
        "normalized_tag_key": "*like*;discourse;english;pragmatics;v1",
        "task_name_display": "*like* discourse/pragm",
        "task_names_seen": [
          "*like* discourse/pragm"
        ],
        "tags_display": "*like*; discourse; pragmatics; English; v1",
        "model_count": 6,
        "distance_metric": "nominal_disagreement_rate",
        "linkage_method": "average",
        "representative_run_stems": [
          "like____gptoss120b__2025-11-09-17-09",
          "like____gpt5__2025-11-09-02-03",
          "like__requesty__anthropicclaudesonnet46__2026-03-12-14-40",
          "like__requesty__novitadeepseekdeepseekv32__2026-03-12-16-52",
          "like__vertex__gemini3flashpreview__2026-03-12-01-31",
          "like__vertex__gemini31propreview__2026-03-12-18-10"
        ],
        "comparable_pair_count": 15,
        "representatives": [
          {
            "provider": "openai",
            "model": "gpt-oss-120b",
            "run_stem": "like____gptoss120b__2025-11-09-17-09",
            "metrics_file": "like____gptoss120b__2025-11-09-17-09__metrics.json",
            "timestamp": "2025-11-09T16:09:03.246413Z",
            "accuracy": 0.6416416416416416,
            "cohen_kappa": 0.5149610698897534
          },
          {
            "provider": "openai",
            "model": "gpt5",
            "run_stem": "like____gpt5__2025-11-09-02-03",
            "metrics_file": "like____gpt5__2025-11-09-02-03__metrics.json",
            "timestamp": "2025-11-09T01:03:38.395018Z",
            "accuracy": 0.8398398398398398,
            "cohen_kappa": 0.7709513457898124
          },
          {
            "provider": "requesty",
            "model": "claude-sonnet-4-6",
            "run_stem": "like__requesty__anthropicclaudesonnet46__2026-03-12-14-40",
            "metrics_file": "like__requesty__anthropicclaudesonnet46__2026-03-12-14-40__metrics.json",
            "timestamp": "2026-03-12T13:40:58.748047Z",
            "accuracy": 0.8298298298298298,
            "cohen_kappa": 0.7574231404297621
          },
          {
            "provider": "requesty",
            "model": "deepseek-v3.2",
            "run_stem": "like__requesty__novitadeepseekdeepseekv32__2026-03-12-16-52",
            "metrics_file": "like__requesty__novitadeepseekdeepseekv32__2026-03-12-16-52__metrics.json",
            "timestamp": "2026-03-12T15:52:04.876466Z",
            "accuracy": 0.6446446446446447,
            "cohen_kappa": 0.47974932556430355
          },
          {
            "provider": "vertex",
            "model": "gemini-3-flash-preview",
            "run_stem": "like__vertex__gemini3flashpreview__2026-03-12-01-31",
            "metrics_file": "like__vertex__gemini3flashpreview__2026-03-12-01-31__metrics.json",
            "timestamp": "2026-03-12T00:31:13.838640Z",
            "accuracy": 0.8838838838838838,
            "cohen_kappa": 0.832653167320839
          },
          {
            "provider": "vertex",
            "model": "gemini-3.1-pro-preview",
            "run_stem": "like__vertex__gemini31propreview__2026-03-12-18-10",
            "metrics_file": "like__vertex__gemini31propreview__2026-03-12-18-10__metrics.json",
            "timestamp": "2026-03-12T17:10:58.353313Z",
            "accuracy": 0.8948948948948949,
            "cohen_kappa": 0.8484215728492612
          }
        ],
        "pairwise": [
          {
            "a": 0,
            "b": 1,
            "distance": 0.22894424673784106,
            "overlap_count": 843,
            "agreement_count": 650,
            "disagreement_count": 193
          },
          {
            "a": 0,
            "b": 2,
            "distance": 0.24823529411764705,
            "overlap_count": 850,
            "agreement_count": 639,
            "disagreement_count": 211
          },
          {
            "a": 0,
            "b": 3,
            "distance": 0.36823529411764705,
            "overlap_count": 850,
            "agreement_count": 537,
            "disagreement_count": 313
          },
          {
            "a": 0,
            "b": 4,
            "distance": 0.21672555948174324,
            "overlap_count": 849,
            "agreement_count": 665,
            "disagreement_count": 184
          },
          {
            "a": 0,
            "b": 5,
            "distance": 0.2429245283018868,
            "overlap_count": 848,
            "agreement_count": 642,
            "disagreement_count": 206
          },
          {
            "a": 1,
            "b": 2,
            "distance": 0.19461697722567287,
            "overlap_count": 966,
            "agreement_count": 778,
            "disagreement_count": 188
          },
          {
            "a": 1,
            "b": 3,
            "distance": 0.33436853002070394,
            "overlap_count": 966,
            "agreement_count": 643,
            "disagreement_count": 323
          },
          {
            "a": 1,
            "b": 4,
            "distance": 0.08799171842650104,
            "overlap_count": 966,
            "agreement_count": 881,
            "disagreement_count": 85
          },
          {
            "a": 1,
            "b": 5,
            "distance": 0.09326424870466321,
            "overlap_count": 965,
            "agreement_count": 875,
            "disagreement_count": 90
          },
          {
            "a": 2,
            "b": 3,
            "distance": 0.35923309788092833,
            "overlap_count": 991,
            "agreement_count": 635,
            "disagreement_count": 356
          },
          {
            "a": 2,
            "b": 4,
            "distance": 0.1505050505050505,
            "overlap_count": 990,
            "agreement_count": 841,
            "disagreement_count": 149
          },
          {
            "a": 2,
            "b": 5,
            "distance": 0.16396761133603238,
            "overlap_count": 988,
            "agreement_count": 826,
            "disagreement_count": 162
          },
          {
            "a": 3,
            "b": 4,
            "distance": 0.32727272727272727,
            "overlap_count": 990,
            "agreement_count": 666,
            "disagreement_count": 324
          },
          {
            "a": 3,
            "b": 5,
            "distance": 0.3360323886639676,
            "overlap_count": 988,
            "agreement_count": 656,
            "disagreement_count": 332
          },
          {
            "a": 4,
            "b": 5,
            "distance": 0.053643724696356275,
            "overlap_count": 988,
            "agreement_count": 935,
            "disagreement_count": 53
          }
        ],
        "linkage": [
          [
            4,
            5,
            0.053643724696356275,
            2
          ],
          [
            1,
            6,
            0.09062798356558213,
            3
          ],
          [
            2,
            7,
            0.16969654635558526,
            4
          ],
          [
            0,
            8,
            0.23420740715977953,
            5
          ],
          [
            3,
            9,
            0.34502840759119485,
            6
          ]
        ],
        "linkage_complete": true
      },
      {
        "group_id": "1247c033faa2b147",
        "cluster_scope": "cross_model",
        "representative_policy": "best_accuracy",
        "task_fingerprint": "7326c5be7cb61f6e9585d417047cb29a4d0b0ca63102404c9692e99eeae036b2",
        "normalized_tag_key": "*like*;discourse;english;pragmatics;v2",
        "task_name_display": "*like* discourse/pragm",
        "task_names_seen": [
          "*like* discourse/pragm"
        ],
        "tags_display": "*like*; discourse; pragmatics; English; v2",
        "model_count": 4,
        "distance_metric": "nominal_disagreement_rate",
        "linkage_method": "average",
        "representative_run_stems": [
          "like__vertex__gemini3flashpreview__2026-03-13-02-32",
          "like__vertex__gemini3propreview__2026-03-13-03-28",
          "like__vertex__gemini31flashlitepreview__2026-03-13-03-29",
          "like__vertex__gemini31propreview__2026-03-13-12-02"
        ],
        "comparable_pair_count": 6,
        "representatives": [
          {
            "provider": "vertex",
            "model": "gemini-3-flash-preview",
            "run_stem": "like__vertex__gemini3flashpreview__2026-03-13-02-32",
            "metrics_file": "like__vertex__gemini3flashpreview__2026-03-13-02-32__metrics.json",
            "timestamp": "2026-03-13T01:32:26.042657Z",
            "accuracy": 0.908908908908909,
            "cohen_kappa": 0.8684070726005488
          },
          {
            "provider": "vertex",
            "model": "gemini-3-pro-preview",
            "run_stem": "like__vertex__gemini3propreview__2026-03-13-03-28",
            "metrics_file": "like__vertex__gemini3propreview__2026-03-13-03-28__metrics.json",
            "timestamp": "2026-03-13T02:29:02.578184Z",
            "accuracy": 0.9019019019019019,
            "cohen_kappa": 0.8586077115714009
          },
          {
            "provider": "vertex",
            "model": "gemini-3.1-flash-lite-preview",
            "run_stem": "like__vertex__gemini31flashlitepreview__2026-03-13-03-29",
            "metrics_file": "like__vertex__gemini31flashlitepreview__2026-03-13-03-29__metrics.json",
            "timestamp": "2026-03-13T02:29:25.545069Z",
            "accuracy": 0.7877877877877878,
            "cohen_kappa": 0.6900339840354066
          },
          {
            "provider": "vertex",
            "model": "gemini-3.1-pro-preview",
            "run_stem": "like__vertex__gemini31propreview__2026-03-13-12-02",
            "metrics_file": "like__vertex__gemini31propreview__2026-03-13-12-02__metrics.json",
            "timestamp": "2026-03-13T11:02:34.724184Z",
            "accuracy": 0.914914914914915,
            "cohen_kappa": 0.876989156966848
          }
        ],
        "pairwise": [
          {
            "a": 0,
            "b": 1,
            "distance": 0.06666666666666667,
            "overlap_count": 990,
            "agreement_count": 924,
            "disagreement_count": 66
          },
          {
            "a": 0,
            "b": 2,
            "distance": 0.1717479674796748,
            "overlap_count": 984,
            "agreement_count": 815,
            "disagreement_count": 169
          },
          {
            "a": 0,
            "b": 3,
            "distance": 0.06060606060606061,
            "overlap_count": 990,
            "agreement_count": 930,
            "disagreement_count": 60
          },
          {
            "a": 1,
            "b": 2,
            "distance": 0.17682926829268292,
            "overlap_count": 984,
            "agreement_count": 810,
            "disagreement_count": 174
          },
          {
            "a": 1,
            "b": 3,
            "distance": 0.046464646464646465,
            "overlap_count": 990,
            "agreement_count": 944,
            "disagreement_count": 46
          },
          {
            "a": 2,
            "b": 3,
            "distance": 0.17073170731707318,
            "overlap_count": 984,
            "agreement_count": 816,
            "disagreement_count": 168
          }
        ],
        "linkage": [
          [
            1,
            3,
            0.046464646464646465,
            2
          ],
          [
            0,
            4,
            0.06363636363636363,
            3
          ],
          [
            2,
            5,
            0.17310298102981028,
            4
          ]
        ],
        "linkage_complete": true
      },
      {
        "group_id": "e5971c73350ad936",
        "cluster_scope": "cross_model",
        "representative_policy": "best_accuracy",
        "task_fingerprint": "cd60855ae86dc7a8391248627a68bbc5a77fabf2d75d0b967118c09aec3d8faa",
        "normalized_tag_key": "*like*;discourse;english;pragmatics;v3",
        "task_name_display": "*like* discourse/pragm",
        "task_names_seen": [
          "*like* discourse/pragm"
        ],
        "tags_display": "*like*; discourse; pragmatics; English; v3",
        "model_count": 17,
        "distance_metric": "nominal_disagreement_rate",
        "linkage_method": "average",
        "representative_run_stems": [
          "like_interrater__einfra__deepseekv32thinking__2026-03-16-23-13",
          "like_interrater__einfra__glm47__2026-03-17-00-08",
          "like_interrater__einfra__glm5__2026-03-31-17-53",
          "like_interrater__einfra__glm51__2026-04-24-16-04",
          "like_interrater__einfra__kimik25__2026-03-19-14-20",
          "like_interrater__einfra__kimik26__2026-04-21-18-06",
          "like_interrater__einfra__qwen35__2026-03-16-23-35",
          "like_interrater__google__modelsgemma426ba4bit__2026-04-04-01-49",
          "like_interrater__openai__gpt54__2026-03-16-23-16",
          "like_interrater__openai__gpt54mini__2026-03-20-18-10",
          "like_interrater__openrouter__qwenqwen36plusfree__2026-04-03-16-35",
          "like_interrater__requesty__anthropicclaudehaiku45__2026-03-23-00-11",
          "like_interrater__requesty__anthropicclaudeopus46__2026-03-21-02-24",
          "like_interrater__requesty__nebiuszaiorgglm47__2026-03-17-15-44",
          "like_interrater__requesty__moonshotkimik25__2026-03-17-15-43",
          "like_interrater__vertex__gemini3flashpreview__2026-03-16-20-22",
          "like_interrater__vertex__gemini31propreview__2026-03-16-23-17"
        ],
        "comparable_pair_count": 136,
        "representatives": [
          {
            "provider": "e-infra",
            "model": "deepseek-v3.2-thinking",
            "run_stem": "like_interrater__einfra__deepseekv32thinking__2026-03-16-23-13",
            "metrics_file": "like_interrater__einfra__deepseekv32thinking__2026-03-16-23-13__metrics.json",
            "timestamp": "2026-03-16T22:13:59.292606Z",
            "accuracy": 0.7565217391304347,
            "cohen_kappa": 0.6858536585365853
          },
          {
            "provider": "e-infra",
            "model": "glm-4.7",
            "run_stem": "like_interrater__einfra__glm47__2026-03-17-00-08",
            "metrics_file": "like_interrater__einfra__glm47__2026-03-17-00-08__metrics.json",
            "timestamp": "2026-03-16T23:08:13.298000Z",
            "accuracy": 0.8260869565217391,
            "cohen_kappa": 0.7717121588089332
          },
          {
            "provider": "e-infra",
            "model": "glm-5",
            "run_stem": "like_interrater__einfra__glm5__2026-03-31-17-53",
            "metrics_file": "like_interrater__einfra__glm5__2026-03-31-17-53__metrics.json",
            "timestamp": "2026-03-31T15:53:31.126778Z",
            "accuracy": 0.8869565217391304,
            "cohen_kappa": 0.8496732026143791
          },
          {
            "provider": "e-infra",
            "model": "glm-5.1",
            "run_stem": "like_interrater__einfra__glm51__2026-04-24-16-04",
            "metrics_file": "like_interrater__einfra__glm51__2026-04-24-16-04__metrics.json",
            "timestamp": "2026-04-24T14:04:41.439134Z",
            "accuracy": 0.8782608695652174,
            "cohen_kappa": 0.8386773547094188
          },
          {
            "provider": "e-infra",
            "model": "kimi-k2.5",
            "run_stem": "like_interrater__einfra__kimik25__2026-03-19-14-20",
            "metrics_file": "like_interrater__einfra__kimik25__2026-03-19-14-20__metrics.json",
            "timestamp": "2026-03-19T13:20:12.743909Z",
            "accuracy": 0.8434782608695652,
            "cohen_kappa": 0.7933100349475787
          },
          {
            "provider": "e-infra",
            "model": "kimi-k2.6",
            "run_stem": "like_interrater__einfra__kimik26__2026-04-21-18-06",
            "metrics_file": "like_interrater__einfra__kimik26__2026-04-21-18-06__metrics.json",
            "timestamp": "2026-04-25T10:26:44.480602Z",
            "accuracy": 0.808695652173913,
            "cohen_kappa": 0.7486338797814208
          },
          {
            "provider": "e-infra",
            "model": "qwen3.5",
            "run_stem": "like_interrater__einfra__qwen35__2026-03-16-23-35",
            "metrics_file": "like_interrater__einfra__qwen35__2026-03-16-23-35__metrics.json",
            "timestamp": "2026-03-16T22:35:09.822527Z",
            "accuracy": 0.8956521739130435,
            "cohen_kappa": 0.8637037037037038
          },
          {
            "provider": "google",
            "model": "gemma-4-26b-a4b-it",
            "run_stem": "like_interrater__google__modelsgemma426ba4bit__2026-04-04-01-49",
            "metrics_file": "like_interrater__google__modelsgemma426ba4bit__2026-04-04-01-49__metrics.json",
            "timestamp": "2026-04-03T23:49:56.734262Z",
            "accuracy": 0.8173913043478261,
            "cohen_kappa": 0.7577733199598797
          },
          {
            "provider": "openai",
            "model": "gpt-5.4",
            "run_stem": "like_interrater__openai__gpt54__2026-03-16-23-16",
            "metrics_file": "like_interrater__openai__gpt54__2026-03-16-23-16__metrics.json",
            "timestamp": "2026-03-16T22:16:19.942954Z",
            "accuracy": 0.6869565217391305,
            "cohen_kappa": 0.6090651558073654
          },
          {
            "provider": "openai",
            "model": "gpt-5.4-mini",
            "run_stem": "like_interrater__openai__gpt54mini__2026-03-20-18-10",
            "metrics_file": "like_interrater__openai__gpt54mini__2026-03-20-18-10__metrics.json",
            "timestamp": "2026-03-20T17:11:01.726403Z",
            "accuracy": 0.6086956521739131,
            "cohen_kappa": 0.48964497041420124
          },
          {
            "provider": "openrouter",
            "model": "qwen3.6",
            "run_stem": "like_interrater__openrouter__qwenqwen36plusfree__2026-04-03-16-35",
            "metrics_file": "like_interrater__openrouter__qwenqwen36plusfree__2026-04-03-16-35__metrics.json",
            "timestamp": "2026-04-03T14:35:02.774817Z",
            "accuracy": 0.8869565217391304,
            "cohen_kappa": 0.8521266073194856
          },
          {
            "provider": "requesty",
            "model": "claude-haiku-4-5",
            "run_stem": "like_interrater__requesty__anthropicclaudehaiku45__2026-03-23-00-11",
            "metrics_file": "like_interrater__requesty__anthropicclaudehaiku45__2026-03-23-00-11__metrics.json",
            "timestamp": "2026-03-22T23:11:59.214488Z",
            "accuracy": 0.5478260869565217,
            "cohen_kappa": 0.40909090909090906
          },
          {
            "provider": "requesty",
            "model": "claude-opus-4-6",
            "run_stem": "like_interrater__requesty__anthropicclaudeopus46__2026-03-21-02-24",
            "metrics_file": "like_interrater__requesty__anthropicclaudeopus46__2026-03-21-02-24__metrics.json",
            "timestamp": "2026-03-21T01:24:28.930846Z",
            "accuracy": 0.8173913043478261,
            "cohen_kappa": 0.7521806054386866
          },
          {
            "provider": "requesty",
            "model": "glm-4.7",
            "run_stem": "like_interrater__requesty__nebiuszaiorgglm47__2026-03-17-15-44",
            "metrics_file": "like_interrater__requesty__nebiuszaiorgglm47__2026-03-17-15-44__metrics.json",
            "timestamp": "2026-03-17T14:44:34.806095Z",
            "accuracy": 0.8,
            "cohen_kappa": 0.7374689826302729
          },
          {
            "provider": "requesty",
            "model": "kimi-k2.5",
            "run_stem": "like_interrater__requesty__moonshotkimik25__2026-03-17-15-43",
            "metrics_file": "like_interrater__requesty__moonshotkimik25__2026-03-17-15-43__metrics.json",
            "timestamp": "2026-03-17T14:43:35.190186Z",
            "accuracy": 0.8608695652173913,
            "cohen_kappa": 0.817279046673287
          },
          {
            "provider": "vertex",
            "model": "gemini-3-flash-preview",
            "run_stem": "like_interrater__vertex__gemini3flashpreview__2026-03-16-20-22",
            "metrics_file": "like_interrater__vertex__gemini3flashpreview__2026-03-16-20-22__metrics.json",
            "timestamp": "2026-03-16T19:22:29.614294Z",
            "accuracy": 0.9217391304347826,
            "cohen_kappa": 0.8961885656970913
          },
          {
            "provider": "vertex",
            "model": "gemini-3.1-pro-preview",
            "run_stem": "like_interrater__vertex__gemini31propreview__2026-03-16-23-17",
            "metrics_file": "like_interrater__vertex__gemini31propreview__2026-03-16-23-17__metrics.json",
            "timestamp": "2026-03-16T22:17:19.752371Z",
            "accuracy": 0.9130434782608695,
            "cohen_kappa": 0.8857426726279185
          }
        ],
        "pairwise": [
          {
            "a": 0,
            "b": 1,
            "distance": 0.20388349514563106,
            "overlap_count": 103,
            "agreement_count": 82,
            "disagreement_count": 21
          },
          {
            "a": 0,
            "b": 2,
            "distance": 0.20952380952380953,
            "overlap_count": 105,
            "agreement_count": 83,
            "disagreement_count": 22
          },
          {
            "a": 0,
            "b": 3,
            "distance": 0.20952380952380953,
            "overlap_count": 105,
            "agreement_count": 83,
            "disagreement_count": 22
          },
          {
            "a": 0,
            "b": 4,
            "distance": 0.16346153846153846,
            "overlap_count": 104,
            "agreement_count": 87,
            "disagreement_count": 17
          },
          {
            "a": 0,
            "b": 5,
            "distance": 0.13333333333333333,
            "overlap_count": 105,
            "agreement_count": 91,
            "disagreement_count": 14
          },
          {
            "a": 0,
            "b": 6,
            "distance": 0.17142857142857143,
            "overlap_count": 105,
            "agreement_count": 87,
            "disagreement_count": 18
          },
          {
            "a": 0,
            "b": 7,
            "distance": 0.2403846153846154,
            "overlap_count": 104,
            "agreement_count": 79,
            "disagreement_count": 25
          },
          {
            "a": 0,
            "b": 8,
            "distance": 0.38095238095238093,
            "overlap_count": 105,
            "agreement_count": 65,
            "disagreement_count": 40
          },
          {
            "a": 0,
            "b": 9,
            "distance": 0.30097087378640774,
            "overlap_count": 103,
            "agreement_count": 72,
            "disagreement_count": 31
          },
          {
            "a": 0,
            "b": 10,
            "distance": 0.19047619047619047,
            "overlap_count": 105,
            "agreement_count": 85,
            "disagreement_count": 20
          },
          {
            "a": 0,
            "b": 11,
            "distance": 0.33980582524271846,
            "overlap_count": 103,
            "agreement_count": 68,
            "disagreement_count": 35
          },
          {
            "a": 0,
            "b": 12,
            "distance": 0.2857142857142857,
            "overlap_count": 105,
            "agreement_count": 75,
            "disagreement_count": 30
          },
          {
            "a": 0,
            "b": 13,
            "distance": 0.1619047619047619,
            "overlap_count": 105,
            "agreement_count": 88,
            "disagreement_count": 17
          },
          {
            "a": 0,
            "b": 14,
            "distance": 0.14705882352941177,
            "overlap_count": 102,
            "agreement_count": 87,
            "disagreement_count": 15
          },
          {
            "a": 0,
            "b": 15,
            "distance": 0.17142857142857143,
            "overlap_count": 105,
            "agreement_count": 87,
            "disagreement_count": 18
          },
          {
            "a": 0,
            "b": 16,
            "distance": 0.2,
            "overlap_count": 105,
            "agreement_count": 84,
            "disagreement_count": 21
          },
          {
            "a": 1,
            "b": 2,
            "distance": 0.0660377358490566,
            "overlap_count": 106,
            "agreement_count": 99,
            "disagreement_count": 7
          },
          {
            "a": 1,
            "b": 3,
            "distance": 0.09433962264150944,
            "overlap_count": 106,
            "agreement_count": 96,
            "disagreement_count": 10
          },
          {
            "a": 1,
            "b": 4,
            "distance": 0.10476190476190476,
            "overlap_count": 105,
            "agreement_count": 94,
            "disagreement_count": 11
          },
          {
            "a": 1,
            "b": 5,
            "distance": 0.09345794392523364,
            "overlap_count": 107,
            "agreement_count": 97,
            "disagreement_count": 10
          },
          {
            "a": 1,
            "b": 6,
            "distance": 0.08571428571428572,
            "overlap_count": 105,
            "agreement_count": 96,
            "disagreement_count": 9
          },
          {
            "a": 1,
            "b": 7,
            "distance": 0.13333333333333333,
            "overlap_count": 105,
            "agreement_count": 91,
            "disagreement_count": 14
          },
          {
            "a": 1,
            "b": 8,
            "distance": 0.29906542056074764,
            "overlap_count": 107,
            "agreement_count": 75,
            "disagreement_count": 32
          },
          {
            "a": 1,
            "b": 9,
            "distance": 0.2857142857142857,
            "overlap_count": 105,
            "agreement_count": 75,
            "disagreement_count": 30
          },
          {
            "a": 1,
            "b": 10,
            "distance": 0.14018691588785046,
            "overlap_count": 107,
            "agreement_count": 92,
            "disagreement_count": 15
          },
          {
            "a": 1,
            "b": 11,
            "distance": 0.3333333333333333,
            "overlap_count": 105,
            "agreement_count": 70,
            "disagreement_count": 35
          },
          {
            "a": 1,
            "b": 12,
            "distance": 0.20754716981132076,
            "overlap_count": 106,
            "agreement_count": 84,
            "disagreement_count": 22
          },
          {
            "a": 1,
            "b": 13,
            "distance": 0.1320754716981132,
            "overlap_count": 106,
            "agreement_count": 92,
            "disagreement_count": 14
          },
          {
            "a": 1,
            "b": 14,
            "distance": 0.11650485436893204,
            "overlap_count": 103,
            "agreement_count": 91,
            "disagreement_count": 12
          },
          {
            "a": 1,
            "b": 15,
            "distance": 0.11214953271028037,
            "overlap_count": 107,
            "agreement_count": 95,
            "disagreement_count": 12
          },
          {
            "a": 1,
            "b": 16,
            "distance": 0.12380952380952381,
            "overlap_count": 105,
            "agreement_count": 92,
            "disagreement_count": 13
          },
          {
            "a": 2,
            "b": 3,
            "distance": 0.05504587155963303,
            "overlap_count": 109,
            "agreement_count": 103,
            "disagreement_count": 6
          },
          {
            "a": 2,
            "b": 4,
            "distance": 0.09259259259259259,
            "overlap_count": 108,
            "agreement_count": 98,
            "disagreement_count": 10
          },
          {
            "a": 2,
            "b": 5,
            "distance": 0.09090909090909091,
            "overlap_count": 110,
            "agreement_count": 100,
            "disagreement_count": 10
          },
          {
            "a": 2,
            "b": 6,
            "distance": 0.056074766355140186,
            "overlap_count": 107,
            "agreement_count": 101,
            "disagreement_count": 6
          },
          {
            "a": 2,
            "b": 7,
            "distance": 0.11926605504587157,
            "overlap_count": 109,
            "agreement_count": 96,
            "disagreement_count": 13
          },
          {
            "a": 2,
            "b": 8,
            "distance": 0.29357798165137616,
            "overlap_count": 109,
            "agreement_count": 77,
            "disagreement_count": 32
          },
          {
            "a": 2,
            "b": 9,
            "distance": 0.2962962962962963,
            "overlap_count": 108,
            "agreement_count": 76,
            "disagreement_count": 32
          },
          {
            "a": 2,
            "b": 10,
            "distance": 0.10185185185185185,
            "overlap_count": 108,
            "agreement_count": 97,
            "disagreement_count": 11
          },
          {
            "a": 2,
            "b": 11,
            "distance": 0.3425925925925926,
            "overlap_count": 108,
            "agreement_count": 71,
            "disagreement_count": 37
          },
          {
            "a": 2,
            "b": 12,
            "distance": 0.1559633027522936,
            "overlap_count": 109,
            "agreement_count": 92,
            "disagreement_count": 17
          },
          {
            "a": 2,
            "b": 13,
            "distance": 0.10091743119266056,
            "overlap_count": 109,
            "agreement_count": 98,
            "disagreement_count": 11
          },
          {
            "a": 2,
            "b": 14,
            "distance": 0.09523809523809523,
            "overlap_count": 105,
            "agreement_count": 95,
            "disagreement_count": 10
          },
          {
            "a": 2,
            "b": 15,
            "distance": 0.07272727272727272,
            "overlap_count": 110,
            "agreement_count": 102,
            "disagreement_count": 8
          },
          {
            "a": 2,
            "b": 16,
            "distance": 0.06481481481481481,
            "overlap_count": 108,
            "agreement_count": 101,
            "disagreement_count": 7
          },
          {
            "a": 3,
            "b": 4,
            "distance": 0.11214953271028037,
            "overlap_count": 107,
            "agreement_count": 95,
            "disagreement_count": 12
          },
          {
            "a": 3,
            "b": 5,
            "distance": 0.11009174311926606,
            "overlap_count": 109,
            "agreement_count": 97,
            "disagreement_count": 12
          },
          {
            "a": 3,
            "b": 6,
            "distance": 0.06542056074766354,
            "overlap_count": 107,
            "agreement_count": 100,
            "disagreement_count": 7
          },
          {
            "a": 3,
            "b": 7,
            "distance": 0.10185185185185185,
            "overlap_count": 108,
            "agreement_count": 97,
            "disagreement_count": 11
          },
          {
            "a": 3,
            "b": 8,
            "distance": 0.28440366972477066,
            "overlap_count": 109,
            "agreement_count": 78,
            "disagreement_count": 31
          },
          {
            "a": 3,
            "b": 9,
            "distance": 0.308411214953271,
            "overlap_count": 107,
            "agreement_count": 74,
            "disagreement_count": 33
          },
          {
            "a": 3,
            "b": 10,
            "distance": 0.08333333333333333,
            "overlap_count": 108,
            "agreement_count": 99,
            "disagreement_count": 9
          },
          {
            "a": 3,
            "b": 11,
            "distance": 0.34579439252336447,
            "overlap_count": 107,
            "agreement_count": 70,
            "disagreement_count": 37
          },
          {
            "a": 3,
            "b": 12,
            "distance": 0.1574074074074074,
            "overlap_count": 108,
            "agreement_count": 91,
            "disagreement_count": 17
          },
          {
            "a": 3,
            "b": 13,
            "distance": 0.14678899082568808,
            "overlap_count": 109,
            "agreement_count": 93,
            "disagreement_count": 16
          },
          {
            "a": 3,
            "b": 14,
            "distance": 0.11428571428571428,
            "overlap_count": 105,
            "agreement_count": 93,
            "disagreement_count": 12
          },
          {
            "a": 3,
            "b": 15,
            "distance": 0.07339449541284404,
            "overlap_count": 109,
            "agreement_count": 101,
            "disagreement_count": 8
          },
          {
            "a": 3,
            "b": 16,
            "distance": 0.102803738317757,
            "overlap_count": 107,
            "agreement_count": 96,
            "disagreement_count": 11
          },
          {
            "a": 4,
            "b": 5,
            "distance": 0.05555555555555555,
            "overlap_count": 108,
            "agreement_count": 102,
            "disagreement_count": 6
          },
          {
            "a": 4,
            "b": 6,
            "distance": 0.08490566037735849,
            "overlap_count": 106,
            "agreement_count": 97,
            "disagreement_count": 9
          },
          {
            "a": 4,
            "b": 7,
            "distance": 0.19626168224299065,
            "overlap_count": 107,
            "agreement_count": 86,
            "disagreement_count": 21
          },
          {
            "a": 4,
            "b": 8,
            "distance": 0.3364485981308411,
            "overlap_count": 107,
            "agreement_count": 71,
            "disagreement_count": 36
          },
          {
            "a": 4,
            "b": 9,
            "distance": 0.3584905660377358,
            "overlap_count": 106,
            "agreement_count": 68,
            "disagreement_count": 38
          },
          {
            "a": 4,
            "b": 10,
            "distance": 0.1308411214953271,
            "overlap_count": 107,
            "agreement_count": 93,
            "disagreement_count": 14
          },
          {
            "a": 4,
            "b": 11,
            "distance": 0.39622641509433965,
            "overlap_count": 106,
            "agreement_count": 64,
            "disagreement_count": 42
          },
          {
            "a": 4,
            "b": 12,
            "distance": 0.2037037037037037,
            "overlap_count": 108,
            "agreement_count": 86,
            "disagreement_count": 22
          },
          {
            "a": 4,
            "b": 13,
            "distance": 0.16822429906542055,
            "overlap_count": 107,
            "agreement_count": 89,
            "disagreement_count": 18
          },
          {
            "a": 4,
            "b": 14,
            "distance": 0.08571428571428572,
            "overlap_count": 105,
            "agreement_count": 96,
            "disagreement_count": 9
          },
          {
            "a": 4,
            "b": 15,
            "distance": 0.1111111111111111,
            "overlap_count": 108,
            "agreement_count": 96,
            "disagreement_count": 12
          },
          {
            "a": 4,
            "b": 16,
            "distance": 0.11214953271028037,
            "overlap_count": 107,
            "agreement_count": 95,
            "disagreement_count": 12
          },
          {
            "a": 5,
            "b": 6,
            "distance": 0.07476635514018691,
            "overlap_count": 107,
            "agreement_count": 99,
            "disagreement_count": 8
          },
          {
            "a": 5,
            "b": 7,
            "distance": 0.1834862385321101,
            "overlap_count": 109,
            "agreement_count": 89,
            "disagreement_count": 20
          },
          {
            "a": 5,
            "b": 8,
            "distance": 0.3333333333333333,
            "overlap_count": 111,
            "agreement_count": 74,
            "disagreement_count": 37
          },
          {
            "a": 5,
            "b": 9,
            "distance": 0.3392857142857143,
            "overlap_count": 112,
            "agreement_count": 74,
            "disagreement_count": 38
          },
          {
            "a": 5,
            "b": 10,
            "distance": 0.12844036697247707,
            "overlap_count": 109,
            "agreement_count": 95,
            "disagreement_count": 14
          },
          {
            "a": 5,
            "b": 11,
            "distance": 0.4017857142857143,
            "overlap_count": 112,
            "agreement_count": 67,
            "disagreement_count": 45
          },
          {
            "a": 5,
            "b": 12,
            "distance": 0.2,
            "overlap_count": 110,
            "agreement_count": 88,
            "disagreement_count": 22
          },
          {
            "a": 5,
            "b": 13,
            "distance": 0.14678899082568808,
            "overlap_count": 109,
            "agreement_count": 93,
            "disagreement_count": 16
          },
          {
            "a": 5,
            "b": 14,
            "distance": 0.05714285714285714,
            "overlap_count": 105,
            "agreement_count": 99,
            "disagreement_count": 6
          },
          {
            "a": 5,
            "b": 15,
            "distance": 0.11607142857142858,
            "overlap_count": 112,
            "agreement_count": 99,
            "disagreement_count": 13
          },
          {
            "a": 5,
            "b": 16,
            "distance": 0.1111111111111111,
            "overlap_count": 108,
            "agreement_count": 96,
            "disagreement_count": 12
          },
          {
            "a": 6,
            "b": 7,
            "distance": 0.1509433962264151,
            "overlap_count": 106,
            "agreement_count": 90,
            "disagreement_count": 16
          },
          {
            "a": 6,
            "b": 8,
            "distance": 0.2897196261682243,
            "overlap_count": 107,
            "agreement_count": 76,
            "disagreement_count": 31
          },
          {
            "a": 6,
            "b": 9,
            "distance": 0.3142857142857143,
            "overlap_count": 105,
            "agreement_count": 72,
            "disagreement_count": 33
          },
          {
            "a": 6,
            "b": 10,
            "distance": 0.056074766355140186,
            "overlap_count": 107,
            "agreement_count": 101,
            "disagreement_count": 6
          },
          {
            "a": 6,
            "b": 11,
            "distance": 0.3523809523809524,
            "overlap_count": 105,
            "agreement_count": 68,
            "disagreement_count": 37
          },
          {
            "a": 6,
            "b": 12,
            "distance": 0.17757009345794392,
            "overlap_count": 107,
            "agreement_count": 88,
            "disagreement_count": 19
          },
          {
            "a": 6,
            "b": 13,
            "distance": 0.1308411214953271,
            "overlap_count": 107,
            "agreement_count": 93,
            "disagreement_count": 14
          },
          {
            "a": 6,
            "b": 14,
            "distance": 0.09615384615384616,
            "overlap_count": 104,
            "agreement_count": 94,
            "disagreement_count": 10
          },
          {
            "a": 6,
            "b": 15,
            "distance": 0.037383177570093455,
            "overlap_count": 107,
            "agreement_count": 103,
            "disagreement_count": 4
          },
          {
            "a": 6,
            "b": 16,
            "distance": 0.04716981132075472,
            "overlap_count": 106,
            "agreement_count": 101,
            "disagreement_count": 5
          },
          {
            "a": 7,
            "b": 8,
            "distance": 0.3055555555555556,
            "overlap_count": 108,
            "agreement_count": 75,
            "disagreement_count": 33
          },
          {
            "a": 7,
            "b": 9,
            "distance": 0.29906542056074764,
            "overlap_count": 107,
            "agreement_count": 75,
            "disagreement_count": 32
          },
          {
            "a": 7,
            "b": 10,
            "distance": 0.14018691588785046,
            "overlap_count": 107,
            "agreement_count": 92,
            "disagreement_count": 15
          },
          {
            "a": 7,
            "b": 11,
            "distance": 0.308411214953271,
            "overlap_count": 107,
            "agreement_count": 74,
            "disagreement_count": 33
          },
          {
            "a": 7,
            "b": 12,
            "distance": 0.2037037037037037,
            "overlap_count": 108,
            "agreement_count": 86,
            "disagreement_count": 22
          },
          {
            "a": 7,
            "b": 13,
            "distance": 0.1574074074074074,
            "overlap_count": 108,
            "agreement_count": 91,
            "disagreement_count": 17
          },
          {
            "a": 7,
            "b": 14,
            "distance": 0.18269230769230768,
            "overlap_count": 104,
            "agreement_count": 85,
            "disagreement_count": 19
          },
          {
            "a": 7,
            "b": 15,
            "distance": 0.1559633027522936,
            "overlap_count": 109,
            "agreement_count": 92,
            "disagreement_count": 17
          },
          {
            "a": 7,
            "b": 16,
            "distance": 0.16822429906542055,
            "overlap_count": 107,
            "agreement_count": 89,
            "disagreement_count": 18
          },
          {
            "a": 8,
            "b": 9,
            "distance": 0.44036697247706424,
            "overlap_count": 109,
            "agreement_count": 61,
            "disagreement_count": 48
          },
          {
            "a": 8,
            "b": 10,
            "distance": 0.28440366972477066,
            "overlap_count": 109,
            "agreement_count": 78,
            "disagreement_count": 31
          },
          {
            "a": 8,
            "b": 11,
            "distance": 0.46788990825688076,
            "overlap_count": 109,
            "agreement_count": 58,
            "disagreement_count": 51
          },
          {
            "a": 8,
            "b": 12,
            "distance": 0.27522935779816515,
            "overlap_count": 109,
            "agreement_count": 79,
            "disagreement_count": 30
          },
          {
            "a": 8,
            "b": 13,
            "distance": 0.3302752293577982,
            "overlap_count": 109,
            "agreement_count": 73,
            "disagreement_count": 36
          },
          {
            "a": 8,
            "b": 14,
            "distance": 0.3142857142857143,
            "overlap_count": 105,
            "agreement_count": 72,
            "disagreement_count": 33
          },
          {
            "a": 8,
            "b": 15,
            "distance": 0.2702702702702703,
            "overlap_count": 111,
            "agreement_count": 81,
            "disagreement_count": 30
          },
          {
            "a": 8,
            "b": 16,
            "distance": 0.2897196261682243,
            "overlap_count": 107,
            "agreement_count": 76,
            "disagreement_count": 31
          },
          {
            "a": 9,
            "b": 10,
            "distance": 0.308411214953271,
            "overlap_count": 107,
            "agreement_count": 74,
            "disagreement_count": 33
          },
          {
            "a": 9,
            "b": 11,
            "distance": 0.32727272727272727,
            "overlap_count": 110,
            "agreement_count": 74,
            "disagreement_count": 36
          },
          {
            "a": 9,
            "b": 12,
            "distance": 0.37037037037037035,
            "overlap_count": 108,
            "agreement_count": 68,
            "disagreement_count": 40
          },
          {
            "a": 9,
            "b": 13,
            "distance": 0.29906542056074764,
            "overlap_count": 107,
            "agreement_count": 75,
            "disagreement_count": 32
          },
          {
            "a": 9,
            "b": 14,
            "distance": 0.30097087378640774,
            "overlap_count": 103,
            "agreement_count": 72,
            "disagreement_count": 31
          },
          {
            "a": 9,
            "b": 15,
            "distance": 0.3181818181818182,
            "overlap_count": 110,
            "agreement_count": 75,
            "disagreement_count": 35
          },
          {
            "a": 9,
            "b": 16,
            "distance": 0.330188679245283,
            "overlap_count": 106,
            "agreement_count": 71,
            "disagreement_count": 35
          },
          {
            "a": 10,
            "b": 11,
            "distance": 0.34579439252336447,
            "overlap_count": 107,
            "agreement_count": 70,
            "disagreement_count": 37
          },
          {
            "a": 10,
            "b": 12,
            "distance": 0.2037037037037037,
            "overlap_count": 108,
            "agreement_count": 86,
            "disagreement_count": 22
          },
          {
            "a": 10,
            "b": 13,
            "distance": 0.16666666666666666,
            "overlap_count": 108,
            "agreement_count": 90,
            "disagreement_count": 18
          },
          {
            "a": 10,
            "b": 14,
            "distance": 0.13333333333333333,
            "overlap_count": 105,
            "agreement_count": 91,
            "disagreement_count": 14
          },
          {
            "a": 10,
            "b": 15,
            "distance": 0.045871559633027525,
            "overlap_count": 109,
            "agreement_count": 104,
            "disagreement_count": 5
          },
          {
            "a": 10,
            "b": 16,
            "distance": 0.07476635514018691,
            "overlap_count": 107,
            "agreement_count": 99,
            "disagreement_count": 8
          },
          {
            "a": 11,
            "b": 12,
            "distance": 0.4351851851851852,
            "overlap_count": 108,
            "agreement_count": 61,
            "disagreement_count": 47
          },
          {
            "a": 11,
            "b": 13,
            "distance": 0.2897196261682243,
            "overlap_count": 107,
            "agreement_count": 76,
            "disagreement_count": 31
          },
          {
            "a": 11,
            "b": 14,
            "distance": 0.3592233009708738,
            "overlap_count": 103,
            "agreement_count": 66,
            "disagreement_count": 37
          },
          {
            "a": 11,
            "b": 15,
            "distance": 0.39090909090909093,
            "overlap_count": 110,
            "agreement_count": 67,
            "disagreement_count": 43
          },
          {
            "a": 11,
            "b": 16,
            "distance": 0.37735849056603776,
            "overlap_count": 106,
            "agreement_count": 66,
            "disagreement_count": 40
          },
          {
            "a": 12,
            "b": 13,
            "distance": 0.24074074074074073,
            "overlap_count": 108,
            "agreement_count": 82,
            "disagreement_count": 26
          },
          {
            "a": 12,
            "b": 14,
            "distance": 0.20952380952380953,
            "overlap_count": 105,
            "agreement_count": 83,
            "disagreement_count": 22
          },
          {
            "a": 12,
            "b": 15,
            "distance": 0.15454545454545454,
            "overlap_count": 110,
            "agreement_count": 93,
            "disagreement_count": 17
          },
          {
            "a": 12,
            "b": 16,
            "distance": 0.16666666666666666,
            "overlap_count": 108,
            "agreement_count": 90,
            "disagreement_count": 18
          },
          {
            "a": 13,
            "b": 14,
            "distance": 0.17142857142857143,
            "overlap_count": 105,
            "agreement_count": 87,
            "disagreement_count": 18
          },
          {
            "a": 13,
            "b": 15,
            "distance": 0.1559633027522936,
            "overlap_count": 109,
            "agreement_count": 92,
            "disagreement_count": 17
          },
          {
            "a": 13,
            "b": 16,
            "distance": 0.1588785046728972,
            "overlap_count": 107,
            "agreement_count": 90,
            "disagreement_count": 17
          },
          {
            "a": 14,
            "b": 15,
            "distance": 0.09523809523809523,
            "overlap_count": 105,
            "agreement_count": 95,
            "disagreement_count": 10
          },
          {
            "a": 14,
            "b": 16,
            "distance": 0.11538461538461539,
            "overlap_count": 104,
            "agreement_count": 92,
            "disagreement_count": 12
          },
          {
            "a": 15,
            "b": 16,
            "distance": 0.027777777777777776,
            "overlap_count": 108,
            "agreement_count": 105,
            "disagreement_count": 3
          }
        ],
        "linkage": [
          [
            15,
            16,
            0.027777777777777776,
            2
          ],
          [
            6,
            17,
            0.042276494445424084,
            3
          ],
          [
            2,
            3,
            0.05504587155963303,
            2
          ],
          [
            4,
            5,
            0.05555555555555555,
            2
          ],
          [
            10,
            18,
            0.05890422704278487,
            4
          ],
          [
            14,
            20,
            0.07142857142857142,
            3
          ],
          [
            19,
            21,
            0.07755260419508468,
            6
          ],
          [
            1,
            23,
            0.10370626943541773,
            7
          ],
          [
            22,
            24,
            0.10664276426239432,
            10
          ],
          [
            13,
            25,
            0.14785733506233265,
            11
          ],
          [
            7,
            26,
            0.15360152636707747,
            12
          ],
          [
            0,
            27,
            0.183533960011687,
            13
          ],
          [
            12,
            28,
            0.19744538782546414,
            14
          ],
          [
            8,
            29,
            0.3062314595487266,
            15
          ],
          [
            9,
            30,
            0.3246716956996757,
            16
          ],
          [
            11,
            31,
            0.3633551976411669,
            17
          ]
        ],
        "linkage_complete": true
      },
      {
        "group_id": "fa72ac2e9e024482",
        "cluster_scope": "cross_model",
        "representative_policy": "best_accuracy",
        "task_fingerprint": "9d6ea446c173cf044401c04094f095a25fb7ceec9caef163c35be7264a0bb349",
        "normalized_tag_key": "*like*;discourse;english;pragmatics;v3",
        "task_name_display": "*like* discourse/pragm",
        "task_names_seen": [
          "*like* discourse/pragm"
        ],
        "tags_display": "*like*; discourse; pragmatics; English; v3",
        "model_count": 3,
        "distance_metric": "nominal_disagreement_rate",
        "linkage_method": "average",
        "representative_run_stems": [
          "like_interrater__openai__gpt54__2026-03-13-23-51",
          "like_interrater__requesty__anthropicclaudesonnet46__2026-03-13-22-58",
          "like_interrater__vertex__gemini3flashpreview__2026-03-13-15-32"
        ],
        "comparable_pair_count": 3,
        "representatives": [
          {
            "provider": "openai",
            "model": "gpt-5.4",
            "run_stem": "like_interrater__openai__gpt54__2026-03-13-23-51",
            "metrics_file": "like_interrater__openai__gpt54__2026-03-13-23-51__metrics.json",
            "timestamp": "2026-03-13T22:51:21.706190Z",
            "accuracy": 0.5217391304347826,
            "cohen_kappa": 0.4387755102040816
          },
          {
            "provider": "requesty",
            "model": "claude-sonnet-4-6",
            "run_stem": "like_interrater__requesty__anthropicclaudesonnet46__2026-03-13-22-58",
            "metrics_file": "like_interrater__requesty__anthropicclaudesonnet46__2026-03-13-22-58__metrics.json",
            "timestamp": "2026-03-13T21:58:23.211617Z",
            "accuracy": 0.7478260869565218,
            "cohen_kappa": 0.6706172839506173
          },
          {
            "provider": "vertex",
            "model": "gemini-3-flash-preview",
            "run_stem": "like_interrater__vertex__gemini3flashpreview__2026-03-13-15-32",
            "metrics_file": "like_interrater__vertex__gemini3flashpreview__2026-03-13-15-32__metrics.json",
            "timestamp": "2026-03-13T14:32:09.952389Z",
            "accuracy": 0.808695652173913,
            "cohen_kappa": 0.7482587064676617
          }
        ],
        "pairwise": [
          {
            "a": 0,
            "b": 1,
            "distance": 0.336734693877551,
            "overlap_count": 98,
            "agreement_count": 65,
            "disagreement_count": 33
          },
          {
            "a": 0,
            "b": 2,
            "distance": 0.3333333333333333,
            "overlap_count": 99,
            "agreement_count": 66,
            "disagreement_count": 33
          },
          {
            "a": 1,
            "b": 2,
            "distance": 0.13513513513513514,
            "overlap_count": 111,
            "agreement_count": 96,
            "disagreement_count": 15
          }
        ],
        "linkage": [
          [
            1,
            2,
            0.13513513513513514,
            2
          ],
          [
            0,
            3,
            0.33503401360544216,
            3
          ]
        ],
        "linkage_complete": true
      },
      {
        "group_id": "411099465fb6c0e7",
        "cluster_scope": "cross_model",
        "representative_policy": "best_accuracy",
        "task_fingerprint": "9911e304116a3387591af8bb222b9d5c5983c625188825c9007ceff21f5910bc",
        "normalized_tag_key": "*like*;discourse;english;pragmatics;v3",
        "task_name_display": "*like* discourse/pragm",
        "task_names_seen": [
          "*like* discourse/pragm"
        ],
        "tags_display": "*like*; discourse; pragmatics; English; v3",
        "model_count": 2,
        "distance_metric": "nominal_disagreement_rate",
        "linkage_method": "average",
        "representative_run_stems": [
          "like_interrater__einfra__deepseekv32thinking__2026-03-13-20-58",
          "like_interrater__vertex__gemini31propreview"
        ],
        "comparable_pair_count": 1,
        "representatives": [
          {
            "provider": "e-infra",
            "model": "deepseek-v3.2-thinking",
            "run_stem": "like_interrater__einfra__deepseekv32thinking__2026-03-13-20-58",
            "metrics_file": "like_interrater__einfra__deepseekv32thinking__2026-03-13-20-58__metrics.json",
            "timestamp": "2026-03-13T19:58:49.743838Z",
            "accuracy": 0.7130434782608696,
            "cohen_kappa": 0.6246290801186943
          },
          {
            "provider": "vertex",
            "model": "gemini-3.1-pro-preview",
            "run_stem": "like_interrater__vertex__gemini31propreview",
            "metrics_file": "like_interrater__vertex__gemini31propreview__metrics.json",
            "timestamp": "2026-03-13T17:50:24.766352Z",
            "accuracy": 0.8260869565217391,
            "cohen_kappa": 0.7721644378405151
          }
        ],
        "pairwise": [
          {
            "a": 0,
            "b": 1,
            "distance": 0.1792452830188679,
            "overlap_count": 106,
            "agreement_count": 87,
            "disagreement_count": 19
          }
        ],
        "linkage": [
          [
            0,
            1,
            0.1792452830188679,
            2
          ]
        ],
        "linkage_complete": true
      },
      {
        "group_id": "b9abcee6b15c5c5f",
        "cluster_scope": "cross_model",
        "representative_policy": "best_accuracy",
        "task_fingerprint": "0eb7fe892594e382024c3813de8ec46058d12d33d835017ca9df11d4d238d61e",
        "normalized_tag_key": "*-ing*;adverbial;clause;english;syntax",
        "task_name_display": "ADV *-ing* clause",
        "task_names_seen": [
          "ADV *-ing* clause"
        ],
        "tags_display": "*-ing*; syntax; adverbial; clause; English",
        "model_count": 16,
        "distance_metric": "nominal_disagreement_rate",
        "linkage_method": "average",
        "representative_run_stems": [
          "adv-ing__einfra__deepseekv32thinking__2026-03-12-01-08",
          "adv-ing__einfra__glm47__2026-03-17-13-00",
          "adv-ing__einfra__glm5__2026-03-30-21-43",
          "adv-ing__einfra__kimik25__2026-03-19-15-11",
          "adv-ing__einfra__qwen35__2026-03-12-14-37",
          "adv-ing__google__modelsgemini3flashpreview__2026-03-11-15-40",
          "adv-ing__vertex__gemini31propreview__2026-03-11-10-43",
          "adv-ing__google__modelsgemma426ba4bit__2026-04-05-01-59",
          "adv-ing__openai__gpt54__2026-03-11-16-26",
          "adv-ing__openai__gpt54mini__2026-03-18-16-49",
          "adv-ing__openai__gpt54pro__2026-03-12-00-22",
          "adv-ing____gptoss120b__2025-11-08-23-28",
          "adv-ing____gpt5__2025-11-08-17-24",
          "adv-ing__openrouter__qwenqwen36plusfree__2026-04-03-14-47",
          "adv-ing__requesty__anthropicclaudehaiku45__2026-03-23-00-13",
          "adv-ing__requesty__anthropicclaudesonnet46__2026-03-11-16-11"
        ],
        "comparable_pair_count": 120,
        "representatives": [
          {
            "provider": "e-infra",
            "model": "deepseek-v3.2-thinking",
            "run_stem": "adv-ing__einfra__deepseekv32thinking__2026-03-12-01-08",
            "metrics_file": "adv-ing__einfra__deepseekv32thinking__2026-03-12-01-08__metrics.json",
            "timestamp": "2026-03-12T10:15:45.563016Z",
            "accuracy": 0.9559748427672956,
            "cohen_kappa": 0.9071687726760916
          },
          {
            "provider": "e-infra",
            "model": "glm-4.7",
            "run_stem": "adv-ing__einfra__glm47__2026-03-17-13-00",
            "metrics_file": "adv-ing__einfra__glm47__2026-03-17-13-00__metrics.json",
            "timestamp": "2026-03-17T12:00:44.101281Z",
            "accuracy": 0.940251572327044,
            "cohen_kappa": 0.8767894286063869
          },
          {
            "provider": "e-infra",
            "model": "glm-5",
            "run_stem": "adv-ing__einfra__glm5__2026-03-30-21-43",
            "metrics_file": "adv-ing__einfra__glm5__2026-03-30-21-43__metrics.json",
            "timestamp": "2026-03-30T19:44:02.491601Z",
            "accuracy": 0.949685534591195,
            "cohen_kappa": 0.895925380461463
          },
          {
            "provider": "e-infra",
            "model": "kimi-k2.5",
            "run_stem": "adv-ing__einfra__kimik25__2026-03-19-15-11",
            "metrics_file": "adv-ing__einfra__kimik25__2026-03-19-15-11__metrics.json",
            "timestamp": "2026-03-19T14:11:22.566473Z",
            "accuracy": 0.9716981132075472,
            "cohen_kappa": 0.9404382843229069
          },
          {
            "provider": "e-infra",
            "model": "qwen3.5",
            "run_stem": "adv-ing__einfra__qwen35__2026-03-12-14-37",
            "metrics_file": "adv-ing__einfra__qwen35__2026-03-12-14-37__metrics.json",
            "timestamp": "2026-03-12T13:37:29.843039Z",
            "accuracy": 0.8238993710691824,
            "cohen_kappa": 0.670716148000222
          },
          {
            "provider": "google",
            "model": "gemini-3-flash-preview",
            "run_stem": "adv-ing__google__modelsgemini3flashpreview__2026-03-11-15-40",
            "metrics_file": "adv-ing__google__modelsgemini3flashpreview__2026-03-11-15-40__metrics.json",
            "timestamp": "2026-03-11T14:40:24.117684Z",
            "accuracy": 0.9811320754716981,
            "cohen_kappa": 0.9596241747079736
          },
          {
            "provider": "google",
            "model": "gemini-3.1-pro-preview",
            "run_stem": "adv-ing__vertex__gemini31propreview__2026-03-11-10-43",
            "metrics_file": "adv-ing__vertex__gemini31propreview__2026-03-11-10-43__metrics.json",
            "timestamp": "2026-03-11T09:43:27.624333Z",
            "accuracy": 0.9748427672955975,
            "cohen_kappa": 0.9461655662772981
          },
          {
            "provider": "google",
            "model": "gemma-4-26b-a4b-it",
            "run_stem": "adv-ing__google__modelsgemma426ba4bit__2026-04-05-01-59",
            "metrics_file": "adv-ing__google__modelsgemma426ba4bit__2026-04-05-01-59__metrics.json",
            "timestamp": "2026-04-04T23:59:13.642882Z",
            "accuracy": 0.9559748427672956,
            "cohen_kappa": 0.9069825749028457
          },
          {
            "provider": "openai",
            "model": "gpt-5.4",
            "run_stem": "adv-ing__openai__gpt54__2026-03-11-16-26",
            "metrics_file": "adv-ing__openai__gpt54__2026-03-11-16-26__metrics.json",
            "timestamp": "2026-03-11T15:26:11.802357Z",
            "accuracy": 0.9182389937106918,
            "cohen_kappa": 0.8348513902205178
          },
          {
            "provider": "openai",
            "model": "gpt-5.4-mini",
            "run_stem": "adv-ing__openai__gpt54mini__2026-03-18-16-49",
            "metrics_file": "adv-ing__openai__gpt54mini__2026-03-18-16-49__metrics.json",
            "timestamp": "2026-03-18T15:49:58.142001Z",
            "accuracy": 0.7704402515723271,
            "cohen_kappa": 0.6185170577795307
          },
          {
            "provider": "openai",
            "model": "gpt-5.4-pro",
            "run_stem": "adv-ing__openai__gpt54pro__2026-03-12-00-22",
            "metrics_file": "adv-ing__openai__gpt54pro__2026-03-12-00-22__metrics.json",
            "timestamp": "2026-03-11T23:22:39.487962Z",
            "accuracy": 0.8459119496855346,
            "cohen_kappa": 0.7156776877600176
          },
          {
            "provider": "openai",
            "model": "gpt-oss-120b",
            "run_stem": "adv-ing____gptoss120b__2025-11-08-23-28",
            "metrics_file": "adv-ing____gptoss120b__2025-11-08-23-28__metrics.json",
            "timestamp": "2025-11-08T22:28:31.410308Z",
            "accuracy": 0.9308176100628931,
            "cohen_kappa": 0.857107843137255
          },
          {
            "provider": "openai",
            "model": "gpt5",
            "run_stem": "adv-ing____gpt5__2025-11-08-17-24",
            "metrics_file": "adv-ing____gpt5__2025-11-08-17-24__metrics.json",
            "timestamp": "2025-11-08T16:24:26.999467Z",
            "accuracy": 0.9245283018867925,
            "cohen_kappa": 0.8476190476190476
          },
          {
            "provider": "openrouter",
            "model": "qwen3.6",
            "run_stem": "adv-ing__openrouter__qwenqwen36plusfree__2026-04-03-14-47",
            "metrics_file": "adv-ing__openrouter__qwenqwen36plusfree__2026-04-03-14-47__metrics.json",
            "timestamp": "2026-04-03T12:47:18.635821Z",
            "accuracy": 0.9654088050314465,
            "cohen_kappa": 0.9257655822244859
          },
          {
            "provider": "requesty",
            "model": "claude-haiku-4-5",
            "run_stem": "adv-ing__requesty__anthropicclaudehaiku45__2026-03-23-00-13",
            "metrics_file": "adv-ing__requesty__anthropicclaudehaiku45__2026-03-23-00-13__metrics.json",
            "timestamp": "2026-03-22T23:13:14.236246Z",
            "accuracy": 0.4748427672955975,
            "cohen_kappa": 0.28195351478521885
          },
          {
            "provider": "requesty",
            "model": "claude-sonnet-4-6",
            "run_stem": "adv-ing__requesty__anthropicclaudesonnet46__2026-03-11-16-11",
            "metrics_file": "adv-ing__requesty__anthropicclaudesonnet46__2026-03-11-16-11__metrics.json",
            "timestamp": "2026-03-11T15:11:34.763226Z",
            "accuracy": 0.9308176100628931,
            "cohen_kappa": 0.8585638039786512
          }
        ],
        "pairwise": [
          {
            "a": 0,
            "b": 1,
            "distance": 0.02572347266881029,
            "overlap_count": 311,
            "agreement_count": 303,
            "disagreement_count": 8
          },
          {
            "a": 0,
            "b": 2,
            "distance": 0.01929260450160772,
            "overlap_count": 311,
            "agreement_count": 305,
            "disagreement_count": 6
          },
          {
            "a": 0,
            "b": 3,
            "distance": 0.028481012658227847,
            "overlap_count": 316,
            "agreement_count": 307,
            "disagreement_count": 9
          },
          {
            "a": 0,
            "b": 4,
            "distance": 0.011194029850746268,
            "overlap_count": 268,
            "agreement_count": 265,
            "disagreement_count": 3
          },
          {
            "a": 0,
            "b": 5,
            "distance": 0.03164556962025317,
            "overlap_count": 316,
            "agreement_count": 306,
            "disagreement_count": 10
          },
          {
            "a": 0,
            "b": 6,
            "distance": 0.0189873417721519,
            "overlap_count": 316,
            "agreement_count": 310,
            "disagreement_count": 6
          },
          {
            "a": 0,
            "b": 7,
            "distance": 0.01904761904761905,
            "overlap_count": 315,
            "agreement_count": 309,
            "disagreement_count": 6
          },
          {
            "a": 0,
            "b": 8,
            "distance": 0.032362459546925564,
            "overlap_count": 309,
            "agreement_count": 299,
            "disagreement_count": 10
          },
          {
            "a": 0,
            "b": 9,
            "distance": 0.2253968253968254,
            "overlap_count": 315,
            "agreement_count": 244,
            "disagreement_count": 71
          },
          {
            "a": 0,
            "b": 10,
            "distance": 0.007272727272727273,
            "overlap_count": 275,
            "agreement_count": 273,
            "disagreement_count": 2
          },
          {
            "a": 0,
            "b": 11,
            "distance": 0.036303630363036306,
            "overlap_count": 303,
            "agreement_count": 292,
            "disagreement_count": 11
          },
          {
            "a": 0,
            "b": 12,
            "distance": 0.013157894736842105,
            "overlap_count": 304,
            "agreement_count": 300,
            "disagreement_count": 4
          },
          {
            "a": 0,
            "b": 13,
            "distance": 0.022151898734177215,
            "overlap_count": 316,
            "agreement_count": 309,
            "disagreement_count": 7
          },
          {
            "a": 0,
            "b": 14,
            "distance": 0.10714285714285714,
            "overlap_count": 168,
            "agreement_count": 150,
            "disagreement_count": 18
          },
          {
            "a": 0,
            "b": 15,
            "distance": 0.042071197411003236,
            "overlap_count": 309,
            "agreement_count": 296,
            "disagreement_count": 13
          },
          {
            "a": 1,
            "b": 2,
            "distance": 0.032679738562091505,
            "overlap_count": 306,
            "agreement_count": 296,
            "disagreement_count": 10
          },
          {
            "a": 1,
            "b": 3,
            "distance": 0.028938906752411574,
            "overlap_count": 311,
            "agreement_count": 302,
            "disagreement_count": 9
          },
          {
            "a": 1,
            "b": 4,
            "distance": 0.02631578947368421,
            "overlap_count": 266,
            "agreement_count": 259,
            "disagreement_count": 7
          },
          {
            "a": 1,
            "b": 5,
            "distance": 0.03858520900321544,
            "overlap_count": 311,
            "agreement_count": 299,
            "disagreement_count": 12
          },
          {
            "a": 1,
            "b": 6,
            "distance": 0.02572347266881029,
            "overlap_count": 311,
            "agreement_count": 303,
            "disagreement_count": 8
          },
          {
            "a": 1,
            "b": 7,
            "distance": 0.03225806451612903,
            "overlap_count": 310,
            "agreement_count": 300,
            "disagreement_count": 10
          },
          {
            "a": 1,
            "b": 8,
            "distance": 0.05263157894736842,
            "overlap_count": 304,
            "agreement_count": 288,
            "disagreement_count": 16
          },
          {
            "a": 1,
            "b": 9,
            "distance": 0.23225806451612904,
            "overlap_count": 310,
            "agreement_count": 238,
            "disagreement_count": 72
          },
          {
            "a": 1,
            "b": 10,
            "distance": 0.022058823529411766,
            "overlap_count": 272,
            "agreement_count": 266,
            "disagreement_count": 6
          },
          {
            "a": 1,
            "b": 11,
            "distance": 0.030100334448160536,
            "overlap_count": 299,
            "agreement_count": 290,
            "disagreement_count": 9
          },
          {
            "a": 1,
            "b": 12,
            "distance": 0.02666666666666667,
            "overlap_count": 300,
            "agreement_count": 292,
            "disagreement_count": 8
          },
          {
            "a": 1,
            "b": 13,
            "distance": 0.028938906752411574,
            "overlap_count": 311,
            "agreement_count": 302,
            "disagreement_count": 9
          },
          {
            "a": 1,
            "b": 14,
            "distance": 0.10714285714285714,
            "overlap_count": 168,
            "agreement_count": 150,
            "disagreement_count": 18
          },
          {
            "a": 1,
            "b": 15,
            "distance": 0.04918032786885246,
            "overlap_count": 305,
            "agreement_count": 290,
            "disagreement_count": 15
          },
          {
            "a": 2,
            "b": 3,
            "distance": 0.012861736334405145,
            "overlap_count": 311,
            "agreement_count": 307,
            "disagreement_count": 4
          },
          {
            "a": 2,
            "b": 4,
            "distance": 0.0037593984962406013,
            "overlap_count": 266,
            "agreement_count": 265,
            "disagreement_count": 1
          },
          {
            "a": 2,
            "b": 5,
            "distance": 0.022508038585209004,
            "overlap_count": 311,
            "agreement_count": 304,
            "disagreement_count": 7
          },
          {
            "a": 2,
            "b": 6,
            "distance": 0.01607717041800643,
            "overlap_count": 311,
            "agreement_count": 306,
            "disagreement_count": 5
          },
          {
            "a": 2,
            "b": 7,
            "distance": 0.01935483870967742,
            "overlap_count": 310,
            "agreement_count": 304,
            "disagreement_count": 6
          },
          {
            "a": 2,
            "b": 8,
            "distance": 0.04276315789473684,
            "overlap_count": 304,
            "agreement_count": 291,
            "disagreement_count": 13
          },
          {
            "a": 2,
            "b": 9,
            "distance": 0.2129032258064516,
            "overlap_count": 310,
            "agreement_count": 244,
            "disagreement_count": 66
          },
          {
            "a": 2,
            "b": 10,
            "distance": 0.007326007326007326,
            "overlap_count": 273,
            "agreement_count": 271,
            "disagreement_count": 2
          },
          {
            "a": 2,
            "b": 11,
            "distance": 0.030100334448160536,
            "overlap_count": 299,
            "agreement_count": 290,
            "disagreement_count": 9
          },
          {
            "a": 2,
            "b": 12,
            "distance": 0.019933554817275746,
            "overlap_count": 301,
            "agreement_count": 295,
            "disagreement_count": 6
          },
          {
            "a": 2,
            "b": 13,
            "distance": 0.022508038585209004,
            "overlap_count": 311,
            "agreement_count": 304,
            "disagreement_count": 7
          },
          {
            "a": 2,
            "b": 14,
            "distance": 0.10179640718562874,
            "overlap_count": 167,
            "agreement_count": 150,
            "disagreement_count": 17
          },
          {
            "a": 2,
            "b": 15,
            "distance": 0.04590163934426229,
            "overlap_count": 305,
            "agreement_count": 291,
            "disagreement_count": 14
          },
          {
            "a": 3,
            "b": 4,
            "distance": 0.011194029850746268,
            "overlap_count": 268,
            "agreement_count": 265,
            "disagreement_count": 3
          },
          {
            "a": 3,
            "b": 5,
            "distance": 0.02830188679245283,
            "overlap_count": 318,
            "agreement_count": 309,
            "disagreement_count": 9
          },
          {
            "a": 3,
            "b": 6,
            "distance": 0.0220125786163522,
            "overlap_count": 318,
            "agreement_count": 311,
            "disagreement_count": 7
          },
          {
            "a": 3,
            "b": 7,
            "distance": 0.022082018927444796,
            "overlap_count": 317,
            "agreement_count": 310,
            "disagreement_count": 7
          },
          {
            "a": 3,
            "b": 8,
            "distance": 0.04516129032258064,
            "overlap_count": 310,
            "agreement_count": 296,
            "disagreement_count": 14
          },
          {
            "a": 3,
            "b": 9,
            "distance": 0.21766561514195584,
            "overlap_count": 317,
            "agreement_count": 248,
            "disagreement_count": 69
          },
          {
            "a": 3,
            "b": 10,
            "distance": 0.007272727272727273,
            "overlap_count": 275,
            "agreement_count": 273,
            "disagreement_count": 2
          },
          {
            "a": 3,
            "b": 11,
            "distance": 0.02631578947368421,
            "overlap_count": 304,
            "agreement_count": 296,
            "disagreement_count": 8
          },
          {
            "a": 3,
            "b": 12,
            "distance": 0.019736842105263157,
            "overlap_count": 304,
            "agreement_count": 298,
            "disagreement_count": 6
          },
          {
            "a": 3,
            "b": 13,
            "distance": 0.0220125786163522,
            "overlap_count": 318,
            "agreement_count": 311,
            "disagreement_count": 7
          },
          {
            "a": 3,
            "b": 14,
            "distance": 0.10119047619047619,
            "overlap_count": 168,
            "agreement_count": 151,
            "disagreement_count": 17
          },
          {
            "a": 3,
            "b": 15,
            "distance": 0.04516129032258064,
            "overlap_count": 310,
            "agreement_count": 296,
            "disagreement_count": 14
          },
          {
            "a": 4,
            "b": 5,
            "distance": 0.011194029850746268,
            "overlap_count": 268,
            "agreement_count": 265,
            "disagreement_count": 3
          },
          {
            "a": 4,
            "b": 6,
            "distance": 0.007462686567164179,
            "overlap_count": 268,
            "agreement_count": 266,
            "disagreement_count": 2
          },
          {
            "a": 4,
            "b": 7,
            "distance": 0.007462686567164179,
            "overlap_count": 268,
            "agreement_count": 266,
            "disagreement_count": 2
          },
          {
            "a": 4,
            "b": 8,
            "distance": 0.03435114503816794,
            "overlap_count": 262,
            "agreement_count": 253,
            "disagreement_count": 9
          },
          {
            "a": 4,
            "b": 9,
            "distance": 0.23507462686567165,
            "overlap_count": 268,
            "agreement_count": 205,
            "disagreement_count": 63
          },
          {
            "a": 4,
            "b": 10,
            "distance": 0.0040650406504065045,
            "overlap_count": 246,
            "agreement_count": 245,
            "disagreement_count": 1
          },
          {
            "a": 4,
            "b": 11,
            "distance": 0.019230769230769232,
            "overlap_count": 260,
            "agreement_count": 255,
            "disagreement_count": 5
          },
          {
            "a": 4,
            "b": 12,
            "distance": 0.01509433962264151,
            "overlap_count": 265,
            "agreement_count": 261,
            "disagreement_count": 4
          },
          {
            "a": 4,
            "b": 13,
            "distance": 0.007462686567164179,
            "overlap_count": 268,
            "agreement_count": 266,
            "disagreement_count": 2
          },
          {
            "a": 4,
            "b": 14,
            "distance": 0.09316770186335403,
            "overlap_count": 161,
            "agreement_count": 146,
            "disagreement_count": 15
          },
          {
            "a": 4,
            "b": 15,
            "distance": 0.03787878787878788,
            "overlap_count": 264,
            "agreement_count": 254,
            "disagreement_count": 10
          },
          {
            "a": 5,
            "b": 6,
            "distance": 0.012578616352201259,
            "overlap_count": 318,
            "agreement_count": 314,
            "disagreement_count": 4
          },
          {
            "a": 5,
            "b": 7,
            "distance": 0.022082018927444796,
            "overlap_count": 317,
            "agreement_count": 310,
            "disagreement_count": 7
          },
          {
            "a": 5,
            "b": 8,
            "distance": 0.04516129032258064,
            "overlap_count": 310,
            "agreement_count": 296,
            "disagreement_count": 14
          },
          {
            "a": 5,
            "b": 9,
            "distance": 0.22712933753943218,
            "overlap_count": 317,
            "agreement_count": 245,
            "disagreement_count": 72
          },
          {
            "a": 5,
            "b": 10,
            "distance": 0.014545454545454545,
            "overlap_count": 275,
            "agreement_count": 271,
            "disagreement_count": 4
          },
          {
            "a": 5,
            "b": 11,
            "distance": 0.02631578947368421,
            "overlap_count": 304,
            "agreement_count": 296,
            "disagreement_count": 8
          },
          {
            "a": 5,
            "b": 12,
            "distance": 0.029605263157894735,
            "overlap_count": 304,
            "agreement_count": 295,
            "disagreement_count": 9
          },
          {
            "a": 5,
            "b": 13,
            "distance": 0.015723270440251572,
            "overlap_count": 318,
            "agreement_count": 313,
            "disagreement_count": 5
          },
          {
            "a": 5,
            "b": 14,
            "distance": 0.08928571428571429,
            "overlap_count": 168,
            "agreement_count": 153,
            "disagreement_count": 15
          },
          {
            "a": 5,
            "b": 15,
            "distance": 0.035483870967741936,
            "overlap_count": 310,
            "agreement_count": 299,
            "disagreement_count": 11
          },
          {
            "a": 6,
            "b": 7,
            "distance": 0.022082018927444796,
            "overlap_count": 317,
            "agreement_count": 310,
            "disagreement_count": 7
          },
          {
            "a": 6,
            "b": 8,
            "distance": 0.03225806451612903,
            "overlap_count": 310,
            "agreement_count": 300,
            "disagreement_count": 10
          },
          {
            "a": 6,
            "b": 9,
            "distance": 0.22712933753943218,
            "overlap_count": 317,
            "agreement_count": 245,
            "disagreement_count": 72
          },
          {
            "a": 6,
            "b": 10,
            "distance": 0.0036363636363636364,
            "overlap_count": 275,
            "agreement_count": 274,
            "disagreement_count": 1
          },
          {
            "a": 6,
            "b": 11,
            "distance": 0.023026315789473683,
            "overlap_count": 304,
            "agreement_count": 297,
            "disagreement_count": 7
          },
          {
            "a": 6,
            "b": 12,
            "distance": 0.01644736842105263,
            "overlap_count": 304,
            "agreement_count": 299,
            "disagreement_count": 5
          },
          {
            "a": 6,
            "b": 13,
            "distance": 0.015723270440251572,
            "overlap_count": 318,
            "agreement_count": 313,
            "disagreement_count": 5
          },
          {
            "a": 6,
            "b": 14,
            "distance": 0.09523809523809523,
            "overlap_count": 168,
            "agreement_count": 152,
            "disagreement_count": 16
          },
          {
            "a": 6,
            "b": 15,
            "distance": 0.02903225806451613,
            "overlap_count": 310,
            "agreement_count": 301,
            "disagreement_count": 9
          },
          {
            "a": 7,
            "b": 8,
            "distance": 0.045307443365695796,
            "overlap_count": 309,
            "agreement_count": 295,
            "disagreement_count": 14
          },
          {
            "a": 7,
            "b": 9,
            "distance": 0.22784810126582278,
            "overlap_count": 316,
            "agreement_count": 244,
            "disagreement_count": 72
          },
          {
            "a": 7,
            "b": 10,
            "distance": 0.01090909090909091,
            "overlap_count": 275,
            "agreement_count": 272,
            "disagreement_count": 3
          },
          {
            "a": 7,
            "b": 11,
            "distance": 0.0297029702970297,
            "overlap_count": 303,
            "agreement_count": 294,
            "disagreement_count": 9
          },
          {
            "a": 7,
            "b": 12,
            "distance": 0.026402640264026403,
            "overlap_count": 303,
            "agreement_count": 295,
            "disagreement_count": 8
          },
          {
            "a": 7,
            "b": 13,
            "distance": 0.012618296529968454,
            "overlap_count": 317,
            "agreement_count": 313,
            "disagreement_count": 4
          },
          {
            "a": 7,
            "b": 14,
            "distance": 0.09523809523809523,
            "overlap_count": 168,
            "agreement_count": 152,
            "disagreement_count": 16
          },
          {
            "a": 7,
            "b": 15,
            "distance": 0.038834951456310676,
            "overlap_count": 309,
            "agreement_count": 297,
            "disagreement_count": 12
          },
          {
            "a": 8,
            "b": 9,
            "distance": 0.22006472491909385,
            "overlap_count": 309,
            "agreement_count": 241,
            "disagreement_count": 68
          },
          {
            "a": 8,
            "b": 10,
            "distance": 0.022222222222222223,
            "overlap_count": 270,
            "agreement_count": 264,
            "disagreement_count": 6
          },
          {
            "a": 8,
            "b": 11,
            "distance": 0.05723905723905724,
            "overlap_count": 297,
            "agreement_count": 280,
            "disagreement_count": 17
          },
          {
            "a": 8,
            "b": 12,
            "distance": 0.026845637583892617,
            "overlap_count": 298,
            "agreement_count": 290,
            "disagreement_count": 8
          },
          {
            "a": 8,
            "b": 13,
            "distance": 0.041935483870967745,
            "overlap_count": 310,
            "agreement_count": 297,
            "disagreement_count": 13
          },
          {
            "a": 8,
            "b": 14,
            "distance": 0.11377245508982035,
            "overlap_count": 167,
            "agreement_count": 148,
            "disagreement_count": 19
          },
          {
            "a": 8,
            "b": 15,
            "distance": 0.03642384105960265,
            "overlap_count": 302,
            "agreement_count": 291,
            "disagreement_count": 11
          },
          {
            "a": 9,
            "b": 10,
            "distance": 0.21454545454545454,
            "overlap_count": 275,
            "agreement_count": 216,
            "disagreement_count": 59
          },
          {
            "a": 9,
            "b": 11,
            "distance": 0.23432343234323433,
            "overlap_count": 303,
            "agreement_count": 232,
            "disagreement_count": 71
          },
          {
            "a": 9,
            "b": 12,
            "distance": 0.22039473684210525,
            "overlap_count": 304,
            "agreement_count": 237,
            "disagreement_count": 67
          },
          {
            "a": 9,
            "b": 13,
            "distance": 0.23974763406940064,
            "overlap_count": 317,
            "agreement_count": 241,
            "disagreement_count": 76
          },
          {
            "a": 9,
            "b": 14,
            "distance": 0.25,
            "overlap_count": 168,
            "agreement_count": 126,
            "disagreement_count": 42
          },
          {
            "a": 9,
            "b": 15,
            "distance": 0.22653721682847897,
            "overlap_count": 309,
            "agreement_count": 239,
            "disagreement_count": 70
          },
          {
            "a": 10,
            "b": 11,
            "distance": 0.014925373134328358,
            "overlap_count": 268,
            "agreement_count": 264,
            "disagreement_count": 4
          },
          {
            "a": 10,
            "b": 12,
            "distance": 0.0072992700729927005,
            "overlap_count": 274,
            "agreement_count": 272,
            "disagreement_count": 2
          },
          {
            "a": 10,
            "b": 13,
            "distance": 0.01090909090909091,
            "overlap_count": 275,
            "agreement_count": 272,
            "disagreement_count": 3
          },
          {
            "a": 10,
            "b": 14,
            "distance": 0.10062893081761007,
            "overlap_count": 159,
            "agreement_count": 143,
            "disagreement_count": 16
          },
          {
            "a": 10,
            "b": 15,
            "distance": 0.01838235294117647,
            "overlap_count": 272,
            "agreement_count": 267,
            "disagreement_count": 5
          },
          {
            "a": 11,
            "b": 12,
            "distance": 0.02711864406779661,
            "overlap_count": 295,
            "agreement_count": 287,
            "disagreement_count": 8
          },
          {
            "a": 11,
            "b": 13,
            "distance": 0.023026315789473683,
            "overlap_count": 304,
            "agreement_count": 297,
            "disagreement_count": 7
          },
          {
            "a": 11,
            "b": 14,
            "distance": 0.08695652173913043,
            "overlap_count": 161,
            "agreement_count": 147,
            "disagreement_count": 14
          },
          {
            "a": 11,
            "b": 15,
            "distance": 0.03355704697986577,
            "overlap_count": 298,
            "agreement_count": 288,
            "disagreement_count": 10
          },
          {
            "a": 12,
            "b": 13,
            "distance": 0.023026315789473683,
            "overlap_count": 304,
            "agreement_count": 297,
            "disagreement_count": 7
          },
          {
            "a": 12,
            "b": 14,
            "distance": 0.10119047619047619,
            "overlap_count": 168,
            "agreement_count": 151,
            "disagreement_count": 17
          },
          {
            "a": 12,
            "b": 15,
            "distance": 0.03666666666666667,
            "overlap_count": 300,
            "agreement_count": 289,
            "disagreement_count": 11
          },
          {
            "a": 13,
            "b": 14,
            "distance": 0.10119047619047619,
            "overlap_count": 168,
            "agreement_count": 151,
            "disagreement_count": 17
          },
          {
            "a": 13,
            "b": 15,
            "distance": 0.03870967741935484,
            "overlap_count": 310,
            "agreement_count": 298,
            "disagreement_count": 12
          },
          {
            "a": 14,
            "b": 15,
            "distance": 0.10843373493975904,
            "overlap_count": 166,
            "agreement_count": 148,
            "disagreement_count": 18
          }
        ],
        "linkage": [
          [
            6,
            10,
            0.0036363636363636364,
            2
          ],
          [
            2,
            4,
            0.0037593984962406013,
            2
          ],
          [
            16,
            17,
            0.008732726240396109,
            4
          ],
          [
            7,
            13,
            0.012618296529968454,
            2
          ],
          [
            0,
            12,
            0.013157894736842105,
            2
          ],
          [
            3,
            18,
            0.013335268018557722,
            5
          ],
          [
            19,
            21,
            0.016050631915888996,
            7
          ],
          [
            5,
            22,
            0.018133330784822897,
            8
          ],
          [
            20,
            23,
            0.019726149856758187,
            10
          ],
          [
            11,
            24,
            0.025606593206743654,
            11
          ],
          [
            1,
            25,
            0.02890812591289117,
            12
          ],
          [
            8,
            15,
            0.03642384105960265,
            2
          ],
          [
            26,
            27,
            0.038712454091310154,
            14
          ],
          [
            14,
            28,
            0.10016962851816788,
            15
          ],
          [
            9,
            29,
            0.22740122224129922,
            16
          ]
        ],
        "linkage_complete": true
      },
      {
        "group_id": "532a75904e1d671d",
        "cluster_scope": "cross_model",
        "representative_policy": "best_accuracy",
        "task_fingerprint": "3d5a97ff902050847f35f1deb9e78aa16664eb5a88253b16a5d12e0d0981ef3d",
        "normalized_tag_key": "correction;english;error;preposition",
        "task_name_display": "Err. correct. prepositions",
        "task_names_seen": [
          "Err. correct. prepositions"
        ],
        "tags_display": "error; correction; preposition; English",
        "model_count": 5,
        "distance_metric": "nominal_disagreement_rate",
        "linkage_method": "average",
        "representative_run_stems": [
          "prepositions__einfra__deepseekv32thinking__2026-04-02-13-14",
          "prepositions__einfra__kimik25__2026-04-01-13-10",
          "prepositions__einfra__qwen35__2026-04-04-12-31",
          "prepositions__openrouter__qwenqwen36plusfree__2026-04-03-02-19",
          "prepositions__vertex__gemini3flashpreview__2026-04-01-12-27"
        ],
        "comparable_pair_count": 10,
        "representatives": [
          {
            "provider": "e-infra",
            "model": "deepseek-v3.2-thinking",
            "run_stem": "prepositions__einfra__deepseekv32thinking__2026-04-02-13-14",
            "metrics_file": "prepositions__einfra__deepseekv32thinking__2026-04-02-13-14__metrics.json",
            "timestamp": "2026-04-02T11:14:24.260967Z",
            "accuracy": 0.8924395946999221,
            "cohen_kappa": 0.7595338824791694
          },
          {
            "provider": "e-infra",
            "model": "kimi-k2.5",
            "run_stem": "prepositions__einfra__kimik25__2026-04-01-13-10",
            "metrics_file": "prepositions__einfra__kimik25__2026-04-01-13-10__metrics.json",
            "timestamp": "2026-04-01T11:10:43.580812Z",
            "accuracy": 0.9041309431021044,
            "cohen_kappa": 0.7827007734474937
          },
          {
            "provider": "e-infra",
            "model": "qwen3.5",
            "run_stem": "prepositions__einfra__qwen35__2026-04-04-12-31",
            "metrics_file": "prepositions__einfra__qwen35__2026-04-04-12-31__metrics.json",
            "timestamp": "2026-04-04T10:31:49.198333Z",
            "accuracy": 0.9033515198752923,
            "cohen_kappa": 0.7803725157791852
          },
          {
            "provider": "openrouter",
            "model": "qwen3.6",
            "run_stem": "prepositions__openrouter__qwenqwen36plusfree__2026-04-03-02-19",
            "metrics_file": "prepositions__openrouter__qwenqwen36plusfree__2026-04-03-02-19__metrics.json",
            "timestamp": "2026-04-03T00:19:27.374136Z",
            "accuracy": 0.9033515198752923,
            "cohen_kappa": 0.7755667190041489
          },
          {
            "provider": "vertex",
            "model": "gemini-3-flash-preview",
            "run_stem": "prepositions__vertex__gemini3flashpreview__2026-04-01-12-27",
            "metrics_file": "prepositions__vertex__gemini3flashpreview__2026-04-01-12-27__metrics.json",
            "timestamp": "2026-04-01T10:27:52.038487Z",
            "accuracy": 0.9025720966484801,
            "cohen_kappa": 0.7752016001861465
          }
        ],
        "pairwise": [
          {
            "a": 0,
            "b": 1,
            "distance": 0.05845674201091192,
            "overlap_count": 1283,
            "agreement_count": 1208,
            "disagreement_count": 75
          },
          {
            "a": 0,
            "b": 2,
            "distance": 0.05455962587685113,
            "overlap_count": 1283,
            "agreement_count": 1213,
            "disagreement_count": 70
          },
          {
            "a": 0,
            "b": 3,
            "distance": 0.04988308651597818,
            "overlap_count": 1283,
            "agreement_count": 1219,
            "disagreement_count": 64
          },
          {
            "a": 0,
            "b": 4,
            "distance": 0.05066250974279034,
            "overlap_count": 1283,
            "agreement_count": 1218,
            "disagreement_count": 65
          },
          {
            "a": 1,
            "b": 2,
            "distance": 0.05144193296960249,
            "overlap_count": 1283,
            "agreement_count": 1217,
            "disagreement_count": 66
          },
          {
            "a": 1,
            "b": 3,
            "distance": 0.04598597038191738,
            "overlap_count": 1283,
            "agreement_count": 1224,
            "disagreement_count": 59
          },
          {
            "a": 1,
            "b": 4,
            "distance": 0.049103663289166016,
            "overlap_count": 1283,
            "agreement_count": 1220,
            "disagreement_count": 63
          },
          {
            "a": 2,
            "b": 3,
            "distance": 0.03351519875292284,
            "overlap_count": 1283,
            "agreement_count": 1240,
            "disagreement_count": 43
          },
          {
            "a": 2,
            "b": 4,
            "distance": 0.034294621979735,
            "overlap_count": 1283,
            "agreement_count": 1239,
            "disagreement_count": 44
          },
          {
            "a": 3,
            "b": 4,
            "distance": 0.028838659392049885,
            "overlap_count": 1283,
            "agreement_count": 1246,
            "disagreement_count": 37
          }
        ],
        "linkage": [
          [
            3,
            4,
            0.028838659392049885,
            2
          ],
          [
            2,
            5,
            0.03390491036632892,
            3
          ],
          [
            1,
            6,
            0.0488438555468953,
            4
          ],
          [
            0,
            7,
            0.05339049103663289,
            5
          ]
        ],
        "linkage_complete": true
      },
      {
        "group_id": "941335089239d5c3",
        "cluster_scope": "cross_model",
        "representative_policy": "best_accuracy",
        "task_fingerprint": "167cd427ec20c9daf6197b22d9658a19f92d953c7e59d7b9d1c98a7c6a3fb3f6",
        "normalized_tag_key": "disambiguation;homonymy;middle english;semantics",
        "task_name_display": "ME disambiguation",
        "task_names_seen": [
          "ME disambiguation"
        ],
        "tags_display": "Middle English; semantics; disambiguation; homonymy",
        "model_count": 11,
        "distance_metric": "nominal_disagreement_rate",
        "linkage_method": "average",
        "representative_run_stems": [
          "ME_disambiguation__einfra__deepseekv32thinking__2026-03-29-18-13",
          "ME_disambiguation__einfra__glm5__2026-03-29-21-59",
          "ME_disambiguation__einfra__gptoss120b__2026-03-29-18-48",
          "ME_disambiguation__einfra__kimik25__2026-03-29-19-32",
          "ME_disambiguation__einfra__qwen35__2026-03-29-20-02",
          "ME_disambiguation__openai__gpt54__2026-03-29-18-03",
          "ME_disambiguation__openai__gpt54mini__2026-03-29-18-07",
          "ME_disambiguation__openrouter__qwenqwen36plusfree__2026-04-03-19-16",
          "ME_disambiguation__requesty__anthropicclaudehaiku45__2026-03-29-18-08",
          "ME_disambiguation__vertex__gemini31flashlitepreview__2026-03-29-18-01",
          "ME_disambiguation__vertex__gemini31propreview__2026-03-29-17-50"
        ],
        "comparable_pair_count": 55,
        "representatives": [
          {
            "provider": "e-infra",
            "model": "deepseek-v3.2-thinking",
            "run_stem": "ME_disambiguation__einfra__deepseekv32thinking__2026-03-29-18-13",
            "metrics_file": "ME_disambiguation__einfra__deepseekv32thinking__2026-03-29-18-13__metrics.json",
            "timestamp": "2026-03-29T16:13:50.205096Z",
            "accuracy": 0.99,
            "cohen_kappa": 0.9864222674813306
          },
          {
            "provider": "e-infra",
            "model": "glm-5",
            "run_stem": "ME_disambiguation__einfra__glm5__2026-03-29-21-59",
            "metrics_file": "ME_disambiguation__einfra__glm5__2026-03-29-21-59__metrics.json",
            "timestamp": "2026-03-29T19:59:20.519323Z",
            "accuracy": 0.98,
            "cohen_kappa": 0.9728539798610463
          },
          {
            "provider": "e-infra",
            "model": "gpt-oss-120b",
            "run_stem": "ME_disambiguation__einfra__gptoss120b__2026-03-29-18-48",
            "metrics_file": "ME_disambiguation__einfra__gptoss120b__2026-03-29-18-48__metrics.json",
            "timestamp": "2026-03-29T16:48:51.685631Z",
            "accuracy": 0.945,
            "cohen_kappa": 0.9254767792418956
          },
          {
            "provider": "e-infra",
            "model": "kimi-k2.5",
            "run_stem": "ME_disambiguation__einfra__kimik25__2026-03-29-19-32",
            "metrics_file": "ME_disambiguation__einfra__kimik25__2026-03-29-19-32__metrics.json",
            "timestamp": "2026-03-29T17:32:26.587695Z",
            "accuracy": 0.99,
            "cohen_kappa": 0.9863975651641643
          },
          {
            "provider": "e-infra",
            "model": "qwen3.5",
            "run_stem": "ME_disambiguation__einfra__qwen35__2026-03-29-20-02",
            "metrics_file": "ME_disambiguation__einfra__qwen35__2026-03-29-20-02__metrics.json",
            "timestamp": "2026-03-29T18:02:10.650671Z",
            "accuracy": 0.9925,
            "cohen_kappa": 0.9897961786689115
          },
          {
            "provider": "openai",
            "model": "gpt-5.4",
            "run_stem": "ME_disambiguation__openai__gpt54__2026-03-29-18-03",
            "metrics_file": "ME_disambiguation__openai__gpt54__2026-03-29-18-03__metrics.json",
            "timestamp": "2026-03-29T16:03:41.707544Z",
            "accuracy": 0.9825,
            "cohen_kappa": 0.9761712267563084
          },
          {
            "provider": "openai",
            "model": "gpt-5.4-mini",
            "run_stem": "ME_disambiguation__openai__gpt54mini__2026-03-29-18-07",
            "metrics_file": "ME_disambiguation__openai__gpt54mini__2026-03-29-18-07__metrics.json",
            "timestamp": "2026-03-29T16:07:03.773591Z",
            "accuracy": 0.9525,
            "cohen_kappa": 0.9356319502672121
          },
          {
            "provider": "openrouter",
            "model": "qwen3.6",
            "run_stem": "ME_disambiguation__openrouter__qwenqwen36plusfree__2026-04-03-19-16",
            "metrics_file": "ME_disambiguation__openrouter__qwenqwen36plusfree__2026-04-03-19-16__metrics.json",
            "timestamp": "2026-04-03T17:16:36.765533Z",
            "accuracy": 0.99,
            "cohen_kappa": 0.986392706490679
          },
          {
            "provider": "requesty",
            "model": "claude-haiku-4-5",
            "run_stem": "ME_disambiguation__requesty__anthropicclaudehaiku45__2026-03-29-18-08",
            "metrics_file": "ME_disambiguation__requesty__anthropicclaudehaiku45__2026-03-29-18-08__metrics.json",
            "timestamp": "2026-03-29T16:09:02.369257Z",
            "accuracy": 0.975,
            "cohen_kappa": 0.9660804233163169
          },
          {
            "provider": "vertex",
            "model": "gemini-3.1-flash-lite-preview",
            "run_stem": "ME_disambiguation__vertex__gemini31flashlitepreview__2026-03-29-18-01",
            "metrics_file": "ME_disambiguation__vertex__gemini31flashlitepreview__2026-03-29-18-01__metrics.json",
            "timestamp": "2026-03-29T16:01:28.629386Z",
            "accuracy": 0.9925,
            "cohen_kappa": 0.9897796666467941
          },
          {
            "provider": "vertex",
            "model": "gemini-3.1-pro-preview",
            "run_stem": "ME_disambiguation__vertex__gemini31propreview__2026-03-29-17-50",
            "metrics_file": "ME_disambiguation__vertex__gemini31propreview__2026-03-29-17-50__metrics.json",
            "timestamp": "2026-03-29T15:50:03.914973Z",
            "accuracy": 0.9925,
            "cohen_kappa": 0.989791317515547
          }
        ],
        "pairwise": [
          {
            "a": 0,
            "b": 1,
            "distance": 0.017543859649122806,
            "overlap_count": 399,
            "agreement_count": 392,
            "disagreement_count": 7
          },
          {
            "a": 0,
            "b": 2,
            "distance": 0.05,
            "overlap_count": 400,
            "agreement_count": 380,
            "disagreement_count": 20
          },
          {
            "a": 0,
            "b": 3,
            "distance": 0.015,
            "overlap_count": 400,
            "agreement_count": 394,
            "disagreement_count": 6
          },
          {
            "a": 0,
            "b": 4,
            "distance": 0.0125,
            "overlap_count": 400,
            "agreement_count": 395,
            "disagreement_count": 5
          },
          {
            "a": 0,
            "b": 5,
            "distance": 0.0175,
            "overlap_count": 400,
            "agreement_count": 393,
            "disagreement_count": 7
          },
          {
            "a": 0,
            "b": 6,
            "distance": 0.0475,
            "overlap_count": 400,
            "agreement_count": 381,
            "disagreement_count": 19
          },
          {
            "a": 0,
            "b": 7,
            "distance": 0.015,
            "overlap_count": 400,
            "agreement_count": 394,
            "disagreement_count": 6
          },
          {
            "a": 0,
            "b": 8,
            "distance": 0.035,
            "overlap_count": 400,
            "agreement_count": 386,
            "disagreement_count": 14
          },
          {
            "a": 0,
            "b": 9,
            "distance": 0.0125,
            "overlap_count": 400,
            "agreement_count": 395,
            "disagreement_count": 5
          },
          {
            "a": 0,
            "b": 10,
            "distance": 0.0125,
            "overlap_count": 400,
            "agreement_count": 395,
            "disagreement_count": 5
          },
          {
            "a": 1,
            "b": 2,
            "distance": 0.05263157894736842,
            "overlap_count": 399,
            "agreement_count": 378,
            "disagreement_count": 21
          },
          {
            "a": 1,
            "b": 3,
            "distance": 0.012531328320802004,
            "overlap_count": 399,
            "agreement_count": 394,
            "disagreement_count": 5
          },
          {
            "a": 1,
            "b": 4,
            "distance": 0.020050125313283207,
            "overlap_count": 399,
            "agreement_count": 391,
            "disagreement_count": 8
          },
          {
            "a": 1,
            "b": 5,
            "distance": 0.015037593984962405,
            "overlap_count": 399,
            "agreement_count": 393,
            "disagreement_count": 6
          },
          {
            "a": 1,
            "b": 6,
            "distance": 0.05012531328320802,
            "overlap_count": 399,
            "agreement_count": 379,
            "disagreement_count": 20
          },
          {
            "a": 1,
            "b": 7,
            "distance": 0.017543859649122806,
            "overlap_count": 399,
            "agreement_count": 392,
            "disagreement_count": 7
          },
          {
            "a": 1,
            "b": 8,
            "distance": 0.03258145363408521,
            "overlap_count": 399,
            "agreement_count": 386,
            "disagreement_count": 13
          },
          {
            "a": 1,
            "b": 9,
            "distance": 0.015037593984962405,
            "overlap_count": 399,
            "agreement_count": 393,
            "disagreement_count": 6
          },
          {
            "a": 1,
            "b": 10,
            "distance": 0.015037593984962405,
            "overlap_count": 399,
            "agreement_count": 393,
            "disagreement_count": 6
          },
          {
            "a": 2,
            "b": 3,
            "distance": 0.06,
            "overlap_count": 400,
            "agreement_count": 376,
            "disagreement_count": 24
          },
          {
            "a": 2,
            "b": 4,
            "distance": 0.0575,
            "overlap_count": 400,
            "agreement_count": 377,
            "disagreement_count": 23
          },
          {
            "a": 2,
            "b": 5,
            "distance": 0.0575,
            "overlap_count": 400,
            "agreement_count": 377,
            "disagreement_count": 23
          },
          {
            "a": 2,
            "b": 6,
            "distance": 0.0725,
            "overlap_count": 400,
            "agreement_count": 371,
            "disagreement_count": 29
          },
          {
            "a": 2,
            "b": 7,
            "distance": 0.065,
            "overlap_count": 400,
            "agreement_count": 374,
            "disagreement_count": 26
          },
          {
            "a": 2,
            "b": 8,
            "distance": 0.045,
            "overlap_count": 400,
            "agreement_count": 382,
            "disagreement_count": 18
          },
          {
            "a": 2,
            "b": 9,
            "distance": 0.0475,
            "overlap_count": 400,
            "agreement_count": 381,
            "disagreement_count": 19
          },
          {
            "a": 2,
            "b": 10,
            "distance": 0.0575,
            "overlap_count": 400,
            "agreement_count": 377,
            "disagreement_count": 23
          },
          {
            "a": 3,
            "b": 4,
            "distance": 0.0175,
            "overlap_count": 400,
            "agreement_count": 393,
            "disagreement_count": 7
          },
          {
            "a": 3,
            "b": 5,
            "distance": 0.0175,
            "overlap_count": 400,
            "agreement_count": 393,
            "disagreement_count": 7
          },
          {
            "a": 3,
            "b": 6,
            "distance": 0.0525,
            "overlap_count": 400,
            "agreement_count": 379,
            "disagreement_count": 21
          },
          {
            "a": 3,
            "b": 7,
            "distance": 0.01,
            "overlap_count": 400,
            "agreement_count": 396,
            "disagreement_count": 4
          },
          {
            "a": 3,
            "b": 8,
            "distance": 0.03,
            "overlap_count": 400,
            "agreement_count": 388,
            "disagreement_count": 12
          },
          {
            "a": 3,
            "b": 9,
            "distance": 0.0175,
            "overlap_count": 400,
            "agreement_count": 393,
            "disagreement_count": 7
          },
          {
            "a": 3,
            "b": 10,
            "distance": 0.0075,
            "overlap_count": 400,
            "agreement_count": 397,
            "disagreement_count": 3
          },
          {
            "a": 4,
            "b": 5,
            "distance": 0.02,
            "overlap_count": 400,
            "agreement_count": 392,
            "disagreement_count": 8
          },
          {
            "a": 4,
            "b": 6,
            "distance": 0.05,
            "overlap_count": 400,
            "agreement_count": 380,
            "disagreement_count": 20
          },
          {
            "a": 4,
            "b": 7,
            "distance": 0.0125,
            "overlap_count": 400,
            "agreement_count": 395,
            "disagreement_count": 5
          },
          {
            "a": 4,
            "b": 8,
            "distance": 0.0325,
            "overlap_count": 400,
            "agreement_count": 387,
            "disagreement_count": 13
          },
          {
            "a": 4,
            "b": 9,
            "distance": 0.01,
            "overlap_count": 400,
            "agreement_count": 396,
            "disagreement_count": 4
          },
          {
            "a": 4,
            "b": 10,
            "distance": 0.01,
            "overlap_count": 400,
            "agreement_count": 396,
            "disagreement_count": 4
          },
          {
            "a": 5,
            "b": 6,
            "distance": 0.055,
            "overlap_count": 400,
            "agreement_count": 378,
            "disagreement_count": 22
          },
          {
            "a": 5,
            "b": 7,
            "distance": 0.0225,
            "overlap_count": 400,
            "agreement_count": 391,
            "disagreement_count": 9
          },
          {
            "a": 5,
            "b": 8,
            "distance": 0.0375,
            "overlap_count": 400,
            "agreement_count": 385,
            "disagreement_count": 15
          },
          {
            "a": 5,
            "b": 9,
            "distance": 0.015,
            "overlap_count": 400,
            "agreement_count": 394,
            "disagreement_count": 6
          },
          {
            "a": 5,
            "b": 10,
            "distance": 0.02,
            "overlap_count": 400,
            "agreement_count": 392,
            "disagreement_count": 8
          },
          {
            "a": 6,
            "b": 7,
            "distance": 0.0475,
            "overlap_count": 400,
            "agreement_count": 381,
            "disagreement_count": 19
          },
          {
            "a": 6,
            "b": 8,
            "distance": 0.0475,
            "overlap_count": 400,
            "agreement_count": 381,
            "disagreement_count": 19
          },
          {
            "a": 6,
            "b": 9,
            "distance": 0.05,
            "overlap_count": 400,
            "agreement_count": 380,
            "disagreement_count": 20
          },
          {
            "a": 6,
            "b": 10,
            "distance": 0.055,
            "overlap_count": 400,
            "agreement_count": 378,
            "disagreement_count": 22
          },
          {
            "a": 7,
            "b": 8,
            "distance": 0.03,
            "overlap_count": 400,
            "agreement_count": 388,
            "disagreement_count": 12
          },
          {
            "a": 7,
            "b": 9,
            "distance": 0.0175,
            "overlap_count": 400,
            "agreement_count": 393,
            "disagreement_count": 7
          },
          {
            "a": 7,
            "b": 10,
            "distance": 0.0075,
            "overlap_count": 400,
            "agreement_count": 397,
            "disagreement_count": 3
          },
          {
            "a": 8,
            "b": 9,
            "distance": 0.0325,
            "overlap_count": 400,
            "agreement_count": 387,
            "disagreement_count": 13
          },
          {
            "a": 8,
            "b": 10,
            "distance": 0.0325,
            "overlap_count": 400,
            "agreement_count": 387,
            "disagreement_count": 13
          },
          {
            "a": 9,
            "b": 10,
            "distance": 0.015,
            "overlap_count": 400,
            "agreement_count": 394,
            "disagreement_count": 6
          }
        ],
        "linkage": [
          [
            3,
            10,
            0.0075,
            2
          ],
          [
            7,
            11,
            0.00875,
            3
          ],
          [
            4,
            9,
            0.01,
            2
          ],
          [
            0,
            13,
            0.0125,
            3
          ],
          [
            12,
            14,
            0.014722222222222223,
            6
          ],
          [
            1,
            5,
            0.015037593984962405,
            2
          ],
          [
            15,
            16,
            0.0175203634085213,
            8
          ],
          [
            8,
            17,
            0.03282268170426065,
            9
          ],
          [
            6,
            18,
            0.050569479253689785,
            10
          ],
          [
            2,
            19,
            0.05651315789473684,
            11
          ]
        ],
        "linkage_complete": true
      },
      {
        "group_id": "da35194c64831de2",
        "cluster_scope": "cross_model",
        "representative_policy": "best_accuracy",
        "task_fingerprint": "e0860bb13f8109f7854720ca263054673c978e0e327536fcf1d0e1226f2388db",
        "normalized_tag_key": "lemmatization;morphology;old english;v3",
        "task_name_display": "OE lemmatization",
        "task_names_seen": [
          "OE lemmatization"
        ],
        "tags_display": "Old English; lemmatization; morphology; v3",
        "model_count": 8,
        "distance_metric": "nominal_disagreement_rate",
        "linkage_method": "average",
        "representative_run_stems": [
          "ycoe3__einfra__deepseekv32thinking__2026-03-18-23-50",
          "ycoe3__einfra__glm47__2026-03-20-10-02",
          "ycoe3__einfra__glm5__2026-03-31-20-38",
          "ycoe3__einfra__gptoss120b__2026-03-20-15-32",
          "ycoe3__einfra__kimik25__2026-03-19-21-22",
          "ycoe3__openai__gpt54mini__2026-03-18-17-04",
          "ycoe3__requesty__anthropicclaudehaiku45__2026-03-23-01-29",
          "ycoe3__vertex__gemini3flashpreview__2026-03-18-01-11"
        ],
        "comparable_pair_count": 28,
        "representatives": [
          {
            "provider": "e-infra",
            "model": "deepseek-v3.2-thinking",
            "run_stem": "ycoe3__einfra__deepseekv32thinking__2026-03-18-23-50",
            "metrics_file": "ycoe3__einfra__deepseekv32thinking__2026-03-18-23-50__metrics.json",
            "timestamp": "2026-03-18T22:50:33.957474Z",
            "accuracy": 0.809961144471918,
            "cohen_kappa": 0.8067742518245767
          },
          {
            "provider": "e-infra",
            "model": "glm-4.7",
            "run_stem": "ycoe3__einfra__glm47__2026-03-20-10-02",
            "metrics_file": "ycoe3__einfra__glm47__2026-03-20-10-02__metrics.json",
            "timestamp": "2026-03-20T09:02:57.831985Z",
            "accuracy": 0.8212645708230307,
            "cohen_kappa": 0.8182681322919435
          },
          {
            "provider": "e-infra",
            "model": "glm-5",
            "run_stem": "ycoe3__einfra__glm5__2026-03-31-20-38",
            "metrics_file": "ycoe3__einfra__glm5__2026-03-31-20-38__metrics.json",
            "timestamp": "2026-03-31T18:38:48.544821Z",
            "accuracy": 0.8205581066760862,
            "cohen_kappa": 0.8175208636693767
          },
          {
            "provider": "e-infra",
            "model": "gpt-oss-120b",
            "run_stem": "ycoe3__einfra__gptoss120b__2026-03-20-15-32",
            "metrics_file": "ycoe3__einfra__gptoss120b__2026-03-20-15-32__metrics.json",
            "timestamp": "2026-03-20T14:32:56.669315Z",
            "accuracy": 0.7004592016955139,
            "cohen_kappa": 0.6961615957069811
          },
          {
            "provider": "e-infra",
            "model": "kimi-k2.5",
            "run_stem": "ycoe3__einfra__kimik25__2026-03-19-21-22",
            "metrics_file": "ycoe3__einfra__kimik25__2026-03-19-21-22__metrics.json",
            "timestamp": "2026-03-19T20:22:24.560641Z",
            "accuracy": 0.8265630519251148,
            "cohen_kappa": 0.8235506107595435
          },
          {
            "provider": "openai",
            "model": "gpt-5.4-mini",
            "run_stem": "ycoe3__openai__gpt54mini__2026-03-18-17-04",
            "metrics_file": "ycoe3__openai__gpt54mini__2026-03-18-17-04__metrics.json",
            "timestamp": "2026-03-18T16:04:56.311644Z",
            "accuracy": 0.6944542564464854,
            "cohen_kappa": 0.6896664555799817
          },
          {
            "provider": "requesty",
            "model": "claude-haiku-4-5",
            "run_stem": "ycoe3__requesty__anthropicclaudehaiku45__2026-03-23-01-29",
            "metrics_file": "ycoe3__requesty__anthropicclaudehaiku45__2026-03-23-01-29__metrics.json",
            "timestamp": "2026-03-23T00:29:26.469275Z",
            "accuracy": 0.6937477922995408,
            "cohen_kappa": 0.6901560734270713
          },
          {
            "provider": "vertex",
            "model": "gemini-3-flash-preview",
            "run_stem": "ycoe3__vertex__gemini3flashpreview__2026-03-18-01-11",
            "metrics_file": "ycoe3__vertex__gemini3flashpreview__2026-03-18-01-11__metrics.json",
            "timestamp": "2026-03-18T00:11:38.185791Z",
            "accuracy": 0.8322147651006712,
            "cohen_kappa": 0.8292449559819026
          }
        ],
        "pairwise": [
          {
            "a": 0,
            "b": 1,
            "distance": 0.1279971791255289,
            "overlap_count": 2836,
            "agreement_count": 2473,
            "disagreement_count": 363
          },
          {
            "a": 0,
            "b": 2,
            "distance": 0.1229739252995067,
            "overlap_count": 2838,
            "agreement_count": 2489,
            "disagreement_count": 349
          },
          {
            "a": 0,
            "b": 3,
            "distance": 0.22532720198089848,
            "overlap_count": 2827,
            "agreement_count": 2190,
            "disagreement_count": 637
          },
          {
            "a": 0,
            "b": 4,
            "distance": 0.1346018322762509,
            "overlap_count": 2838,
            "agreement_count": 2456,
            "disagreement_count": 382
          },
          {
            "a": 0,
            "b": 5,
            "distance": 0.2690677966101695,
            "overlap_count": 2832,
            "agreement_count": 2070,
            "disagreement_count": 762
          },
          {
            "a": 0,
            "b": 6,
            "distance": 0.28621783574198095,
            "overlap_count": 2837,
            "agreement_count": 2025,
            "disagreement_count": 812
          },
          {
            "a": 0,
            "b": 7,
            "distance": 0.11557434813248767,
            "overlap_count": 2838,
            "agreement_count": 2510,
            "disagreement_count": 328
          },
          {
            "a": 1,
            "b": 2,
            "distance": 0.09026798307475317,
            "overlap_count": 2836,
            "agreement_count": 2580,
            "disagreement_count": 256
          },
          {
            "a": 1,
            "b": 3,
            "distance": 0.20601769911504425,
            "overlap_count": 2825,
            "agreement_count": 2243,
            "disagreement_count": 582
          },
          {
            "a": 1,
            "b": 4,
            "distance": 0.11636107193229901,
            "overlap_count": 2836,
            "agreement_count": 2506,
            "disagreement_count": 330
          },
          {
            "a": 1,
            "b": 5,
            "distance": 0.25830388692579503,
            "overlap_count": 2830,
            "agreement_count": 2099,
            "disagreement_count": 731
          },
          {
            "a": 1,
            "b": 6,
            "distance": 0.272310405643739,
            "overlap_count": 2835,
            "agreement_count": 2063,
            "disagreement_count": 772
          },
          {
            "a": 1,
            "b": 7,
            "distance": 0.12306064880112835,
            "overlap_count": 2836,
            "agreement_count": 2487,
            "disagreement_count": 349
          },
          {
            "a": 2,
            "b": 3,
            "distance": 0.20551821719136895,
            "overlap_count": 2827,
            "agreement_count": 2246,
            "disagreement_count": 581
          },
          {
            "a": 2,
            "b": 4,
            "distance": 0.11804087385482734,
            "overlap_count": 2838,
            "agreement_count": 2503,
            "disagreement_count": 335
          },
          {
            "a": 2,
            "b": 5,
            "distance": 0.2542372881355932,
            "overlap_count": 2832,
            "agreement_count": 2112,
            "disagreement_count": 720
          },
          {
            "a": 2,
            "b": 6,
            "distance": 0.2749383151216073,
            "overlap_count": 2837,
            "agreement_count": 2057,
            "disagreement_count": 780
          },
          {
            "a": 2,
            "b": 7,
            "distance": 0.12050739957716702,
            "overlap_count": 2838,
            "agreement_count": 2496,
            "disagreement_count": 342
          },
          {
            "a": 3,
            "b": 4,
            "distance": 0.23523169437566324,
            "overlap_count": 2827,
            "agreement_count": 2162,
            "disagreement_count": 665
          },
          {
            "a": 3,
            "b": 5,
            "distance": 0.30297661233167966,
            "overlap_count": 2822,
            "agreement_count": 1967,
            "disagreement_count": 855
          },
          {
            "a": 3,
            "b": 6,
            "distance": 0.2699929228591649,
            "overlap_count": 2826,
            "agreement_count": 2063,
            "disagreement_count": 763
          },
          {
            "a": 3,
            "b": 7,
            "distance": 0.2263883975946233,
            "overlap_count": 2827,
            "agreement_count": 2187,
            "disagreement_count": 640
          },
          {
            "a": 4,
            "b": 5,
            "distance": 0.2874293785310734,
            "overlap_count": 2832,
            "agreement_count": 2018,
            "disagreement_count": 814
          },
          {
            "a": 4,
            "b": 6,
            "distance": 0.3119492421572083,
            "overlap_count": 2837,
            "agreement_count": 1952,
            "disagreement_count": 885
          },
          {
            "a": 4,
            "b": 7,
            "distance": 0.1269338959212377,
            "overlap_count": 2844,
            "agreement_count": 2483,
            "disagreement_count": 361
          },
          {
            "a": 5,
            "b": 6,
            "distance": 0.32426704344754503,
            "overlap_count": 2831,
            "agreement_count": 1913,
            "disagreement_count": 918
          },
          {
            "a": 5,
            "b": 7,
            "distance": 0.2680084745762712,
            "overlap_count": 2832,
            "agreement_count": 2073,
            "disagreement_count": 759
          },
          {
            "a": 6,
            "b": 7,
            "distance": 0.2982023264011279,
            "overlap_count": 2837,
            "agreement_count": 1991,
            "disagreement_count": 846
          }
        ],
        "linkage": [
          [
            1,
            2,
            0.09026798307475317,
            2
          ],
          [
            0,
            7,
            0.11557434813248767,
            2
          ],
          [
            4,
            8,
            0.11720097289356318,
            3
          ],
          [
            9,
            10,
            0.12601248016680325,
            5
          ],
          [
            3,
            11,
            0.21969664205151967,
            6
          ],
          [
            5,
            12,
            0.2733372395184303,
            7
          ],
          [
            6,
            13,
            0.2911254416246248,
            8
          ]
        ],
        "linkage_complete": true
      },
      {
        "group_id": "0b905fcaf17fc900",
        "cluster_scope": "cross_model",
        "representative_policy": "best_accuracy",
        "task_fingerprint": "eca5b779052ec243fcc76cdc818bae503645042de8cdc27356517265352b7c24",
        "normalized_tag_key": "lemmatization;morphology;old english;v4;validator",
        "task_name_display": "OE lemmatization",
        "task_names_seen": [
          "OE lemmatization"
        ],
        "tags_display": "Old English; lemmatization; morphology; v4; validator",
        "model_count": 3,
        "distance_metric": "nominal_disagreement_rate",
        "linkage_method": "average",
        "representative_run_stems": [
          "ycoe3__einfra__glm51__2026-04-27-22-51",
          "ycoe3__einfra__kimik26__2026-04-29-01-25",
          "ycoe3__vertex__gemini3flashpreview__2026-04-30-21-56"
        ],
        "comparable_pair_count": 3,
        "representatives": [
          {
            "provider": "e-infra",
            "model": "glm-5.1",
            "run_stem": "ycoe3__einfra__glm51__2026-04-27-22-51",
            "metrics_file": "ycoe3__einfra__glm51__2026-04-27-22-51__metrics.json",
            "timestamp": "2026-04-27T20:51:24.742584Z",
            "accuracy": 0.9739985945186226,
            "cohen_kappa": 0.9735352590737089
          },
          {
            "provider": "e-infra",
            "model": "kimi-k2.6",
            "run_stem": "ycoe3__einfra__kimik26__2026-04-29-01-25",
            "metrics_file": "ycoe3__einfra__kimik26__2026-04-29-01-25__metrics.json",
            "timestamp": "2026-04-28T23:25:53.664337Z",
            "accuracy": 0.9739985945186226,
            "cohen_kappa": 0.973533010784025
          },
          {
            "provider": "vertex",
            "model": "gemini-3-flash-preview",
            "run_stem": "ycoe3__vertex__gemini3flashpreview__2026-04-30-21-56",
            "metrics_file": "ycoe3__vertex__gemini3flashpreview__2026-04-30-21-56__metrics.json",
            "timestamp": "2026-04-30T19:56:09.797765Z",
            "accuracy": 0.9873506676036542,
            "cohen_kappa": 0.9871231511429811
          }
        ],
        "pairwise": [
          {
            "a": 0,
            "b": 1,
            "distance": 0.023313316849169905,
            "overlap_count": 2831,
            "agreement_count": 2765,
            "disagreement_count": 66
          },
          {
            "a": 0,
            "b": 2,
            "distance": 0.021441124780316345,
            "overlap_count": 2845,
            "agreement_count": 2784,
            "disagreement_count": 61
          },
          {
            "a": 1,
            "b": 2,
            "distance": 0.015536723163841809,
            "overlap_count": 2832,
            "agreement_count": 2788,
            "disagreement_count": 44
          }
        ],
        "linkage": [
          [
            1,
            2,
            0.015536723163841809,
            2
          ],
          [
            0,
            3,
            0.022377220814743123,
            3
          ]
        ],
        "linkage_complete": true
      },
      {
        "group_id": "714c54f5e2df639d",
        "cluster_scope": "cross_model",
        "representative_policy": "best_accuracy",
        "task_fingerprint": "5d2775386b7429a15d795e64be76f5d0829aff4b3b67fc8acae71e4adebccffd",
        "normalized_tag_key": "morphology;number;old english",
        "task_name_display": "OE number",
        "task_names_seen": [
          "OE number"
        ],
        "tags_display": "Old English; morphology; number",
        "model_count": 19,
        "distance_metric": "nominal_disagreement_rate",
        "linkage_method": "average",
        "representative_run_stems": [
          "OE_number____deepseekv32thinking__2026-02-21-02-26",
          "OE_number____gemini3flashpreview__2026-02-25-01-10",
          "OE_number____gemini31propreview__2026-02-21-02-26",
          "OE_number____glm47__2026-02-22-16-17",
          "OE_number____gptoss120b__2026-02-21-18-49",
          "OE_number____gpt51__2026-02-24-17-09",
          "OE_number____gpt52pro__2026-02-21-02-26",
          "OE_number____gpt5mini__2026-02-24-01-18",
          "OE_number____kimik25__2026-02-21-23-36",
          "OE_number__einfra__glm5__2026-04-02-01-04",
          "OE_number__inception__mercury2__2026-03-04-23-43",
          "OE_number__openai__gpt54mini__2026-03-20-18-21",
          "OE_number__openai__gpt54pro__2026-03-10-15-22",
          "OE_number__openai__qwen35__2026-02-23-02-19",
          "OE_number__openrouter__qwenqwen36plusfree__2026-04-04-00-50",
          "OE_number__requesty__anthropicclaudehaiku45__2026-03-23-01-17",
          "OE_number__requesty__claudesonnet46__2026-02-21-20-17",
          "OE_number__openai__gpt54__2026-03-10-14-52",
          "OE_number__vertex__gemini3flashpreview__2026-02-27-02-12"
        ],
        "comparable_pair_count": 171,
        "representatives": [
          {
            "provider": "",
            "model": "deepseek-v3.2-thinking",
            "run_stem": "OE_number____deepseekv32thinking__2026-02-21-02-26",
            "metrics_file": "OE_number____deepseekv32thinking__2026-02-21-02-26__metrics.json",
            "timestamp": "2026-02-21T01:26:38.907718Z",
            "accuracy": 0.9633333333333334,
            "cohen_kappa": 0.8979509004673386
          },
          {
            "provider": "",
            "model": "gemini-3-flash-preview",
            "run_stem": "OE_number____gemini3flashpreview__2026-02-25-01-10",
            "metrics_file": "OE_number____gemini3flashpreview__2026-02-25-01-10__metrics.json",
            "timestamp": "2026-02-25T00:10:24.527172Z",
            "accuracy": 0.9808333333333333,
            "cohen_kappa": 0.9449738825312014
          },
          {
            "provider": "",
            "model": "gemini-3.1-pro-preview",
            "run_stem": "OE_number____gemini31propreview__2026-02-21-02-26",
            "metrics_file": "OE_number____gemini31propreview__2026-02-21-02-26__metrics.json",
            "timestamp": "2026-02-21T01:26:31.217004Z",
            "accuracy": 0.9825,
            "cohen_kappa": 0.949889040017181
          },
          {
            "provider": "",
            "model": "glm-4.7",
            "run_stem": "OE_number____glm47__2026-02-22-16-17",
            "metrics_file": "OE_number____glm47__2026-02-22-16-17__metrics.json",
            "timestamp": "2026-02-22T15:17:02.084278Z",
            "accuracy": 0.9516666666666667,
            "cohen_kappa": 0.8702485039428794
          },
          {
            "provider": "",
            "model": "gpt-oss-120b",
            "run_stem": "OE_number____gptoss120b__2026-02-21-18-49",
            "metrics_file": "OE_number____gptoss120b__2026-02-21-18-49__metrics.json",
            "timestamp": "2026-02-21T17:49:34.964163Z",
            "accuracy": 0.8766666666666667,
            "cohen_kappa": 0.6989514152334654
          },
          {
            "provider": "",
            "model": "gpt51",
            "run_stem": "OE_number____gpt51__2026-02-24-17-09",
            "metrics_file": "OE_number____gpt51__2026-02-24-17-09__metrics.json",
            "timestamp": "2026-02-24T16:09:02.122628Z",
            "accuracy": 0.9308333333333333,
            "cohen_kappa": 0.8217653816833507
          },
          {
            "provider": "",
            "model": "gpt52pro",
            "run_stem": "OE_number____gpt52pro__2026-02-21-02-26",
            "metrics_file": "OE_number____gpt52pro__2026-02-21-02-26__metrics.json",
            "timestamp": "2026-02-21T01:26:20.586712Z",
            "accuracy": 0.9775,
            "cohen_kappa": 0.9357382563646894
          },
          {
            "provider": "",
            "model": "gpt5mini",
            "run_stem": "OE_number____gpt5mini__2026-02-24-01-18",
            "metrics_file": "OE_number____gpt5mini__2026-02-24-01-18__metrics.json",
            "timestamp": "2026-02-24T00:18:27.101111Z",
            "accuracy": 0.9175,
            "cohen_kappa": 0.7903386348715559
          },
          {
            "provider": "",
            "model": "kimi-k2.5",
            "run_stem": "OE_number____kimik25__2026-02-21-23-36",
            "metrics_file": "OE_number____kimik25__2026-02-21-23-36__metrics.json",
            "timestamp": "2026-02-21T22:37:01.106140Z",
            "accuracy": 0.9666666666666667,
            "cohen_kappa": 0.9075472279577182
          },
          {
            "provider": "e-infra",
            "model": "glm-5",
            "run_stem": "OE_number__einfra__glm5__2026-04-02-01-04",
            "metrics_file": "OE_number__einfra__glm5__2026-04-02-01-04__metrics.json",
            "timestamp": "2026-04-01T23:04:56.944335Z",
            "accuracy": 0.9641666666666666,
            "cohen_kappa": 0.8995307541034676
          },
          {
            "provider": "inception",
            "model": "mercury-2",
            "run_stem": "OE_number__inception__mercury2__2026-03-04-23-43",
            "metrics_file": "OE_number__inception__mercury2__2026-03-04-23-43__metrics.json",
            "timestamp": "2026-03-04T22:49:36.410032Z",
            "accuracy": 0.8308333333333333,
            "cohen_kappa": 0.6136240352496692
          },
          {
            "provider": "openai",
            "model": "gpt-5.4-mini",
            "run_stem": "OE_number__openai__gpt54mini__2026-03-20-18-21",
            "metrics_file": "OE_number__openai__gpt54mini__2026-03-20-18-21__metrics.json",
            "timestamp": "2026-03-20T17:21:04.759848Z",
            "accuracy": 0.9375,
            "cohen_kappa": 0.8335491030146107
          },
          {
            "provider": "openai",
            "model": "gpt-5.4-pro",
            "run_stem": "OE_number__openai__gpt54pro__2026-03-10-15-22",
            "metrics_file": "OE_number__openai__gpt54pro__2026-03-10-15-22__metrics.json",
            "timestamp": "2026-03-10T14:22:37.561603Z",
            "accuracy": 0.9791666666666666,
            "cohen_kappa": 0.9400331017278462
          },
          {
            "provider": "openai",
            "model": "qwen3.5",
            "run_stem": "OE_number__openai__qwen35__2026-02-23-02-19",
            "metrics_file": "OE_number__openai__qwen35__2026-02-23-02-19__metrics.json",
            "timestamp": "2026-02-23T01:19:56.706842Z",
            "accuracy": 0.9708333333333333,
            "cohen_kappa": 0.9179706725623618
          },
          {
            "provider": "openrouter",
            "model": "qwen3.6",
            "run_stem": "OE_number__openrouter__qwenqwen36plusfree__2026-04-04-00-50",
            "metrics_file": "OE_number__openrouter__qwenqwen36plusfree__2026-04-04-00-50__metrics.json",
            "timestamp": "2026-04-03T22:50:47.255105Z",
            "accuracy": 0.96,
            "cohen_kappa": 0.8909735156498432
          },
          {
            "provider": "requesty",
            "model": "claude-haiku-4-5",
            "run_stem": "OE_number__requesty__anthropicclaudehaiku45__2026-03-23-01-17",
            "metrics_file": "OE_number__requesty__anthropicclaudehaiku45__2026-03-23-01-17__metrics.json",
            "timestamp": "2026-03-23T00:17:54.157008Z",
            "accuracy": 0.9475,
            "cohen_kappa": 0.84400406084667
          },
          {
            "provider": "requesty",
            "model": "claude-sonnet-4-6",
            "run_stem": "OE_number__requesty__claudesonnet46__2026-02-21-20-17",
            "metrics_file": "OE_number__requesty__claudesonnet46__2026-02-21-20-17__metrics.json",
            "timestamp": "2026-02-21T19:17:16.089500Z",
            "accuracy": 0.97,
            "cohen_kappa": 0.9159480745883012
          },
          {
            "provider": "requesty",
            "model": "gpt-5.4-pro",
            "run_stem": "OE_number__openai__gpt54__2026-03-10-14-52",
            "metrics_file": "OE_number__openai__gpt54__2026-03-10-14-52__metrics.json",
            "timestamp": "2026-03-10T13:52:04.694015Z",
            "accuracy": 0.9491666666666667,
            "cohen_kappa": 0.8620278884612341
          },
          {
            "provider": "vertex",
            "model": "gemini-3-flash-preview",
            "run_stem": "OE_number__vertex__gemini3flashpreview__2026-02-27-02-12",
            "metrics_file": "OE_number__vertex__gemini3flashpreview__2026-02-27-02-12__metrics.json",
            "timestamp": "2026-02-27T01:12:08.626686Z",
            "accuracy": 0.9791666666666666,
            "cohen_kappa": 0.9403440952585486
          }
        ],
        "pairwise": [
          {
            "a": 0,
            "b": 1,
            "distance": 0.02422723475355054,
            "overlap_count": 1197,
            "agreement_count": 1168,
            "disagreement_count": 29
          },
          {
            "a": 0,
            "b": 2,
            "distance": 0.025898078529657476,
            "overlap_count": 1197,
            "agreement_count": 1166,
            "disagreement_count": 31
          },
          {
            "a": 0,
            "b": 3,
            "distance": 0.05430242272347535,
            "overlap_count": 1197,
            "agreement_count": 1132,
            "disagreement_count": 65
          },
          {
            "a": 0,
            "b": 4,
            "distance": 0.06945681211041853,
            "overlap_count": 1123,
            "agreement_count": 1045,
            "disagreement_count": 78
          },
          {
            "a": 0,
            "b": 5,
            "distance": 0.07569386038687972,
            "overlap_count": 1189,
            "agreement_count": 1099,
            "disagreement_count": 90
          },
          {
            "a": 0,
            "b": 6,
            "distance": 0.029239766081871343,
            "overlap_count": 1197,
            "agreement_count": 1162,
            "disagreement_count": 35
          },
          {
            "a": 0,
            "b": 7,
            "distance": 0.05982905982905983,
            "overlap_count": 1170,
            "agreement_count": 1100,
            "disagreement_count": 70
          },
          {
            "a": 0,
            "b": 8,
            "distance": 0.040100250626566414,
            "overlap_count": 1197,
            "agreement_count": 1149,
            "disagreement_count": 48
          },
          {
            "a": 0,
            "b": 9,
            "distance": 0.03678929765886288,
            "overlap_count": 1196,
            "agreement_count": 1152,
            "disagreement_count": 44
          },
          {
            "a": 0,
            "b": 10,
            "distance": 0.13315696649029982,
            "overlap_count": 1134,
            "agreement_count": 983,
            "disagreement_count": 151
          },
          {
            "a": 0,
            "b": 11,
            "distance": 0.06683375104427736,
            "overlap_count": 1197,
            "agreement_count": 1117,
            "disagreement_count": 80
          },
          {
            "a": 0,
            "b": 12,
            "distance": 0.02756892230576441,
            "overlap_count": 1197,
            "agreement_count": 1164,
            "disagreement_count": 33
          },
          {
            "a": 0,
            "b": 13,
            "distance": 0.03341687552213868,
            "overlap_count": 1197,
            "agreement_count": 1157,
            "disagreement_count": 40
          },
          {
            "a": 0,
            "b": 14,
            "distance": 0.04344193817878028,
            "overlap_count": 1197,
            "agreement_count": 1145,
            "disagreement_count": 52
          },
          {
            "a": 0,
            "b": 15,
            "distance": 0.06098579782790309,
            "overlap_count": 1197,
            "agreement_count": 1124,
            "disagreement_count": 73
          },
          {
            "a": 0,
            "b": 16,
            "distance": 0.03842940685045948,
            "overlap_count": 1197,
            "agreement_count": 1151,
            "disagreement_count": 46
          },
          {
            "a": 0,
            "b": 17,
            "distance": 0.05434782608695652,
            "overlap_count": 1196,
            "agreement_count": 1131,
            "disagreement_count": 65
          },
          {
            "a": 0,
            "b": 18,
            "distance": 0.029239766081871343,
            "overlap_count": 1197,
            "agreement_count": 1162,
            "disagreement_count": 35
          },
          {
            "a": 1,
            "b": 2,
            "distance": 0.0016666666666666668,
            "overlap_count": 1200,
            "agreement_count": 1198,
            "disagreement_count": 2
          },
          {
            "a": 1,
            "b": 3,
            "distance": 0.05337781484570475,
            "overlap_count": 1199,
            "agreement_count": 1135,
            "disagreement_count": 64
          },
          {
            "a": 1,
            "b": 4,
            "distance": 0.06933333333333333,
            "overlap_count": 1125,
            "agreement_count": 1047,
            "disagreement_count": 78
          },
          {
            "a": 1,
            "b": 5,
            "distance": 0.06968933669185558,
            "overlap_count": 1191,
            "agreement_count": 1108,
            "disagreement_count": 83
          },
          {
            "a": 1,
            "b": 6,
            "distance": 0.013333333333333334,
            "overlap_count": 1200,
            "agreement_count": 1184,
            "disagreement_count": 16
          },
          {
            "a": 1,
            "b": 7,
            "distance": 0.06313993174061433,
            "overlap_count": 1172,
            "agreement_count": 1098,
            "disagreement_count": 74
          },
          {
            "a": 1,
            "b": 8,
            "distance": 0.030833333333333334,
            "overlap_count": 1200,
            "agreement_count": 1163,
            "disagreement_count": 37
          },
          {
            "a": 1,
            "b": 9,
            "distance": 0.03252710592160134,
            "overlap_count": 1199,
            "agreement_count": 1160,
            "disagreement_count": 39
          },
          {
            "a": 1,
            "b": 10,
            "distance": 0.12313104661389622,
            "overlap_count": 1137,
            "agreement_count": 997,
            "disagreement_count": 140
          },
          {
            "a": 1,
            "b": 11,
            "distance": 0.06666666666666667,
            "overlap_count": 1200,
            "agreement_count": 1120,
            "disagreement_count": 80
          },
          {
            "a": 1,
            "b": 12,
            "distance": 0.006666666666666667,
            "overlap_count": 1200,
            "agreement_count": 1192,
            "disagreement_count": 8
          },
          {
            "a": 1,
            "b": 13,
            "distance": 0.023333333333333334,
            "overlap_count": 1200,
            "agreement_count": 1172,
            "disagreement_count": 28
          },
          {
            "a": 1,
            "b": 14,
            "distance": 0.035833333333333335,
            "overlap_count": 1200,
            "agreement_count": 1157,
            "disagreement_count": 43
          },
          {
            "a": 1,
            "b": 15,
            "distance": 0.045,
            "overlap_count": 1200,
            "agreement_count": 1146,
            "disagreement_count": 54
          },
          {
            "a": 1,
            "b": 16,
            "distance": 0.0275,
            "overlap_count": 1200,
            "agreement_count": 1167,
            "disagreement_count": 33
          },
          {
            "a": 1,
            "b": 17,
            "distance": 0.05087572977481234,
            "overlap_count": 1199,
            "agreement_count": 1138,
            "disagreement_count": 61
          },
          {
            "a": 1,
            "b": 18,
            "distance": 0.006666666666666667,
            "overlap_count": 1200,
            "agreement_count": 1192,
            "disagreement_count": 8
          },
          {
            "a": 2,
            "b": 3,
            "distance": 0.05170975813177648,
            "overlap_count": 1199,
            "agreement_count": 1137,
            "disagreement_count": 62
          },
          {
            "a": 2,
            "b": 4,
            "distance": 0.06933333333333333,
            "overlap_count": 1125,
            "agreement_count": 1047,
            "disagreement_count": 78
          },
          {
            "a": 2,
            "b": 5,
            "distance": 0.06801007556675064,
            "overlap_count": 1191,
            "agreement_count": 1110,
            "disagreement_count": 81
          },
          {
            "a": 2,
            "b": 6,
            "distance": 0.011666666666666667,
            "overlap_count": 1200,
            "agreement_count": 1186,
            "disagreement_count": 14
          },
          {
            "a": 2,
            "b": 7,
            "distance": 0.06143344709897611,
            "overlap_count": 1172,
            "agreement_count": 1100,
            "disagreement_count": 72
          },
          {
            "a": 2,
            "b": 8,
            "distance": 0.029166666666666667,
            "overlap_count": 1200,
            "agreement_count": 1165,
            "disagreement_count": 35
          },
          {
            "a": 2,
            "b": 9,
            "distance": 0.030859049207673062,
            "overlap_count": 1199,
            "agreement_count": 1162,
            "disagreement_count": 37
          },
          {
            "a": 2,
            "b": 10,
            "distance": 0.12137203166226913,
            "overlap_count": 1137,
            "agreement_count": 999,
            "disagreement_count": 138
          },
          {
            "a": 2,
            "b": 11,
            "distance": 0.06666666666666667,
            "overlap_count": 1200,
            "agreement_count": 1120,
            "disagreement_count": 80
          },
          {
            "a": 2,
            "b": 12,
            "distance": 0.006666666666666667,
            "overlap_count": 1200,
            "agreement_count": 1192,
            "disagreement_count": 8
          },
          {
            "a": 2,
            "b": 13,
            "distance": 0.023333333333333334,
            "overlap_count": 1200,
            "agreement_count": 1172,
            "disagreement_count": 28
          },
          {
            "a": 2,
            "b": 14,
            "distance": 0.035833333333333335,
            "overlap_count": 1200,
            "agreement_count": 1157,
            "disagreement_count": 43
          },
          {
            "a": 2,
            "b": 15,
            "distance": 0.043333333333333335,
            "overlap_count": 1200,
            "agreement_count": 1148,
            "disagreement_count": 52
          },
          {
            "a": 2,
            "b": 16,
            "distance": 0.025833333333333333,
            "overlap_count": 1200,
            "agreement_count": 1169,
            "disagreement_count": 31
          },
          {
            "a": 2,
            "b": 17,
            "distance": 0.05254378648874062,
            "overlap_count": 1199,
            "agreement_count": 1136,
            "disagreement_count": 63
          },
          {
            "a": 2,
            "b": 18,
            "distance": 0.006666666666666667,
            "overlap_count": 1200,
            "agreement_count": 1192,
            "disagreement_count": 8
          },
          {
            "a": 3,
            "b": 4,
            "distance": 0.07466666666666667,
            "overlap_count": 1125,
            "agreement_count": 1041,
            "disagreement_count": 84
          },
          {
            "a": 3,
            "b": 5,
            "distance": 0.06801007556675064,
            "overlap_count": 1191,
            "agreement_count": 1110,
            "disagreement_count": 81
          },
          {
            "a": 3,
            "b": 6,
            "distance": 0.04837364470391994,
            "overlap_count": 1199,
            "agreement_count": 1141,
            "disagreement_count": 58
          },
          {
            "a": 3,
            "b": 7,
            "distance": 0.05721605465414176,
            "overlap_count": 1171,
            "agreement_count": 1104,
            "disagreement_count": 67
          },
          {
            "a": 3,
            "b": 8,
            "distance": 0.05087572977481234,
            "overlap_count": 1199,
            "agreement_count": 1138,
            "disagreement_count": 61
          },
          {
            "a": 3,
            "b": 9,
            "distance": 0.052587646076794656,
            "overlap_count": 1198,
            "agreement_count": 1135,
            "disagreement_count": 63
          },
          {
            "a": 3,
            "b": 10,
            "distance": 0.12764084507042253,
            "overlap_count": 1136,
            "agreement_count": 991,
            "disagreement_count": 145
          },
          {
            "a": 3,
            "b": 11,
            "distance": 0.07339449541284404,
            "overlap_count": 1199,
            "agreement_count": 1111,
            "disagreement_count": 88
          },
          {
            "a": 3,
            "b": 12,
            "distance": 0.05170975813177648,
            "overlap_count": 1199,
            "agreement_count": 1137,
            "disagreement_count": 62
          },
          {
            "a": 3,
            "b": 13,
            "distance": 0.05504587155963303,
            "overlap_count": 1199,
            "agreement_count": 1133,
            "disagreement_count": 66
          },
          {
            "a": 3,
            "b": 14,
            "distance": 0.05421184320266889,
            "overlap_count": 1199,
            "agreement_count": 1134,
            "disagreement_count": 65
          },
          {
            "a": 3,
            "b": 15,
            "distance": 0.08006672226855713,
            "overlap_count": 1199,
            "agreement_count": 1103,
            "disagreement_count": 96
          },
          {
            "a": 3,
            "b": 16,
            "distance": 0.0475396163469558,
            "overlap_count": 1199,
            "agreement_count": 1142,
            "disagreement_count": 57
          },
          {
            "a": 3,
            "b": 17,
            "distance": 0.0642737896494157,
            "overlap_count": 1198,
            "agreement_count": 1121,
            "disagreement_count": 77
          },
          {
            "a": 3,
            "b": 18,
            "distance": 0.05004170141784821,
            "overlap_count": 1199,
            "agreement_count": 1139,
            "disagreement_count": 60
          },
          {
            "a": 4,
            "b": 5,
            "distance": 0.07295373665480427,
            "overlap_count": 1124,
            "agreement_count": 1042,
            "disagreement_count": 82
          },
          {
            "a": 4,
            "b": 6,
            "distance": 0.07111111111111111,
            "overlap_count": 1125,
            "agreement_count": 1045,
            "disagreement_count": 80
          },
          {
            "a": 4,
            "b": 7,
            "distance": 0.0703971119133574,
            "overlap_count": 1108,
            "agreement_count": 1030,
            "disagreement_count": 78
          },
          {
            "a": 4,
            "b": 8,
            "distance": 0.07555555555555556,
            "overlap_count": 1125,
            "agreement_count": 1040,
            "disagreement_count": 85
          },
          {
            "a": 4,
            "b": 9,
            "distance": 0.0693950177935943,
            "overlap_count": 1124,
            "agreement_count": 1046,
            "disagreement_count": 78
          },
          {
            "a": 4,
            "b": 10,
            "distance": 0.12017937219730941,
            "overlap_count": 1115,
            "agreement_count": 981,
            "disagreement_count": 134
          },
          {
            "a": 4,
            "b": 11,
            "distance": 0.08444444444444445,
            "overlap_count": 1125,
            "agreement_count": 1030,
            "disagreement_count": 95
          },
          {
            "a": 4,
            "b": 12,
            "distance": 0.06933333333333333,
            "overlap_count": 1125,
            "agreement_count": 1047,
            "disagreement_count": 78
          },
          {
            "a": 4,
            "b": 13,
            "distance": 0.072,
            "overlap_count": 1125,
            "agreement_count": 1044,
            "disagreement_count": 81
          },
          {
            "a": 4,
            "b": 14,
            "distance": 0.07377777777777778,
            "overlap_count": 1125,
            "agreement_count": 1042,
            "disagreement_count": 83
          },
          {
            "a": 4,
            "b": 15,
            "distance": 0.09244444444444444,
            "overlap_count": 1125,
            "agreement_count": 1021,
            "disagreement_count": 104
          },
          {
            "a": 4,
            "b": 16,
            "distance": 0.06577777777777778,
            "overlap_count": 1125,
            "agreement_count": 1051,
            "disagreement_count": 74
          },
          {
            "a": 4,
            "b": 17,
            "distance": 0.08444444444444445,
            "overlap_count": 1125,
            "agreement_count": 1030,
            "disagreement_count": 95
          },
          {
            "a": 4,
            "b": 18,
            "distance": 0.06488888888888888,
            "overlap_count": 1125,
            "agreement_count": 1052,
            "disagreement_count": 73
          },
          {
            "a": 5,
            "b": 6,
            "distance": 0.07304785894206549,
            "overlap_count": 1191,
            "agreement_count": 1104,
            "disagreement_count": 87
          },
          {
            "a": 5,
            "b": 7,
            "distance": 0.061855670103092786,
            "overlap_count": 1164,
            "agreement_count": 1092,
            "disagreement_count": 72
          },
          {
            "a": 5,
            "b": 8,
            "distance": 0.07136859781696053,
            "overlap_count": 1191,
            "agreement_count": 1106,
            "disagreement_count": 85
          },
          {
            "a": 5,
            "b": 9,
            "distance": 0.07394957983193277,
            "overlap_count": 1190,
            "agreement_count": 1102,
            "disagreement_count": 88
          },
          {
            "a": 5,
            "b": 10,
            "distance": 0.13204225352112675,
            "overlap_count": 1136,
            "agreement_count": 986,
            "disagreement_count": 150
          },
          {
            "a": 5,
            "b": 11,
            "distance": 0.07472712006717044,
            "overlap_count": 1191,
            "agreement_count": 1102,
            "disagreement_count": 89
          },
          {
            "a": 5,
            "b": 12,
            "distance": 0.07304785894206549,
            "overlap_count": 1191,
            "agreement_count": 1104,
            "disagreement_count": 87
          },
          {
            "a": 5,
            "b": 13,
            "distance": 0.07556675062972293,
            "overlap_count": 1191,
            "agreement_count": 1101,
            "disagreement_count": 90
          },
          {
            "a": 5,
            "b": 14,
            "distance": 0.06633081444164568,
            "overlap_count": 1191,
            "agreement_count": 1112,
            "disagreement_count": 79
          },
          {
            "a": 5,
            "b": 15,
            "distance": 0.08648194794290512,
            "overlap_count": 1191,
            "agreement_count": 1088,
            "disagreement_count": 103
          },
          {
            "a": 5,
            "b": 16,
            "distance": 0.06381192275398824,
            "overlap_count": 1191,
            "agreement_count": 1115,
            "disagreement_count": 76
          },
          {
            "a": 5,
            "b": 17,
            "distance": 0.06633081444164568,
            "overlap_count": 1191,
            "agreement_count": 1112,
            "disagreement_count": 79
          },
          {
            "a": 5,
            "b": 18,
            "distance": 0.06801007556675064,
            "overlap_count": 1191,
            "agreement_count": 1110,
            "disagreement_count": 81
          },
          {
            "a": 6,
            "b": 7,
            "distance": 0.059726962457337884,
            "overlap_count": 1172,
            "agreement_count": 1102,
            "disagreement_count": 70
          },
          {
            "a": 6,
            "b": 8,
            "distance": 0.024166666666666666,
            "overlap_count": 1200,
            "agreement_count": 1171,
            "disagreement_count": 29
          },
          {
            "a": 6,
            "b": 9,
            "distance": 0.030859049207673062,
            "overlap_count": 1199,
            "agreement_count": 1162,
            "disagreement_count": 37
          },
          {
            "a": 6,
            "b": 10,
            "distance": 0.12576956904133685,
            "overlap_count": 1137,
            "agreement_count": 994,
            "disagreement_count": 143
          },
          {
            "a": 6,
            "b": 11,
            "distance": 0.06833333333333333,
            "overlap_count": 1200,
            "agreement_count": 1118,
            "disagreement_count": 82
          },
          {
            "a": 6,
            "b": 12,
            "distance": 0.008333333333333333,
            "overlap_count": 1200,
            "agreement_count": 1190,
            "disagreement_count": 10
          },
          {
            "a": 6,
            "b": 13,
            "distance": 0.021666666666666667,
            "overlap_count": 1200,
            "agreement_count": 1174,
            "disagreement_count": 26
          },
          {
            "a": 6,
            "b": 14,
            "distance": 0.04083333333333333,
            "overlap_count": 1200,
            "agreement_count": 1151,
            "disagreement_count": 49
          },
          {
            "a": 6,
            "b": 15,
            "distance": 0.045,
            "overlap_count": 1200,
            "agreement_count": 1146,
            "disagreement_count": 54
          },
          {
            "a": 6,
            "b": 16,
            "distance": 0.0225,
            "overlap_count": 1200,
            "agreement_count": 1173,
            "disagreement_count": 27
          },
          {
            "a": 6,
            "b": 17,
            "distance": 0.05587989991659716,
            "overlap_count": 1199,
            "agreement_count": 1132,
            "disagreement_count": 67
          },
          {
            "a": 6,
            "b": 18,
            "distance": 0.011666666666666667,
            "overlap_count": 1200,
            "agreement_count": 1186,
            "disagreement_count": 14
          },
          {
            "a": 7,
            "b": 8,
            "distance": 0.06313993174061433,
            "overlap_count": 1172,
            "agreement_count": 1098,
            "disagreement_count": 74
          },
          {
            "a": 7,
            "b": 9,
            "distance": 0.0623398804440649,
            "overlap_count": 1171,
            "agreement_count": 1098,
            "disagreement_count": 73
          },
          {
            "a": 7,
            "b": 10,
            "distance": 0.1275831087151842,
            "overlap_count": 1113,
            "agreement_count": 971,
            "disagreement_count": 142
          },
          {
            "a": 7,
            "b": 11,
            "distance": 0.07849829351535836,
            "overlap_count": 1172,
            "agreement_count": 1080,
            "disagreement_count": 92
          },
          {
            "a": 7,
            "b": 12,
            "distance": 0.06143344709897611,
            "overlap_count": 1172,
            "agreement_count": 1100,
            "disagreement_count": 72
          },
          {
            "a": 7,
            "b": 13,
            "distance": 0.060580204778156996,
            "overlap_count": 1172,
            "agreement_count": 1101,
            "disagreement_count": 71
          },
          {
            "a": 7,
            "b": 14,
            "distance": 0.05631399317406143,
            "overlap_count": 1172,
            "agreement_count": 1106,
            "disagreement_count": 66
          },
          {
            "a": 7,
            "b": 15,
            "distance": 0.08447098976109214,
            "overlap_count": 1172,
            "agreement_count": 1073,
            "disagreement_count": 99
          },
          {
            "a": 7,
            "b": 16,
            "distance": 0.06313993174061433,
            "overlap_count": 1172,
            "agreement_count": 1098,
            "disagreement_count": 74
          },
          {
            "a": 7,
            "b": 17,
            "distance": 0.06831767719897523,
            "overlap_count": 1171,
            "agreement_count": 1091,
            "disagreement_count": 80
          },
          {
            "a": 7,
            "b": 18,
            "distance": 0.05802047781569966,
            "overlap_count": 1172,
            "agreement_count": 1104,
            "disagreement_count": 68
          },
          {
            "a": 8,
            "b": 9,
            "distance": 0.041701417848206836,
            "overlap_count": 1199,
            "agreement_count": 1149,
            "disagreement_count": 50
          },
          {
            "a": 8,
            "b": 10,
            "distance": 0.13016710642040458,
            "overlap_count": 1137,
            "agreement_count": 989,
            "disagreement_count": 148
          },
          {
            "a": 8,
            "b": 11,
            "distance": 0.0725,
            "overlap_count": 1200,
            "agreement_count": 1113,
            "disagreement_count": 87
          },
          {
            "a": 8,
            "b": 12,
            "distance": 0.029166666666666667,
            "overlap_count": 1200,
            "agreement_count": 1165,
            "disagreement_count": 35
          },
          {
            "a": 8,
            "b": 13,
            "distance": 0.034166666666666665,
            "overlap_count": 1200,
            "agreement_count": 1159,
            "disagreement_count": 41
          },
          {
            "a": 8,
            "b": 14,
            "distance": 0.043333333333333335,
            "overlap_count": 1200,
            "agreement_count": 1148,
            "disagreement_count": 52
          },
          {
            "a": 8,
            "b": 15,
            "distance": 0.059166666666666666,
            "overlap_count": 1200,
            "agreement_count": 1129,
            "disagreement_count": 71
          },
          {
            "a": 8,
            "b": 16,
            "distance": 0.03333333333333333,
            "overlap_count": 1200,
            "agreement_count": 1160,
            "disagreement_count": 40
          },
          {
            "a": 8,
            "b": 17,
            "distance": 0.0567139282735613,
            "overlap_count": 1199,
            "agreement_count": 1131,
            "disagreement_count": 68
          },
          {
            "a": 8,
            "b": 18,
            "distance": 0.029166666666666667,
            "overlap_count": 1200,
            "agreement_count": 1165,
            "disagreement_count": 35
          },
          {
            "a": 9,
            "b": 10,
            "distance": 0.12411971830985916,
            "overlap_count": 1136,
            "agreement_count": 995,
            "disagreement_count": 141
          },
          {
            "a": 9,
            "b": 11,
            "distance": 0.07172643869891576,
            "overlap_count": 1199,
            "agreement_count": 1113,
            "disagreement_count": 86
          },
          {
            "a": 9,
            "b": 12,
            "distance": 0.029190992493744787,
            "overlap_count": 1199,
            "agreement_count": 1164,
            "disagreement_count": 35
          },
          {
            "a": 9,
            "b": 13,
            "distance": 0.03753127606338615,
            "overlap_count": 1199,
            "agreement_count": 1154,
            "disagreement_count": 45
          },
          {
            "a": 9,
            "b": 14,
            "distance": 0.04837364470391994,
            "overlap_count": 1199,
            "agreement_count": 1141,
            "disagreement_count": 58
          },
          {
            "a": 9,
            "b": 15,
            "distance": 0.05921601334445371,
            "overlap_count": 1199,
            "agreement_count": 1128,
            "disagreement_count": 71
          },
          {
            "a": 9,
            "b": 16,
            "distance": 0.030025020850708923,
            "overlap_count": 1199,
            "agreement_count": 1163,
            "disagreement_count": 36
          },
          {
            "a": 9,
            "b": 17,
            "distance": 0.06343906510851419,
            "overlap_count": 1198,
            "agreement_count": 1122,
            "disagreement_count": 76
          },
          {
            "a": 9,
            "b": 18,
            "distance": 0.029190992493744787,
            "overlap_count": 1199,
            "agreement_count": 1164,
            "disagreement_count": 35
          },
          {
            "a": 10,
            "b": 11,
            "distance": 0.1363236587510994,
            "overlap_count": 1137,
            "agreement_count": 982,
            "disagreement_count": 155
          },
          {
            "a": 10,
            "b": 12,
            "distance": 0.12313104661389622,
            "overlap_count": 1137,
            "agreement_count": 997,
            "disagreement_count": 140
          },
          {
            "a": 10,
            "b": 13,
            "distance": 0.11961301671064203,
            "overlap_count": 1137,
            "agreement_count": 1001,
            "disagreement_count": 136
          },
          {
            "a": 10,
            "b": 14,
            "distance": 0.12752858399296393,
            "overlap_count": 1137,
            "agreement_count": 992,
            "disagreement_count": 145
          },
          {
            "a": 10,
            "b": 15,
            "distance": 0.1442392260334213,
            "overlap_count": 1137,
            "agreement_count": 973,
            "disagreement_count": 164
          },
          {
            "a": 10,
            "b": 16,
            "distance": 0.1248900615655233,
            "overlap_count": 1137,
            "agreement_count": 995,
            "disagreement_count": 142
          },
          {
            "a": 10,
            "b": 17,
            "distance": 0.13192612137203166,
            "overlap_count": 1137,
            "agreement_count": 987,
            "disagreement_count": 150
          },
          {
            "a": 10,
            "b": 18,
            "distance": 0.11873350923482849,
            "overlap_count": 1137,
            "agreement_count": 1002,
            "disagreement_count": 135
          },
          {
            "a": 11,
            "b": 12,
            "distance": 0.06833333333333333,
            "overlap_count": 1200,
            "agreement_count": 1118,
            "disagreement_count": 82
          },
          {
            "a": 11,
            "b": 13,
            "distance": 0.06666666666666667,
            "overlap_count": 1200,
            "agreement_count": 1120,
            "disagreement_count": 80
          },
          {
            "a": 11,
            "b": 14,
            "distance": 0.07416666666666667,
            "overlap_count": 1200,
            "agreement_count": 1111,
            "disagreement_count": 89
          },
          {
            "a": 11,
            "b": 15,
            "distance": 0.08333333333333333,
            "overlap_count": 1200,
            "agreement_count": 1100,
            "disagreement_count": 100
          },
          {
            "a": 11,
            "b": 16,
            "distance": 0.0725,
            "overlap_count": 1200,
            "agreement_count": 1113,
            "disagreement_count": 87
          },
          {
            "a": 11,
            "b": 17,
            "distance": 0.07589658048373644,
            "overlap_count": 1199,
            "agreement_count": 1108,
            "disagreement_count": 91
          },
          {
            "a": 11,
            "b": 18,
            "distance": 0.06833333333333333,
            "overlap_count": 1200,
            "agreement_count": 1118,
            "disagreement_count": 82
          },
          {
            "a": 12,
            "b": 13,
            "distance": 0.021666666666666667,
            "overlap_count": 1200,
            "agreement_count": 1174,
            "disagreement_count": 26
          },
          {
            "a": 12,
            "b": 14,
            "distance": 0.03916666666666667,
            "overlap_count": 1200,
            "agreement_count": 1153,
            "disagreement_count": 47
          },
          {
            "a": 12,
            "b": 15,
            "distance": 0.043333333333333335,
            "overlap_count": 1200,
            "agreement_count": 1148,
            "disagreement_count": 52
          },
          {
            "a": 12,
            "b": 16,
            "distance": 0.025833333333333333,
            "overlap_count": 1200,
            "agreement_count": 1169,
            "disagreement_count": 31
          },
          {
            "a": 12,
            "b": 17,
            "distance": 0.05421184320266889,
            "overlap_count": 1199,
            "agreement_count": 1134,
            "disagreement_count": 65
          },
          {
            "a": 12,
            "b": 18,
            "distance": 0.006666666666666667,
            "overlap_count": 1200,
            "agreement_count": 1192,
            "disagreement_count": 8
          },
          {
            "a": 13,
            "b": 14,
            "distance": 0.035833333333333335,
            "overlap_count": 1200,
            "agreement_count": 1157,
            "disagreement_count": 43
          },
          {
            "a": 13,
            "b": 15,
            "distance": 0.06,
            "overlap_count": 1200,
            "agreement_count": 1128,
            "disagreement_count": 72
          },
          {
            "a": 13,
            "b": 16,
            "distance": 0.0325,
            "overlap_count": 1200,
            "agreement_count": 1161,
            "disagreement_count": 39
          },
          {
            "a": 13,
            "b": 17,
            "distance": 0.05587989991659716,
            "overlap_count": 1199,
            "agreement_count": 1132,
            "disagreement_count": 67
          },
          {
            "a": 13,
            "b": 18,
            "distance": 0.02,
            "overlap_count": 1200,
            "agreement_count": 1176,
            "disagreement_count": 24
          },
          {
            "a": 14,
            "b": 15,
            "distance": 0.07416666666666667,
            "overlap_count": 1200,
            "agreement_count": 1111,
            "disagreement_count": 89
          },
          {
            "a": 14,
            "b": 16,
            "distance": 0.04,
            "overlap_count": 1200,
            "agreement_count": 1152,
            "disagreement_count": 48
          },
          {
            "a": 14,
            "b": 17,
            "distance": 0.060050041701417846,
            "overlap_count": 1199,
            "agreement_count": 1127,
            "disagreement_count": 72
          },
          {
            "a": 14,
            "b": 18,
            "distance": 0.035833333333333335,
            "overlap_count": 1200,
            "agreement_count": 1157,
            "disagreement_count": 43
          },
          {
            "a": 15,
            "b": 16,
            "distance": 0.05416666666666667,
            "overlap_count": 1200,
            "agreement_count": 1135,
            "disagreement_count": 65
          },
          {
            "a": 15,
            "b": 17,
            "distance": 0.0725604670558799,
            "overlap_count": 1199,
            "agreement_count": 1112,
            "disagreement_count": 87
          },
          {
            "a": 15,
            "b": 18,
            "distance": 0.045,
            "overlap_count": 1200,
            "agreement_count": 1146,
            "disagreement_count": 54
          },
          {
            "a": 16,
            "b": 17,
            "distance": 0.05504587155963303,
            "overlap_count": 1199,
            "agreement_count": 1133,
            "disagreement_count": 66
          },
          {
            "a": 16,
            "b": 18,
            "distance": 0.024166666666666666,
            "overlap_count": 1200,
            "agreement_count": 1171,
            "disagreement_count": 29
          },
          {
            "a": 17,
            "b": 18,
            "distance": 0.05087572977481234,
            "overlap_count": 1199,
            "agreement_count": 1138,
            "disagreement_count": 61
          }
        ],
        "linkage": [
          [
            1,
            2,
            0.0016666666666666668,
            2
          ],
          [
            12,
            18,
            0.006666666666666667,
            2
          ],
          [
            19,
            20,
            0.006666666666666667,
            4
          ],
          [
            6,
            21,
            0.01125,
            5
          ],
          [
            13,
            22,
            0.022,
            6
          ],
          [
            16,
            23,
            0.02638888888888889,
            7
          ],
          [
            0,
            24,
            0.029717150017901896,
            8
          ],
          [
            8,
            25,
            0.031262531328320804,
            9
          ],
          [
            9,
            26,
            0.033186022416177983,
            10
          ],
          [
            14,
            27,
            0.03984822495493669,
            11
          ],
          [
            3,
            28,
            0.05179780062866963,
            12
          ],
          [
            15,
            29,
            0.05578626667563172,
            13
          ],
          [
            17,
            30,
            0.057438298346892847,
            14
          ],
          [
            5,
            7,
            0.061855670103092786,
            2
          ],
          [
            31,
            32,
            0.06708755568051086,
            16
          ],
          [
            11,
            33,
            0.0717860424513939,
            17
          ],
          [
            4,
            34,
            0.07348904644607562,
            18
          ],
          [
            10,
            35,
            0.12730818012869527,
            19
          ]
        ],
        "linkage_complete": true
      },
      {
        "group_id": "a8180ff0cdc9ff74",
        "cluster_scope": "cross_model",
        "representative_policy": "best_accuracy",
        "task_fingerprint": "2c0d314be63ddff932b8c577820159b00f71219bdb5eba31e8cbc0736a1243f5",
        "normalized_tag_key": "english;pragmatics;semantics;sentiment analysis",
        "task_name_display": "sentiment analysis",
        "task_names_seen": [
          "sentiment analysis"
        ],
        "tags_display": "sentiment analysis; English; semantics; pragmatics",
        "model_count": 2,
        "distance_metric": "nominal_disagreement_rate",
        "linkage_method": "average",
        "representative_run_stems": [
          "sentiment__vertex__gemini3flashpreview__2026-04-30-00-04",
          "sentiment__vertex__gemini31flashlitepreview__2026-04-29-23-18"
        ],
        "comparable_pair_count": 1,
        "representatives": [
          {
            "provider": "vertex",
            "model": "gemini-3-flash-preview",
            "run_stem": "sentiment__vertex__gemini3flashpreview__2026-04-30-00-04",
            "metrics_file": "sentiment__vertex__gemini3flashpreview__2026-04-30-00-04__metrics.json",
            "timestamp": "2026-04-29T22:04:02.485410Z",
            "accuracy": 0.9425,
            "cohen_kappa": 0.885
          },
          {
            "provider": "vertex",
            "model": "gemini-3.1-flash-lite-preview",
            "run_stem": "sentiment__vertex__gemini31flashlitepreview__2026-04-29-23-18",
            "metrics_file": "sentiment__vertex__gemini31flashlitepreview__2026-04-29-23-18__metrics.json",
            "timestamp": "2026-04-29T21:18:08.340230Z",
            "accuracy": 0.9475,
            "cohen_kappa": 0.8952618453865336
          }
        ],
        "pairwise": [
          {
            "a": 0,
            "b": 1,
            "distance": 0.017543859649122806,
            "overlap_count": 399,
            "agreement_count": 392,
            "disagreement_count": 7
          }
        ],
        "linkage": [
          [
            0,
            1,
            0.017543859649122806,
            2
          ]
        ],
        "linkage_complete": true
      }
    ]
  }
}