{
  "model_details": {
    "provider": "requesty",
    "model_requested": "claude-haiku-4-5",
    "model_for_requests": "claude-haiku-4-5",
    "api_base_url": "https://router.requesty.ai/v1",
    "chat_completions_endpoint": "https://router.requesty.ai/v1/chat/completions"
  },
  "run_config": {
    "input": [
      "data/input/ME_disambiguation.csv"
    ],
    "labels": null,
    "task_name": "ME disambiguation",
    "task_description": "Disambiguation of four Middle English words: loaf, man, meat or son - and other homographic forms.",
    "tags": "Middle English;semantics;disambiguation;homonymy",
    "model": "claude-haiku-4-5",
    "temperature": null,
    "top_p": null,
    "top_k": null,
    "service_tier": "standard",
    "verbosity": null,
    "reasoning_effort": null,
    "thinking_level": null,
    "effort": null,
    "strict_control_acceptance": true,
    "provider": "requesty",
    "system_prompt": "You are a meticulous historical linguist disambiguating Middle English nouns. In Middle English, words are very often formally homographic due to irregular spelling - the actual lexical unit can only be recognised based on the context.\n\nYou will receive a Middle English word form and you need to decided whether it is an instance of one of the following words: \n- loaf (prototypically meaning bread, piece of bread or food; in specific or abstract sense), \n- man (prototypically meaning a male human being, a human being in general, a person; in specific or abstract sense), \n- meat (prototypically meaning food, flesh or meal; in specific or abstract sense),\n- son (prototypically meaning a male child; in specific or abstract sense) \n- or an instance of an other word, perhaps homonymous in its Middle English form, e.g. love, leaf, life, money, moon, mete (measurement), sun, sin, sound,  etc.\n\nIf it is one of the words: loaf, man, meat or son (in any meaning of those words), classify that word, i.e. \"loaf\", \"man\", \"meat\" or \"son\".\nOtherwise, classify it as \"other\".",
    "system_prompt_b64": null,
    "few_shot_examples": 0,
    "prompt_layout": "standard",
    "cache_pad_target_tokens": 0,
    "prompt_cache_key": null,
    "gemini_cached_content": null,
    "requesty_auto_cache": true,
    "vertex_auto_adc_login": null,
    "vertex_access_token_refresh_seconds": null,
    "create_gemini_cache": false,
    "gemini_cache_ttl": 3600,
    "gemini_cache_ttl_autoupdate": true,
    "keep_gemini_cache": false,
    "enable_cot": true,
    "no_explanation": false,
    "logprobs": false,
    "calibration": true,
    "confusion_heatmap": true,
    "api_key_var": "REQUESTY_API_KEY",
    "api_base_var": "REQUESTY_BASE_URL",
    "max_retries": 3,
    "retry_delay": 5.0,
    "request_interval_ms": 0,
    "request_timeout_seconds": 30.0,
    "threads": 4,
    "prompt_log_detail": "full",
    "flush_rows": 100,
    "flush_seconds": 2.0,
    "validator_cmd": null,
    "validator_args": "",
    "validator_timeout": 5.0,
    "validator_prompt_max_candidates": 50,
    "validator_prompt_max_chars": 8000,
    "validator_exhausted_policy": "accept_blank_confidence",
    "validator_debug": false,
    "log_level": "INFO"
  },
  "source_input_csv": "data/input/ME_disambiguation.csv",
  "source_output_csv": "data/output/ME_disambiguation__requesty__anthropicclaudehaiku45__2026-03-29-18-08.csv",
  "source_labels_csv": "",
  "cache_padding": {
    "enabled": false,
    "target_shared_prefix_tokens": 0,
    "calibration_shared_prefix_tokens": null,
    "target_prompt_tokens": 0,
    "calibration_prompt_tokens": null,
    "calibration_example_id": null,
    "applied_padding_tokens_estimate": 0,
    "examples_with_padding_applied": 0
  },
  "request_control_summary": {
    "configured": {
      "requesty_auto_cache": true
    },
    "attempts_total": 402,
    "attempts_with_control_telemetry": 402,
    "per_control": {
      "reasoning_effort": {
        "configured_value": null,
        "requested_attempts": 0,
        "sent_attempts": 0,
        "accepted_attempts": 0,
        "rejected_attempts": 0,
        "missing_from_final_request_attempts": 0,
        "acceptance_rate": null,
        "rejected_reasons": {},
        "rejected_example_ids": []
      },
      "thinking_level": {
        "configured_value": null,
        "requested_attempts": 0,
        "sent_attempts": 0,
        "accepted_attempts": 0,
        "rejected_attempts": 0,
        "missing_from_final_request_attempts": 0,
        "acceptance_rate": null,
        "rejected_reasons": {},
        "rejected_example_ids": []
      },
      "effort": {
        "configured_value": null,
        "requested_attempts": 0,
        "sent_attempts": 0,
        "accepted_attempts": 0,
        "rejected_attempts": 0,
        "missing_from_final_request_attempts": 0,
        "acceptance_rate": null,
        "rejected_reasons": {},
        "rejected_example_ids": []
      },
      "verbosity": {
        "configured_value": null,
        "requested_attempts": 0,
        "sent_attempts": 0,
        "accepted_attempts": 0,
        "rejected_attempts": 0,
        "missing_from_final_request_attempts": 0,
        "acceptance_rate": null,
        "rejected_reasons": {},
        "rejected_example_ids": []
      },
      "prompt_cache_key": {
        "configured_value": null,
        "requested_attempts": 0,
        "sent_attempts": 0,
        "accepted_attempts": 0,
        "rejected_attempts": 0,
        "missing_from_final_request_attempts": 0,
        "acceptance_rate": null,
        "rejected_reasons": {},
        "rejected_example_ids": []
      },
      "gemini_cached_content": {
        "configured_value": null,
        "requested_attempts": 0,
        "sent_attempts": 0,
        "accepted_attempts": 0,
        "rejected_attempts": 0,
        "missing_from_final_request_attempts": 0,
        "acceptance_rate": null,
        "rejected_reasons": {},
        "rejected_example_ids": []
      },
      "requesty_auto_cache": {
        "configured_value": true,
        "requested_attempts": 402,
        "sent_attempts": 402,
        "accepted_attempts": 402,
        "rejected_attempts": 0,
        "missing_from_final_request_attempts": 0,
        "acceptance_rate": 1.0,
        "rejected_reasons": {},
        "rejected_example_ids": []
      }
    }
  },
  "usage_metadata_summary": {
    "attempts_total": 402,
    "attempts_with_usage_metadata": 402,
    "attempts_with_cached_token_signals": 402,
    "cached_tokens_total_estimate": 0,
    "cache_read_tokens_total": 0,
    "cache_write_tokens_total": 0,
    "cache_token_fields_totals": {
      "usage.prompt_tokens_details.cached_tokens": 0
    },
    "attempts_with_gemini_cached_content_token_signals": 0,
    "gemini_cached_content_token_count_total": 0,
    "gemini_cached_content_token_fields_totals": {}
  },
  "token_usage_totals": {
    "attempts_total": 402,
    "attempts_with_token_usage": 402,
    "attempts_with_output_tokens": 402,
    "attempts_with_cached_input_tokens": 0,
    "attempts_with_thinking_tokens": 0,
    "input_tokens_total": 486990,
    "cached_input_tokens_total": 0,
    "non_cached_input_tokens_total": 486990,
    "output_tokens_total": 89568,
    "thinking_tokens_total": 0,
    "output_tokens_definition": "total_tokens - prompt_tokens (or completion_tokens + thinking_tokens fallback)"
  },
  "truth_label_count": 400,
  "prediction_count": 400,
  "evaluated_example_count": 400,
  "calibration_metrics": {
    "available": true,
    "sample_count": 400,
    "bin_count": 10,
    "ece": 0.018949999999998222,
    "mce": 0.43999999999999984,
    "brier_score": 0.01647600000000005
  },
  "first_prompt_timestamp": "2026-03-29T16:09:02.369257Z",
  "last_prompt_timestamp": "2026-03-29T16:13:42.085274Z",
  "overall_time_seconds": 279.716017,
  "overall_time_human": "4m 40s",
  "accuracy": 0.975,
  "cohen_kappa": 0.9660804233163169,
  "macro_precision": 0.9755669554004429,
  "macro_recall": 0.9845878136200718,
  "macro_f1": 0.9796861920875205,
  "per_label": {
    "loaf": {
      "precision": 1.0,
      "recall": 1.0,
      "f1": 1.0,
      "support": 5
    },
    "man": {
      "precision": 1.0,
      "recall": 1.0,
      "f1": 1.0,
      "support": 97
    },
    "meat": {
      "precision": 0.9680851063829787,
      "recall": 0.978494623655914,
      "f1": 0.9732620320855615,
      "support": 93
    },
    "other": {
      "precision": 0.9855072463768116,
      "recall": 0.9444444444444444,
      "f1": 0.9645390070921985,
      "support": 144
    },
    "son": {
      "precision": 0.9242424242424242,
      "recall": 1.0,
      "f1": 0.9606299212598425,
      "support": 61
    }
  },
  "labels": [
    "loaf",
    "man",
    "meat",
    "other",
    "son"
  ],
  "label_count": 5,
  "total_examples": 400,
  "confusion_matrix": {
    "loaf": {
      "loaf": 5,
      "man": 0,
      "meat": 0,
      "other": 0,
      "son": 0
    },
    "man": {
      "loaf": 0,
      "man": 97,
      "meat": 0,
      "other": 0,
      "son": 0
    },
    "meat": {
      "loaf": 0,
      "man": 0,
      "meat": 91,
      "other": 2,
      "son": 0
    },
    "other": {
      "loaf": 0,
      "man": 0,
      "meat": 3,
      "other": 136,
      "son": 5
    },
    "son": {
      "loaf": 0,
      "man": 0,
      "meat": 0,
      "other": 0,
      "son": 61
    }
  },
  "label_metrics_available": true
}
