# Project Verity-H v0.3 — Environment Variables # ── LLM mode ────────────────────────────────────────────────────────── # "mock" — canned responses for offline tests (no API key needed) # "api" — OpenAI-compatible endpoint # "hf_api" — HuggingFace Inference API (recommended for Qwen3, etc.) LLM_MODE=mock # ── OpenAI-compatible API (LLM_MODE=api) ────────────────────────────── OPENAI_API_KEY=sk-your-key-here OPENAI_BASE_URL=https://api.openai.com/v1 # ── HuggingFace Inference API (LLM_MODE=hf_api) ────────────────────── HF_API_KEY=hf_your-key-here # ── Model settings (used by all API modes) ──────────────────────────── MODEL_NAME=Qwen/Qwen3-4B-Instruct-2507 LLM_TEMPERATURE=0.0 LLM_MAX_TOKENS=2048 # ── Rate limiting (all API modes) ───────────────────────────────────── # Delay in seconds between LLM calls (applies to ALL API modes). # Default: 2.0 (safe for HF Serverless Inference API) # Faster: 0.5 (try this first; 100 cases in ~20 min) # Fastest: 0.0 (may hit rate limits; use only for small batches) # If you get 429 errors, increase or switch to a dedicated endpoint. LLM_CALL_DELAY=2 # Per-minute rate limit (0 = unlimited). HF Serverless often enforces # a per-minute ceiling even if per-second spacing is low. This adds a # global call tracker that pauses when the limit is approached. # Default: 30 (conservative for most free/paid HF endpoints) LLM_MAX_CALLS_PER_MINUTE=30