| # Project Verity-H v0.3 β Environment Variables | |
| # ββ LLM mode ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # "mock" β canned responses for offline tests (no API key needed) | |
| # "api" β OpenAI-compatible endpoint | |
| # "hf_api" β HuggingFace Inference API (recommended for Qwen3, etc.) | |
| LLM_MODE=mock | |
| # ββ OpenAI-compatible API (LLM_MODE=api) ββββββββββββββββββββββββββββββ | |
| OPENAI_API_KEY=sk-your-key-here | |
| OPENAI_BASE_URL=https://api.openai.com/v1 | |
| # ββ HuggingFace Inference API (LLM_MODE=hf_api) ββββββββββββββββββββββ | |
| HF_API_KEY=hf_your-key-here | |
| # ββ Model settings (used by all API modes) ββββββββββββββββββββββββββββ | |
| MODEL_NAME=Qwen/Qwen3-4B-Instruct-2507 | |
| LLM_TEMPERATURE=0.0 | |
| LLM_MAX_TOKENS=2048 | |
| # ββ Rate limiting (all API modes) βββββββββββββββββββββββββββββββββββββ | |
| # Delay in seconds between LLM calls (applies to ALL API modes). | |
| # Default: 2.0 (safe for HF Serverless Inference API) | |
| # Faster: 0.5 (try this first; 100 cases in ~20 min) | |
| # Fastest: 0.0 (may hit rate limits; use only for small batches) | |
| # If you get 429 errors, increase or switch to a dedicated endpoint. | |
| LLM_CALL_DELAY=2 | |
| # Per-minute rate limit (0 = unlimited). HF Serverless often enforces | |
| # a per-minute ceiling even if per-second spacing is low. This adds a | |
| # global call tracker that pauses when the limit is approached. | |
| # Default: 30 (conservative for most free/paid HF endpoints) | |
| LLM_MAX_CALLS_PER_MINUTE=30 | |