verity-h-prototype / .env.example
Sravanth18's picture
Upload .env.example
202fe07 verified
# Project Verity-H v0.3 β€” Environment Variables
# ── LLM mode ──────────────────────────────────────────────────────────
# "mock" β€” canned responses for offline tests (no API key needed)
# "api" β€” OpenAI-compatible endpoint
# "hf_api" β€” HuggingFace Inference API (recommended for Qwen3, etc.)
LLM_MODE=mock
# ── OpenAI-compatible API (LLM_MODE=api) ──────────────────────────────
OPENAI_API_KEY=sk-your-key-here
OPENAI_BASE_URL=https://api.openai.com/v1
# ── HuggingFace Inference API (LLM_MODE=hf_api) ──────────────────────
HF_API_KEY=hf_your-key-here
# ── Model settings (used by all API modes) ────────────────────────────
MODEL_NAME=Qwen/Qwen3-4B-Instruct-2507
LLM_TEMPERATURE=0.0
LLM_MAX_TOKENS=2048
# ── Rate limiting (all API modes) ─────────────────────────────────────
# Delay in seconds between LLM calls (applies to ALL API modes).
# Default: 2.0 (safe for HF Serverless Inference API)
# Faster: 0.5 (try this first; 100 cases in ~20 min)
# Fastest: 0.0 (may hit rate limits; use only for small batches)
# If you get 429 errors, increase or switch to a dedicated endpoint.
LLM_CALL_DELAY=2
# Per-minute rate limit (0 = unlimited). HF Serverless often enforces
# a per-minute ceiling even if per-second spacing is low. This adds a
# global call tracker that pauses when the limit is approached.
# Default: 30 (conservative for most free/paid HF endpoints)
LLM_MAX_CALLS_PER_MINUTE=30