Sravanth18
/

verity-h-prototype

Model card Files Files and versions

verity-h-prototype / .env.example

Sravanth18's picture

Upload .env.example

202fe07 verified about 1 month ago

history blame contribute delete

1.79 kB

	# Project Verity-H v0.3 — Environment Variables

	# ── LLM mode ──────────────────────────────────────────────────────────
	# "mock" — canned responses for offline tests (no API key needed)
	# "api" — OpenAI-compatible endpoint
	# "hf_api" — HuggingFace Inference API (recommended for Qwen3, etc.)
	LLM_MODE=mock

	# ── OpenAI-compatible API (LLM_MODE=api) ──────────────────────────────
	OPENAI_API_KEY=sk-your-key-here
	OPENAI_BASE_URL=https://api.openai.com/v1

	# ── HuggingFace Inference API (LLM_MODE=hf_api) ──────────────────────
	HF_API_KEY=hf_your-key-here

	# ── Model settings (used by all API modes) ────────────────────────────
	MODEL_NAME=Qwen/Qwen3-4B-Instruct-2507
	LLM_TEMPERATURE=0.0
	LLM_MAX_TOKENS=2048

	# ── Rate limiting (all API modes) ─────────────────────────────────────
	# Delay in seconds between LLM calls (applies to ALL API modes).
	# Default: 2.0 (safe for HF Serverless Inference API)
	# Faster: 0.5 (try this first; 100 cases in ~20 min)
	# Fastest: 0.0 (may hit rate limits; use only for small batches)
	# If you get 429 errors, increase or switch to a dedicated endpoint.
	LLM_CALL_DELAY=2

	# Per-minute rate limit (0 = unlimited). HF Serverless often enforces
	# a per-minute ceiling even if per-second spacing is low. This adds a
	# global call tracker that pauses when the limit is approached.
	# Default: 30 (conservative for most free/paid HF endpoints)
	LLM_MAX_CALLS_PER_MINUTE=30