llm-hub/config/litellm_config.yaml

88 lines
2.2 KiB
YAML

model_list:
# FREE TIER AGGREGATION
- model_name: fast-tier
litellm_params:
model: groq/llama-3.3-70b-versatile
api_key: os.environ/GROQ_API_KEY_1
rpm_limit: 20
- model_name: fast-tier
litellm_params:
model: groq/llama-3.1-8b-instant
api_key: os.environ/GROQ_API_KEY_2
rpm_limit: 20
- model_name: volume-tier
litellm_params:
model: mistral/mistral-small-latest
api_key: os.environ/MISTRAL_API_KEY
tpm_limit: 500000
# CLAUDE & KIMI - Quality/Reasoning
- model_name: quality-tier
litellm_params:
model: anthropic/claude-3-5-sonnet-20240620
api_key: os.environ/ANTHROPIC_API_KEY
rpm_limit: 5
- model_name: claude-haiku
litellm_params:
model: anthropic/claude-3-haiku-20240307
api_key: os.environ/ANTHROPIC_API_KEY
rpm_limit: 10
# Kimi via OpenRouter (Free tier)
- model_name: reasoning-tier
litellm_params:
model: openrouter/moonshotai/kimi-k2:free
api_key: os.environ/OPENROUTER_API_KEY
# Kimi Direct (Ultra-cheap)
- model_name: reasoning-tier
litellm_params:
model: moonshot/kimi-k2-0711-preview
api_key: os.environ/MOONSHOT_API_KEY
tpm_limit: 100000
- model_name: deepseek
litellm_params:
model: deepseek/deepseek-chat
api_key: os.environ/DEEPSEEK_API_KEY
# Embeddings & Tools
- model_name: embeddings
litellm_params:
model: cohere/embed-english-v3.0
api_key: os.environ/COHERE_API_KEY
# Local Fallback
- model_name: local-llama
litellm_params:
model: ollama/llama3.1:8b
api_base: http://ollama:11434
router_settings:
routing_strategy: "usage-based-routing"
timeout: 30
num_retries: 3
allowed_fails: 2
cooldown_time: 60
fallbacks:
- fast-tier: ["volume-tier", "reasoning-tier"]
- volume-tier: ["reasoning-tier", "local-llama"]
- quality-tier: ["claude-haiku", "reasoning-tier"]
general_settings:
master_key: os.environ/LITELLM_MASTER_KEY
cache: true
cache_params:
type: redis
host: redis
port: 6379
ttl: 3600
retry_policy:
TimeoutError: 3
RateLimitError: 5
log_level: info
log_file: /app/logs/litellm.log