88 lines
2.2 KiB
YAML
88 lines
2.2 KiB
YAML
model_list:
|
|
# FREE TIER AGGREGATION
|
|
- model_name: fast-tier
|
|
litellm_params:
|
|
model: groq/llama-3.3-70b-versatile
|
|
api_key: os.environ/GROQ_API_KEY_1
|
|
rpm_limit: 20
|
|
|
|
- model_name: fast-tier
|
|
litellm_params:
|
|
model: groq/llama-3.1-8b-instant
|
|
api_key: os.environ/GROQ_API_KEY_2
|
|
rpm_limit: 20
|
|
|
|
- model_name: volume-tier
|
|
litellm_params:
|
|
model: mistral/mistral-small-latest
|
|
api_key: os.environ/MISTRAL_API_KEY
|
|
tpm_limit: 500000
|
|
|
|
# CLAUDE & KIMI - Quality/Reasoning
|
|
- model_name: quality-tier
|
|
litellm_params:
|
|
model: anthropic/claude-3-5-sonnet-20240620
|
|
api_key: os.environ/ANTHROPIC_API_KEY
|
|
rpm_limit: 5
|
|
|
|
- model_name: claude-haiku
|
|
litellm_params:
|
|
model: anthropic/claude-3-haiku-20240307
|
|
api_key: os.environ/ANTHROPIC_API_KEY
|
|
rpm_limit: 10
|
|
|
|
# Kimi via OpenRouter (Free tier)
|
|
- model_name: reasoning-tier
|
|
litellm_params:
|
|
model: openrouter/moonshotai/kimi-k2:free
|
|
api_key: os.environ/OPENROUTER_API_KEY
|
|
|
|
# Kimi Direct (Ultra-cheap)
|
|
- model_name: reasoning-tier
|
|
litellm_params:
|
|
model: moonshot/kimi-k2-0711-preview
|
|
api_key: os.environ/MOONSHOT_API_KEY
|
|
tpm_limit: 100000
|
|
|
|
- model_name: deepseek
|
|
litellm_params:
|
|
model: deepseek/deepseek-chat
|
|
api_key: os.environ/DEEPSEEK_API_KEY
|
|
|
|
# Embeddings & Tools
|
|
- model_name: embeddings
|
|
litellm_params:
|
|
model: cohere/embed-english-v3.0
|
|
api_key: os.environ/COHERE_API_KEY
|
|
|
|
# Local Fallback
|
|
- model_name: local-llama
|
|
litellm_params:
|
|
model: ollama/llama3.1:8b
|
|
api_base: http://ollama:11434
|
|
|
|
router_settings:
|
|
routing_strategy: "usage-based-routing"
|
|
timeout: 30
|
|
num_retries: 3
|
|
allowed_fails: 2
|
|
cooldown_time: 60
|
|
fallbacks:
|
|
- fast-tier: ["volume-tier", "reasoning-tier"]
|
|
- volume-tier: ["reasoning-tier", "local-llama"]
|
|
- quality-tier: ["claude-haiku", "reasoning-tier"]
|
|
|
|
general_settings:
|
|
master_key: os.environ/LITELLM_MASTER_KEY
|
|
cache: true
|
|
cache_params:
|
|
type: redis
|
|
host: redis
|
|
port: 6379
|
|
ttl: 3600
|
|
retry_policy:
|
|
TimeoutError: 3
|
|
RateLimitError: 5
|
|
log_level: info
|
|
log_file: /app/logs/litellm.log
|