model_list: # FREE TIER AGGREGATION - model_name: fast-tier litellm_params: model: groq/llama-3.3-70b-versatile api_key: os.environ/GROQ_API_KEY_1 rpm_limit: 20 - model_name: fast-tier litellm_params: model: groq/llama-3.1-8b-instant api_key: os.environ/GROQ_API_KEY_2 rpm_limit: 20 - model_name: volume-tier litellm_params: model: mistral/mistral-small-latest api_key: os.environ/MISTRAL_API_KEY tpm_limit: 500000 # CLAUDE & KIMI - Quality/Reasoning - model_name: quality-tier litellm_params: model: anthropic/claude-3-5-sonnet-20240620 api_key: os.environ/ANTHROPIC_API_KEY rpm_limit: 5 - model_name: claude-haiku litellm_params: model: anthropic/claude-3-haiku-20240307 api_key: os.environ/ANTHROPIC_API_KEY rpm_limit: 10 # Kimi via OpenRouter (Free tier) - model_name: reasoning-tier litellm_params: model: openrouter/moonshotai/kimi-k2:free api_key: os.environ/OPENROUTER_API_KEY # Kimi Direct (Ultra-cheap) - model_name: reasoning-tier litellm_params: model: moonshot/kimi-k2-0711-preview api_key: os.environ/MOONSHOT_API_KEY tpm_limit: 100000 - model_name: deepseek litellm_params: model: deepseek/deepseek-chat api_key: os.environ/DEEPSEEK_API_KEY # Embeddings & Tools - model_name: embeddings litellm_params: model: cohere/embed-english-v3.0 api_key: os.environ/COHERE_API_KEY # Local Fallback - model_name: local-llama litellm_params: model: ollama/llama3.1:8b api_base: http://ollama:11434 router_settings: routing_strategy: "usage-based-routing" timeout: 30 num_retries: 3 allowed_fails: 2 cooldown_time: 60 fallbacks: - fast-tier: ["volume-tier", "reasoning-tier"] - volume-tier: ["reasoning-tier", "local-llama"] - quality-tier: ["claude-haiku", "reasoning-tier"] general_settings: master_key: os.environ/LITELLM_MASTER_KEY cache: true cache_params: type: redis host: redis port: 6379 ttl: 3600 retry_policy: TimeoutError: 3 RateLimitError: 5 log_level: info log_file: /app/logs/litellm.log