Initial commit: Agentic LLM Hub with multi-reasoning, MCP tools, and IDE
This commit is contained in:
commit
2cafb31cb4
|
|
@ -0,0 +1,69 @@
|
||||||
|
# Master Key for API Access (generate strong key)
|
||||||
|
MASTER_KEY=sk-agent-$(openssl rand -hex 8)
|
||||||
|
|
||||||
|
# ==========================================
|
||||||
|
# FREE TIER API KEYS (Add your keys below)
|
||||||
|
# ==========================================
|
||||||
|
|
||||||
|
# Groq - https://console.groq.com (20 RPM free, create multiple accounts)
|
||||||
|
GROQ_API_KEY_1=gsk_your_first_groq_key_here
|
||||||
|
GROQ_API_KEY_2=gsk_your_second_groq_key_here
|
||||||
|
|
||||||
|
# Mistral - https://console.mistral.ai (1B tokens/month free)
|
||||||
|
MISTRAL_API_KEY=your_mistral_key_here
|
||||||
|
|
||||||
|
# Anthropic Claude - https://console.anthropic.com ($5 trial, $500 student)
|
||||||
|
ANTHROPIC_API_KEY=sk-ant-your_claude_key_here
|
||||||
|
|
||||||
|
# Moonshot Kimi - https://platform.moonshot.ai ($5 signup bonus)
|
||||||
|
MOONSHOT_API_KEY=sk-your_moonshot_key_here
|
||||||
|
|
||||||
|
# OpenRouter - https://openrouter.ai (50 req/day free, access to Kimi free)
|
||||||
|
OPENROUTER_API_KEY=sk-or-your_openrouter_key_here
|
||||||
|
|
||||||
|
# Cohere - https://cohere.com (1K calls/month free, good for embeddings)
|
||||||
|
COHERE_API_KEY=your_cohere_key_here
|
||||||
|
|
||||||
|
# DeepSeek - https://platform.deepseek.com (cheap rates)
|
||||||
|
DEEPSEEK_API_KEY=sk-your_deepseek_key_here
|
||||||
|
|
||||||
|
# GitHub Token (for MCP Git tools)
|
||||||
|
GITHUB_TOKEN=ghp_your_github_token_here
|
||||||
|
|
||||||
|
# ==========================================
|
||||||
|
# AGENT SETTINGS
|
||||||
|
# ==========================================
|
||||||
|
|
||||||
|
# Default reasoning: react, plan_execute, reflexion, or auto
|
||||||
|
DEFAULT_REASONING=auto
|
||||||
|
|
||||||
|
# Enable self-reflection (true/false)
|
||||||
|
ENABLE_REFLECTION=true
|
||||||
|
|
||||||
|
# Maximum iterations per request
|
||||||
|
MAX_ITERATIONS=10
|
||||||
|
|
||||||
|
# ==========================================
|
||||||
|
# UI/IDE SETTINGS
|
||||||
|
# ==========================================
|
||||||
|
|
||||||
|
# Code-Server passwords (change these!)
|
||||||
|
IDE_PASSWORD=secure-ide-password-123
|
||||||
|
IDE_SUDO_PASSWORD=admin-password-456
|
||||||
|
|
||||||
|
# Optional: Domain for reverse proxy
|
||||||
|
# IDE_DOMAIN=code.yourdomain.com
|
||||||
|
|
||||||
|
# Web UI settings
|
||||||
|
WEBUI_SECRET_KEY=$(openssl rand -hex 32)
|
||||||
|
|
||||||
|
# ==========================================
|
||||||
|
# ADVANCED MEMORY SETTINGS
|
||||||
|
# ==========================================
|
||||||
|
|
||||||
|
# Enable knowledge graph (Neo4j) - requires more RAM
|
||||||
|
ENABLE_KNOWLEDGE_GRAPH=false
|
||||||
|
NEO4J_AUTH=neo4j/password
|
||||||
|
|
||||||
|
# ChromaDB settings
|
||||||
|
CHROMA_STORAGE_PATH=/data/chroma
|
||||||
|
|
@ -0,0 +1,31 @@
|
||||||
|
# Environment variables
|
||||||
|
.env
|
||||||
|
.env.local
|
||||||
|
.env.*.local
|
||||||
|
|
||||||
|
# Data directories (persisted volumes)
|
||||||
|
data/
|
||||||
|
workspace/*
|
||||||
|
!workspace/.gitkeep
|
||||||
|
logs/
|
||||||
|
*.db
|
||||||
|
*.sqlite3
|
||||||
|
|
||||||
|
# IDE
|
||||||
|
.idea/
|
||||||
|
.vscode/
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
|
||||||
|
# OS
|
||||||
|
.DS_Store
|
||||||
|
Thumbs.db
|
||||||
|
|
||||||
|
# Docker
|
||||||
|
.docker/
|
||||||
|
docker-compose.override.yml
|
||||||
|
|
||||||
|
# Temporary
|
||||||
|
*.tmp
|
||||||
|
*.bak
|
||||||
|
*.log
|
||||||
|
|
@ -0,0 +1,52 @@
|
||||||
|
.PHONY: help setup start stop logs status update backup clean
|
||||||
|
|
||||||
|
help:
|
||||||
|
@echo "Agentic LLM Hub Management"
|
||||||
|
@echo "=========================="
|
||||||
|
@echo "make setup - Initial setup"
|
||||||
|
@echo "make start - Start all services (full profile)"
|
||||||
|
@echo "make start-ide - Start with IDE only"
|
||||||
|
@echo "make stop - Stop all services"
|
||||||
|
@echo "make logs - View logs"
|
||||||
|
@echo "make status - Check service status"
|
||||||
|
@echo "make update - Pull latest and update images"
|
||||||
|
@echo "make backup - Backup data directories"
|
||||||
|
@echo "make clean - Remove containers (data preserved)"
|
||||||
|
|
||||||
|
setup:
|
||||||
|
@chmod +x *.sh scripts/*.sh 2>/dev/null || true
|
||||||
|
@./setup.sh
|
||||||
|
|
||||||
|
start:
|
||||||
|
@./start.sh full
|
||||||
|
|
||||||
|
start-ide:
|
||||||
|
@./start.sh ide
|
||||||
|
|
||||||
|
stop:
|
||||||
|
@docker-compose down
|
||||||
|
|
||||||
|
logs:
|
||||||
|
@docker-compose logs -f --tail=100
|
||||||
|
|
||||||
|
status:
|
||||||
|
@echo "Container Status:"
|
||||||
|
@docker-compose ps
|
||||||
|
@echo ""
|
||||||
|
@echo "API Health:"
|
||||||
|
@curl -s http://localhost:8080/health | python3 -m json.tool 2>/dev/null || echo "API not responding"
|
||||||
|
|
||||||
|
update:
|
||||||
|
@git pull
|
||||||
|
@docker-compose pull
|
||||||
|
@docker-compose up -d
|
||||||
|
|
||||||
|
backup:
|
||||||
|
@mkdir -p backup/$(shell date +%Y%m%d)
|
||||||
|
@cp -r data backup/$(shell date +%Y%m%d)/
|
||||||
|
@cp .env backup/$(shell date +%Y%m%d)/
|
||||||
|
@echo "Backup created: backup/$(shell date +%Y%m%d)/"
|
||||||
|
|
||||||
|
clean:
|
||||||
|
@docker-compose down -v
|
||||||
|
@echo "Containers removed. Data preserved in ./data/"
|
||||||
|
|
@ -0,0 +1,60 @@
|
||||||
|
# 🤖 Agentic LLM Hub
|
||||||
|
|
||||||
|
Self-hosted AI agent platform with multi-provider LLM aggregation, reasoning engines (ReAct, Plan-and-Execute, Reflexion), MCP tools, and web IDE.
|
||||||
|
|
||||||
|
## 🚀 Quick Start
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Clone from your Gitea
|
||||||
|
git clone https://gitea.yourdomain.com/youruser/llm-hub.git
|
||||||
|
cd llm-hub
|
||||||
|
|
||||||
|
# 2. Configure
|
||||||
|
cp .env.example .env
|
||||||
|
nano .env # Add your API keys
|
||||||
|
|
||||||
|
# 3. Deploy
|
||||||
|
./setup.sh && ./start.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
## 📡 Access Points
|
||||||
|
|
||||||
|
| Service | URL | Description |
|
||||||
|
|---------|-----|-------------|
|
||||||
|
| VS Code IDE | `http://your-ip:8443` | Full IDE with Continue.dev |
|
||||||
|
| Agent API | `http://your-ip:8080/v1` | Main API endpoint |
|
||||||
|
| LiteLLM | `http://your-ip:4000` | LLM Gateway |
|
||||||
|
| MCP Tools | `http://your-ip:8001/docs` | Tool OpenAPI docs |
|
||||||
|
| ChromaDB | `http://your-ip:8000` | Vector memory |
|
||||||
|
| Web UI | `http://your-ip:3000` | Chat interface |
|
||||||
|
|
||||||
|
## 🔧 Supported Providers
|
||||||
|
|
||||||
|
- **Groq** (Free tier, fast)
|
||||||
|
- **Mistral** (1B tokens/month free)
|
||||||
|
- **Anthropic Claude** (Trial credits)
|
||||||
|
- **Moonshot Kimi** ($5 signup bonus)
|
||||||
|
- **OpenRouter** (Free tier access)
|
||||||
|
- **Cohere** (1K calls/month)
|
||||||
|
- **DeepSeek** (Cheap reasoning)
|
||||||
|
|
||||||
|
## 🧠 Reasoning Modes
|
||||||
|
|
||||||
|
- `react` - Fast iterative reasoning
|
||||||
|
- `plan_execute` - Complex multi-step tasks
|
||||||
|
- `reflexion` - Self-correcting with verification
|
||||||
|
- `auto` - Automatic selection
|
||||||
|
|
||||||
|
## 📚 Documentation
|
||||||
|
|
||||||
|
- [Setup Guide](docs/SETUP.md)
|
||||||
|
- [API Reference](docs/API.md)
|
||||||
|
- [Provider Guide](docs/PROVIDERS.md)
|
||||||
|
|
||||||
|
## 🔄 Updates
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git pull origin main
|
||||||
|
docker-compose pull
|
||||||
|
docker-compose up -d
|
||||||
|
```
|
||||||
|
|
@ -0,0 +1,54 @@
|
||||||
|
name: LLM Hub IDE
|
||||||
|
version: 1.0.0
|
||||||
|
schema: v1
|
||||||
|
|
||||||
|
models:
|
||||||
|
- name: Groq Llama 3.3 70B
|
||||||
|
provider: openai
|
||||||
|
model: fast-tier
|
||||||
|
apiBase: http://agent-core:8080/v1
|
||||||
|
apiKey: sk-agent
|
||||||
|
roles: [chat, edit, apply]
|
||||||
|
|
||||||
|
- name: Claude 3.5 Sonnet
|
||||||
|
provider: openai
|
||||||
|
model: quality-tier
|
||||||
|
apiBase: http://agent-core:8080/v1
|
||||||
|
apiKey: sk-agent
|
||||||
|
roles: [chat, edit, apply]
|
||||||
|
|
||||||
|
- name: Kimi K2
|
||||||
|
provider: openai
|
||||||
|
model: reasoning-tier
|
||||||
|
apiBase: http://agent-core:8080/v1
|
||||||
|
apiKey: sk-agent
|
||||||
|
roles: [chat, edit, apply]
|
||||||
|
|
||||||
|
- name: Mistral Small
|
||||||
|
provider: openai
|
||||||
|
model: volume-tier
|
||||||
|
apiBase: http://agent-core:8080/v1
|
||||||
|
apiKey: sk-agent
|
||||||
|
roles: [chat, edit]
|
||||||
|
|
||||||
|
tabAutocompleteModel:
|
||||||
|
name: Mistral Autocomplete
|
||||||
|
provider: openai
|
||||||
|
model: volume-tier
|
||||||
|
apiBase: http://litellm:4000/v1
|
||||||
|
apiKey: sk-agent
|
||||||
|
|
||||||
|
embeddingsProvider:
|
||||||
|
provider: openai
|
||||||
|
model: embeddings
|
||||||
|
apiBase: http://litellm:4000/v1
|
||||||
|
apiKey: sk-agent
|
||||||
|
|
||||||
|
context:
|
||||||
|
- provider: code
|
||||||
|
- provider: docs
|
||||||
|
- provider: diff
|
||||||
|
- provider: terminal
|
||||||
|
- provider: problems
|
||||||
|
- provider: folder
|
||||||
|
- provider: codebase
|
||||||
|
|
@ -0,0 +1,87 @@
|
||||||
|
model_list:
|
||||||
|
# FREE TIER AGGREGATION
|
||||||
|
- model_name: fast-tier
|
||||||
|
litellm_params:
|
||||||
|
model: groq/llama-3.3-70b-versatile
|
||||||
|
api_key: os.environ/GROQ_API_KEY_1
|
||||||
|
rpm_limit: 20
|
||||||
|
|
||||||
|
- model_name: fast-tier
|
||||||
|
litellm_params:
|
||||||
|
model: groq/llama-3.1-8b-instant
|
||||||
|
api_key: os.environ/GROQ_API_KEY_2
|
||||||
|
rpm_limit: 20
|
||||||
|
|
||||||
|
- model_name: volume-tier
|
||||||
|
litellm_params:
|
||||||
|
model: mistral/mistral-small-latest
|
||||||
|
api_key: os.environ/MISTRAL_API_KEY
|
||||||
|
tpm_limit: 500000
|
||||||
|
|
||||||
|
# CLAUDE & KIMI - Quality/Reasoning
|
||||||
|
- model_name: quality-tier
|
||||||
|
litellm_params:
|
||||||
|
model: anthropic/claude-3-5-sonnet-20240620
|
||||||
|
api_key: os.environ/ANTHROPIC_API_KEY
|
||||||
|
rpm_limit: 5
|
||||||
|
|
||||||
|
- model_name: claude-haiku
|
||||||
|
litellm_params:
|
||||||
|
model: anthropic/claude-3-haiku-20240307
|
||||||
|
api_key: os.environ/ANTHROPIC_API_KEY
|
||||||
|
rpm_limit: 10
|
||||||
|
|
||||||
|
# Kimi via OpenRouter (Free tier)
|
||||||
|
- model_name: reasoning-tier
|
||||||
|
litellm_params:
|
||||||
|
model: openrouter/moonshotai/kimi-k2:free
|
||||||
|
api_key: os.environ/OPENROUTER_API_KEY
|
||||||
|
|
||||||
|
# Kimi Direct (Ultra-cheap)
|
||||||
|
- model_name: reasoning-tier
|
||||||
|
litellm_params:
|
||||||
|
model: moonshot/kimi-k2-0711-preview
|
||||||
|
api_key: os.environ/MOONSHOT_API_KEY
|
||||||
|
tpm_limit: 100000
|
||||||
|
|
||||||
|
- model_name: deepseek
|
||||||
|
litellm_params:
|
||||||
|
model: deepseek/deepseek-chat
|
||||||
|
api_key: os.environ/DEEPSEEK_API_KEY
|
||||||
|
|
||||||
|
# Embeddings & Tools
|
||||||
|
- model_name: embeddings
|
||||||
|
litellm_params:
|
||||||
|
model: cohere/embed-english-v3.0
|
||||||
|
api_key: os.environ/COHERE_API_KEY
|
||||||
|
|
||||||
|
# Local Fallback
|
||||||
|
- model_name: local-llama
|
||||||
|
litellm_params:
|
||||||
|
model: ollama/llama3.1:8b
|
||||||
|
api_base: http://ollama:11434
|
||||||
|
|
||||||
|
router_settings:
|
||||||
|
routing_strategy: "usage-based-routing"
|
||||||
|
timeout: 30
|
||||||
|
num_retries: 3
|
||||||
|
allowed_fails: 2
|
||||||
|
cooldown_time: 60
|
||||||
|
fallbacks:
|
||||||
|
- fast-tier: ["volume-tier", "reasoning-tier"]
|
||||||
|
- volume-tier: ["reasoning-tier", "local-llama"]
|
||||||
|
- quality-tier: ["claude-haiku", "reasoning-tier"]
|
||||||
|
|
||||||
|
general_settings:
|
||||||
|
master_key: os.environ/LITELLM_MASTER_KEY
|
||||||
|
cache: true
|
||||||
|
cache_params:
|
||||||
|
type: redis
|
||||||
|
host: redis
|
||||||
|
port: 6379
|
||||||
|
ttl: 3600
|
||||||
|
retry_policy:
|
||||||
|
TimeoutError: 3
|
||||||
|
RateLimitError: 5
|
||||||
|
log_level: info
|
||||||
|
log_file: /app/logs/litellm.log
|
||||||
|
|
@ -0,0 +1,35 @@
|
||||||
|
{
|
||||||
|
"mcpServers": {
|
||||||
|
"filesystem": {
|
||||||
|
"command": "npx",
|
||||||
|
"args": ["-y", "@modelcontextprotocol/server-filesystem", "/workspace"]
|
||||||
|
},
|
||||||
|
"git": {
|
||||||
|
"command": "uvx",
|
||||||
|
"args": ["mcp-server-git"]
|
||||||
|
},
|
||||||
|
"github": {
|
||||||
|
"command": "npx",
|
||||||
|
"args": ["-y", "@modelcontextprotocol/server-github"],
|
||||||
|
"env": {
|
||||||
|
"GITHUB_PERSONAL_ACCESS_TOKEN": "${GITHUB_TOKEN}"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"fetch": {
|
||||||
|
"command": "uvx",
|
||||||
|
"args": ["mcp-server-fetch"]
|
||||||
|
},
|
||||||
|
"sqlite": {
|
||||||
|
"command": "uvx",
|
||||||
|
"args": ["mcp-server-sqlite", "/workspace/data.db"]
|
||||||
|
},
|
||||||
|
"memory": {
|
||||||
|
"command": "npx",
|
||||||
|
"args": ["-y", "@modelcontextprotocol/server-memory"]
|
||||||
|
},
|
||||||
|
"sequential-thinking": {
|
||||||
|
"command": "npx",
|
||||||
|
"args": ["-y", "@modelcontextprotocol/server-sequential-thinking"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,151 @@
|
||||||
|
version: '3.8'
|
||||||
|
|
||||||
|
services:
|
||||||
|
# Core Infrastructure
|
||||||
|
redis:
|
||||||
|
image: redis:7-alpine
|
||||||
|
container_name: agent-redis
|
||||||
|
restart: unless-stopped
|
||||||
|
volumes:
|
||||||
|
- ./data/redis:/data
|
||||||
|
command: redis-server --appendonly yes --maxmemory 512mb --maxmemory-policy allkeys-lru
|
||||||
|
networks:
|
||||||
|
- agent-network
|
||||||
|
|
||||||
|
chromadb:
|
||||||
|
image: chromadb/chroma:latest
|
||||||
|
container_name: agent-memory-vector
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- "8000:8000"
|
||||||
|
volumes:
|
||||||
|
- ./data/chroma:/chroma/chroma
|
||||||
|
environment:
|
||||||
|
- IS_PERSISTENT=TRUE
|
||||||
|
- PERSIST_DIRECTORY=/chroma/chroma
|
||||||
|
- ANONYMIZED_TELEMETRY=FALSE
|
||||||
|
networks:
|
||||||
|
- agent-network
|
||||||
|
|
||||||
|
# LLM Gateway
|
||||||
|
litellm:
|
||||||
|
image: ghcr.io/berriai/litellm:main-latest
|
||||||
|
container_name: agent-gateway
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- "4000:4000"
|
||||||
|
volumes:
|
||||||
|
- ./config/litellm_config.yaml:/app/config.yaml
|
||||||
|
- ./logs:/app/logs
|
||||||
|
environment:
|
||||||
|
- DATABASE_URL=sqlite:///app/db.sqlite3
|
||||||
|
- LITELLM_MASTER_KEY=${MASTER_KEY:-sk-agent}
|
||||||
|
- REDIS_HOST=redis
|
||||||
|
- REDIS_PORT=6379
|
||||||
|
command: --config /app/config.yaml --port 4000
|
||||||
|
networks:
|
||||||
|
- agent-network
|
||||||
|
|
||||||
|
# Agent Core with Reasoning Engines
|
||||||
|
agent-core:
|
||||||
|
build:
|
||||||
|
context: ./services/agent-core
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
container_name: agent-core
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- "8080:8080"
|
||||||
|
volumes:
|
||||||
|
- ./workspace:/workspace
|
||||||
|
- ./config/agent:/app/config
|
||||||
|
- ./data/agent:/app/data
|
||||||
|
environment:
|
||||||
|
- LLM_API_BASE=http://litellm:4000/v1
|
||||||
|
- LLM_API_KEY=${MASTER_KEY:-sk-agent}
|
||||||
|
- REDIS_URL=redis://redis:6379/0
|
||||||
|
- CHROMA_URL=http://chromadb:8000
|
||||||
|
- DEFAULT_REASONING_MODE=${DEFAULT_REASONING:-auto}
|
||||||
|
depends_on:
|
||||||
|
- litellm
|
||||||
|
- redis
|
||||||
|
- chromadb
|
||||||
|
networks:
|
||||||
|
- agent-network
|
||||||
|
|
||||||
|
# MCP Tool Gateway
|
||||||
|
mcpo:
|
||||||
|
build:
|
||||||
|
context: ./services/mcpo
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
container_name: agent-mcp-gateway
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- "8001:8000"
|
||||||
|
volumes:
|
||||||
|
- ./workspace:/workspace:ro
|
||||||
|
- ./config/mcp:/app/config
|
||||||
|
networks:
|
||||||
|
- agent-network
|
||||||
|
profiles:
|
||||||
|
- mcp
|
||||||
|
|
||||||
|
# VS Code Server with AI Assistant
|
||||||
|
code-server:
|
||||||
|
image: lscr.io/linuxserver/code-server:latest
|
||||||
|
container_name: agent-ide
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- "8443:8443"
|
||||||
|
environment:
|
||||||
|
- PUID=1000
|
||||||
|
- PGID=1000
|
||||||
|
- TZ=Etc/UTC
|
||||||
|
- PASSWORD=${IDE_PASSWORD:-code}
|
||||||
|
- SUDO_PASSWORD=${IDE_SUDO_PASSWORD:-sudo}
|
||||||
|
- DEFAULT_WORKSPACE=/workspace
|
||||||
|
volumes:
|
||||||
|
- ./workspace:/workspace
|
||||||
|
- ./data/code-server:/config
|
||||||
|
- ./config/continue:/config/.continue:ro
|
||||||
|
networks:
|
||||||
|
- agent-network
|
||||||
|
profiles:
|
||||||
|
- ide
|
||||||
|
|
||||||
|
# Web UI
|
||||||
|
open-webui:
|
||||||
|
image: ghcr.io/open-webui/open-webui:main
|
||||||
|
container_name: agent-ui
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- "3000:8080"
|
||||||
|
volumes:
|
||||||
|
- ./data/open-webui:/app/backend/data
|
||||||
|
environment:
|
||||||
|
- OPENAI_API_BASE_URL=http://agent-core:8080/v1
|
||||||
|
- OPENAI_API_KEY=${MASTER_KEY:-sk-agent}
|
||||||
|
- ENABLE_SIGNUP=false
|
||||||
|
- DEFAULT_MODELS=agent/orchestrator
|
||||||
|
depends_on:
|
||||||
|
- agent-core
|
||||||
|
networks:
|
||||||
|
- agent-network
|
||||||
|
profiles:
|
||||||
|
- ui
|
||||||
|
|
||||||
|
# Auto-updater
|
||||||
|
watchtower:
|
||||||
|
image: containrrr/watchtower
|
||||||
|
container_name: agent-watchtower
|
||||||
|
restart: unless-stopped
|
||||||
|
volumes:
|
||||||
|
- /var/run/docker.sock:/var/run/docker.sock
|
||||||
|
environment:
|
||||||
|
- WATCHTOWER_POLL_INTERVAL=86400
|
||||||
|
- WATCHTOWER_CLEANUP=true
|
||||||
|
networks:
|
||||||
|
- agent-network
|
||||||
|
|
||||||
|
networks:
|
||||||
|
agent-network:
|
||||||
|
driver: bridge
|
||||||
|
|
@ -0,0 +1,88 @@
|
||||||
|
# API Reference
|
||||||
|
|
||||||
|
## Base URL
|
||||||
|
```
|
||||||
|
http://your-server-ip:8080/v1
|
||||||
|
```
|
||||||
|
|
||||||
|
## Authentication
|
||||||
|
All requests require Bearer token:
|
||||||
|
```
|
||||||
|
Authorization: Bearer sk-agent-your-key
|
||||||
|
```
|
||||||
|
|
||||||
|
## Endpoints
|
||||||
|
|
||||||
|
### POST /chat/completions
|
||||||
|
Main agent endpoint.
|
||||||
|
|
||||||
|
**Request:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"message": "Create a Python script to fetch weather data",
|
||||||
|
"reasoning_mode": "plan_execute",
|
||||||
|
"session_id": "unique-session-id",
|
||||||
|
"max_iterations": 10
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Response:**
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"response": "Here\'s the Python script...",
|
||||||
|
"reasoning_mode": "plan_execute",
|
||||||
|
"session_id": "unique-session-id",
|
||||||
|
"steps": [
|
||||||
|
{"step_number": 1, "type": "plan", "content": "..."},
|
||||||
|
{"step_number": 2, "type": "action", "content": "..."}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"model_used": "volume-tier",
|
||||||
|
"auto_selected": true,
|
||||||
|
"timestamp": "2024-..."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Reasoning Modes
|
||||||
|
|
||||||
|
| Mode | Use Case | Speed | Accuracy |
|
||||||
|
|------|----------|-------|----------|
|
||||||
|
| `react` | Simple Q&A, debugging | Fast | Medium |
|
||||||
|
| `plan_execute` | Complex multi-step tasks | Medium | High |
|
||||||
|
| `reflexion` | Code review, critical tasks | Slow | Very High |
|
||||||
|
| `auto` | Let system decide | Variable | Adaptive |
|
||||||
|
|
||||||
|
### GET /models
|
||||||
|
List available models.
|
||||||
|
|
||||||
|
### GET /health
|
||||||
|
Check system status.
|
||||||
|
|
||||||
|
### GET /sessions/{id}/history
|
||||||
|
Retrieve conversation history.
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
### Python
|
||||||
|
```python
|
||||||
|
import requests
|
||||||
|
|
||||||
|
response = requests.post(
|
||||||
|
"http://localhost:8080/v1/chat/completions",
|
||||||
|
headers={"Authorization": "Bearer sk-agent-xxx"},
|
||||||
|
json={
|
||||||
|
"message": "Refactor this code",
|
||||||
|
"reasoning_mode": "reflexion"
|
||||||
|
}
|
||||||
|
)
|
||||||
|
print(response.json()["response"])
|
||||||
|
```
|
||||||
|
|
||||||
|
### cURL
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:8080/v1/chat/completions \
|
||||||
|
-H "Authorization: Bearer sk-agent-xxx" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{"message":"Hello","reasoning_mode":"auto"}'
|
||||||
|
```
|
||||||
|
|
@ -0,0 +1,66 @@
|
||||||
|
# Provider Setup Guide
|
||||||
|
|
||||||
|
## Free Tier Providers
|
||||||
|
|
||||||
|
### Groq (Fastest)
|
||||||
|
- **URL**: https://console.groq.com
|
||||||
|
- **Free Tier**: 20 RPM, variable TPM
|
||||||
|
- **Models**: Llama 3.3 70B, Llama 3.1 8B
|
||||||
|
- **Best For**: Speed, quick coding tasks
|
||||||
|
- **Tip**: Create multiple accounts with different phones for load balancing
|
||||||
|
|
||||||
|
### Mistral (High Volume)
|
||||||
|
- **URL**: https://console.mistral.ai
|
||||||
|
- **Free Tier**: 1 billion tokens/month
|
||||||
|
- **Models**: Mistral Small, Medium
|
||||||
|
- **Best For**: High-volume processing, chatbots
|
||||||
|
|
||||||
|
### OpenRouter (Universal Access)
|
||||||
|
- **URL**: https://openrouter.ai
|
||||||
|
- **Free Tier**: 50 requests/day
|
||||||
|
- **Access**: Kimi K2:free, Gemini Flash:free
|
||||||
|
- **Best For**: Testing, fallback access
|
||||||
|
|
||||||
|
### Cohere (Embeddings)
|
||||||
|
- **URL**: https://cohere.com
|
||||||
|
- **Free Tier**: 1,000 calls/month
|
||||||
|
- **Best For**: Embeddings, RAG systems
|
||||||
|
|
||||||
|
## Trial/Cheap Providers
|
||||||
|
|
||||||
|
### Anthropic Claude (Highest Quality)
|
||||||
|
- **URL**: https://console.anthropic.com
|
||||||
|
- **Trial**: $5 free credits (new users)
|
||||||
|
- **Student**: $500 credits (apply with .edu)
|
||||||
|
- **Cost**: $3/M input (Sonnet), $0.25/M (Haiku)
|
||||||
|
- **Best For**: Complex reasoning, analysis, code review
|
||||||
|
|
||||||
|
### Moonshot Kimi (Best Value)
|
||||||
|
- **URL**: https://platform.moonshot.ai
|
||||||
|
- **Bonus**: $5 signup credit
|
||||||
|
- **Cost**: $0.60/M input, $2.50/M output
|
||||||
|
- **Context**: 128K tokens
|
||||||
|
- **Best For**: Coding, long documents, Chinese content
|
||||||
|
|
||||||
|
### DeepSeek (Cheapest Reasoning)
|
||||||
|
- **URL**: https://platform.deepseek.com
|
||||||
|
- **Cost**: $0.14/M input, $0.28/M output
|
||||||
|
- **Best For**: Reasoning tasks, math, code
|
||||||
|
|
||||||
|
## Configuration Priority
|
||||||
|
|
||||||
|
The system routes requests in this priority:
|
||||||
|
|
||||||
|
1. **Fast tasks** → Groq (free, instant)
|
||||||
|
2. **High volume** → Mistral (1B tokens)
|
||||||
|
3. **Complex coding** → Kimi (cheap, 128K context)
|
||||||
|
4. **Quality critical** → Claude (expensive but best)
|
||||||
|
5. **Fallback** → OpenRouter free tier
|
||||||
|
|
||||||
|
## Rate Limit Management
|
||||||
|
|
||||||
|
The router automatically:
|
||||||
|
- Tracks RPM/TPM across all providers
|
||||||
|
- Distributes load (multiple Groq accounts)
|
||||||
|
- Falls back when limits approached
|
||||||
|
- Caches responses to reduce API calls
|
||||||
|
|
@ -0,0 +1,98 @@
|
||||||
|
# Setup Guide
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
- **OS**: Debian 12, Ubuntu 22.04+, or Proxmox LXC
|
||||||
|
- **RAM**: 4GB minimum (8GB recommended for IDE)
|
||||||
|
- **Storage**: 20GB free space
|
||||||
|
- **Network**: Internet access for API calls
|
||||||
|
|
||||||
|
## Quick Install
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Clone from your Gitea
|
||||||
|
git clone https://gitea.yourdomain.com/username/llm-hub.git
|
||||||
|
cd llm-hub
|
||||||
|
|
||||||
|
# 2. Run setup
|
||||||
|
chmod +x setup.sh && ./setup.sh
|
||||||
|
|
||||||
|
# 3. Configure API keys
|
||||||
|
nano .env
|
||||||
|
|
||||||
|
# 4. Start
|
||||||
|
./start.sh full
|
||||||
|
```
|
||||||
|
|
||||||
|
## Proxmox LXC Setup
|
||||||
|
|
||||||
|
On Proxmox host, create optimized container:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pct create 100 local:vztmpl/debian-12-standard_12.7-1_amd64.tar.zst \
|
||||||
|
--hostname llm-hub \
|
||||||
|
--memory 8192 \
|
||||||
|
--swap 1024 \
|
||||||
|
--cores 4 \
|
||||||
|
--rootfs local-lvm:20 \
|
||||||
|
--features nesting=1,keyctl=1 \
|
||||||
|
--net0 name=eth0,bridge=vmbr0,ip=dhcp
|
||||||
|
|
||||||
|
# Add to /etc/pve/lxc/100.conf:
|
||||||
|
cat >> /etc/pve/lxc/100.conf << EOF
|
||||||
|
lxc.cgroup.relative = 0
|
||||||
|
lxc.apparmor.profile = unconfined
|
||||||
|
lxc.cgroup.devices.allow = a
|
||||||
|
EOF
|
||||||
|
|
||||||
|
pct start 100
|
||||||
|
pct exec 100 -- bash -c "apt update && apt install -y curl git && curl -fsSL setup.sh | bash"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
Edit `.env` file:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Required: At least one LLM provider
|
||||||
|
GROQ_API_KEY_1=gsk_xxx
|
||||||
|
MISTRAL_API_KEY=your_key
|
||||||
|
|
||||||
|
# Recommended: Multiple providers for redundancy
|
||||||
|
ANTHROPIC_API_KEY=sk-ant-xxx
|
||||||
|
MOONSHOT_API_KEY=sk-xxx
|
||||||
|
OPENROUTER_API_KEY=sk-or-xxx
|
||||||
|
|
||||||
|
# UI Security
|
||||||
|
IDE_PASSWORD=strong-password-here
|
||||||
|
```
|
||||||
|
|
||||||
|
## Verification
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check health
|
||||||
|
curl http://localhost:8080/health
|
||||||
|
|
||||||
|
# Test agent
|
||||||
|
curl -X POST http://localhost:8080/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer sk-agent-xxx" \
|
||||||
|
-d '{"message":"Hello","reasoning_mode":"react"}'
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
**Docker not starting in LXC:**
|
||||||
|
```bash
|
||||||
|
# On Proxmox host, check config
|
||||||
|
pct config 100 | grep features
|
||||||
|
# Should show: features: nesting=1,keyctl=1
|
||||||
|
```
|
||||||
|
|
||||||
|
**Permission denied on workspace:**
|
||||||
|
```bash
|
||||||
|
chown -R 1000:1000 workspace/
|
||||||
|
```
|
||||||
|
|
||||||
|
**Port conflicts:**
|
||||||
|
Edit `docker-compose.yml` to change port mappings (e.g., `8081:8080`)
|
||||||
|
|
@ -0,0 +1,57 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
ENV_FILE=".env"
|
||||||
|
CONFIG_FILE="config/litellm_config.yaml"
|
||||||
|
|
||||||
|
echo "🔌 Add Provider to LLM Hub"
|
||||||
|
echo "=========================="
|
||||||
|
echo ""
|
||||||
|
echo "1. Groq (Fast)"
|
||||||
|
echo "2. Mistral (Volume)"
|
||||||
|
echo "3. Anthropic Claude (Quality)"
|
||||||
|
echo "4. Moonshot Kimi (Cheap/128K)"
|
||||||
|
echo "5. OpenRouter (Free tier access)"
|
||||||
|
echo "6. Cohere (Embeddings)"
|
||||||
|
echo "7. DeepSeek (Cheap reasoning)"
|
||||||
|
echo "8. Exit"
|
||||||
|
read -p "Select (1-8): " choice
|
||||||
|
|
||||||
|
read -p "Enter API Key: " api_key
|
||||||
|
|
||||||
|
case $choice in
|
||||||
|
1)
|
||||||
|
read -p "Instance number (1,2,3...): " num
|
||||||
|
var="GROQ_API_KEY_$num"
|
||||||
|
echo "$var=$api_key" >> "$ENV_FILE"
|
||||||
|
echo "✅ Added Groq key as $var"
|
||||||
|
;;
|
||||||
|
2)
|
||||||
|
echo "MISTRAL_API_KEY=$api_key" >> "$ENV_FILE"
|
||||||
|
echo "✅ Added Mistral"
|
||||||
|
;;
|
||||||
|
3)
|
||||||
|
echo "ANTHROPIC_API_KEY=$api_key" >> "$ENV_FILE"
|
||||||
|
echo "✅ Added Claude (remember: expensive, use sparingly)"
|
||||||
|
;;
|
||||||
|
4)
|
||||||
|
echo "MOONSHOT_API_KEY=$api_key" >> "$ENV_FILE"
|
||||||
|
echo "✅ Added Kimi (great for coding!)"
|
||||||
|
;;
|
||||||
|
5)
|
||||||
|
echo "OPENROUTER_API_KEY=$api_key" >> "$ENV_FILE"
|
||||||
|
echo "✅ Added OpenRouter (access free tier models)"
|
||||||
|
;;
|
||||||
|
6)
|
||||||
|
echo "COHERE_API_KEY=$api_key" >> "$ENV_FILE"
|
||||||
|
echo "✅ Added Cohere (embeddings)"
|
||||||
|
;;
|
||||||
|
7)
|
||||||
|
echo "DEEPSEEK_API_KEY=$api_key" >> "$ENV_FILE"
|
||||||
|
echo "✅ Added DeepSeek (cheap reasoning)"
|
||||||
|
;;
|
||||||
|
8) exit 0 ;;
|
||||||
|
*) echo "Invalid choice" ; exit 1 ;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
read -p "Restart services to apply? (y/N): " restart
|
||||||
|
[[ $restart =~ ^[Yy]$ ]] && docker-compose restart
|
||||||
|
|
@ -0,0 +1,37 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
echo "🧠 LLM Hub Status"
|
||||||
|
echo "================="
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Container status
|
||||||
|
echo "📦 Containers:"
|
||||||
|
docker-compose ps --services --filter "status=running" 2>/dev/null | while read service; do
|
||||||
|
status=$(docker-compose ps -q "$service" | xargs docker inspect -f '{{.State.Status}}' 2>/dev/null)
|
||||||
|
echo " $service: $status"
|
||||||
|
done
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "🔍 Health Checks:"
|
||||||
|
|
||||||
|
# API health
|
||||||
|
if curl -s http://localhost:8080/health | grep -q "healthy"; then
|
||||||
|
echo " ✅ Agent Core: Healthy"
|
||||||
|
else
|
||||||
|
echo " ❌ Agent Core: Not responding"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# LiteLLM
|
||||||
|
if curl -s http://localhost:4000/health/liveliness | grep -q "true"; then
|
||||||
|
echo " ✅ LiteLLM: Running"
|
||||||
|
else
|
||||||
|
echo " ❌ LiteLLM: Not responding"
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "📊 Router Stats:"
|
||||||
|
curl -s http://localhost:8080/health 2>/dev/null | python3 -m json.tool 2>/dev/null || echo " Unable to fetch stats"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "💾 Memory Usage:"
|
||||||
|
docker stats --no-stream --format "table {{.Name}}\t{{.MemUsage}}" | grep -E "(agent-|NAME)" || true
|
||||||
|
|
@ -0,0 +1,20 @@
|
||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install system dependencies
|
||||||
|
RUN apt-get update && apt-get install -y \
|
||||||
|
build-essential \
|
||||||
|
git \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Install Python dependencies
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Copy application
|
||||||
|
COPY main.py .
|
||||||
|
|
||||||
|
EXPOSE 8080
|
||||||
|
|
||||||
|
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]
|
||||||
|
|
@ -0,0 +1,357 @@
|
||||||
|
"""
|
||||||
|
Agentic AI Core - Multi-Reasoning Engine
|
||||||
|
Supports: ReAct, Plan-and-Execute, Reflexion
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
from typing import List, Dict, Any, Literal, Optional
|
||||||
|
from fastapi import FastAPI, HTTPException
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
from datetime import datetime
|
||||||
|
import httpx
|
||||||
|
import redis
|
||||||
|
|
||||||
|
app = FastAPI(
|
||||||
|
title="Agentic AI Core",
|
||||||
|
version="2.0.0",
|
||||||
|
description="Multi-reasoning agent platform with memory and MCP integration"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
LLM_API_BASE = os.getenv("LLM_API_BASE", "http://litellm:4000/v1")
|
||||||
|
LLM_API_KEY = os.getenv("LLM_API_KEY", "sk-agent")
|
||||||
|
DEFAULT_REASONING = os.getenv("DEFAULT_REASONING_MODE", "auto")
|
||||||
|
|
||||||
|
# Redis for short-term memory
|
||||||
|
try:
|
||||||
|
redis_client = redis.from_url(
|
||||||
|
os.getenv("REDIS_URL", "redis://redis:6379"),
|
||||||
|
decode_responses=True
|
||||||
|
)
|
||||||
|
except:
|
||||||
|
redis_client = None
|
||||||
|
|
||||||
|
# ==========================================
|
||||||
|
# DATA MODELS
|
||||||
|
# ==========================================
|
||||||
|
|
||||||
|
class AgentRequest(BaseModel):
|
||||||
|
message: str
|
||||||
|
session_id: str = Field(default="default", description="Conversation thread ID")
|
||||||
|
reasoning_mode: Literal["react", "plan_execute", "reflexion", "auto"] = DEFAULT_REASONING
|
||||||
|
context_files: Optional[List[str]] = Field(default_factory=list)
|
||||||
|
enable_memory: bool = True
|
||||||
|
max_iterations: int = 10
|
||||||
|
|
||||||
|
class AgentStep(BaseModel):
|
||||||
|
step_number: int
|
||||||
|
type: Literal["thought", "action", "observation", "reflection", "plan"]
|
||||||
|
content: str
|
||||||
|
timestamp: datetime = Field(default_factory=datetime.now)
|
||||||
|
|
||||||
|
class AgentResponse(BaseModel):
|
||||||
|
response: str
|
||||||
|
reasoning_mode: str
|
||||||
|
session_id: str
|
||||||
|
steps: List[AgentStep] = Field(default_factory=list)
|
||||||
|
metadata: Dict[str, Any] = Field(default_factory=dict)
|
||||||
|
|
||||||
|
# ==========================================
|
||||||
|
# UTILITY FUNCTIONS
|
||||||
|
# ==========================================
|
||||||
|
|
||||||
|
async def call_llm(messages: List[Dict], model: str = "auto", tools: Optional[List] = None) -> Dict:
|
||||||
|
"""Call LLM through LiteLLM gateway"""
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
payload = {
|
||||||
|
"model": model,
|
||||||
|
"messages": messages,
|
||||||
|
"temperature": 0.7,
|
||||||
|
"max_tokens": 4000
|
||||||
|
}
|
||||||
|
if tools:
|
||||||
|
payload["tools"] = tools
|
||||||
|
|
||||||
|
response = await client.post(
|
||||||
|
f"{LLM_API_BASE}/chat/completions",
|
||||||
|
headers={"Authorization": f"Bearer {LLM_API_KEY}"},
|
||||||
|
json=payload,
|
||||||
|
timeout=60.0
|
||||||
|
)
|
||||||
|
return response.json()
|
||||||
|
|
||||||
|
def determine_reasoning_mode(message: str, requested: str) -> str:
|
||||||
|
"""Auto-select reasoning mode based on task complexity"""
|
||||||
|
if requested != "auto":
|
||||||
|
return requested
|
||||||
|
|
||||||
|
# Complexity indicators
|
||||||
|
complexity_markers = [
|
||||||
|
"plan", "design", "architecture", "steps", "implement",
|
||||||
|
"build", "create", "project", "complex", "multi-step"
|
||||||
|
]
|
||||||
|
|
||||||
|
msg_lower = message.lower()
|
||||||
|
score = sum(1 for marker in complexity_markers if marker in msg_lower)
|
||||||
|
|
||||||
|
if score >= 3 or len(message) > 500:
|
||||||
|
return "plan_execute"
|
||||||
|
elif "review" in msg_lower or "check" in msg_lower or "verify" in msg_lower:
|
||||||
|
return "reflexion"
|
||||||
|
else:
|
||||||
|
return "react"
|
||||||
|
|
||||||
|
# ==========================================
|
||||||
|
# REASONING ENGINES
|
||||||
|
# ==========================================
|
||||||
|
|
||||||
|
class ReActEngine:
|
||||||
|
"""ReAct: Reasoning + Acting in interleaved steps"""
|
||||||
|
|
||||||
|
async def run(self, message: str, session_id: str) -> Dict:
|
||||||
|
steps = []
|
||||||
|
|
||||||
|
# Initial thought
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": "You are a ReAct agent. Think step by step and act."},
|
||||||
|
{"role": "user", "content": message}
|
||||||
|
]
|
||||||
|
|
||||||
|
response = await call_llm(messages, model="fast-tier")
|
||||||
|
|
||||||
|
steps.append(AgentStep(
|
||||||
|
step_number=1,
|
||||||
|
type="thought",
|
||||||
|
content="Initial analysis and reasoning"
|
||||||
|
))
|
||||||
|
|
||||||
|
content = response["choices"][0]["message"]["content"]
|
||||||
|
|
||||||
|
return {
|
||||||
|
"response": content,
|
||||||
|
"steps": steps,
|
||||||
|
"model_used": "fast-tier"
|
||||||
|
}
|
||||||
|
|
||||||
|
class PlanAndExecuteEngine:
|
||||||
|
"""Plan first, then execute step by step"""
|
||||||
|
|
||||||
|
async def run(self, message: str, session_id: str) -> Dict:
|
||||||
|
steps = []
|
||||||
|
|
||||||
|
# Planning phase
|
||||||
|
plan_messages = [
|
||||||
|
{"role": "system", "content": "Create a step-by-step plan to accomplish the task."},
|
||||||
|
{"role": "user", "content": f"Create a detailed plan for: {message}"}
|
||||||
|
]
|
||||||
|
|
||||||
|
plan_response = await call_llm(plan_messages, model="volume-tier")
|
||||||
|
plan = plan_response["choices"][0]["message"]["content"]
|
||||||
|
|
||||||
|
steps.append(AgentStep(
|
||||||
|
step_number=1,
|
||||||
|
type="plan",
|
||||||
|
content=plan
|
||||||
|
))
|
||||||
|
|
||||||
|
# Execution phase
|
||||||
|
exec_messages = [
|
||||||
|
{"role": "system", "content": "Execute the task following the provided plan."},
|
||||||
|
{"role": "user", "content": f"Task: {message}\n\nPlan: {plan}\n\nExecute this plan:"}
|
||||||
|
]
|
||||||
|
|
||||||
|
exec_response = await call_llm(exec_messages, model="reasoning-tier")
|
||||||
|
result = exec_response["choices"][0]["message"]["content"]
|
||||||
|
|
||||||
|
steps.append(AgentStep(
|
||||||
|
step_number=2,
|
||||||
|
type="action",
|
||||||
|
content="Execution completed"
|
||||||
|
))
|
||||||
|
|
||||||
|
return {
|
||||||
|
"response": result,
|
||||||
|
"steps": steps,
|
||||||
|
"model_used": "reasoning-tier",
|
||||||
|
"plan": plan
|
||||||
|
}
|
||||||
|
|
||||||
|
class ReflexionEngine:
|
||||||
|
"""Execute with self-reflection and correction"""
|
||||||
|
|
||||||
|
async def run(self, message: str, session_id: str, max_iterations: int = 2) -> Dict:
|
||||||
|
steps = []
|
||||||
|
|
||||||
|
# Initial execution
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": "Solve the problem carefully."},
|
||||||
|
{"role": "user", "content": message}
|
||||||
|
]
|
||||||
|
|
||||||
|
response = await call_llm(messages, model="quality-tier")
|
||||||
|
answer = response["choices"][0]["message"]["content"]
|
||||||
|
|
||||||
|
steps.append(AgentStep(
|
||||||
|
step_number=1,
|
||||||
|
type="action",
|
||||||
|
content="Initial solution generated"
|
||||||
|
))
|
||||||
|
|
||||||
|
# Reflection phase
|
||||||
|
for i in range(max_iterations):
|
||||||
|
reflect_messages = [
|
||||||
|
{"role": "system", "content": "Critically evaluate the solution for errors or improvements."},
|
||||||
|
{"role": "user", "content": f"Problem: {message}\n\nProposed Solution: {answer}\n\nIdentify any issues or improvements:"}
|
||||||
|
]
|
||||||
|
|
||||||
|
reflect_response = await call_llm(reflect_messages, model="claude-haiku")
|
||||||
|
reflection = reflect_response["choices"][0]["message"]["content"]
|
||||||
|
|
||||||
|
if "correct" in reflection.lower() and "no issues" in reflection.lower():
|
||||||
|
break
|
||||||
|
|
||||||
|
steps.append(AgentStep(
|
||||||
|
step_number=2+i,
|
||||||
|
type="reflection",
|
||||||
|
content=reflection
|
||||||
|
))
|
||||||
|
|
||||||
|
# Improve based on reflection
|
||||||
|
improve_messages = [
|
||||||
|
{"role": "system", "content": "Improve the solution based on the critique."},
|
||||||
|
{"role": "user", "content": f"Original: {answer}\n\nIssues found: {reflection}\n\nProvide improved solution:"}
|
||||||
|
]
|
||||||
|
|
||||||
|
improve_response = await call_llm(improve_messages, model="quality-tier")
|
||||||
|
answer = improve_response["choices"][0]["message"]["content"]
|
||||||
|
|
||||||
|
return {
|
||||||
|
"response": answer,
|
||||||
|
"steps": steps,
|
||||||
|
"model_used": "quality-tier",
|
||||||
|
"iterations": len(steps)
|
||||||
|
}
|
||||||
|
|
||||||
|
# ==========================================
|
||||||
|
# API ENDPOINTS
|
||||||
|
# ==========================================
|
||||||
|
|
||||||
|
@app.post("/v1/chat/completions", response_model=AgentResponse)
|
||||||
|
async def agent_endpoint(request: AgentRequest):
|
||||||
|
"""
|
||||||
|
Main agent endpoint with multiple reasoning strategies:
|
||||||
|
- react: Fast iterative reasoning (good for simple tasks)
|
||||||
|
- plan_execute: Plan then execute (good for complex tasks)
|
||||||
|
- reflexion: Self-correcting (good for accuracy-critical tasks)
|
||||||
|
- auto: Automatically select based on task complexity
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Determine reasoning mode
|
||||||
|
mode = determine_reasoning_mode(request.message, request.reasoning_mode)
|
||||||
|
|
||||||
|
# Store message in memory if enabled
|
||||||
|
if request.enable_memory and redis_client:
|
||||||
|
key = f"session:{request.session_id}:history"
|
||||||
|
redis_client.lpush(key, request.message)
|
||||||
|
redis_client.ltrim(key, 0, 99) # Keep last 100 messages
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Route to appropriate reasoning engine
|
||||||
|
if mode == "react":
|
||||||
|
result = await ReActEngine().run(request.message, request.session_id)
|
||||||
|
elif mode == "plan_execute":
|
||||||
|
result = await PlanAndExecuteEngine().run(request.message, request.session_id)
|
||||||
|
elif mode == "reflexion":
|
||||||
|
result = await ReflexionEngine().run(
|
||||||
|
request.message,
|
||||||
|
request.session_id,
|
||||||
|
max_iterations=2
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Default fallback
|
||||||
|
result = await ReActEngine().run(request.message, request.session_id)
|
||||||
|
|
||||||
|
return AgentResponse(
|
||||||
|
response=result["response"],
|
||||||
|
reasoning_mode=mode,
|
||||||
|
session_id=request.session_id,
|
||||||
|
steps=result.get("steps", []),
|
||||||
|
metadata={
|
||||||
|
"model_used": result.get("model_used", "unknown"),
|
||||||
|
"auto_selected": request.reasoning_mode == "auto",
|
||||||
|
"timestamp": datetime.now().isoformat()
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
@app.get("/v1/models")
|
||||||
|
async def list_models():
|
||||||
|
"""List available agent models"""
|
||||||
|
return {
|
||||||
|
"object": "list",
|
||||||
|
"data": [
|
||||||
|
{
|
||||||
|
"id": "agent/orchestrator",
|
||||||
|
"object": "model",
|
||||||
|
"created": 1700000000,
|
||||||
|
"owned_by": "llm-hub",
|
||||||
|
"description": "Auto-selecting orchestrator"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "agent/react",
|
||||||
|
"object": "model",
|
||||||
|
"created": 1700000000,
|
||||||
|
"owned_by": "llm-hub",
|
||||||
|
"description": "ReAct reasoning - fast iterative"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "agent/plan-execute",
|
||||||
|
"object": "model",
|
||||||
|
"created": 1700000000,
|
||||||
|
"owned_by": "llm-hub",
|
||||||
|
"description": "Plan-and-Execute - complex tasks"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "agent/reflexion",
|
||||||
|
"object": "model",
|
||||||
|
"created": 1700000000,
|
||||||
|
"owned_by": "llm-hub",
|
||||||
|
"description": "Reflexion - self-correcting with verification"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
@app.get("/health")
|
||||||
|
async def health():
|
||||||
|
"""Health check endpoint"""
|
||||||
|
redis_status = "connected" if redis_client and redis_client.ping() else "disconnected"
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": "healthy",
|
||||||
|
"version": "2.0.0",
|
||||||
|
"capabilities": ["react", "plan_execute", "reflexion", "mcp", "memory"],
|
||||||
|
"default_mode": DEFAULT_REASONING,
|
||||||
|
"redis": redis_status,
|
||||||
|
"timestamp": datetime.now().isoformat()
|
||||||
|
}
|
||||||
|
|
||||||
|
@app.get("/sessions/{session_id}/history")
|
||||||
|
async def get_session_history(session_id: str, limit: int = 10):
|
||||||
|
"""Retrieve conversation history for a session"""
|
||||||
|
if not redis_client:
|
||||||
|
return {"error": "Redis not available"}
|
||||||
|
|
||||||
|
key = f"session:{session_id}:history"
|
||||||
|
history = redis_client.lrange(key, 0, limit - 1)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"session_id": session_id,
|
||||||
|
"history": history,
|
||||||
|
"count": len(history)
|
||||||
|
}
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import uvicorn
|
||||||
|
uvicorn.run(app, host="0.0.0.0", port=8080)
|
||||||
|
|
@ -0,0 +1,13 @@
|
||||||
|
fastapi==0.104.1
|
||||||
|
uvicorn[standard]==0.24.0
|
||||||
|
langgraph==0.2.53
|
||||||
|
langchain==0.3.0
|
||||||
|
langchain-openai==0.2.0
|
||||||
|
langchain-chroma==0.1.4
|
||||||
|
chromadb==0.5.0
|
||||||
|
redis==5.0.1
|
||||||
|
httpx==0.25.2
|
||||||
|
tiktoken==0.5.1
|
||||||
|
pydantic==2.5.0
|
||||||
|
python-multipart==0.0.6
|
||||||
|
aiofiles==23.2.1
|
||||||
|
|
@ -0,0 +1,11 @@
|
||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN pip install mcpo uv
|
||||||
|
|
||||||
|
COPY servers.json .
|
||||||
|
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
CMD ["mcpo", "--config", "servers.json", "--host", "0.0.0.0", "--port", "8000"]
|
||||||
|
|
@ -0,0 +1,24 @@
|
||||||
|
{
|
||||||
|
"mcpServers": {
|
||||||
|
"filesystem": {
|
||||||
|
"command": "npx",
|
||||||
|
"args": ["-y", "@modelcontextprotocol/server-filesystem", "/workspace"]
|
||||||
|
},
|
||||||
|
"git": {
|
||||||
|
"command": "uvx",
|
||||||
|
"args": ["mcp-server-git"]
|
||||||
|
},
|
||||||
|
"fetch": {
|
||||||
|
"command": "uvx",
|
||||||
|
"args": ["mcp-server-fetch"]
|
||||||
|
},
|
||||||
|
"memory": {
|
||||||
|
"command": "npx",
|
||||||
|
"args": ["-y", "@modelcontextprotocol/server-memory"]
|
||||||
|
},
|
||||||
|
"sequential-thinking": {
|
||||||
|
"command": "npx",
|
||||||
|
"args": ["-y", "@modelcontextprotocol/server-sequential-thinking"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,84 @@
|
||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
GREEN='\033[0;32m'
|
||||||
|
YELLOW='\033[1;33m'
|
||||||
|
BLUE='\033[0;34m'
|
||||||
|
NC='\033[0m'
|
||||||
|
|
||||||
|
INSTALL_DIR="$(pwd)"
|
||||||
|
|
||||||
|
echo -e "${GREEN}🧠 Agentic AI Hub Setup${NC}"
|
||||||
|
echo "======================="
|
||||||
|
|
||||||
|
# Detect Proxmox LXC
|
||||||
|
if [ -f /proc/1/environ ] && grep -q "container=lxc" /proc/1/environ 2>/dev/null; then
|
||||||
|
echo -e "${YELLOW}✓ LXC container detected${NC}"
|
||||||
|
if ! grep -q "lxc.cgroup.relative" /etc/pve/lxc/*.conf 2>/dev/null; then
|
||||||
|
echo -e "${YELLOW}⚠️ Tip: For LXC with Docker, add to /etc/pve/lxc/XXX.conf:${NC}"
|
||||||
|
echo " lxc.cgroup.relative = 0"
|
||||||
|
echo " lxc.apparmor.profile = unconfined"
|
||||||
|
echo " lxc.cgroup.devices.allow = a"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check/install Docker
|
||||||
|
if ! command -v docker &> /dev/null; then
|
||||||
|
echo -e "${YELLOW}Installing Docker...${NC}"
|
||||||
|
curl -fsSL https://get.docker.com | sh
|
||||||
|
usermod -aG docker $USER || true
|
||||||
|
systemctl enable docker
|
||||||
|
systemctl start docker
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! command -v docker-compose &> /dev/null && ! docker compose version &> /dev/null; then
|
||||||
|
echo -e "${YELLOW}Installing Docker Compose...${NC}"
|
||||||
|
apt-get update && apt-get install -y docker-compose-plugin
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Install Node.js for MCP
|
||||||
|
if ! command -v node &> /dev/null || [ "$(node -v | cut -d'v' -f2 | cut -d'.' -f1)" != "20" ]; then
|
||||||
|
echo -e "${YELLOW}Installing Node.js 20...${NC}"
|
||||||
|
curl -fsSL https://deb.nodesource.com/setup_20.x | bash -
|
||||||
|
apt-get install -y nodejs
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Install uv for Python tools
|
||||||
|
if ! command -v uv &> /dev/null; then
|
||||||
|
pip install uv || pip3 install uv
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Create directories
|
||||||
|
echo -e "${BLUE}Creating directories...${NC}"
|
||||||
|
mkdir -p {data/{redis,chroma,agent/{sessions,code-server,open-webui},neo4j},workspace,logs}
|
||||||
|
mkdir -p services/{agent-core,mcpo}
|
||||||
|
|
||||||
|
# Set permissions
|
||||||
|
chown -R 1000:1000 workspace data || true
|
||||||
|
chmod +x *.sh scripts/*.sh 2>/dev/null || true
|
||||||
|
|
||||||
|
# Create .env if not exists
|
||||||
|
if [ ! -f .env ]; then
|
||||||
|
echo -e "${YELLOW}Creating .env file...${NC}"
|
||||||
|
cp .env.example .env
|
||||||
|
# Generate random keys
|
||||||
|
sed -i "s/MASTER_KEY=.*/MASTER_KEY=sk-agent-$(openssl rand -hex 8)/" .env
|
||||||
|
sed -i "s/WEBUI_SECRET_KEY=.*/WEBUI_SECRET_KEY=$(openssl rand -hex 32)/" .env
|
||||||
|
echo -e "${GREEN}✓ .env created. Edit it to add your API keys.${NC}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Create workspace gitkeep
|
||||||
|
touch workspace/.gitkeep
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -e "${GREEN}✅ Setup complete!${NC}"
|
||||||
|
echo ""
|
||||||
|
echo "Next steps:"
|
||||||
|
echo "1. Edit .env file: nano .env"
|
||||||
|
echo "2. Add your API keys (Groq, Mistral, etc.)"
|
||||||
|
echo "3. Start services: ./start.sh"
|
||||||
|
echo ""
|
||||||
|
echo "Documentation:"
|
||||||
|
echo " - Setup: docs/SETUP.md"
|
||||||
|
echo " - API: docs/API.md"
|
||||||
|
echo " - Providers: docs/PROVIDERS.md"
|
||||||
|
|
@ -0,0 +1,66 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
cd "$(dirname "$0")"
|
||||||
|
|
||||||
|
GREEN='\033[0;32m'
|
||||||
|
BLUE='\033[0;34m'
|
||||||
|
NC='\033[0m'
|
||||||
|
|
||||||
|
if [ ! -f .env ]; then
|
||||||
|
echo "Error: .env file not found. Run ./setup.sh first."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
source .env
|
||||||
|
|
||||||
|
echo -e "${GREEN}🚀 Starting Agentic LLM Hub...${NC}"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Determine profile
|
||||||
|
PROFILE=${1:-full}
|
||||||
|
|
||||||
|
if [ "$PROFILE" = "minimal" ]; then
|
||||||
|
echo "Mode: Minimal (core services only)"
|
||||||
|
docker-compose up -d redis chromadb litellm agent-core
|
||||||
|
elif [ "$PROFILE" = "ide" ]; then
|
||||||
|
echo "Mode: Standard + IDE"
|
||||||
|
docker-compose --profile ide up -d
|
||||||
|
elif [ "$PROFILE" = "full" ]; then
|
||||||
|
echo "Mode: Full (all services including MCP tools)"
|
||||||
|
docker-compose --profile ide --profile mcp --profile ui up -d
|
||||||
|
else
|
||||||
|
echo "Usage: ./start.sh [minimal|ide|full]"
|
||||||
|
echo " minimal - Core services only (lowest resources)"
|
||||||
|
echo " ide - Core + VS Code IDE"
|
||||||
|
echo " full - Everything including MCP tools and Web UI"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Waiting for services..."
|
||||||
|
sleep 5
|
||||||
|
|
||||||
|
# Get IP
|
||||||
|
IP=$(hostname -I | awk '{print $1}')
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -e "${GREEN}✅ Services started!${NC}"
|
||||||
|
echo ""
|
||||||
|
echo "Access Points:"
|
||||||
|
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||||
|
printf "${BLUE}%-22s${NC} %s\n" "Agent API:" "http://$IP:8080/v1"
|
||||||
|
printf "${BLUE}%-22s${NC} %s\n" "VS Code IDE:" "http://$IP:8443"
|
||||||
|
printf "${BLUE}%-22s${NC} %s\n" "LiteLLM Gateway:" "http://$IP:4000"
|
||||||
|
printf "${BLUE}%-22s${NC} %s\n" "MCP Tools:" "http://$IP:8001/docs"
|
||||||
|
printf "${BLUE}%-22s${NC} %s\n" "Web UI:" "http://$IP:3000"
|
||||||
|
printf "${BLUE}%-22s${NC} %s\n" "Vector DB:" "http://$IP:8000"
|
||||||
|
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||||
|
echo ""
|
||||||
|
echo "Test command:"
|
||||||
|
echo "curl -X POST http://$IP:8080/v1/chat/completions \"
|
||||||
|
echo " -H 'Content-Type: application/json' \"
|
||||||
|
echo " -H 'Authorization: Bearer ${MASTER_KEY:0:20}...' \"
|
||||||
|
echo " -d '{"message":"Hello","reasoning_mode":"react"}'"
|
||||||
|
echo ""
|
||||||
|
echo "View logs: docker-compose logs -f"
|
||||||
|
echo "Stop: docker-compose down"
|
||||||
Loading…
Reference in New Issue