commit 2cafb31cb47eb46de27e5270b7e62928cd714e9f Author: ImpulsiveFPS Date: Sun Feb 1 15:11:31 2026 +0100 Initial commit: Agentic LLM Hub with multi-reasoning, MCP tools, and IDE diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..b733ebe --- /dev/null +++ b/.env.example @@ -0,0 +1,69 @@ +# Master Key for API Access (generate strong key) +MASTER_KEY=sk-agent-$(openssl rand -hex 8) + +# ========================================== +# FREE TIER API KEYS (Add your keys below) +# ========================================== + +# Groq - https://console.groq.com (20 RPM free, create multiple accounts) +GROQ_API_KEY_1=gsk_your_first_groq_key_here +GROQ_API_KEY_2=gsk_your_second_groq_key_here + +# Mistral - https://console.mistral.ai (1B tokens/month free) +MISTRAL_API_KEY=your_mistral_key_here + +# Anthropic Claude - https://console.anthropic.com ($5 trial, $500 student) +ANTHROPIC_API_KEY=sk-ant-your_claude_key_here + +# Moonshot Kimi - https://platform.moonshot.ai ($5 signup bonus) +MOONSHOT_API_KEY=sk-your_moonshot_key_here + +# OpenRouter - https://openrouter.ai (50 req/day free, access to Kimi free) +OPENROUTER_API_KEY=sk-or-your_openrouter_key_here + +# Cohere - https://cohere.com (1K calls/month free, good for embeddings) +COHERE_API_KEY=your_cohere_key_here + +# DeepSeek - https://platform.deepseek.com (cheap rates) +DEEPSEEK_API_KEY=sk-your_deepseek_key_here + +# GitHub Token (for MCP Git tools) +GITHUB_TOKEN=ghp_your_github_token_here + +# ========================================== +# AGENT SETTINGS +# ========================================== + +# Default reasoning: react, plan_execute, reflexion, or auto +DEFAULT_REASONING=auto + +# Enable self-reflection (true/false) +ENABLE_REFLECTION=true + +# Maximum iterations per request +MAX_ITERATIONS=10 + +# ========================================== +# UI/IDE SETTINGS +# ========================================== + +# Code-Server passwords (change these!) +IDE_PASSWORD=secure-ide-password-123 +IDE_SUDO_PASSWORD=admin-password-456 + +# Optional: Domain for reverse proxy +# IDE_DOMAIN=code.yourdomain.com + +# Web UI settings +WEBUI_SECRET_KEY=$(openssl rand -hex 32) + +# ========================================== +# ADVANCED MEMORY SETTINGS +# ========================================== + +# Enable knowledge graph (Neo4j) - requires more RAM +ENABLE_KNOWLEDGE_GRAPH=false +NEO4J_AUTH=neo4j/password + +# ChromaDB settings +CHROMA_STORAGE_PATH=/data/chroma diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..882fd9e --- /dev/null +++ b/.gitignore @@ -0,0 +1,31 @@ +# Environment variables +.env +.env.local +.env.*.local + +# Data directories (persisted volumes) +data/ +workspace/* +!workspace/.gitkeep +logs/ +*.db +*.sqlite3 + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# OS +.DS_Store +Thumbs.db + +# Docker +.docker/ +docker-compose.override.yml + +# Temporary +*.tmp +*.bak +*.log diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e347014 --- /dev/null +++ b/Makefile @@ -0,0 +1,52 @@ +.PHONY: help setup start stop logs status update backup clean + +help: + @echo "Agentic LLM Hub Management" + @echo "==========================" + @echo "make setup - Initial setup" + @echo "make start - Start all services (full profile)" + @echo "make start-ide - Start with IDE only" + @echo "make stop - Stop all services" + @echo "make logs - View logs" + @echo "make status - Check service status" + @echo "make update - Pull latest and update images" + @echo "make backup - Backup data directories" + @echo "make clean - Remove containers (data preserved)" + +setup: + @chmod +x *.sh scripts/*.sh 2>/dev/null || true + @./setup.sh + +start: + @./start.sh full + +start-ide: + @./start.sh ide + +stop: + @docker-compose down + +logs: + @docker-compose logs -f --tail=100 + +status: + @echo "Container Status:" + @docker-compose ps + @echo "" + @echo "API Health:" + @curl -s http://localhost:8080/health | python3 -m json.tool 2>/dev/null || echo "API not responding" + +update: + @git pull + @docker-compose pull + @docker-compose up -d + +backup: + @mkdir -p backup/$(shell date +%Y%m%d) + @cp -r data backup/$(shell date +%Y%m%d)/ + @cp .env backup/$(shell date +%Y%m%d)/ + @echo "Backup created: backup/$(shell date +%Y%m%d)/" + +clean: + @docker-compose down -v + @echo "Containers removed. Data preserved in ./data/" diff --git a/README.md b/README.md new file mode 100644 index 0000000..763056b --- /dev/null +++ b/README.md @@ -0,0 +1,60 @@ +# 🤖 Agentic LLM Hub + +Self-hosted AI agent platform with multi-provider LLM aggregation, reasoning engines (ReAct, Plan-and-Execute, Reflexion), MCP tools, and web IDE. + +## 🚀 Quick Start + +```bash +# 1. Clone from your Gitea +git clone https://gitea.yourdomain.com/youruser/llm-hub.git +cd llm-hub + +# 2. Configure +cp .env.example .env +nano .env # Add your API keys + +# 3. Deploy +./setup.sh && ./start.sh +``` + +## 📡 Access Points + +| Service | URL | Description | +|---------|-----|-------------| +| VS Code IDE | `http://your-ip:8443` | Full IDE with Continue.dev | +| Agent API | `http://your-ip:8080/v1` | Main API endpoint | +| LiteLLM | `http://your-ip:4000` | LLM Gateway | +| MCP Tools | `http://your-ip:8001/docs` | Tool OpenAPI docs | +| ChromaDB | `http://your-ip:8000` | Vector memory | +| Web UI | `http://your-ip:3000` | Chat interface | + +## 🔧 Supported Providers + +- **Groq** (Free tier, fast) +- **Mistral** (1B tokens/month free) +- **Anthropic Claude** (Trial credits) +- **Moonshot Kimi** ($5 signup bonus) +- **OpenRouter** (Free tier access) +- **Cohere** (1K calls/month) +- **DeepSeek** (Cheap reasoning) + +## 🧠 Reasoning Modes + +- `react` - Fast iterative reasoning +- `plan_execute` - Complex multi-step tasks +- `reflexion` - Self-correcting with verification +- `auto` - Automatic selection + +## 📚 Documentation + +- [Setup Guide](docs/SETUP.md) +- [API Reference](docs/API.md) +- [Provider Guide](docs/PROVIDERS.md) + +## 🔄 Updates + +```bash +git pull origin main +docker-compose pull +docker-compose up -d +``` diff --git a/config/agent/.gitkeep b/config/agent/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/config/continue/config.yaml b/config/continue/config.yaml new file mode 100644 index 0000000..931a52c --- /dev/null +++ b/config/continue/config.yaml @@ -0,0 +1,54 @@ +name: LLM Hub IDE +version: 1.0.0 +schema: v1 + +models: + - name: Groq Llama 3.3 70B + provider: openai + model: fast-tier + apiBase: http://agent-core:8080/v1 + apiKey: sk-agent + roles: [chat, edit, apply] + + - name: Claude 3.5 Sonnet + provider: openai + model: quality-tier + apiBase: http://agent-core:8080/v1 + apiKey: sk-agent + roles: [chat, edit, apply] + + - name: Kimi K2 + provider: openai + model: reasoning-tier + apiBase: http://agent-core:8080/v1 + apiKey: sk-agent + roles: [chat, edit, apply] + + - name: Mistral Small + provider: openai + model: volume-tier + apiBase: http://agent-core:8080/v1 + apiKey: sk-agent + roles: [chat, edit] + +tabAutocompleteModel: + name: Mistral Autocomplete + provider: openai + model: volume-tier + apiBase: http://litellm:4000/v1 + apiKey: sk-agent + +embeddingsProvider: + provider: openai + model: embeddings + apiBase: http://litellm:4000/v1 + apiKey: sk-agent + +context: + - provider: code + - provider: docs + - provider: diff + - provider: terminal + - provider: problems + - provider: folder + - provider: codebase diff --git a/config/litellm_config.yaml b/config/litellm_config.yaml new file mode 100644 index 0000000..91a93e0 --- /dev/null +++ b/config/litellm_config.yaml @@ -0,0 +1,87 @@ +model_list: + # FREE TIER AGGREGATION + - model_name: fast-tier + litellm_params: + model: groq/llama-3.3-70b-versatile + api_key: os.environ/GROQ_API_KEY_1 + rpm_limit: 20 + + - model_name: fast-tier + litellm_params: + model: groq/llama-3.1-8b-instant + api_key: os.environ/GROQ_API_KEY_2 + rpm_limit: 20 + + - model_name: volume-tier + litellm_params: + model: mistral/mistral-small-latest + api_key: os.environ/MISTRAL_API_KEY + tpm_limit: 500000 + + # CLAUDE & KIMI - Quality/Reasoning + - model_name: quality-tier + litellm_params: + model: anthropic/claude-3-5-sonnet-20240620 + api_key: os.environ/ANTHROPIC_API_KEY + rpm_limit: 5 + + - model_name: claude-haiku + litellm_params: + model: anthropic/claude-3-haiku-20240307 + api_key: os.environ/ANTHROPIC_API_KEY + rpm_limit: 10 + + # Kimi via OpenRouter (Free tier) + - model_name: reasoning-tier + litellm_params: + model: openrouter/moonshotai/kimi-k2:free + api_key: os.environ/OPENROUTER_API_KEY + + # Kimi Direct (Ultra-cheap) + - model_name: reasoning-tier + litellm_params: + model: moonshot/kimi-k2-0711-preview + api_key: os.environ/MOONSHOT_API_KEY + tpm_limit: 100000 + + - model_name: deepseek + litellm_params: + model: deepseek/deepseek-chat + api_key: os.environ/DEEPSEEK_API_KEY + + # Embeddings & Tools + - model_name: embeddings + litellm_params: + model: cohere/embed-english-v3.0 + api_key: os.environ/COHERE_API_KEY + + # Local Fallback + - model_name: local-llama + litellm_params: + model: ollama/llama3.1:8b + api_base: http://ollama:11434 + +router_settings: + routing_strategy: "usage-based-routing" + timeout: 30 + num_retries: 3 + allowed_fails: 2 + cooldown_time: 60 + fallbacks: + - fast-tier: ["volume-tier", "reasoning-tier"] + - volume-tier: ["reasoning-tier", "local-llama"] + - quality-tier: ["claude-haiku", "reasoning-tier"] + +general_settings: + master_key: os.environ/LITELLM_MASTER_KEY + cache: true + cache_params: + type: redis + host: redis + port: 6379 + ttl: 3600 + retry_policy: + TimeoutError: 3 + RateLimitError: 5 + log_level: info + log_file: /app/logs/litellm.log diff --git a/config/mcp/servers.json b/config/mcp/servers.json new file mode 100644 index 0000000..728ea3b --- /dev/null +++ b/config/mcp/servers.json @@ -0,0 +1,35 @@ +{ + "mcpServers": { + "filesystem": { + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-filesystem", "/workspace"] + }, + "git": { + "command": "uvx", + "args": ["mcp-server-git"] + }, + "github": { + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-github"], + "env": { + "GITHUB_PERSONAL_ACCESS_TOKEN": "${GITHUB_TOKEN}" + } + }, + "fetch": { + "command": "uvx", + "args": ["mcp-server-fetch"] + }, + "sqlite": { + "command": "uvx", + "args": ["mcp-server-sqlite", "/workspace/data.db"] + }, + "memory": { + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-memory"] + }, + "sequential-thinking": { + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-sequential-thinking"] + } + } +} diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..4436080 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,151 @@ +version: '3.8' + +services: + # Core Infrastructure + redis: + image: redis:7-alpine + container_name: agent-redis + restart: unless-stopped + volumes: + - ./data/redis:/data + command: redis-server --appendonly yes --maxmemory 512mb --maxmemory-policy allkeys-lru + networks: + - agent-network + + chromadb: + image: chromadb/chroma:latest + container_name: agent-memory-vector + restart: unless-stopped + ports: + - "8000:8000" + volumes: + - ./data/chroma:/chroma/chroma + environment: + - IS_PERSISTENT=TRUE + - PERSIST_DIRECTORY=/chroma/chroma + - ANONYMIZED_TELEMETRY=FALSE + networks: + - agent-network + + # LLM Gateway + litellm: + image: ghcr.io/berriai/litellm:main-latest + container_name: agent-gateway + restart: unless-stopped + ports: + - "4000:4000" + volumes: + - ./config/litellm_config.yaml:/app/config.yaml + - ./logs:/app/logs + environment: + - DATABASE_URL=sqlite:///app/db.sqlite3 + - LITELLM_MASTER_KEY=${MASTER_KEY:-sk-agent} + - REDIS_HOST=redis + - REDIS_PORT=6379 + command: --config /app/config.yaml --port 4000 + networks: + - agent-network + + # Agent Core with Reasoning Engines + agent-core: + build: + context: ./services/agent-core + dockerfile: Dockerfile + container_name: agent-core + restart: unless-stopped + ports: + - "8080:8080" + volumes: + - ./workspace:/workspace + - ./config/agent:/app/config + - ./data/agent:/app/data + environment: + - LLM_API_BASE=http://litellm:4000/v1 + - LLM_API_KEY=${MASTER_KEY:-sk-agent} + - REDIS_URL=redis://redis:6379/0 + - CHROMA_URL=http://chromadb:8000 + - DEFAULT_REASONING_MODE=${DEFAULT_REASONING:-auto} + depends_on: + - litellm + - redis + - chromadb + networks: + - agent-network + + # MCP Tool Gateway + mcpo: + build: + context: ./services/mcpo + dockerfile: Dockerfile + container_name: agent-mcp-gateway + restart: unless-stopped + ports: + - "8001:8000" + volumes: + - ./workspace:/workspace:ro + - ./config/mcp:/app/config + networks: + - agent-network + profiles: + - mcp + + # VS Code Server with AI Assistant + code-server: + image: lscr.io/linuxserver/code-server:latest + container_name: agent-ide + restart: unless-stopped + ports: + - "8443:8443" + environment: + - PUID=1000 + - PGID=1000 + - TZ=Etc/UTC + - PASSWORD=${IDE_PASSWORD:-code} + - SUDO_PASSWORD=${IDE_SUDO_PASSWORD:-sudo} + - DEFAULT_WORKSPACE=/workspace + volumes: + - ./workspace:/workspace + - ./data/code-server:/config + - ./config/continue:/config/.continue:ro + networks: + - agent-network + profiles: + - ide + + # Web UI + open-webui: + image: ghcr.io/open-webui/open-webui:main + container_name: agent-ui + restart: unless-stopped + ports: + - "3000:8080" + volumes: + - ./data/open-webui:/app/backend/data + environment: + - OPENAI_API_BASE_URL=http://agent-core:8080/v1 + - OPENAI_API_KEY=${MASTER_KEY:-sk-agent} + - ENABLE_SIGNUP=false + - DEFAULT_MODELS=agent/orchestrator + depends_on: + - agent-core + networks: + - agent-network + profiles: + - ui + + # Auto-updater + watchtower: + image: containrrr/watchtower + container_name: agent-watchtower + restart: unless-stopped + volumes: + - /var/run/docker.sock:/var/run/docker.sock + environment: + - WATCHTOWER_POLL_INTERVAL=86400 + - WATCHTOWER_CLEANUP=true + networks: + - agent-network + +networks: + agent-network: + driver: bridge diff --git a/docs/API.md b/docs/API.md new file mode 100644 index 0000000..5e9fa22 --- /dev/null +++ b/docs/API.md @@ -0,0 +1,88 @@ +# API Reference + +## Base URL +``` +http://your-server-ip:8080/v1 +``` + +## Authentication +All requests require Bearer token: +``` +Authorization: Bearer sk-agent-your-key +``` + +## Endpoints + +### POST /chat/completions +Main agent endpoint. + +**Request:** +```json +{ + "message": "Create a Python script to fetch weather data", + "reasoning_mode": "plan_execute", + "session_id": "unique-session-id", + "max_iterations": 10 +} +``` + +**Response:** +```json +{ + "response": "Here\'s the Python script...", + "reasoning_mode": "plan_execute", + "session_id": "unique-session-id", + "steps": [ + {"step_number": 1, "type": "plan", "content": "..."}, + {"step_number": 2, "type": "action", "content": "..."} + ], + "metadata": { + "model_used": "volume-tier", + "auto_selected": true, + "timestamp": "2024-..." + } +} +``` + +### Reasoning Modes + +| Mode | Use Case | Speed | Accuracy | +|------|----------|-------|----------| +| `react` | Simple Q&A, debugging | Fast | Medium | +| `plan_execute` | Complex multi-step tasks | Medium | High | +| `reflexion` | Code review, critical tasks | Slow | Very High | +| `auto` | Let system decide | Variable | Adaptive | + +### GET /models +List available models. + +### GET /health +Check system status. + +### GET /sessions/{id}/history +Retrieve conversation history. + +## Examples + +### Python +```python +import requests + +response = requests.post( + "http://localhost:8080/v1/chat/completions", + headers={"Authorization": "Bearer sk-agent-xxx"}, + json={ + "message": "Refactor this code", + "reasoning_mode": "reflexion" + } +) +print(response.json()["response"]) +``` + +### cURL +```bash +curl -X POST http://localhost:8080/v1/chat/completions \ + -H "Authorization: Bearer sk-agent-xxx" \ + -H "Content-Type: application/json" \ + -d '{"message":"Hello","reasoning_mode":"auto"}' +``` diff --git a/docs/PROVIDERS.md b/docs/PROVIDERS.md new file mode 100644 index 0000000..133264a --- /dev/null +++ b/docs/PROVIDERS.md @@ -0,0 +1,66 @@ +# Provider Setup Guide + +## Free Tier Providers + +### Groq (Fastest) +- **URL**: https://console.groq.com +- **Free Tier**: 20 RPM, variable TPM +- **Models**: Llama 3.3 70B, Llama 3.1 8B +- **Best For**: Speed, quick coding tasks +- **Tip**: Create multiple accounts with different phones for load balancing + +### Mistral (High Volume) +- **URL**: https://console.mistral.ai +- **Free Tier**: 1 billion tokens/month +- **Models**: Mistral Small, Medium +- **Best For**: High-volume processing, chatbots + +### OpenRouter (Universal Access) +- **URL**: https://openrouter.ai +- **Free Tier**: 50 requests/day +- **Access**: Kimi K2:free, Gemini Flash:free +- **Best For**: Testing, fallback access + +### Cohere (Embeddings) +- **URL**: https://cohere.com +- **Free Tier**: 1,000 calls/month +- **Best For**: Embeddings, RAG systems + +## Trial/Cheap Providers + +### Anthropic Claude (Highest Quality) +- **URL**: https://console.anthropic.com +- **Trial**: $5 free credits (new users) +- **Student**: $500 credits (apply with .edu) +- **Cost**: $3/M input (Sonnet), $0.25/M (Haiku) +- **Best For**: Complex reasoning, analysis, code review + +### Moonshot Kimi (Best Value) +- **URL**: https://platform.moonshot.ai +- **Bonus**: $5 signup credit +- **Cost**: $0.60/M input, $2.50/M output +- **Context**: 128K tokens +- **Best For**: Coding, long documents, Chinese content + +### DeepSeek (Cheapest Reasoning) +- **URL**: https://platform.deepseek.com +- **Cost**: $0.14/M input, $0.28/M output +- **Best For**: Reasoning tasks, math, code + +## Configuration Priority + +The system routes requests in this priority: + +1. **Fast tasks** → Groq (free, instant) +2. **High volume** → Mistral (1B tokens) +3. **Complex coding** → Kimi (cheap, 128K context) +4. **Quality critical** → Claude (expensive but best) +5. **Fallback** → OpenRouter free tier + +## Rate Limit Management + +The router automatically: +- Tracks RPM/TPM across all providers +- Distributes load (multiple Groq accounts) +- Falls back when limits approached +- Caches responses to reduce API calls diff --git a/docs/SETUP.md b/docs/SETUP.md new file mode 100644 index 0000000..341e781 --- /dev/null +++ b/docs/SETUP.md @@ -0,0 +1,98 @@ +# Setup Guide + +## Prerequisites + +- **OS**: Debian 12, Ubuntu 22.04+, or Proxmox LXC +- **RAM**: 4GB minimum (8GB recommended for IDE) +- **Storage**: 20GB free space +- **Network**: Internet access for API calls + +## Quick Install + +```bash +# 1. Clone from your Gitea +git clone https://gitea.yourdomain.com/username/llm-hub.git +cd llm-hub + +# 2. Run setup +chmod +x setup.sh && ./setup.sh + +# 3. Configure API keys +nano .env + +# 4. Start +./start.sh full +``` + +## Proxmox LXC Setup + +On Proxmox host, create optimized container: + +```bash +pct create 100 local:vztmpl/debian-12-standard_12.7-1_amd64.tar.zst \ + --hostname llm-hub \ + --memory 8192 \ + --swap 1024 \ + --cores 4 \ + --rootfs local-lvm:20 \ + --features nesting=1,keyctl=1 \ + --net0 name=eth0,bridge=vmbr0,ip=dhcp + +# Add to /etc/pve/lxc/100.conf: +cat >> /etc/pve/lxc/100.conf << EOF +lxc.cgroup.relative = 0 +lxc.apparmor.profile = unconfined +lxc.cgroup.devices.allow = a +EOF + +pct start 100 +pct exec 100 -- bash -c "apt update && apt install -y curl git && curl -fsSL setup.sh | bash" +``` + +## Configuration + +Edit `.env` file: + +```bash +# Required: At least one LLM provider +GROQ_API_KEY_1=gsk_xxx +MISTRAL_API_KEY=your_key + +# Recommended: Multiple providers for redundancy +ANTHROPIC_API_KEY=sk-ant-xxx +MOONSHOT_API_KEY=sk-xxx +OPENROUTER_API_KEY=sk-or-xxx + +# UI Security +IDE_PASSWORD=strong-password-here +``` + +## Verification + +```bash +# Check health +curl http://localhost:8080/health + +# Test agent +curl -X POST http://localhost:8080/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-agent-xxx" \ + -d '{"message":"Hello","reasoning_mode":"react"}' +``` + +## Troubleshooting + +**Docker not starting in LXC:** +```bash +# On Proxmox host, check config +pct config 100 | grep features +# Should show: features: nesting=1,keyctl=1 +``` + +**Permission denied on workspace:** +```bash +chown -R 1000:1000 workspace/ +``` + +**Port conflicts:** +Edit `docker-compose.yml` to change port mappings (e.g., `8081:8080`) diff --git a/scripts/add-provider.sh b/scripts/add-provider.sh new file mode 100644 index 0000000..10e5d57 --- /dev/null +++ b/scripts/add-provider.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +ENV_FILE=".env" +CONFIG_FILE="config/litellm_config.yaml" + +echo "🔌 Add Provider to LLM Hub" +echo "==========================" +echo "" +echo "1. Groq (Fast)" +echo "2. Mistral (Volume)" +echo "3. Anthropic Claude (Quality)" +echo "4. Moonshot Kimi (Cheap/128K)" +echo "5. OpenRouter (Free tier access)" +echo "6. Cohere (Embeddings)" +echo "7. DeepSeek (Cheap reasoning)" +echo "8. Exit" +read -p "Select (1-8): " choice + +read -p "Enter API Key: " api_key + +case $choice in + 1) + read -p "Instance number (1,2,3...): " num + var="GROQ_API_KEY_$num" + echo "$var=$api_key" >> "$ENV_FILE" + echo "✅ Added Groq key as $var" + ;; + 2) + echo "MISTRAL_API_KEY=$api_key" >> "$ENV_FILE" + echo "✅ Added Mistral" + ;; + 3) + echo "ANTHROPIC_API_KEY=$api_key" >> "$ENV_FILE" + echo "✅ Added Claude (remember: expensive, use sparingly)" + ;; + 4) + echo "MOONSHOT_API_KEY=$api_key" >> "$ENV_FILE" + echo "✅ Added Kimi (great for coding!)" + ;; + 5) + echo "OPENROUTER_API_KEY=$api_key" >> "$ENV_FILE" + echo "✅ Added OpenRouter (access free tier models)" + ;; + 6) + echo "COHERE_API_KEY=$api_key" >> "$ENV_FILE" + echo "✅ Added Cohere (embeddings)" + ;; + 7) + echo "DEEPSEEK_API_KEY=$api_key" >> "$ENV_FILE" + echo "✅ Added DeepSeek (cheap reasoning)" + ;; + 8) exit 0 ;; + *) echo "Invalid choice" ; exit 1 ;; +esac + +read -p "Restart services to apply? (y/N): " restart +[[ $restart =~ ^[Yy]$ ]] && docker-compose restart diff --git a/scripts/status.sh b/scripts/status.sh new file mode 100644 index 0000000..34e53e8 --- /dev/null +++ b/scripts/status.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +echo "🧠 LLM Hub Status" +echo "=================" +echo "" + +# Container status +echo "📦 Containers:" +docker-compose ps --services --filter "status=running" 2>/dev/null | while read service; do + status=$(docker-compose ps -q "$service" | xargs docker inspect -f '{{.State.Status}}' 2>/dev/null) + echo " $service: $status" +done + +echo "" +echo "🔍 Health Checks:" + +# API health +if curl -s http://localhost:8080/health | grep -q "healthy"; then + echo " ✅ Agent Core: Healthy" +else + echo " ❌ Agent Core: Not responding" +fi + +# LiteLLM +if curl -s http://localhost:4000/health/liveliness | grep -q "true"; then + echo " ✅ LiteLLM: Running" +else + echo " ❌ LiteLLM: Not responding" +fi + +echo "" +echo "📊 Router Stats:" +curl -s http://localhost:8080/health 2>/dev/null | python3 -m json.tool 2>/dev/null || echo " Unable to fetch stats" + +echo "" +echo "💾 Memory Usage:" +docker stats --no-stream --format "table {{.Name}}\t{{.MemUsage}}" | grep -E "(agent-|NAME)" || true diff --git a/services/agent-core/Dockerfile b/services/agent-core/Dockerfile new file mode 100644 index 0000000..a0ae6a2 --- /dev/null +++ b/services/agent-core/Dockerfile @@ -0,0 +1,20 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + build-essential \ + git \ + && rm -rf /var/lib/apt/lists/* + +# Install Python dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy application +COPY main.py . + +EXPOSE 8080 + +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"] diff --git a/services/agent-core/main.py b/services/agent-core/main.py new file mode 100644 index 0000000..f9e6fa7 --- /dev/null +++ b/services/agent-core/main.py @@ -0,0 +1,357 @@ +""" +Agentic AI Core - Multi-Reasoning Engine +Supports: ReAct, Plan-and-Execute, Reflexion +""" +import os +import json +from typing import List, Dict, Any, Literal, Optional +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel, Field +from datetime import datetime +import httpx +import redis + +app = FastAPI( + title="Agentic AI Core", + version="2.0.0", + description="Multi-reasoning agent platform with memory and MCP integration" +) + +# Configuration +LLM_API_BASE = os.getenv("LLM_API_BASE", "http://litellm:4000/v1") +LLM_API_KEY = os.getenv("LLM_API_KEY", "sk-agent") +DEFAULT_REASONING = os.getenv("DEFAULT_REASONING_MODE", "auto") + +# Redis for short-term memory +try: + redis_client = redis.from_url( + os.getenv("REDIS_URL", "redis://redis:6379"), + decode_responses=True + ) +except: + redis_client = None + +# ========================================== +# DATA MODELS +# ========================================== + +class AgentRequest(BaseModel): + message: str + session_id: str = Field(default="default", description="Conversation thread ID") + reasoning_mode: Literal["react", "plan_execute", "reflexion", "auto"] = DEFAULT_REASONING + context_files: Optional[List[str]] = Field(default_factory=list) + enable_memory: bool = True + max_iterations: int = 10 + +class AgentStep(BaseModel): + step_number: int + type: Literal["thought", "action", "observation", "reflection", "plan"] + content: str + timestamp: datetime = Field(default_factory=datetime.now) + +class AgentResponse(BaseModel): + response: str + reasoning_mode: str + session_id: str + steps: List[AgentStep] = Field(default_factory=list) + metadata: Dict[str, Any] = Field(default_factory=dict) + +# ========================================== +# UTILITY FUNCTIONS +# ========================================== + +async def call_llm(messages: List[Dict], model: str = "auto", tools: Optional[List] = None) -> Dict: + """Call LLM through LiteLLM gateway""" + async with httpx.AsyncClient() as client: + payload = { + "model": model, + "messages": messages, + "temperature": 0.7, + "max_tokens": 4000 + } + if tools: + payload["tools"] = tools + + response = await client.post( + f"{LLM_API_BASE}/chat/completions", + headers={"Authorization": f"Bearer {LLM_API_KEY}"}, + json=payload, + timeout=60.0 + ) + return response.json() + +def determine_reasoning_mode(message: str, requested: str) -> str: + """Auto-select reasoning mode based on task complexity""" + if requested != "auto": + return requested + + # Complexity indicators + complexity_markers = [ + "plan", "design", "architecture", "steps", "implement", + "build", "create", "project", "complex", "multi-step" + ] + + msg_lower = message.lower() + score = sum(1 for marker in complexity_markers if marker in msg_lower) + + if score >= 3 or len(message) > 500: + return "plan_execute" + elif "review" in msg_lower or "check" in msg_lower or "verify" in msg_lower: + return "reflexion" + else: + return "react" + +# ========================================== +# REASONING ENGINES +# ========================================== + +class ReActEngine: + """ReAct: Reasoning + Acting in interleaved steps""" + + async def run(self, message: str, session_id: str) -> Dict: + steps = [] + + # Initial thought + messages = [ + {"role": "system", "content": "You are a ReAct agent. Think step by step and act."}, + {"role": "user", "content": message} + ] + + response = await call_llm(messages, model="fast-tier") + + steps.append(AgentStep( + step_number=1, + type="thought", + content="Initial analysis and reasoning" + )) + + content = response["choices"][0]["message"]["content"] + + return { + "response": content, + "steps": steps, + "model_used": "fast-tier" + } + +class PlanAndExecuteEngine: + """Plan first, then execute step by step""" + + async def run(self, message: str, session_id: str) -> Dict: + steps = [] + + # Planning phase + plan_messages = [ + {"role": "system", "content": "Create a step-by-step plan to accomplish the task."}, + {"role": "user", "content": f"Create a detailed plan for: {message}"} + ] + + plan_response = await call_llm(plan_messages, model="volume-tier") + plan = plan_response["choices"][0]["message"]["content"] + + steps.append(AgentStep( + step_number=1, + type="plan", + content=plan + )) + + # Execution phase + exec_messages = [ + {"role": "system", "content": "Execute the task following the provided plan."}, + {"role": "user", "content": f"Task: {message}\n\nPlan: {plan}\n\nExecute this plan:"} + ] + + exec_response = await call_llm(exec_messages, model="reasoning-tier") + result = exec_response["choices"][0]["message"]["content"] + + steps.append(AgentStep( + step_number=2, + type="action", + content="Execution completed" + )) + + return { + "response": result, + "steps": steps, + "model_used": "reasoning-tier", + "plan": plan + } + +class ReflexionEngine: + """Execute with self-reflection and correction""" + + async def run(self, message: str, session_id: str, max_iterations: int = 2) -> Dict: + steps = [] + + # Initial execution + messages = [ + {"role": "system", "content": "Solve the problem carefully."}, + {"role": "user", "content": message} + ] + + response = await call_llm(messages, model="quality-tier") + answer = response["choices"][0]["message"]["content"] + + steps.append(AgentStep( + step_number=1, + type="action", + content="Initial solution generated" + )) + + # Reflection phase + for i in range(max_iterations): + reflect_messages = [ + {"role": "system", "content": "Critically evaluate the solution for errors or improvements."}, + {"role": "user", "content": f"Problem: {message}\n\nProposed Solution: {answer}\n\nIdentify any issues or improvements:"} + ] + + reflect_response = await call_llm(reflect_messages, model="claude-haiku") + reflection = reflect_response["choices"][0]["message"]["content"] + + if "correct" in reflection.lower() and "no issues" in reflection.lower(): + break + + steps.append(AgentStep( + step_number=2+i, + type="reflection", + content=reflection + )) + + # Improve based on reflection + improve_messages = [ + {"role": "system", "content": "Improve the solution based on the critique."}, + {"role": "user", "content": f"Original: {answer}\n\nIssues found: {reflection}\n\nProvide improved solution:"} + ] + + improve_response = await call_llm(improve_messages, model="quality-tier") + answer = improve_response["choices"][0]["message"]["content"] + + return { + "response": answer, + "steps": steps, + "model_used": "quality-tier", + "iterations": len(steps) + } + +# ========================================== +# API ENDPOINTS +# ========================================== + +@app.post("/v1/chat/completions", response_model=AgentResponse) +async def agent_endpoint(request: AgentRequest): + """ + Main agent endpoint with multiple reasoning strategies: + - react: Fast iterative reasoning (good for simple tasks) + - plan_execute: Plan then execute (good for complex tasks) + - reflexion: Self-correcting (good for accuracy-critical tasks) + - auto: Automatically select based on task complexity + """ + + # Determine reasoning mode + mode = determine_reasoning_mode(request.message, request.reasoning_mode) + + # Store message in memory if enabled + if request.enable_memory and redis_client: + key = f"session:{request.session_id}:history" + redis_client.lpush(key, request.message) + redis_client.ltrim(key, 0, 99) # Keep last 100 messages + + try: + # Route to appropriate reasoning engine + if mode == "react": + result = await ReActEngine().run(request.message, request.session_id) + elif mode == "plan_execute": + result = await PlanAndExecuteEngine().run(request.message, request.session_id) + elif mode == "reflexion": + result = await ReflexionEngine().run( + request.message, + request.session_id, + max_iterations=2 + ) + else: + # Default fallback + result = await ReActEngine().run(request.message, request.session_id) + + return AgentResponse( + response=result["response"], + reasoning_mode=mode, + session_id=request.session_id, + steps=result.get("steps", []), + metadata={ + "model_used": result.get("model_used", "unknown"), + "auto_selected": request.reasoning_mode == "auto", + "timestamp": datetime.now().isoformat() + } + ) + + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/v1/models") +async def list_models(): + """List available agent models""" + return { + "object": "list", + "data": [ + { + "id": "agent/orchestrator", + "object": "model", + "created": 1700000000, + "owned_by": "llm-hub", + "description": "Auto-selecting orchestrator" + }, + { + "id": "agent/react", + "object": "model", + "created": 1700000000, + "owned_by": "llm-hub", + "description": "ReAct reasoning - fast iterative" + }, + { + "id": "agent/plan-execute", + "object": "model", + "created": 1700000000, + "owned_by": "llm-hub", + "description": "Plan-and-Execute - complex tasks" + }, + { + "id": "agent/reflexion", + "object": "model", + "created": 1700000000, + "owned_by": "llm-hub", + "description": "Reflexion - self-correcting with verification" + } + ] + } + +@app.get("/health") +async def health(): + """Health check endpoint""" + redis_status = "connected" if redis_client and redis_client.ping() else "disconnected" + + return { + "status": "healthy", + "version": "2.0.0", + "capabilities": ["react", "plan_execute", "reflexion", "mcp", "memory"], + "default_mode": DEFAULT_REASONING, + "redis": redis_status, + "timestamp": datetime.now().isoformat() + } + +@app.get("/sessions/{session_id}/history") +async def get_session_history(session_id: str, limit: int = 10): + """Retrieve conversation history for a session""" + if not redis_client: + return {"error": "Redis not available"} + + key = f"session:{session_id}:history" + history = redis_client.lrange(key, 0, limit - 1) + + return { + "session_id": session_id, + "history": history, + "count": len(history) + } + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8080) diff --git a/services/agent-core/requirements.txt b/services/agent-core/requirements.txt new file mode 100644 index 0000000..23e5777 --- /dev/null +++ b/services/agent-core/requirements.txt @@ -0,0 +1,13 @@ +fastapi==0.104.1 +uvicorn[standard]==0.24.0 +langgraph==0.2.53 +langchain==0.3.0 +langchain-openai==0.2.0 +langchain-chroma==0.1.4 +chromadb==0.5.0 +redis==5.0.1 +httpx==0.25.2 +tiktoken==0.5.1 +pydantic==2.5.0 +python-multipart==0.0.6 +aiofiles==23.2.1 diff --git a/services/mcpo/Dockerfile b/services/mcpo/Dockerfile new file mode 100644 index 0000000..4433752 --- /dev/null +++ b/services/mcpo/Dockerfile @@ -0,0 +1,11 @@ +FROM python:3.11-slim + +WORKDIR /app + +RUN pip install mcpo uv + +COPY servers.json . + +EXPOSE 8000 + +CMD ["mcpo", "--config", "servers.json", "--host", "0.0.0.0", "--port", "8000"] diff --git a/services/mcpo/servers.json b/services/mcpo/servers.json new file mode 100644 index 0000000..0e678f1 --- /dev/null +++ b/services/mcpo/servers.json @@ -0,0 +1,24 @@ +{ + "mcpServers": { + "filesystem": { + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-filesystem", "/workspace"] + }, + "git": { + "command": "uvx", + "args": ["mcp-server-git"] + }, + "fetch": { + "command": "uvx", + "args": ["mcp-server-fetch"] + }, + "memory": { + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-memory"] + }, + "sequential-thinking": { + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-sequential-thinking"] + } + } +} diff --git a/setup.sh b/setup.sh new file mode 100644 index 0000000..95f6c5e --- /dev/null +++ b/setup.sh @@ -0,0 +1,84 @@ +#!/bin/bash +set -e + +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' + +INSTALL_DIR="$(pwd)" + +echo -e "${GREEN}🧠 Agentic AI Hub Setup${NC}" +echo "=======================" + +# Detect Proxmox LXC +if [ -f /proc/1/environ ] && grep -q "container=lxc" /proc/1/environ 2>/dev/null; then + echo -e "${YELLOW}✓ LXC container detected${NC}" + if ! grep -q "lxc.cgroup.relative" /etc/pve/lxc/*.conf 2>/dev/null; then + echo -e "${YELLOW}⚠️ Tip: For LXC with Docker, add to /etc/pve/lxc/XXX.conf:${NC}" + echo " lxc.cgroup.relative = 0" + echo " lxc.apparmor.profile = unconfined" + echo " lxc.cgroup.devices.allow = a" + fi +fi + +# Check/install Docker +if ! command -v docker &> /dev/null; then + echo -e "${YELLOW}Installing Docker...${NC}" + curl -fsSL https://get.docker.com | sh + usermod -aG docker $USER || true + systemctl enable docker + systemctl start docker +fi + +if ! command -v docker-compose &> /dev/null && ! docker compose version &> /dev/null; then + echo -e "${YELLOW}Installing Docker Compose...${NC}" + apt-get update && apt-get install -y docker-compose-plugin +fi + +# Install Node.js for MCP +if ! command -v node &> /dev/null || [ "$(node -v | cut -d'v' -f2 | cut -d'.' -f1)" != "20" ]; then + echo -e "${YELLOW}Installing Node.js 20...${NC}" + curl -fsSL https://deb.nodesource.com/setup_20.x | bash - + apt-get install -y nodejs +fi + +# Install uv for Python tools +if ! command -v uv &> /dev/null; then + pip install uv || pip3 install uv +fi + +# Create directories +echo -e "${BLUE}Creating directories...${NC}" +mkdir -p {data/{redis,chroma,agent/{sessions,code-server,open-webui},neo4j},workspace,logs} +mkdir -p services/{agent-core,mcpo} + +# Set permissions +chown -R 1000:1000 workspace data || true +chmod +x *.sh scripts/*.sh 2>/dev/null || true + +# Create .env if not exists +if [ ! -f .env ]; then + echo -e "${YELLOW}Creating .env file...${NC}" + cp .env.example .env + # Generate random keys + sed -i "s/MASTER_KEY=.*/MASTER_KEY=sk-agent-$(openssl rand -hex 8)/" .env + sed -i "s/WEBUI_SECRET_KEY=.*/WEBUI_SECRET_KEY=$(openssl rand -hex 32)/" .env + echo -e "${GREEN}✓ .env created. Edit it to add your API keys.${NC}" +fi + +# Create workspace gitkeep +touch workspace/.gitkeep + +echo "" +echo -e "${GREEN}✅ Setup complete!${NC}" +echo "" +echo "Next steps:" +echo "1. Edit .env file: nano .env" +echo "2. Add your API keys (Groq, Mistral, etc.)" +echo "3. Start services: ./start.sh" +echo "" +echo "Documentation:" +echo " - Setup: docs/SETUP.md" +echo " - API: docs/API.md" +echo " - Providers: docs/PROVIDERS.md" diff --git a/start.sh b/start.sh new file mode 100644 index 0000000..73ee028 --- /dev/null +++ b/start.sh @@ -0,0 +1,66 @@ +#!/bin/bash + +cd "$(dirname "$0")" + +GREEN='\033[0;32m' +BLUE='\033[0;34m' +NC='\033[0m' + +if [ ! -f .env ]; then + echo "Error: .env file not found. Run ./setup.sh first." + exit 1 +fi + +source .env + +echo -e "${GREEN}🚀 Starting Agentic LLM Hub...${NC}" +echo "" + +# Determine profile +PROFILE=${1:-full} + +if [ "$PROFILE" = "minimal" ]; then + echo "Mode: Minimal (core services only)" + docker-compose up -d redis chromadb litellm agent-core +elif [ "$PROFILE" = "ide" ]; then + echo "Mode: Standard + IDE" + docker-compose --profile ide up -d +elif [ "$PROFILE" = "full" ]; then + echo "Mode: Full (all services including MCP tools)" + docker-compose --profile ide --profile mcp --profile ui up -d +else + echo "Usage: ./start.sh [minimal|ide|full]" + echo " minimal - Core services only (lowest resources)" + echo " ide - Core + VS Code IDE" + echo " full - Everything including MCP tools and Web UI" + exit 1 +fi + +echo "" +echo "Waiting for services..." +sleep 5 + +# Get IP +IP=$(hostname -I | awk '{print $1}') + +echo "" +echo -e "${GREEN}✅ Services started!${NC}" +echo "" +echo "Access Points:" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +printf "${BLUE}%-22s${NC} %s\n" "Agent API:" "http://$IP:8080/v1" +printf "${BLUE}%-22s${NC} %s\n" "VS Code IDE:" "http://$IP:8443" +printf "${BLUE}%-22s${NC} %s\n" "LiteLLM Gateway:" "http://$IP:4000" +printf "${BLUE}%-22s${NC} %s\n" "MCP Tools:" "http://$IP:8001/docs" +printf "${BLUE}%-22s${NC} %s\n" "Web UI:" "http://$IP:3000" +printf "${BLUE}%-22s${NC} %s\n" "Vector DB:" "http://$IP:8000" +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +echo "" +echo "Test command:" +echo "curl -X POST http://$IP:8080/v1/chat/completions \" +echo " -H 'Content-Type: application/json' \" +echo " -H 'Authorization: Bearer ${MASTER_KEY:0:20}...' \" +echo " -d '{"message":"Hello","reasoning_mode":"react"}'" +echo "" +echo "View logs: docker-compose logs -f" +echo "Stop: docker-compose down" diff --git a/workspace/.gitkeep b/workspace/.gitkeep new file mode 100644 index 0000000..e69de29