Initial commit: Agentic LLM Hub with multi-reasoning, MCP tools, and IDE

2026-02-01 15:11:31 +01:00 · 2026-02-01 15:11:31 +01:00 · 2cafb31cb4
commit 2cafb31cb4
22 changed files with 1460 additions and 0 deletions
--- a/.env.example
+++ b/.env.example
@ -0,0 +1,69 @@
 # Master Key for API Access (generate strong key)
 MASTER_KEY=sk-agent-$(openssl rand -hex 8)
 # ==========================================
 # FREE TIER API KEYS (Add your keys below)
 # ==========================================
 # Groq - https://console.groq.com (20 RPM free, create multiple accounts)
 GROQ_API_KEY_1=gsk_your_first_groq_key_here
 GROQ_API_KEY_2=gsk_your_second_groq_key_here
 # Mistral - https://console.mistral.ai (1B tokens/month free)
 MISTRAL_API_KEY=your_mistral_key_here
 # Anthropic Claude - https://console.anthropic.com ($5 trial, $500 student)
 ANTHROPIC_API_KEY=sk-ant-your_claude_key_here
 # Moonshot Kimi - https://platform.moonshot.ai ($5 signup bonus)
 MOONSHOT_API_KEY=sk-your_moonshot_key_here
 # OpenRouter - https://openrouter.ai (50 req/day free, access to Kimi free)
 OPENROUTER_API_KEY=sk-or-your_openrouter_key_here
 # Cohere - https://cohere.com (1K calls/month free, good for embeddings)
 COHERE_API_KEY=your_cohere_key_here
 # DeepSeek - https://platform.deepseek.com (cheap rates)
 DEEPSEEK_API_KEY=sk-your_deepseek_key_here
 # GitHub Token (for MCP Git tools)
 GITHUB_TOKEN=ghp_your_github_token_here
 # ==========================================
 # AGENT SETTINGS
 # ==========================================
 # Default reasoning: react, plan_execute, reflexion, or auto
 DEFAULT_REASONING=auto
 # Enable self-reflection (true/false)
 ENABLE_REFLECTION=true
 # Maximum iterations per request
 MAX_ITERATIONS=10
 # ==========================================
 # UI/IDE SETTINGS
 # ==========================================
 # Code-Server passwords (change these!)
 IDE_PASSWORD=secure-ide-password-123
 IDE_SUDO_PASSWORD=admin-password-456
 # Optional: Domain for reverse proxy
 # IDE_DOMAIN=code.yourdomain.com
 # Web UI settings
 WEBUI_SECRET_KEY=$(openssl rand -hex 32)
 # ==========================================
 # ADVANCED MEMORY SETTINGS
 # ==========================================
 # Enable knowledge graph (Neo4j) - requires more RAM
 ENABLE_KNOWLEDGE_GRAPH=false
 NEO4J_AUTH=neo4j/password
 # ChromaDB settings
 CHROMA_STORAGE_PATH=/data/chroma
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,31 @@
 # Environment variables
 .env
 .env.local
 .env.*.local
 # Data directories (persisted volumes)
 data/
 workspace/*
 !workspace/.gitkeep
 logs/
 *.db
 *.sqlite3
 # IDE
 .idea/
 .vscode/
 *.swp
 *.swo
 # OS
 .DS_Store
 Thumbs.db
 # Docker
 .docker/
 docker-compose.override.yml
 # Temporary
 *.tmp
 *.bak
 *.log
--- a/52
+++ b/52
@ -0,0 +1,52 @@
 .PHONY: help setup start stop logs status update backup clean
 help:
 	@echo "Agentic LLM Hub Management"
 	@echo "=========================="
 	@echo "make setup    - Initial setup"
 	@echo "make start    - Start all services (full profile)"
 	@echo "make start-ide - Start with IDE only"
 	@echo "make stop     - Stop all services"
 	@echo "make logs     - View logs"
 	@echo "make status   - Check service status"
 	@echo "make update   - Pull latest and update images"
 	@echo "make backup   - Backup data directories"
 	@echo "make clean    - Remove containers (data preserved)"
 setup:
 	@chmod +x *.sh scripts/*.sh 2>/dev/null || true
 	@./setup.sh
 start:
 	@./start.sh full
 start-ide:
 	@./start.sh ide
 stop:
 	@docker-compose down
 logs:
 	@docker-compose logs -f --tail=100
 status:
 	@echo "Container Status:"
 	@docker-compose ps
 	@echo ""
 	@echo "API Health:"
 	@curl -s http://localhost:8080/health | python3 -m json.tool 2>/dev/null || echo "API not responding"
 update:
 	@git pull
 	@docker-compose pull
 	@docker-compose up -d
 backup:
 	@mkdir -p backup/$(shell date +%Y%m%d)
 	@cp -r data backup/$(shell date +%Y%m%d)/
 	@cp .env backup/$(shell date +%Y%m%d)/
 	@echo "Backup created: backup/$(shell date +%Y%m%d)/"
 clean:
 	@docker-compose down -v
 	@echo "Containers removed. Data preserved in ./data/"
--- a/README.md
+++ b/README.md
@ -0,0 +1,60 @@
 # 🤖 Agentic LLM Hub
 Self-hosted AI agent platform with multi-provider LLM aggregation, reasoning engines (ReAct, Plan-and-Execute, Reflexion), MCP tools, and web IDE.
 ## 🚀 Quick Start
 ```bash
 # 1. Clone from your Gitea
 git clone https://gitea.yourdomain.com/youruser/llm-hub.git
 cd llm-hub
 # 2. Configure
 cp .env.example .env
 nano .env  # Add your API keys
 # 3. Deploy
 ./setup.sh && ./start.sh
 ```
 ## 📡 Access Points
 | Service | URL | Description |
 |---------|-----|-------------|
 | VS Code IDE | `http://your-ip:8443` | Full IDE with Continue.dev |
 | Agent API | `http://your-ip:8080/v1` | Main API endpoint |
 | LiteLLM | `http://your-ip:4000` | LLM Gateway |
 | MCP Tools | `http://your-ip:8001/docs` | Tool OpenAPI docs |
 | ChromaDB | `http://your-ip:8000` | Vector memory |
 | Web UI | `http://your-ip:3000` | Chat interface |
 ## 🔧 Supported Providers
 - **Groq** (Free tier, fast)
 - **Mistral** (1B tokens/month free)
 - **Anthropic Claude** (Trial credits)
 - **Moonshot Kimi** ($5 signup bonus)
 - **OpenRouter** (Free tier access)
 - **Cohere** (1K calls/month)
 - **DeepSeek** (Cheap reasoning)
 ## 🧠 Reasoning Modes
 - `react` - Fast iterative reasoning
 - `plan_execute` - Complex multi-step tasks
 - `reflexion` - Self-correcting with verification
 - `auto` - Automatic selection
 ## 📚 Documentation
 - [Setup Guide](docs/SETUP.md)
 - [API Reference](docs/API.md)
 - [Provider Guide](docs/PROVIDERS.md)
 ## 🔄 Updates
 ```bash
 git pull origin main
 docker-compose pull
 docker-compose up -d
 ```
--- a/config/agent/.gitkeep
+++ b/config/agent/.gitkeep
--- a/config/continue/config.yaml
+++ b/config/continue/config.yaml
@ -0,0 +1,54 @@
 name: LLM Hub IDE
 version: 1.0.0
 schema: v1
 models:
  - name: Groq Llama 3.3 70B
    provider: openai
    model: fast-tier
    apiBase: http://agent-core:8080/v1
    apiKey: sk-agent
    roles: [chat, edit, apply]
  - name: Claude 3.5 Sonnet
    provider: openai
    model: quality-tier
    apiBase: http://agent-core:8080/v1
    apiKey: sk-agent
    roles: [chat, edit, apply]
  - name: Kimi K2
    provider: openai
    model: reasoning-tier
    apiBase: http://agent-core:8080/v1
    apiKey: sk-agent
    roles: [chat, edit, apply]
  - name: Mistral Small
    provider: openai
    model: volume-tier
    apiBase: http://agent-core:8080/v1
    apiKey: sk-agent
    roles: [chat, edit]
 tabAutocompleteModel:
  name: Mistral Autocomplete
  provider: openai
  model: volume-tier
  apiBase: http://litellm:4000/v1
  apiKey: sk-agent
 embeddingsProvider:
  provider: openai
  model: embeddings
  apiBase: http://litellm:4000/v1
  apiKey: sk-agent
 context:
  - provider: code
  - provider: docs
  - provider: diff
  - provider: terminal
  - provider: problems
  - provider: folder
  - provider: codebase
--- a/config/litellm_config.yaml
+++ b/config/litellm_config.yaml
@ -0,0 +1,87 @@
 model_list:
  # FREE TIER AGGREGATION
  - model_name: fast-tier
    litellm_params:
      model: groq/llama-3.3-70b-versatile
      api_key: os.environ/GROQ_API_KEY_1
      rpm_limit: 20
  - model_name: fast-tier
    litellm_params:
      model: groq/llama-3.1-8b-instant
      api_key: os.environ/GROQ_API_KEY_2
      rpm_limit: 20
  - model_name: volume-tier
    litellm_params:
      model: mistral/mistral-small-latest
      api_key: os.environ/MISTRAL_API_KEY
      tpm_limit: 500000
  # CLAUDE & KIMI - Quality/Reasoning
  - model_name: quality-tier
    litellm_params:
      model: anthropic/claude-3-5-sonnet-20240620
      api_key: os.environ/ANTHROPIC_API_KEY
      rpm_limit: 5
  - model_name: claude-haiku
    litellm_params:
      model: anthropic/claude-3-haiku-20240307
      api_key: os.environ/ANTHROPIC_API_KEY
      rpm_limit: 10
  # Kimi via OpenRouter (Free tier)
  - model_name: reasoning-tier
    litellm_params:
      model: openrouter/moonshotai/kimi-k2:free
      api_key: os.environ/OPENROUTER_API_KEY
  # Kimi Direct (Ultra-cheap)
  - model_name: reasoning-tier
    litellm_params:
      model: moonshot/kimi-k2-0711-preview
      api_key: os.environ/MOONSHOT_API_KEY
      tpm_limit: 100000
  - model_name: deepseek
    litellm_params:
      model: deepseek/deepseek-chat
      api_key: os.environ/DEEPSEEK_API_KEY
  # Embeddings & Tools
  - model_name: embeddings
    litellm_params:
      model: cohere/embed-english-v3.0
      api_key: os.environ/COHERE_API_KEY
  # Local Fallback
  - model_name: local-llama
    litellm_params:
      model: ollama/llama3.1:8b
      api_base: http://ollama:11434
 router_settings:
  routing_strategy: "usage-based-routing"
  timeout: 30
  num_retries: 3
  allowed_fails: 2
  cooldown_time: 60
  fallbacks:
    - fast-tier: ["volume-tier", "reasoning-tier"]
    - volume-tier: ["reasoning-tier", "local-llama"]
    - quality-tier: ["claude-haiku", "reasoning-tier"]
 general_settings:
  master_key: os.environ/LITELLM_MASTER_KEY
  cache: true
  cache_params:
    type: redis
    host: redis
    port: 6379
    ttl: 3600
  retry_policy:
    TimeoutError: 3
    RateLimitError: 5
  log_level: info
  log_file: /app/logs/litellm.log
--- a/config/mcp/servers.json
+++ b/config/mcp/servers.json
@ -0,0 +1,35 @@
 {
  "mcpServers": {
    "filesystem": {
      "command": "npx",
      "args": ["-y", "@modelcontextprotocol/server-filesystem", "/workspace"]
    },
    "git": {
      "command": "uvx",
      "args": ["mcp-server-git"]
    },
    "github": {
      "command": "npx",
      "args": ["-y", "@modelcontextprotocol/server-github"],
      "env": {
        "GITHUB_PERSONAL_ACCESS_TOKEN": "${GITHUB_TOKEN}"
      }
    },
    "fetch": {
      "command": "uvx",
      "args": ["mcp-server-fetch"]
    },
    "sqlite": {
      "command": "uvx",
      "args": ["mcp-server-sqlite", "/workspace/data.db"]
    },
    "memory": {
      "command": "npx",
      "args": ["-y", "@modelcontextprotocol/server-memory"]
    },
    "sequential-thinking": {
      "command": "npx",
      "args": ["-y", "@modelcontextprotocol/server-sequential-thinking"]
    }
  }
 }
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -0,0 +1,151 @@
 version: '3.8'
 services:
  # Core Infrastructure
  redis:
    image: redis:7-alpine
    container_name: agent-redis
    restart: unless-stopped
    volumes:
      - ./data/redis:/data
    command: redis-server --appendonly yes --maxmemory 512mb --maxmemory-policy allkeys-lru
    networks:
      - agent-network
  chromadb:
    image: chromadb/chroma:latest
    container_name: agent-memory-vector
    restart: unless-stopped
    ports:
      - "8000:8000"
    volumes:
      - ./data/chroma:/chroma/chroma
    environment:
      - IS_PERSISTENT=TRUE
      - PERSIST_DIRECTORY=/chroma/chroma
      - ANONYMIZED_TELEMETRY=FALSE
    networks:
      - agent-network
  # LLM Gateway
  litellm:
    image: ghcr.io/berriai/litellm:main-latest
    container_name: agent-gateway
    restart: unless-stopped
    ports:
      - "4000:4000"
    volumes:
      - ./config/litellm_config.yaml:/app/config.yaml
      - ./logs:/app/logs
    environment:
      - DATABASE_URL=sqlite:///app/db.sqlite3
      - LITELLM_MASTER_KEY=${MASTER_KEY:-sk-agent}
      - REDIS_HOST=redis
      - REDIS_PORT=6379
    command: --config /app/config.yaml --port 4000
    networks:
      - agent-network
  # Agent Core with Reasoning Engines
  agent-core:
    build:
      context: ./services/agent-core
      dockerfile: Dockerfile
    container_name: agent-core
    restart: unless-stopped
    ports:
      - "8080:8080"
    volumes:
      - ./workspace:/workspace
      - ./config/agent:/app/config
      - ./data/agent:/app/data
    environment:
      - LLM_API_BASE=http://litellm:4000/v1
      - LLM_API_KEY=${MASTER_KEY:-sk-agent}
      - REDIS_URL=redis://redis:6379/0
      - CHROMA_URL=http://chromadb:8000
      - DEFAULT_REASONING_MODE=${DEFAULT_REASONING:-auto}
    depends_on:
      - litellm
      - redis
      - chromadb
    networks:
      - agent-network
  # MCP Tool Gateway
  mcpo:
    build:
      context: ./services/mcpo
      dockerfile: Dockerfile
    container_name: agent-mcp-gateway
    restart: unless-stopped
    ports:
      - "8001:8000"
    volumes:
      - ./workspace:/workspace:ro
      - ./config/mcp:/app/config
    networks:
      - agent-network
    profiles:
      - mcp
  # VS Code Server with AI Assistant
  code-server:
    image: lscr.io/linuxserver/code-server:latest
    container_name: agent-ide
    restart: unless-stopped
    ports:
      - "8443:8443"
    environment:
      - PUID=1000
      - PGID=1000
      - TZ=Etc/UTC
      - PASSWORD=${IDE_PASSWORD:-code}
      - SUDO_PASSWORD=${IDE_SUDO_PASSWORD:-sudo}
      - DEFAULT_WORKSPACE=/workspace
    volumes:
      - ./workspace:/workspace
      - ./data/code-server:/config
      - ./config/continue:/config/.continue:ro
    networks:
      - agent-network
    profiles:
      - ide
  # Web UI
  open-webui:
    image: ghcr.io/open-webui/open-webui:main
    container_name: agent-ui
    restart: unless-stopped
    ports:
      - "3000:8080"
    volumes:
      - ./data/open-webui:/app/backend/data
    environment:
      - OPENAI_API_BASE_URL=http://agent-core:8080/v1
      - OPENAI_API_KEY=${MASTER_KEY:-sk-agent}
      - ENABLE_SIGNUP=false
      - DEFAULT_MODELS=agent/orchestrator
    depends_on:
      - agent-core
    networks:
      - agent-network
    profiles:
      - ui
  # Auto-updater
  watchtower:
    image: containrrr/watchtower
    container_name: agent-watchtower
    restart: unless-stopped
    volumes:
      - /var/run/docker.sock:/var/run/docker.sock
    environment:
      - WATCHTOWER_POLL_INTERVAL=86400
      - WATCHTOWER_CLEANUP=true
    networks:
      - agent-network
 networks:
  agent-network:
    driver: bridge
--- a/docs/API.md
+++ b/docs/API.md
@ -0,0 +1,88 @@
 # API Reference
 ## Base URL
 ```
 http://your-server-ip:8080/v1
 ```
 ## Authentication
 All requests require Bearer token:
 ```
 Authorization: Bearer sk-agent-your-key
 ```
 ## Endpoints
 ### POST /chat/completions
 Main agent endpoint.
 **Request:**
 ```json
 {
  "message": "Create a Python script to fetch weather data",
  "reasoning_mode": "plan_execute",
  "session_id": "unique-session-id",
  "max_iterations": 10
 }
 ```
 **Response:**
 ```json
 {
  "response": "Here\'s the Python script...",
  "reasoning_mode": "plan_execute",
  "session_id": "unique-session-id",
  "steps": [
    {"step_number": 1, "type": "plan", "content": "..."},
    {"step_number": 2, "type": "action", "content": "..."}
  ],
  "metadata": {
    "model_used": "volume-tier",
    "auto_selected": true,
    "timestamp": "2024-..."
  }
 }
 ```
 ### Reasoning Modes
 | Mode | Use Case | Speed | Accuracy |
 |------|----------|-------|----------|
 | `react` | Simple Q&A, debugging | Fast | Medium |
 | `plan_execute` | Complex multi-step tasks | Medium | High |
 | `reflexion` | Code review, critical tasks | Slow | Very High |
 | `auto` | Let system decide | Variable | Adaptive |
 ### GET /models
 List available models.
 ### GET /health
 Check system status.
 ### GET /sessions/{id}/history
 Retrieve conversation history.
 ## Examples
 ### Python
 ```python
 import requests
 response = requests.post(
    "http://localhost:8080/v1/chat/completions",
    headers={"Authorization": "Bearer sk-agent-xxx"},
    json={
        "message": "Refactor this code",
        "reasoning_mode": "reflexion"
    }
 )
 print(response.json()["response"])
 ```
 ### cURL
 ```bash
 curl -X POST http://localhost:8080/v1/chat/completions \
  -H "Authorization: Bearer sk-agent-xxx" \
  -H "Content-Type: application/json" \
  -d '{"message":"Hello","reasoning_mode":"auto"}'
 ```
--- a/docs/PROVIDERS.md
+++ b/docs/PROVIDERS.md
@ -0,0 +1,66 @@
 # Provider Setup Guide
 ## Free Tier Providers
 ### Groq (Fastest)
 - **URL**: https://console.groq.com
 - **Free Tier**: 20 RPM, variable TPM
 - **Models**: Llama 3.3 70B, Llama 3.1 8B
 - **Best For**: Speed, quick coding tasks
 - **Tip**: Create multiple accounts with different phones for load balancing
 ### Mistral (High Volume)
 - **URL**: https://console.mistral.ai
 - **Free Tier**: 1 billion tokens/month
 - **Models**: Mistral Small, Medium
 - **Best For**: High-volume processing, chatbots
 ### OpenRouter (Universal Access)
 - **URL**: https://openrouter.ai
 - **Free Tier**: 50 requests/day
 - **Access**: Kimi K2:free, Gemini Flash:free
 - **Best For**: Testing, fallback access
 ### Cohere (Embeddings)
 - **URL**: https://cohere.com
 - **Free Tier**: 1,000 calls/month
 - **Best For**: Embeddings, RAG systems
 ## Trial/Cheap Providers
 ### Anthropic Claude (Highest Quality)
 - **URL**: https://console.anthropic.com
 - **Trial**: $5 free credits (new users)
 - **Student**: $500 credits (apply with .edu)
 - **Cost**: $3/M input (Sonnet), $0.25/M (Haiku)
 - **Best For**: Complex reasoning, analysis, code review
 ### Moonshot Kimi (Best Value)
 - **URL**: https://platform.moonshot.ai
 - **Bonus**: $5 signup credit
 - **Cost**: $0.60/M input, $2.50/M output
 - **Context**: 128K tokens
 - **Best For**: Coding, long documents, Chinese content
 ### DeepSeek (Cheapest Reasoning)
 - **URL**: https://platform.deepseek.com
 - **Cost**: $0.14/M input, $0.28/M output
 - **Best For**: Reasoning tasks, math, code
 ## Configuration Priority
 The system routes requests in this priority:
 1. **Fast tasks** → Groq (free, instant)
 2. **High volume** → Mistral (1B tokens)
 3. **Complex coding** → Kimi (cheap, 128K context)
 4. **Quality critical** → Claude (expensive but best)
 5. **Fallback** → OpenRouter free tier
 ## Rate Limit Management
 The router automatically:
 - Tracks RPM/TPM across all providers
 - Distributes load (multiple Groq accounts)
 - Falls back when limits approached
 - Caches responses to reduce API calls
--- a/docs/SETUP.md
+++ b/docs/SETUP.md
@ -0,0 +1,98 @@
 # Setup Guide
 ## Prerequisites
 - **OS**: Debian 12, Ubuntu 22.04+, or Proxmox LXC
 - **RAM**: 4GB minimum (8GB recommended for IDE)
 - **Storage**: 20GB free space
 - **Network**: Internet access for API calls
 ## Quick Install
 ```bash
 # 1. Clone from your Gitea
 git clone https://gitea.yourdomain.com/username/llm-hub.git
 cd llm-hub
 # 2. Run setup
 chmod +x setup.sh && ./setup.sh
 # 3. Configure API keys
 nano .env
 # 4. Start
 ./start.sh full
 ```
 ## Proxmox LXC Setup
 On Proxmox host, create optimized container:
 ```bash
 pct create 100 local:vztmpl/debian-12-standard_12.7-1_amd64.tar.zst \
  --hostname llm-hub \
  --memory 8192 \
  --swap 1024 \
  --cores 4 \
  --rootfs local-lvm:20 \
  --features nesting=1,keyctl=1 \
  --net0 name=eth0,bridge=vmbr0,ip=dhcp
 # Add to /etc/pve/lxc/100.conf:
 cat >> /etc/pve/lxc/100.conf << EOF
 lxc.cgroup.relative = 0
 lxc.apparmor.profile = unconfined
 lxc.cgroup.devices.allow = a
 EOF
 pct start 100
 pct exec 100 -- bash -c "apt update && apt install -y curl git && curl -fsSL setup.sh | bash"
 ```
 ## Configuration
 Edit `.env` file:
 ```bash
 # Required: At least one LLM provider
 GROQ_API_KEY_1=gsk_xxx
 MISTRAL_API_KEY=your_key
 # Recommended: Multiple providers for redundancy
 ANTHROPIC_API_KEY=sk-ant-xxx
 MOONSHOT_API_KEY=sk-xxx
 OPENROUTER_API_KEY=sk-or-xxx
 # UI Security
 IDE_PASSWORD=strong-password-here
 ```
 ## Verification
 ```bash
 # Check health
 curl http://localhost:8080/health
 # Test agent
 curl -X POST http://localhost:8080/v1/chat/completions \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer sk-agent-xxx" \
  -d '{"message":"Hello","reasoning_mode":"react"}'
 ```
 ## Troubleshooting
 **Docker not starting in LXC:**
 ```bash
 # On Proxmox host, check config
 pct config 100 | grep features
 # Should show: features: nesting=1,keyctl=1
 ```
 **Permission denied on workspace:**
 ```bash
 chown -R 1000:1000 workspace/
 ```
 **Port conflicts:**
 Edit `docker-compose.yml` to change port mappings (e.g., `8081:8080`)
--- a/scripts/add-provider.sh
+++ b/scripts/add-provider.sh
@ -0,0 +1,57 @@
 #!/bin/bash
 ENV_FILE=".env"
 CONFIG_FILE="config/litellm_config.yaml"
 echo "🔌 Add Provider to LLM Hub"
 echo "=========================="
 echo ""
 echo "1. Groq (Fast)"
 echo "2. Mistral (Volume)"
 echo "3. Anthropic Claude (Quality)"
 echo "4. Moonshot Kimi (Cheap/128K)"
 echo "5. OpenRouter (Free tier access)"
 echo "6. Cohere (Embeddings)"
 echo "7. DeepSeek (Cheap reasoning)"
 echo "8. Exit"
 read -p "Select (1-8): " choice
 read -p "Enter API Key: " api_key
 case $choice in
    1)
        read -p "Instance number (1,2,3...): " num
        var="GROQ_API_KEY_$num"
        echo "$var=$api_key" >> "$ENV_FILE"
        echo "✅ Added Groq key as $var"
        ;;
    2)
        echo "MISTRAL_API_KEY=$api_key" >> "$ENV_FILE"
        echo "✅ Added Mistral"
        ;;
    3)
        echo "ANTHROPIC_API_KEY=$api_key" >> "$ENV_FILE"
        echo "✅ Added Claude (remember: expensive, use sparingly)"
        ;;
    4)
        echo "MOONSHOT_API_KEY=$api_key" >> "$ENV_FILE"
        echo "✅ Added Kimi (great for coding!)"
        ;;
    5)
        echo "OPENROUTER_API_KEY=$api_key" >> "$ENV_FILE"
        echo "✅ Added OpenRouter (access free tier models)"
        ;;
    6)
        echo "COHERE_API_KEY=$api_key" >> "$ENV_FILE"
        echo "✅ Added Cohere (embeddings)"
        ;;
    7)
        echo "DEEPSEEK_API_KEY=$api_key" >> "$ENV_FILE"
        echo "✅ Added DeepSeek (cheap reasoning)"
        ;;
    8) exit 0 ;;
    *) echo "Invalid choice" ; exit 1 ;;
 esac
 read -p "Restart services to apply? (y/N): " restart
 [[ $restart =~ ^[Yy]$ ]] && docker-compose restart
--- a/scripts/status.sh
+++ b/scripts/status.sh
@ -0,0 +1,37 @@
 #!/bin/bash
 echo "🧠 LLM Hub Status"
 echo "================="
 echo ""
 # Container status
 echo "📦 Containers:"
 docker-compose ps --services --filter "status=running" 2>/dev/null | while read service; do
    status=$(docker-compose ps -q "$service" | xargs docker inspect -f '{{.State.Status}}' 2>/dev/null)
    echo "  $service: $status"
 done
 echo ""
 echo "🔍 Health Checks:"
 # API health
 if curl -s http://localhost:8080/health | grep -q "healthy"; then
    echo "  ✅ Agent Core: Healthy"
 else
    echo "  ❌ Agent Core: Not responding"
 fi
 # LiteLLM
 if curl -s http://localhost:4000/health/liveliness | grep -q "true"; then
    echo "  ✅ LiteLLM: Running"
 else
    echo "  ❌ LiteLLM: Not responding"
 fi
 echo ""
 echo "📊 Router Stats:"
 curl -s http://localhost:8080/health 2>/dev/null | python3 -m json.tool 2>/dev/null || echo "  Unable to fetch stats"
 echo ""
 echo "💾 Memory Usage:"
 docker stats --no-stream --format "table {{.Name}}\t{{.MemUsage}}" | grep -E "(agent-|NAME)" || true
--- a/services/agent-core/Dockerfile
+++ b/services/agent-core/Dockerfile
@ -0,0 +1,20 @@
 FROM python:3.11-slim
 WORKDIR /app
 # Install system dependencies
 RUN apt-get update && apt-get install -y \
    build-essential \
    git \
    && rm -rf /var/lib/apt/lists/*
 # Install Python dependencies
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 # Copy application
 COPY main.py .
 EXPOSE 8080
 CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]
--- a/services/agent-core/main.py
+++ b/services/agent-core/main.py
@ -0,0 +1,357 @@
 """
 Agentic AI Core - Multi-Reasoning Engine
 Supports: ReAct, Plan-and-Execute, Reflexion
 """
 import os
 import json
 from typing import List, Dict, Any, Literal, Optional
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel, Field
 from datetime import datetime
 import httpx
 import redis
 app = FastAPI(
    title="Agentic AI Core",
    version="2.0.0",
    description="Multi-reasoning agent platform with memory and MCP integration"
 )
 # Configuration
 LLM_API_BASE = os.getenv("LLM_API_BASE", "http://litellm:4000/v1")
 LLM_API_KEY = os.getenv("LLM_API_KEY", "sk-agent")
 DEFAULT_REASONING = os.getenv("DEFAULT_REASONING_MODE", "auto")
 # Redis for short-term memory
 try:
    redis_client = redis.from_url(
        os.getenv("REDIS_URL", "redis://redis:6379"),
        decode_responses=True
    )
 except:
    redis_client = None
 # ==========================================
 # DATA MODELS
 # ==========================================
 class AgentRequest(BaseModel):
    message: str
    session_id: str = Field(default="default", description="Conversation thread ID")
    reasoning_mode: Literal["react", "plan_execute", "reflexion", "auto"] = DEFAULT_REASONING
    context_files: Optional[List[str]] = Field(default_factory=list)
    enable_memory: bool = True
    max_iterations: int = 10
 class AgentStep(BaseModel):
    step_number: int
    type: Literal["thought", "action", "observation", "reflection", "plan"]
    content: str
    timestamp: datetime = Field(default_factory=datetime.now)
 class AgentResponse(BaseModel):
    response: str
    reasoning_mode: str
    session_id: str
    steps: List[AgentStep] = Field(default_factory=list)
    metadata: Dict[str, Any] = Field(default_factory=dict)
 # ==========================================
 # UTILITY FUNCTIONS
 # ==========================================
 async def call_llm(messages: List[Dict], model: str = "auto", tools: Optional[List] = None) -> Dict:
    """Call LLM through LiteLLM gateway"""
    async with httpx.AsyncClient() as client:
        payload = {
            "model": model,
            "messages": messages,
            "temperature": 0.7,
            "max_tokens": 4000
        }
        if tools:
            payload["tools"] = tools
        response = await client.post(
            f"{LLM_API_BASE}/chat/completions",
            headers={"Authorization": f"Bearer {LLM_API_KEY}"},
            json=payload,
            timeout=60.0
        )
        return response.json()
 def determine_reasoning_mode(message: str, requested: str) -> str:
    """Auto-select reasoning mode based on task complexity"""
    if requested != "auto":
        return requested
    # Complexity indicators
    complexity_markers = [
        "plan", "design", "architecture", "steps", "implement",
        "build", "create", "project", "complex", "multi-step"
    ]
    msg_lower = message.lower()
    score = sum(1 for marker in complexity_markers if marker in msg_lower)
    if score >= 3 or len(message) > 500:
        return "plan_execute"
    elif "review" in msg_lower or "check" in msg_lower or "verify" in msg_lower:
        return "reflexion"
    else:
        return "react"
 # ==========================================
 # REASONING ENGINES
 # ==========================================
 class ReActEngine:
    """ReAct: Reasoning + Acting in interleaved steps"""
    async def run(self, message: str, session_id: str) -> Dict:
        steps = []
        # Initial thought
        messages = [
            {"role": "system", "content": "You are a ReAct agent. Think step by step and act."},
            {"role": "user", "content": message}
        ]
        response = await call_llm(messages, model="fast-tier")
        steps.append(AgentStep(
            step_number=1,
            type="thought",
            content="Initial analysis and reasoning"
        ))
        content = response["choices"][0]["message"]["content"]
        return {
            "response": content,
            "steps": steps,
            "model_used": "fast-tier"
        }
 class PlanAndExecuteEngine:
    """Plan first, then execute step by step"""
    async def run(self, message: str, session_id: str) -> Dict:
        steps = []
        # Planning phase
        plan_messages = [
            {"role": "system", "content": "Create a step-by-step plan to accomplish the task."},
            {"role": "user", "content": f"Create a detailed plan for: {message}"}
        ]
        plan_response = await call_llm(plan_messages, model="volume-tier")
        plan = plan_response["choices"][0]["message"]["content"]
        steps.append(AgentStep(
            step_number=1,
            type="plan",
            content=plan
        ))
        # Execution phase
        exec_messages = [
            {"role": "system", "content": "Execute the task following the provided plan."},
            {"role": "user", "content": f"Task: {message}\n\nPlan: {plan}\n\nExecute this plan:"}
        ]
        exec_response = await call_llm(exec_messages, model="reasoning-tier")
        result = exec_response["choices"][0]["message"]["content"]
        steps.append(AgentStep(
            step_number=2,
            type="action",
            content="Execution completed"
        ))
        return {
            "response": result,
            "steps": steps,
            "model_used": "reasoning-tier",
            "plan": plan
        }
 class ReflexionEngine:
    """Execute with self-reflection and correction"""
    async def run(self, message: str, session_id: str, max_iterations: int = 2) -> Dict:
        steps = []
        # Initial execution
        messages = [
            {"role": "system", "content": "Solve the problem carefully."},
            {"role": "user", "content": message}
        ]
        response = await call_llm(messages, model="quality-tier")
        answer = response["choices"][0]["message"]["content"]
        steps.append(AgentStep(
            step_number=1,
            type="action",
            content="Initial solution generated"
        ))
        # Reflection phase
        for i in range(max_iterations):
            reflect_messages = [
                {"role": "system", "content": "Critically evaluate the solution for errors or improvements."},
                {"role": "user", "content": f"Problem: {message}\n\nProposed Solution: {answer}\n\nIdentify any issues or improvements:"}
            ]
            reflect_response = await call_llm(reflect_messages, model="claude-haiku")
            reflection = reflect_response["choices"][0]["message"]["content"]
            if "correct" in reflection.lower() and "no issues" in reflection.lower():
                break
            steps.append(AgentStep(
                step_number=2+i,
                type="reflection",
                content=reflection
            ))
            # Improve based on reflection
            improve_messages = [
                {"role": "system", "content": "Improve the solution based on the critique."},
                {"role": "user", "content": f"Original: {answer}\n\nIssues found: {reflection}\n\nProvide improved solution:"}
            ]
            improve_response = await call_llm(improve_messages, model="quality-tier")
            answer = improve_response["choices"][0]["message"]["content"]
        return {
            "response": answer,
            "steps": steps,
            "model_used": "quality-tier",
            "iterations": len(steps)
        }
 # ==========================================
 # API ENDPOINTS
 # ==========================================
@app.post("/v1/chat/completions", response_model=AgentResponse)
 async def agent_endpoint(request: AgentRequest):
    """
    Main agent endpoint with multiple reasoning strategies:
    - react: Fast iterative reasoning (good for simple tasks)
    - plan_execute: Plan then execute (good for complex tasks)
    - reflexion: Self-correcting (good for accuracy-critical tasks)
    - auto: Automatically select based on task complexity
    """
    # Determine reasoning mode
    mode = determine_reasoning_mode(request.message, request.reasoning_mode)
    # Store message in memory if enabled
    if request.enable_memory and redis_client:
        key = f"session:{request.session_id}:history"
        redis_client.lpush(key, request.message)
        redis_client.ltrim(key, 0, 99)  # Keep last 100 messages
    try:
        # Route to appropriate reasoning engine
        if mode == "react":
            result = await ReActEngine().run(request.message, request.session_id)
        elif mode == "plan_execute":
            result = await PlanAndExecuteEngine().run(request.message, request.session_id)
        elif mode == "reflexion":
            result = await ReflexionEngine().run(
                request.message, 
                request.session_id,
                max_iterations=2
            )
        else:
            # Default fallback
            result = await ReActEngine().run(request.message, request.session_id)
        return AgentResponse(
            response=result["response"],
            reasoning_mode=mode,
            session_id=request.session_id,
            steps=result.get("steps", []),
            metadata={
                "model_used": result.get("model_used", "unknown"),
                "auto_selected": request.reasoning_mode == "auto",
                "timestamp": datetime.now().isoformat()
            }
        )
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))
@app.get("/v1/models")
 async def list_models():
    """List available agent models"""
    return {
        "object": "list",
        "data": [
            {
                "id": "agent/orchestrator",
                "object": "model",
                "created": 1700000000,
                "owned_by": "llm-hub",
                "description": "Auto-selecting orchestrator"
            },
            {
                "id": "agent/react",
                "object": "model",
                "created": 1700000000,
                "owned_by": "llm-hub",
                "description": "ReAct reasoning - fast iterative"
            },
            {
                "id": "agent/plan-execute",
                "object": "model",
                "created": 1700000000,
                "owned_by": "llm-hub",
                "description": "Plan-and-Execute - complex tasks"
            },
            {
                "id": "agent/reflexion",
                "object": "model",
                "created": 1700000000,
                "owned_by": "llm-hub",
                "description": "Reflexion - self-correcting with verification"
            }
        ]
    }
@app.get("/health")
 async def health():
    """Health check endpoint"""
    redis_status = "connected" if redis_client and redis_client.ping() else "disconnected"
    return {
        "status": "healthy",
        "version": "2.0.0",
        "capabilities": ["react", "plan_execute", "reflexion", "mcp", "memory"],
        "default_mode": DEFAULT_REASONING,
        "redis": redis_status,
        "timestamp": datetime.now().isoformat()
    }
@app.get("/sessions/{session_id}/history")
 async def get_session_history(session_id: str, limit: int = 10):
    """Retrieve conversation history for a session"""
    if not redis_client:
        return {"error": "Redis not available"}
    key = f"session:{session_id}:history"
    history = redis_client.lrange(key, 0, limit - 1)
    return {
        "session_id": session_id,
        "history": history,
        "count": len(history)
    }
 if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8080)
--- a/services/agent-core/requirements.txt
+++ b/services/agent-core/requirements.txt
@ -0,0 +1,13 @@
 fastapi==0.104.1
 uvicorn[standard]==0.24.0
 langgraph==0.2.53
 langchain==0.3.0
 langchain-openai==0.2.0
 langchain-chroma==0.1.4
 chromadb==0.5.0
 redis==5.0.1
 httpx==0.25.2
 tiktoken==0.5.1
 pydantic==2.5.0
 python-multipart==0.0.6
 aiofiles==23.2.1
--- a/services/mcpo/Dockerfile
+++ b/services/mcpo/Dockerfile
@ -0,0 +1,11 @@
 FROM python:3.11-slim
 WORKDIR /app
 RUN pip install mcpo uv
 COPY servers.json .
 EXPOSE 8000
 CMD ["mcpo", "--config", "servers.json", "--host", "0.0.0.0", "--port", "8000"]
--- a/services/mcpo/servers.json
+++ b/services/mcpo/servers.json
@ -0,0 +1,24 @@
 {
  "mcpServers": {
    "filesystem": {
      "command": "npx",
      "args": ["-y", "@modelcontextprotocol/server-filesystem", "/workspace"]
    },
    "git": {
      "command": "uvx",
      "args": ["mcp-server-git"]
    },
    "fetch": {
      "command": "uvx",
      "args": ["mcp-server-fetch"]
    },
    "memory": {
      "command": "npx",
      "args": ["-y", "@modelcontextprotocol/server-memory"]
    },
    "sequential-thinking": {
      "command": "npx",
      "args": ["-y", "@modelcontextprotocol/server-sequential-thinking"]
    }
  }
 }
--- a/setup.sh
+++ b/setup.sh
@ -0,0 +1,84 @@
 #!/bin/bash
 set -e
 GREEN='\033[0;32m'
 YELLOW='\033[1;33m'
 BLUE='\033[0;34m'
 NC='\033[0m'
 INSTALL_DIR="$(pwd)"
 echo -e "${GREEN}🧠 Agentic AI Hub Setup${NC}"
 echo "======================="
 # Detect Proxmox LXC
 if [ -f /proc/1/environ ] && grep -q "container=lxc" /proc/1/environ 2>/dev/null; then
    echo -e "${YELLOW}✓ LXC container detected${NC}"
    if ! grep -q "lxc.cgroup.relative" /etc/pve/lxc/*.conf 2>/dev/null; then
        echo -e "${YELLOW}⚠️  Tip: For LXC with Docker, add to /etc/pve/lxc/XXX.conf:${NC}"
        echo "  lxc.cgroup.relative = 0"
        echo "  lxc.apparmor.profile = unconfined"
        echo "  lxc.cgroup.devices.allow = a"
    fi
 fi
 # Check/install Docker
 if ! command -v docker &> /dev/null; then
    echo -e "${YELLOW}Installing Docker...${NC}"
    curl -fsSL https://get.docker.com | sh
    usermod -aG docker $USER || true
    systemctl enable docker
    systemctl start docker
 fi
 if ! command -v docker-compose &> /dev/null && ! docker compose version &> /dev/null; then
    echo -e "${YELLOW}Installing Docker Compose...${NC}"
    apt-get update && apt-get install -y docker-compose-plugin
 fi
 # Install Node.js for MCP
 if ! command -v node &> /dev/null || [ "$(node -v | cut -d'v' -f2 | cut -d'.' -f1)" != "20" ]; then
    echo -e "${YELLOW}Installing Node.js 20...${NC}"
    curl -fsSL https://deb.nodesource.com/setup_20.x | bash -
    apt-get install -y nodejs
 fi
 # Install uv for Python tools
 if ! command -v uv &> /dev/null; then
    pip install uv || pip3 install uv
 fi
 # Create directories
 echo -e "${BLUE}Creating directories...${NC}"
 mkdir -p {data/{redis,chroma,agent/{sessions,code-server,open-webui},neo4j},workspace,logs}
 mkdir -p services/{agent-core,mcpo}
 # Set permissions
 chown -R 1000:1000 workspace data || true
 chmod +x *.sh scripts/*.sh 2>/dev/null || true
 # Create .env if not exists
 if [ ! -f .env ]; then
    echo -e "${YELLOW}Creating .env file...${NC}"
    cp .env.example .env
    # Generate random keys
    sed -i "s/MASTER_KEY=.*/MASTER_KEY=sk-agent-$(openssl rand -hex 8)/" .env
    sed -i "s/WEBUI_SECRET_KEY=.*/WEBUI_SECRET_KEY=$(openssl rand -hex 32)/" .env
    echo -e "${GREEN}✓ .env created. Edit it to add your API keys.${NC}"
 fi
 # Create workspace gitkeep
 touch workspace/.gitkeep
 echo ""
 echo -e "${GREEN}✅ Setup complete!${NC}"
 echo ""
 echo "Next steps:"
 echo "1. Edit .env file: nano .env"
 echo "2. Add your API keys (Groq, Mistral, etc.)"
 echo "3. Start services: ./start.sh"
 echo ""
 echo "Documentation:"
 echo "  - Setup: docs/SETUP.md"
 echo "  - API: docs/API.md"
 echo "  - Providers: docs/PROVIDERS.md"
--- a/start.sh
+++ b/start.sh
@ -0,0 +1,66 @@
 #!/bin/bash
 cd "$(dirname "$0")"
 GREEN='\033[0;32m'
 BLUE='\033[0;34m'
 NC='\033[0m'
 if [ ! -f .env ]; then
    echo "Error: .env file not found. Run ./setup.sh first."
    exit 1
 fi
 source .env
 echo -e "${GREEN}🚀 Starting Agentic LLM Hub...${NC}"
 echo ""
 # Determine profile
 PROFILE=${1:-full}
 if [ "$PROFILE" = "minimal" ]; then
    echo "Mode: Minimal (core services only)"
    docker-compose up -d redis chromadb litellm agent-core
 elif [ "$PROFILE" = "ide" ]; then
    echo "Mode: Standard + IDE"
    docker-compose --profile ide up -d
 elif [ "$PROFILE" = "full" ]; then
    echo "Mode: Full (all services including MCP tools)"
    docker-compose --profile ide --profile mcp --profile ui up -d
 else
    echo "Usage: ./start.sh [minimal|ide|full]"
    echo "  minimal - Core services only (lowest resources)"
    echo "  ide     - Core + VS Code IDE"
    echo "  full    - Everything including MCP tools and Web UI"
    exit 1
 fi
 echo ""
 echo "Waiting for services..."
 sleep 5
 # Get IP
 IP=$(hostname -I | awk '{print $1}')
 echo ""
 echo -e "${GREEN}✅ Services started!${NC}"
 echo ""
 echo "Access Points:"
 echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
 printf "${BLUE}%-22s${NC} %s\n" "Agent API:" "http://$IP:8080/v1"
 printf "${BLUE}%-22s${NC} %s\n" "VS Code IDE:" "http://$IP:8443"
 printf "${BLUE}%-22s${NC} %s\n" "LiteLLM Gateway:" "http://$IP:4000"
 printf "${BLUE}%-22s${NC} %s\n" "MCP Tools:" "http://$IP:8001/docs"
 printf "${BLUE}%-22s${NC} %s\n" "Web UI:" "http://$IP:3000"
 printf "${BLUE}%-22s${NC} %s\n" "Vector DB:" "http://$IP:8000"
 echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
 echo ""
 echo "Test command:"
 echo "curl -X POST http://$IP:8080/v1/chat/completions \"
 echo "  -H 'Content-Type: application/json' \"
 echo "  -H 'Authorization: Bearer ${MASTER_KEY:0:20}...' \"
 echo "  -d '{"message":"Hello","reasoning_mode":"react"}'"
 echo ""
 echo "View logs: docker-compose logs -f"
 echo "Stop: docker-compose down"
--- a/workspace/.gitkeep
+++ b/workspace/.gitkeep