Initial commit: Agentic LLM Hub with multi-reasoning, MCP tools, and IDE

This commit is contained in:
ImpulsiveFPS 2026-02-01 15:11:31 +01:00
commit 2cafb31cb4
22 changed files with 1460 additions and 0 deletions

69
.env.example Normal file
View File

@ -0,0 +1,69 @@
# Master Key for API Access (generate strong key)
MASTER_KEY=sk-agent-$(openssl rand -hex 8)
# ==========================================
# FREE TIER API KEYS (Add your keys below)
# ==========================================
# Groq - https://console.groq.com (20 RPM free, create multiple accounts)
GROQ_API_KEY_1=gsk_your_first_groq_key_here
GROQ_API_KEY_2=gsk_your_second_groq_key_here
# Mistral - https://console.mistral.ai (1B tokens/month free)
MISTRAL_API_KEY=your_mistral_key_here
# Anthropic Claude - https://console.anthropic.com ($5 trial, $500 student)
ANTHROPIC_API_KEY=sk-ant-your_claude_key_here
# Moonshot Kimi - https://platform.moonshot.ai ($5 signup bonus)
MOONSHOT_API_KEY=sk-your_moonshot_key_here
# OpenRouter - https://openrouter.ai (50 req/day free, access to Kimi free)
OPENROUTER_API_KEY=sk-or-your_openrouter_key_here
# Cohere - https://cohere.com (1K calls/month free, good for embeddings)
COHERE_API_KEY=your_cohere_key_here
# DeepSeek - https://platform.deepseek.com (cheap rates)
DEEPSEEK_API_KEY=sk-your_deepseek_key_here
# GitHub Token (for MCP Git tools)
GITHUB_TOKEN=ghp_your_github_token_here
# ==========================================
# AGENT SETTINGS
# ==========================================
# Default reasoning: react, plan_execute, reflexion, or auto
DEFAULT_REASONING=auto
# Enable self-reflection (true/false)
ENABLE_REFLECTION=true
# Maximum iterations per request
MAX_ITERATIONS=10
# ==========================================
# UI/IDE SETTINGS
# ==========================================
# Code-Server passwords (change these!)
IDE_PASSWORD=secure-ide-password-123
IDE_SUDO_PASSWORD=admin-password-456
# Optional: Domain for reverse proxy
# IDE_DOMAIN=code.yourdomain.com
# Web UI settings
WEBUI_SECRET_KEY=$(openssl rand -hex 32)
# ==========================================
# ADVANCED MEMORY SETTINGS
# ==========================================
# Enable knowledge graph (Neo4j) - requires more RAM
ENABLE_KNOWLEDGE_GRAPH=false
NEO4J_AUTH=neo4j/password
# ChromaDB settings
CHROMA_STORAGE_PATH=/data/chroma

31
.gitignore vendored Normal file
View File

@ -0,0 +1,31 @@
# Environment variables
.env
.env.local
.env.*.local
# Data directories (persisted volumes)
data/
workspace/*
!workspace/.gitkeep
logs/
*.db
*.sqlite3
# IDE
.idea/
.vscode/
*.swp
*.swo
# OS
.DS_Store
Thumbs.db
# Docker
.docker/
docker-compose.override.yml
# Temporary
*.tmp
*.bak
*.log

52
Makefile Normal file
View File

@ -0,0 +1,52 @@
.PHONY: help setup start stop logs status update backup clean
help:
@echo "Agentic LLM Hub Management"
@echo "=========================="
@echo "make setup - Initial setup"
@echo "make start - Start all services (full profile)"
@echo "make start-ide - Start with IDE only"
@echo "make stop - Stop all services"
@echo "make logs - View logs"
@echo "make status - Check service status"
@echo "make update - Pull latest and update images"
@echo "make backup - Backup data directories"
@echo "make clean - Remove containers (data preserved)"
setup:
@chmod +x *.sh scripts/*.sh 2>/dev/null || true
@./setup.sh
start:
@./start.sh full
start-ide:
@./start.sh ide
stop:
@docker-compose down
logs:
@docker-compose logs -f --tail=100
status:
@echo "Container Status:"
@docker-compose ps
@echo ""
@echo "API Health:"
@curl -s http://localhost:8080/health | python3 -m json.tool 2>/dev/null || echo "API not responding"
update:
@git pull
@docker-compose pull
@docker-compose up -d
backup:
@mkdir -p backup/$(shell date +%Y%m%d)
@cp -r data backup/$(shell date +%Y%m%d)/
@cp .env backup/$(shell date +%Y%m%d)/
@echo "Backup created: backup/$(shell date +%Y%m%d)/"
clean:
@docker-compose down -v
@echo "Containers removed. Data preserved in ./data/"

60
README.md Normal file
View File

@ -0,0 +1,60 @@
# 🤖 Agentic LLM Hub
Self-hosted AI agent platform with multi-provider LLM aggregation, reasoning engines (ReAct, Plan-and-Execute, Reflexion), MCP tools, and web IDE.
## 🚀 Quick Start
```bash
# 1. Clone from your Gitea
git clone https://gitea.yourdomain.com/youruser/llm-hub.git
cd llm-hub
# 2. Configure
cp .env.example .env
nano .env # Add your API keys
# 3. Deploy
./setup.sh && ./start.sh
```
## 📡 Access Points
| Service | URL | Description |
|---------|-----|-------------|
| VS Code IDE | `http://your-ip:8443` | Full IDE with Continue.dev |
| Agent API | `http://your-ip:8080/v1` | Main API endpoint |
| LiteLLM | `http://your-ip:4000` | LLM Gateway |
| MCP Tools | `http://your-ip:8001/docs` | Tool OpenAPI docs |
| ChromaDB | `http://your-ip:8000` | Vector memory |
| Web UI | `http://your-ip:3000` | Chat interface |
## 🔧 Supported Providers
- **Groq** (Free tier, fast)
- **Mistral** (1B tokens/month free)
- **Anthropic Claude** (Trial credits)
- **Moonshot Kimi** ($5 signup bonus)
- **OpenRouter** (Free tier access)
- **Cohere** (1K calls/month)
- **DeepSeek** (Cheap reasoning)
## 🧠 Reasoning Modes
- `react` - Fast iterative reasoning
- `plan_execute` - Complex multi-step tasks
- `reflexion` - Self-correcting with verification
- `auto` - Automatic selection
## 📚 Documentation
- [Setup Guide](docs/SETUP.md)
- [API Reference](docs/API.md)
- [Provider Guide](docs/PROVIDERS.md)
## 🔄 Updates
```bash
git pull origin main
docker-compose pull
docker-compose up -d
```

0
config/agent/.gitkeep Normal file
View File

View File

@ -0,0 +1,54 @@
name: LLM Hub IDE
version: 1.0.0
schema: v1
models:
- name: Groq Llama 3.3 70B
provider: openai
model: fast-tier
apiBase: http://agent-core:8080/v1
apiKey: sk-agent
roles: [chat, edit, apply]
- name: Claude 3.5 Sonnet
provider: openai
model: quality-tier
apiBase: http://agent-core:8080/v1
apiKey: sk-agent
roles: [chat, edit, apply]
- name: Kimi K2
provider: openai
model: reasoning-tier
apiBase: http://agent-core:8080/v1
apiKey: sk-agent
roles: [chat, edit, apply]
- name: Mistral Small
provider: openai
model: volume-tier
apiBase: http://agent-core:8080/v1
apiKey: sk-agent
roles: [chat, edit]
tabAutocompleteModel:
name: Mistral Autocomplete
provider: openai
model: volume-tier
apiBase: http://litellm:4000/v1
apiKey: sk-agent
embeddingsProvider:
provider: openai
model: embeddings
apiBase: http://litellm:4000/v1
apiKey: sk-agent
context:
- provider: code
- provider: docs
- provider: diff
- provider: terminal
- provider: problems
- provider: folder
- provider: codebase

View File

@ -0,0 +1,87 @@
model_list:
# FREE TIER AGGREGATION
- model_name: fast-tier
litellm_params:
model: groq/llama-3.3-70b-versatile
api_key: os.environ/GROQ_API_KEY_1
rpm_limit: 20
- model_name: fast-tier
litellm_params:
model: groq/llama-3.1-8b-instant
api_key: os.environ/GROQ_API_KEY_2
rpm_limit: 20
- model_name: volume-tier
litellm_params:
model: mistral/mistral-small-latest
api_key: os.environ/MISTRAL_API_KEY
tpm_limit: 500000
# CLAUDE & KIMI - Quality/Reasoning
- model_name: quality-tier
litellm_params:
model: anthropic/claude-3-5-sonnet-20240620
api_key: os.environ/ANTHROPIC_API_KEY
rpm_limit: 5
- model_name: claude-haiku
litellm_params:
model: anthropic/claude-3-haiku-20240307
api_key: os.environ/ANTHROPIC_API_KEY
rpm_limit: 10
# Kimi via OpenRouter (Free tier)
- model_name: reasoning-tier
litellm_params:
model: openrouter/moonshotai/kimi-k2:free
api_key: os.environ/OPENROUTER_API_KEY
# Kimi Direct (Ultra-cheap)
- model_name: reasoning-tier
litellm_params:
model: moonshot/kimi-k2-0711-preview
api_key: os.environ/MOONSHOT_API_KEY
tpm_limit: 100000
- model_name: deepseek
litellm_params:
model: deepseek/deepseek-chat
api_key: os.environ/DEEPSEEK_API_KEY
# Embeddings & Tools
- model_name: embeddings
litellm_params:
model: cohere/embed-english-v3.0
api_key: os.environ/COHERE_API_KEY
# Local Fallback
- model_name: local-llama
litellm_params:
model: ollama/llama3.1:8b
api_base: http://ollama:11434
router_settings:
routing_strategy: "usage-based-routing"
timeout: 30
num_retries: 3
allowed_fails: 2
cooldown_time: 60
fallbacks:
- fast-tier: ["volume-tier", "reasoning-tier"]
- volume-tier: ["reasoning-tier", "local-llama"]
- quality-tier: ["claude-haiku", "reasoning-tier"]
general_settings:
master_key: os.environ/LITELLM_MASTER_KEY
cache: true
cache_params:
type: redis
host: redis
port: 6379
ttl: 3600
retry_policy:
TimeoutError: 3
RateLimitError: 5
log_level: info
log_file: /app/logs/litellm.log

35
config/mcp/servers.json Normal file
View File

@ -0,0 +1,35 @@
{
"mcpServers": {
"filesystem": {
"command": "npx",
"args": ["-y", "@modelcontextprotocol/server-filesystem", "/workspace"]
},
"git": {
"command": "uvx",
"args": ["mcp-server-git"]
},
"github": {
"command": "npx",
"args": ["-y", "@modelcontextprotocol/server-github"],
"env": {
"GITHUB_PERSONAL_ACCESS_TOKEN": "${GITHUB_TOKEN}"
}
},
"fetch": {
"command": "uvx",
"args": ["mcp-server-fetch"]
},
"sqlite": {
"command": "uvx",
"args": ["mcp-server-sqlite", "/workspace/data.db"]
},
"memory": {
"command": "npx",
"args": ["-y", "@modelcontextprotocol/server-memory"]
},
"sequential-thinking": {
"command": "npx",
"args": ["-y", "@modelcontextprotocol/server-sequential-thinking"]
}
}
}

151
docker-compose.yml Normal file
View File

@ -0,0 +1,151 @@
version: '3.8'
services:
# Core Infrastructure
redis:
image: redis:7-alpine
container_name: agent-redis
restart: unless-stopped
volumes:
- ./data/redis:/data
command: redis-server --appendonly yes --maxmemory 512mb --maxmemory-policy allkeys-lru
networks:
- agent-network
chromadb:
image: chromadb/chroma:latest
container_name: agent-memory-vector
restart: unless-stopped
ports:
- "8000:8000"
volumes:
- ./data/chroma:/chroma/chroma
environment:
- IS_PERSISTENT=TRUE
- PERSIST_DIRECTORY=/chroma/chroma
- ANONYMIZED_TELEMETRY=FALSE
networks:
- agent-network
# LLM Gateway
litellm:
image: ghcr.io/berriai/litellm:main-latest
container_name: agent-gateway
restart: unless-stopped
ports:
- "4000:4000"
volumes:
- ./config/litellm_config.yaml:/app/config.yaml
- ./logs:/app/logs
environment:
- DATABASE_URL=sqlite:///app/db.sqlite3
- LITELLM_MASTER_KEY=${MASTER_KEY:-sk-agent}
- REDIS_HOST=redis
- REDIS_PORT=6379
command: --config /app/config.yaml --port 4000
networks:
- agent-network
# Agent Core with Reasoning Engines
agent-core:
build:
context: ./services/agent-core
dockerfile: Dockerfile
container_name: agent-core
restart: unless-stopped
ports:
- "8080:8080"
volumes:
- ./workspace:/workspace
- ./config/agent:/app/config
- ./data/agent:/app/data
environment:
- LLM_API_BASE=http://litellm:4000/v1
- LLM_API_KEY=${MASTER_KEY:-sk-agent}
- REDIS_URL=redis://redis:6379/0
- CHROMA_URL=http://chromadb:8000
- DEFAULT_REASONING_MODE=${DEFAULT_REASONING:-auto}
depends_on:
- litellm
- redis
- chromadb
networks:
- agent-network
# MCP Tool Gateway
mcpo:
build:
context: ./services/mcpo
dockerfile: Dockerfile
container_name: agent-mcp-gateway
restart: unless-stopped
ports:
- "8001:8000"
volumes:
- ./workspace:/workspace:ro
- ./config/mcp:/app/config
networks:
- agent-network
profiles:
- mcp
# VS Code Server with AI Assistant
code-server:
image: lscr.io/linuxserver/code-server:latest
container_name: agent-ide
restart: unless-stopped
ports:
- "8443:8443"
environment:
- PUID=1000
- PGID=1000
- TZ=Etc/UTC
- PASSWORD=${IDE_PASSWORD:-code}
- SUDO_PASSWORD=${IDE_SUDO_PASSWORD:-sudo}
- DEFAULT_WORKSPACE=/workspace
volumes:
- ./workspace:/workspace
- ./data/code-server:/config
- ./config/continue:/config/.continue:ro
networks:
- agent-network
profiles:
- ide
# Web UI
open-webui:
image: ghcr.io/open-webui/open-webui:main
container_name: agent-ui
restart: unless-stopped
ports:
- "3000:8080"
volumes:
- ./data/open-webui:/app/backend/data
environment:
- OPENAI_API_BASE_URL=http://agent-core:8080/v1
- OPENAI_API_KEY=${MASTER_KEY:-sk-agent}
- ENABLE_SIGNUP=false
- DEFAULT_MODELS=agent/orchestrator
depends_on:
- agent-core
networks:
- agent-network
profiles:
- ui
# Auto-updater
watchtower:
image: containrrr/watchtower
container_name: agent-watchtower
restart: unless-stopped
volumes:
- /var/run/docker.sock:/var/run/docker.sock
environment:
- WATCHTOWER_POLL_INTERVAL=86400
- WATCHTOWER_CLEANUP=true
networks:
- agent-network
networks:
agent-network:
driver: bridge

88
docs/API.md Normal file
View File

@ -0,0 +1,88 @@
# API Reference
## Base URL
```
http://your-server-ip:8080/v1
```
## Authentication
All requests require Bearer token:
```
Authorization: Bearer sk-agent-your-key
```
## Endpoints
### POST /chat/completions
Main agent endpoint.
**Request:**
```json
{
"message": "Create a Python script to fetch weather data",
"reasoning_mode": "plan_execute",
"session_id": "unique-session-id",
"max_iterations": 10
}
```
**Response:**
```json
{
"response": "Here\'s the Python script...",
"reasoning_mode": "plan_execute",
"session_id": "unique-session-id",
"steps": [
{"step_number": 1, "type": "plan", "content": "..."},
{"step_number": 2, "type": "action", "content": "..."}
],
"metadata": {
"model_used": "volume-tier",
"auto_selected": true,
"timestamp": "2024-..."
}
}
```
### Reasoning Modes
| Mode | Use Case | Speed | Accuracy |
|------|----------|-------|----------|
| `react` | Simple Q&A, debugging | Fast | Medium |
| `plan_execute` | Complex multi-step tasks | Medium | High |
| `reflexion` | Code review, critical tasks | Slow | Very High |
| `auto` | Let system decide | Variable | Adaptive |
### GET /models
List available models.
### GET /health
Check system status.
### GET /sessions/{id}/history
Retrieve conversation history.
## Examples
### Python
```python
import requests
response = requests.post(
"http://localhost:8080/v1/chat/completions",
headers={"Authorization": "Bearer sk-agent-xxx"},
json={
"message": "Refactor this code",
"reasoning_mode": "reflexion"
}
)
print(response.json()["response"])
```
### cURL
```bash
curl -X POST http://localhost:8080/v1/chat/completions \
-H "Authorization: Bearer sk-agent-xxx" \
-H "Content-Type: application/json" \
-d '{"message":"Hello","reasoning_mode":"auto"}'
```

66
docs/PROVIDERS.md Normal file
View File

@ -0,0 +1,66 @@
# Provider Setup Guide
## Free Tier Providers
### Groq (Fastest)
- **URL**: https://console.groq.com
- **Free Tier**: 20 RPM, variable TPM
- **Models**: Llama 3.3 70B, Llama 3.1 8B
- **Best For**: Speed, quick coding tasks
- **Tip**: Create multiple accounts with different phones for load balancing
### Mistral (High Volume)
- **URL**: https://console.mistral.ai
- **Free Tier**: 1 billion tokens/month
- **Models**: Mistral Small, Medium
- **Best For**: High-volume processing, chatbots
### OpenRouter (Universal Access)
- **URL**: https://openrouter.ai
- **Free Tier**: 50 requests/day
- **Access**: Kimi K2:free, Gemini Flash:free
- **Best For**: Testing, fallback access
### Cohere (Embeddings)
- **URL**: https://cohere.com
- **Free Tier**: 1,000 calls/month
- **Best For**: Embeddings, RAG systems
## Trial/Cheap Providers
### Anthropic Claude (Highest Quality)
- **URL**: https://console.anthropic.com
- **Trial**: $5 free credits (new users)
- **Student**: $500 credits (apply with .edu)
- **Cost**: $3/M input (Sonnet), $0.25/M (Haiku)
- **Best For**: Complex reasoning, analysis, code review
### Moonshot Kimi (Best Value)
- **URL**: https://platform.moonshot.ai
- **Bonus**: $5 signup credit
- **Cost**: $0.60/M input, $2.50/M output
- **Context**: 128K tokens
- **Best For**: Coding, long documents, Chinese content
### DeepSeek (Cheapest Reasoning)
- **URL**: https://platform.deepseek.com
- **Cost**: $0.14/M input, $0.28/M output
- **Best For**: Reasoning tasks, math, code
## Configuration Priority
The system routes requests in this priority:
1. **Fast tasks** → Groq (free, instant)
2. **High volume** → Mistral (1B tokens)
3. **Complex coding** → Kimi (cheap, 128K context)
4. **Quality critical** → Claude (expensive but best)
5. **Fallback** → OpenRouter free tier
## Rate Limit Management
The router automatically:
- Tracks RPM/TPM across all providers
- Distributes load (multiple Groq accounts)
- Falls back when limits approached
- Caches responses to reduce API calls

98
docs/SETUP.md Normal file
View File

@ -0,0 +1,98 @@
# Setup Guide
## Prerequisites
- **OS**: Debian 12, Ubuntu 22.04+, or Proxmox LXC
- **RAM**: 4GB minimum (8GB recommended for IDE)
- **Storage**: 20GB free space
- **Network**: Internet access for API calls
## Quick Install
```bash
# 1. Clone from your Gitea
git clone https://gitea.yourdomain.com/username/llm-hub.git
cd llm-hub
# 2. Run setup
chmod +x setup.sh && ./setup.sh
# 3. Configure API keys
nano .env
# 4. Start
./start.sh full
```
## Proxmox LXC Setup
On Proxmox host, create optimized container:
```bash
pct create 100 local:vztmpl/debian-12-standard_12.7-1_amd64.tar.zst \
--hostname llm-hub \
--memory 8192 \
--swap 1024 \
--cores 4 \
--rootfs local-lvm:20 \
--features nesting=1,keyctl=1 \
--net0 name=eth0,bridge=vmbr0,ip=dhcp
# Add to /etc/pve/lxc/100.conf:
cat >> /etc/pve/lxc/100.conf << EOF
lxc.cgroup.relative = 0
lxc.apparmor.profile = unconfined
lxc.cgroup.devices.allow = a
EOF
pct start 100
pct exec 100 -- bash -c "apt update && apt install -y curl git && curl -fsSL setup.sh | bash"
```
## Configuration
Edit `.env` file:
```bash
# Required: At least one LLM provider
GROQ_API_KEY_1=gsk_xxx
MISTRAL_API_KEY=your_key
# Recommended: Multiple providers for redundancy
ANTHROPIC_API_KEY=sk-ant-xxx
MOONSHOT_API_KEY=sk-xxx
OPENROUTER_API_KEY=sk-or-xxx
# UI Security
IDE_PASSWORD=strong-password-here
```
## Verification
```bash
# Check health
curl http://localhost:8080/health
# Test agent
curl -X POST http://localhost:8080/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer sk-agent-xxx" \
-d '{"message":"Hello","reasoning_mode":"react"}'
```
## Troubleshooting
**Docker not starting in LXC:**
```bash
# On Proxmox host, check config
pct config 100 | grep features
# Should show: features: nesting=1,keyctl=1
```
**Permission denied on workspace:**
```bash
chown -R 1000:1000 workspace/
```
**Port conflicts:**
Edit `docker-compose.yml` to change port mappings (e.g., `8081:8080`)

57
scripts/add-provider.sh Normal file
View File

@ -0,0 +1,57 @@
#!/bin/bash
ENV_FILE=".env"
CONFIG_FILE="config/litellm_config.yaml"
echo "🔌 Add Provider to LLM Hub"
echo "=========================="
echo ""
echo "1. Groq (Fast)"
echo "2. Mistral (Volume)"
echo "3. Anthropic Claude (Quality)"
echo "4. Moonshot Kimi (Cheap/128K)"
echo "5. OpenRouter (Free tier access)"
echo "6. Cohere (Embeddings)"
echo "7. DeepSeek (Cheap reasoning)"
echo "8. Exit"
read -p "Select (1-8): " choice
read -p "Enter API Key: " api_key
case $choice in
1)
read -p "Instance number (1,2,3...): " num
var="GROQ_API_KEY_$num"
echo "$var=$api_key" >> "$ENV_FILE"
echo "✅ Added Groq key as $var"
;;
2)
echo "MISTRAL_API_KEY=$api_key" >> "$ENV_FILE"
echo "✅ Added Mistral"
;;
3)
echo "ANTHROPIC_API_KEY=$api_key" >> "$ENV_FILE"
echo "✅ Added Claude (remember: expensive, use sparingly)"
;;
4)
echo "MOONSHOT_API_KEY=$api_key" >> "$ENV_FILE"
echo "✅ Added Kimi (great for coding!)"
;;
5)
echo "OPENROUTER_API_KEY=$api_key" >> "$ENV_FILE"
echo "✅ Added OpenRouter (access free tier models)"
;;
6)
echo "COHERE_API_KEY=$api_key" >> "$ENV_FILE"
echo "✅ Added Cohere (embeddings)"
;;
7)
echo "DEEPSEEK_API_KEY=$api_key" >> "$ENV_FILE"
echo "✅ Added DeepSeek (cheap reasoning)"
;;
8) exit 0 ;;
*) echo "Invalid choice" ; exit 1 ;;
esac
read -p "Restart services to apply? (y/N): " restart
[[ $restart =~ ^[Yy]$ ]] && docker-compose restart

37
scripts/status.sh Normal file
View File

@ -0,0 +1,37 @@
#!/bin/bash
echo "🧠 LLM Hub Status"
echo "================="
echo ""
# Container status
echo "📦 Containers:"
docker-compose ps --services --filter "status=running" 2>/dev/null | while read service; do
status=$(docker-compose ps -q "$service" | xargs docker inspect -f '{{.State.Status}}' 2>/dev/null)
echo " $service: $status"
done
echo ""
echo "🔍 Health Checks:"
# API health
if curl -s http://localhost:8080/health | grep -q "healthy"; then
echo " ✅ Agent Core: Healthy"
else
echo " ❌ Agent Core: Not responding"
fi
# LiteLLM
if curl -s http://localhost:4000/health/liveliness | grep -q "true"; then
echo " ✅ LiteLLM: Running"
else
echo " ❌ LiteLLM: Not responding"
fi
echo ""
echo "📊 Router Stats:"
curl -s http://localhost:8080/health 2>/dev/null | python3 -m json.tool 2>/dev/null || echo " Unable to fetch stats"
echo ""
echo "💾 Memory Usage:"
docker stats --no-stream --format "table {{.Name}}\t{{.MemUsage}}" | grep -E "(agent-|NAME)" || true

View File

@ -0,0 +1,20 @@
FROM python:3.11-slim
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
build-essential \
git \
&& rm -rf /var/lib/apt/lists/*
# Install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy application
COPY main.py .
EXPOSE 8080
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]

357
services/agent-core/main.py Normal file
View File

@ -0,0 +1,357 @@
"""
Agentic AI Core - Multi-Reasoning Engine
Supports: ReAct, Plan-and-Execute, Reflexion
"""
import os
import json
from typing import List, Dict, Any, Literal, Optional
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel, Field
from datetime import datetime
import httpx
import redis
app = FastAPI(
title="Agentic AI Core",
version="2.0.0",
description="Multi-reasoning agent platform with memory and MCP integration"
)
# Configuration
LLM_API_BASE = os.getenv("LLM_API_BASE", "http://litellm:4000/v1")
LLM_API_KEY = os.getenv("LLM_API_KEY", "sk-agent")
DEFAULT_REASONING = os.getenv("DEFAULT_REASONING_MODE", "auto")
# Redis for short-term memory
try:
redis_client = redis.from_url(
os.getenv("REDIS_URL", "redis://redis:6379"),
decode_responses=True
)
except:
redis_client = None
# ==========================================
# DATA MODELS
# ==========================================
class AgentRequest(BaseModel):
message: str
session_id: str = Field(default="default", description="Conversation thread ID")
reasoning_mode: Literal["react", "plan_execute", "reflexion", "auto"] = DEFAULT_REASONING
context_files: Optional[List[str]] = Field(default_factory=list)
enable_memory: bool = True
max_iterations: int = 10
class AgentStep(BaseModel):
step_number: int
type: Literal["thought", "action", "observation", "reflection", "plan"]
content: str
timestamp: datetime = Field(default_factory=datetime.now)
class AgentResponse(BaseModel):
response: str
reasoning_mode: str
session_id: str
steps: List[AgentStep] = Field(default_factory=list)
metadata: Dict[str, Any] = Field(default_factory=dict)
# ==========================================
# UTILITY FUNCTIONS
# ==========================================
async def call_llm(messages: List[Dict], model: str = "auto", tools: Optional[List] = None) -> Dict:
"""Call LLM through LiteLLM gateway"""
async with httpx.AsyncClient() as client:
payload = {
"model": model,
"messages": messages,
"temperature": 0.7,
"max_tokens": 4000
}
if tools:
payload["tools"] = tools
response = await client.post(
f"{LLM_API_BASE}/chat/completions",
headers={"Authorization": f"Bearer {LLM_API_KEY}"},
json=payload,
timeout=60.0
)
return response.json()
def determine_reasoning_mode(message: str, requested: str) -> str:
"""Auto-select reasoning mode based on task complexity"""
if requested != "auto":
return requested
# Complexity indicators
complexity_markers = [
"plan", "design", "architecture", "steps", "implement",
"build", "create", "project", "complex", "multi-step"
]
msg_lower = message.lower()
score = sum(1 for marker in complexity_markers if marker in msg_lower)
if score >= 3 or len(message) > 500:
return "plan_execute"
elif "review" in msg_lower or "check" in msg_lower or "verify" in msg_lower:
return "reflexion"
else:
return "react"
# ==========================================
# REASONING ENGINES
# ==========================================
class ReActEngine:
"""ReAct: Reasoning + Acting in interleaved steps"""
async def run(self, message: str, session_id: str) -> Dict:
steps = []
# Initial thought
messages = [
{"role": "system", "content": "You are a ReAct agent. Think step by step and act."},
{"role": "user", "content": message}
]
response = await call_llm(messages, model="fast-tier")
steps.append(AgentStep(
step_number=1,
type="thought",
content="Initial analysis and reasoning"
))
content = response["choices"][0]["message"]["content"]
return {
"response": content,
"steps": steps,
"model_used": "fast-tier"
}
class PlanAndExecuteEngine:
"""Plan first, then execute step by step"""
async def run(self, message: str, session_id: str) -> Dict:
steps = []
# Planning phase
plan_messages = [
{"role": "system", "content": "Create a step-by-step plan to accomplish the task."},
{"role": "user", "content": f"Create a detailed plan for: {message}"}
]
plan_response = await call_llm(plan_messages, model="volume-tier")
plan = plan_response["choices"][0]["message"]["content"]
steps.append(AgentStep(
step_number=1,
type="plan",
content=plan
))
# Execution phase
exec_messages = [
{"role": "system", "content": "Execute the task following the provided plan."},
{"role": "user", "content": f"Task: {message}\n\nPlan: {plan}\n\nExecute this plan:"}
]
exec_response = await call_llm(exec_messages, model="reasoning-tier")
result = exec_response["choices"][0]["message"]["content"]
steps.append(AgentStep(
step_number=2,
type="action",
content="Execution completed"
))
return {
"response": result,
"steps": steps,
"model_used": "reasoning-tier",
"plan": plan
}
class ReflexionEngine:
"""Execute with self-reflection and correction"""
async def run(self, message: str, session_id: str, max_iterations: int = 2) -> Dict:
steps = []
# Initial execution
messages = [
{"role": "system", "content": "Solve the problem carefully."},
{"role": "user", "content": message}
]
response = await call_llm(messages, model="quality-tier")
answer = response["choices"][0]["message"]["content"]
steps.append(AgentStep(
step_number=1,
type="action",
content="Initial solution generated"
))
# Reflection phase
for i in range(max_iterations):
reflect_messages = [
{"role": "system", "content": "Critically evaluate the solution for errors or improvements."},
{"role": "user", "content": f"Problem: {message}\n\nProposed Solution: {answer}\n\nIdentify any issues or improvements:"}
]
reflect_response = await call_llm(reflect_messages, model="claude-haiku")
reflection = reflect_response["choices"][0]["message"]["content"]
if "correct" in reflection.lower() and "no issues" in reflection.lower():
break
steps.append(AgentStep(
step_number=2+i,
type="reflection",
content=reflection
))
# Improve based on reflection
improve_messages = [
{"role": "system", "content": "Improve the solution based on the critique."},
{"role": "user", "content": f"Original: {answer}\n\nIssues found: {reflection}\n\nProvide improved solution:"}
]
improve_response = await call_llm(improve_messages, model="quality-tier")
answer = improve_response["choices"][0]["message"]["content"]
return {
"response": answer,
"steps": steps,
"model_used": "quality-tier",
"iterations": len(steps)
}
# ==========================================
# API ENDPOINTS
# ==========================================
@app.post("/v1/chat/completions", response_model=AgentResponse)
async def agent_endpoint(request: AgentRequest):
"""
Main agent endpoint with multiple reasoning strategies:
- react: Fast iterative reasoning (good for simple tasks)
- plan_execute: Plan then execute (good for complex tasks)
- reflexion: Self-correcting (good for accuracy-critical tasks)
- auto: Automatically select based on task complexity
"""
# Determine reasoning mode
mode = determine_reasoning_mode(request.message, request.reasoning_mode)
# Store message in memory if enabled
if request.enable_memory and redis_client:
key = f"session:{request.session_id}:history"
redis_client.lpush(key, request.message)
redis_client.ltrim(key, 0, 99) # Keep last 100 messages
try:
# Route to appropriate reasoning engine
if mode == "react":
result = await ReActEngine().run(request.message, request.session_id)
elif mode == "plan_execute":
result = await PlanAndExecuteEngine().run(request.message, request.session_id)
elif mode == "reflexion":
result = await ReflexionEngine().run(
request.message,
request.session_id,
max_iterations=2
)
else:
# Default fallback
result = await ReActEngine().run(request.message, request.session_id)
return AgentResponse(
response=result["response"],
reasoning_mode=mode,
session_id=request.session_id,
steps=result.get("steps", []),
metadata={
"model_used": result.get("model_used", "unknown"),
"auto_selected": request.reasoning_mode == "auto",
"timestamp": datetime.now().isoformat()
}
)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/v1/models")
async def list_models():
"""List available agent models"""
return {
"object": "list",
"data": [
{
"id": "agent/orchestrator",
"object": "model",
"created": 1700000000,
"owned_by": "llm-hub",
"description": "Auto-selecting orchestrator"
},
{
"id": "agent/react",
"object": "model",
"created": 1700000000,
"owned_by": "llm-hub",
"description": "ReAct reasoning - fast iterative"
},
{
"id": "agent/plan-execute",
"object": "model",
"created": 1700000000,
"owned_by": "llm-hub",
"description": "Plan-and-Execute - complex tasks"
},
{
"id": "agent/reflexion",
"object": "model",
"created": 1700000000,
"owned_by": "llm-hub",
"description": "Reflexion - self-correcting with verification"
}
]
}
@app.get("/health")
async def health():
"""Health check endpoint"""
redis_status = "connected" if redis_client and redis_client.ping() else "disconnected"
return {
"status": "healthy",
"version": "2.0.0",
"capabilities": ["react", "plan_execute", "reflexion", "mcp", "memory"],
"default_mode": DEFAULT_REASONING,
"redis": redis_status,
"timestamp": datetime.now().isoformat()
}
@app.get("/sessions/{session_id}/history")
async def get_session_history(session_id: str, limit: int = 10):
"""Retrieve conversation history for a session"""
if not redis_client:
return {"error": "Redis not available"}
key = f"session:{session_id}:history"
history = redis_client.lrange(key, 0, limit - 1)
return {
"session_id": session_id,
"history": history,
"count": len(history)
}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8080)

View File

@ -0,0 +1,13 @@
fastapi==0.104.1
uvicorn[standard]==0.24.0
langgraph==0.2.53
langchain==0.3.0
langchain-openai==0.2.0
langchain-chroma==0.1.4
chromadb==0.5.0
redis==5.0.1
httpx==0.25.2
tiktoken==0.5.1
pydantic==2.5.0
python-multipart==0.0.6
aiofiles==23.2.1

11
services/mcpo/Dockerfile Normal file
View File

@ -0,0 +1,11 @@
FROM python:3.11-slim
WORKDIR /app
RUN pip install mcpo uv
COPY servers.json .
EXPOSE 8000
CMD ["mcpo", "--config", "servers.json", "--host", "0.0.0.0", "--port", "8000"]

View File

@ -0,0 +1,24 @@
{
"mcpServers": {
"filesystem": {
"command": "npx",
"args": ["-y", "@modelcontextprotocol/server-filesystem", "/workspace"]
},
"git": {
"command": "uvx",
"args": ["mcp-server-git"]
},
"fetch": {
"command": "uvx",
"args": ["mcp-server-fetch"]
},
"memory": {
"command": "npx",
"args": ["-y", "@modelcontextprotocol/server-memory"]
},
"sequential-thinking": {
"command": "npx",
"args": ["-y", "@modelcontextprotocol/server-sequential-thinking"]
}
}
}

84
setup.sh Normal file
View File

@ -0,0 +1,84 @@
#!/bin/bash
set -e
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
INSTALL_DIR="$(pwd)"
echo -e "${GREEN}🧠 Agentic AI Hub Setup${NC}"
echo "======================="
# Detect Proxmox LXC
if [ -f /proc/1/environ ] && grep -q "container=lxc" /proc/1/environ 2>/dev/null; then
echo -e "${YELLOW}✓ LXC container detected${NC}"
if ! grep -q "lxc.cgroup.relative" /etc/pve/lxc/*.conf 2>/dev/null; then
echo -e "${YELLOW}⚠️ Tip: For LXC with Docker, add to /etc/pve/lxc/XXX.conf:${NC}"
echo " lxc.cgroup.relative = 0"
echo " lxc.apparmor.profile = unconfined"
echo " lxc.cgroup.devices.allow = a"
fi
fi
# Check/install Docker
if ! command -v docker &> /dev/null; then
echo -e "${YELLOW}Installing Docker...${NC}"
curl -fsSL https://get.docker.com | sh
usermod -aG docker $USER || true
systemctl enable docker
systemctl start docker
fi
if ! command -v docker-compose &> /dev/null && ! docker compose version &> /dev/null; then
echo -e "${YELLOW}Installing Docker Compose...${NC}"
apt-get update && apt-get install -y docker-compose-plugin
fi
# Install Node.js for MCP
if ! command -v node &> /dev/null || [ "$(node -v | cut -d'v' -f2 | cut -d'.' -f1)" != "20" ]; then
echo -e "${YELLOW}Installing Node.js 20...${NC}"
curl -fsSL https://deb.nodesource.com/setup_20.x | bash -
apt-get install -y nodejs
fi
# Install uv for Python tools
if ! command -v uv &> /dev/null; then
pip install uv || pip3 install uv
fi
# Create directories
echo -e "${BLUE}Creating directories...${NC}"
mkdir -p {data/{redis,chroma,agent/{sessions,code-server,open-webui},neo4j},workspace,logs}
mkdir -p services/{agent-core,mcpo}
# Set permissions
chown -R 1000:1000 workspace data || true
chmod +x *.sh scripts/*.sh 2>/dev/null || true
# Create .env if not exists
if [ ! -f .env ]; then
echo -e "${YELLOW}Creating .env file...${NC}"
cp .env.example .env
# Generate random keys
sed -i "s/MASTER_KEY=.*/MASTER_KEY=sk-agent-$(openssl rand -hex 8)/" .env
sed -i "s/WEBUI_SECRET_KEY=.*/WEBUI_SECRET_KEY=$(openssl rand -hex 32)/" .env
echo -e "${GREEN}✓ .env created. Edit it to add your API keys.${NC}"
fi
# Create workspace gitkeep
touch workspace/.gitkeep
echo ""
echo -e "${GREEN}✅ Setup complete!${NC}"
echo ""
echo "Next steps:"
echo "1. Edit .env file: nano .env"
echo "2. Add your API keys (Groq, Mistral, etc.)"
echo "3. Start services: ./start.sh"
echo ""
echo "Documentation:"
echo " - Setup: docs/SETUP.md"
echo " - API: docs/API.md"
echo " - Providers: docs/PROVIDERS.md"

66
start.sh Normal file
View File

@ -0,0 +1,66 @@
#!/bin/bash
cd "$(dirname "$0")"
GREEN='\033[0;32m'
BLUE='\033[0;34m'
NC='\033[0m'
if [ ! -f .env ]; then
echo "Error: .env file not found. Run ./setup.sh first."
exit 1
fi
source .env
echo -e "${GREEN}🚀 Starting Agentic LLM Hub...${NC}"
echo ""
# Determine profile
PROFILE=${1:-full}
if [ "$PROFILE" = "minimal" ]; then
echo "Mode: Minimal (core services only)"
docker-compose up -d redis chromadb litellm agent-core
elif [ "$PROFILE" = "ide" ]; then
echo "Mode: Standard + IDE"
docker-compose --profile ide up -d
elif [ "$PROFILE" = "full" ]; then
echo "Mode: Full (all services including MCP tools)"
docker-compose --profile ide --profile mcp --profile ui up -d
else
echo "Usage: ./start.sh [minimal|ide|full]"
echo " minimal - Core services only (lowest resources)"
echo " ide - Core + VS Code IDE"
echo " full - Everything including MCP tools and Web UI"
exit 1
fi
echo ""
echo "Waiting for services..."
sleep 5
# Get IP
IP=$(hostname -I | awk '{print $1}')
echo ""
echo -e "${GREEN}✅ Services started!${NC}"
echo ""
echo "Access Points:"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
printf "${BLUE}%-22s${NC} %s\n" "Agent API:" "http://$IP:8080/v1"
printf "${BLUE}%-22s${NC} %s\n" "VS Code IDE:" "http://$IP:8443"
printf "${BLUE}%-22s${NC} %s\n" "LiteLLM Gateway:" "http://$IP:4000"
printf "${BLUE}%-22s${NC} %s\n" "MCP Tools:" "http://$IP:8001/docs"
printf "${BLUE}%-22s${NC} %s\n" "Web UI:" "http://$IP:3000"
printf "${BLUE}%-22s${NC} %s\n" "Vector DB:" "http://$IP:8000"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo ""
echo "Test command:"
echo "curl -X POST http://$IP:8080/v1/chat/completions \"
echo " -H 'Content-Type: application/json' \"
echo " -H 'Authorization: Bearer ${MASTER_KEY:0:20}...' \"
echo " -d '{"message":"Hello","reasoning_mode":"react"}'"
echo ""
echo "View logs: docker-compose logs -f"
echo "Stop: docker-compose down"

0
workspace/.gitkeep Normal file
View File