Add local LLM setup script using Ollama

- Auto-detects GPU (NVIDIA/AMD/Intel/CPU)
- Installs appropriate models based on VRAM
- Creates helper commands: llm-start, llm-stop, llm-list, llm-chat
- Sets up systemd service for auto-start
- API endpoint at localhost:11434 for integration
This commit is contained in:
devmatrix 2026-02-18 13:58:27 +00:00
parent f77ea9a05d
commit 1bd38de10b
1 changed files with 172 additions and 0 deletions

172
ai/setup-local-llm.sh Executable file
View File

@ -0,0 +1,172 @@
#!/bin/bash
# Local LLM Setup Script for DevMatrix
# Supports CPU and GPU (CUDA/ROCm)
# Source: https://git.lemonlink.eu/devmatrix/devmatrix-scripts
set -e
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
log() { echo -e "${BLUE}[LLM-SETUP]${NC} $1"; }
success() { echo -e "${GREEN}[✓]${NC} $1"; }
warning() { echo -e "${YELLOW}[!]${NC} $1"; }
error() { echo -e "${RED}[✗]${NC} $1"; }
log "🤖 Setting up Local LLM Environment"
log "===================================="
# Detect GPU
log "Detecting GPU..."
if command -v nvidia-smi &> /dev/null; then
GPU_TYPE="nvidia"
GPU_INFO=$(nvidia-smi --query-gpu=name,memory.total --format=csv,noheader 2>/dev/null || echo "Unknown")
success "NVIDIA GPU detected: $GPU_INFO"
elif lspci | grep -i amd &> /dev/null; then
GPU_TYPE="amd"
success "AMD GPU detected"
elif lspci | grep -i intel &> /dev/null; then
GPU_TYPE="intel"
success "Intel GPU detected"
else
GPU_TYPE="cpu"
warning "No dedicated GPU detected, will use CPU"
fi
# Install Ollama
log "Installing Ollama..."
if ! command -v ollama &> /dev/null; then
curl -fsSL https://ollama.com/install.sh | sh
success "Ollama installed"
else
success "Ollama already installed"
fi
# Setup Ollama service
log "Setting up Ollama service..."
sudo systemctl enable ollama 2>/dev/null || true
sudo systemctl start ollama 2>/dev/null || true
# Wait for service
sleep 3
# Pull recommended models based on GPU
log "Downloading recommended models..."
if [ "$GPU_TYPE" == "nvidia" ]; then
# Check VRAM
VRAM=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits 2>/dev/null | head -1)
log "Detected VRAM: ${VRAM}MB"
if [ "$VRAM" -gt 8000 ]; then
# 8GB+ VRAM - can run larger models
log "8GB+ VRAM detected - installing larger models"
ollama pull llama3.2:3b
ollama pull qwen2.5:3b
ollama pull phi3:medium
elif [ "$VRAM" -gt 4000 ]; then
# 4-8GB VRAM
log "4-8GB VRAM detected - installing medium models"
ollama pull llama3.2:3b
ollama pull qwen2.5:3b
ollama pull gemma2:2b
else
# Less than 4GB
log "Under 4GB VRAM - installing lightweight models"
ollama pull llama3.2:1b
ollama pull qwen2.5:0.5b
fi
else
# CPU-only
log "CPU-only mode - installing lightweight models"
ollama pull llama3.2:1b
ollama pull qwen2.5:0.5b
fi
# Create helper scripts
log "Creating helper scripts..."
# Start Ollama
cat > /usr/local/bin/llm-start << 'EOF'
#!/bin/bash
sudo systemctl start ollama
echo "Ollama started. Available models:"
ollama list
EOF
chmod +x /usr/local/bin/llm-start
# Stop Ollama
cat > /usr/local/bin/llm-stop << 'EOF'
#!/bin/bash
sudo systemctl stop ollama
echo "Ollama stopped"
EOF
chmod +x /usr/local/bin/llm-stop
# List models
cat > /usr/local/bin/llm-list << 'EOF'
#!/bin/bash
echo "Available models:"
ollama list
echo ""
echo "Quick start:"
echo " ollama run llama3.2:1b # Fast, basic"
echo " ollama run llama3.2:3b # Better quality"
echo " ollama run qwen2.5:3b # Good for coding"
EOF
chmod +x /usr/local/bin/llm-list
# Chat with model
cat > /usr/local/bin/llm-chat << 'EOF'
#!/bin/bash
MODEL=${1:-llama3.2:1b}
echo "Starting chat with $MODEL (Ctrl+D to exit)"
ollama run "$MODEL"
EOF
chmod +x /usr/local/bin/llm-chat
# Create systemd service for auto-start
cat > /tmp/ollama.service << 'EOF'
[Unit]
Description=Ollama Local LLM Service
After=network.target
[Service]
Type=simple
User=devmatrix
Group=devmatrix
ExecStart=/usr/local/bin/ollama serve
Restart=always
RestartSec=3
Environment="HOME=/home/devmatrix"
[Install]
WantedBy=multi-user.target
EOF
sudo mv /tmp/ollama.service /etc/systemd/system/ollama.service
sudo systemctl daemon-reload
sudo systemctl enable ollama
# Success summary
success "🎉 Local LLM setup complete!"
echo ""
echo "Available commands:"
echo " llm-start - Start Ollama service"
echo " llm-stop - Stop Ollama service"
echo " llm-list - List available models"
echo " llm-chat - Start chat (default: llama3.2:1b)"
echo ""
echo "Installed models:"
ollama list 2>/dev/null || echo " (Models downloading in background)"
echo ""
echo "Quick start:"
echo " ollama run llama3.2:1b"
echo ""
echo "API endpoint: http://localhost:11434"
echo ""
echo "To integrate with Mission Control, set:"
echo " OLLAMA_URL=http://localhost:11434"