173 lines
4.4 KiB
Bash
Executable File
173 lines
4.4 KiB
Bash
Executable File
#!/bin/bash
|
|
# Local LLM Setup Script for DevMatrix
|
|
# Supports CPU and GPU (CUDA/ROCm)
|
|
# Source: https://git.lemonlink.eu/devmatrix/devmatrix-scripts
|
|
|
|
set -e
|
|
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
BLUE='\033[0;34m'
|
|
NC='\033[0m'
|
|
|
|
log() { echo -e "${BLUE}[LLM-SETUP]${NC} $1"; }
|
|
success() { echo -e "${GREEN}[✓]${NC} $1"; }
|
|
warning() { echo -e "${YELLOW}[!]${NC} $1"; }
|
|
error() { echo -e "${RED}[✗]${NC} $1"; }
|
|
|
|
log "🤖 Setting up Local LLM Environment"
|
|
log "===================================="
|
|
|
|
# Detect GPU
|
|
log "Detecting GPU..."
|
|
if command -v nvidia-smi &> /dev/null; then
|
|
GPU_TYPE="nvidia"
|
|
GPU_INFO=$(nvidia-smi --query-gpu=name,memory.total --format=csv,noheader 2>/dev/null || echo "Unknown")
|
|
success "NVIDIA GPU detected: $GPU_INFO"
|
|
elif lspci | grep -i amd &> /dev/null; then
|
|
GPU_TYPE="amd"
|
|
success "AMD GPU detected"
|
|
elif lspci | grep -i intel &> /dev/null; then
|
|
GPU_TYPE="intel"
|
|
success "Intel GPU detected"
|
|
else
|
|
GPU_TYPE="cpu"
|
|
warning "No dedicated GPU detected, will use CPU"
|
|
fi
|
|
|
|
# Install Ollama
|
|
log "Installing Ollama..."
|
|
if ! command -v ollama &> /dev/null; then
|
|
curl -fsSL https://ollama.com/install.sh | sh
|
|
success "Ollama installed"
|
|
else
|
|
success "Ollama already installed"
|
|
fi
|
|
|
|
# Setup Ollama service
|
|
log "Setting up Ollama service..."
|
|
sudo systemctl enable ollama 2>/dev/null || true
|
|
sudo systemctl start ollama 2>/dev/null || true
|
|
|
|
# Wait for service
|
|
sleep 3
|
|
|
|
# Pull recommended models based on GPU
|
|
log "Downloading recommended models..."
|
|
|
|
if [ "$GPU_TYPE" == "nvidia" ]; then
|
|
# Check VRAM
|
|
VRAM=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits 2>/dev/null | head -1)
|
|
log "Detected VRAM: ${VRAM}MB"
|
|
|
|
if [ "$VRAM" -gt 8000 ]; then
|
|
# 8GB+ VRAM - can run larger models
|
|
log "8GB+ VRAM detected - installing larger models"
|
|
ollama pull llama3.2:3b
|
|
ollama pull qwen2.5:3b
|
|
ollama pull phi3:medium
|
|
elif [ "$VRAM" -gt 4000 ]; then
|
|
# 4-8GB VRAM
|
|
log "4-8GB VRAM detected - installing medium models"
|
|
ollama pull llama3.2:3b
|
|
ollama pull qwen2.5:3b
|
|
ollama pull gemma2:2b
|
|
else
|
|
# Less than 4GB
|
|
log "Under 4GB VRAM - installing lightweight models"
|
|
ollama pull llama3.2:1b
|
|
ollama pull qwen2.5:0.5b
|
|
fi
|
|
else
|
|
# CPU-only
|
|
log "CPU-only mode - installing lightweight models"
|
|
ollama pull llama3.2:1b
|
|
ollama pull qwen2.5:0.5b
|
|
fi
|
|
|
|
# Create helper scripts
|
|
log "Creating helper scripts..."
|
|
|
|
# Start Ollama
|
|
cat > /usr/local/bin/llm-start << 'EOF'
|
|
#!/bin/bash
|
|
sudo systemctl start ollama
|
|
echo "Ollama started. Available models:"
|
|
ollama list
|
|
EOF
|
|
chmod +x /usr/local/bin/llm-start
|
|
|
|
# Stop Ollama
|
|
cat > /usr/local/bin/llm-stop << 'EOF'
|
|
#!/bin/bash
|
|
sudo systemctl stop ollama
|
|
echo "Ollama stopped"
|
|
EOF
|
|
chmod +x /usr/local/bin/llm-stop
|
|
|
|
# List models
|
|
cat > /usr/local/bin/llm-list << 'EOF'
|
|
#!/bin/bash
|
|
echo "Available models:"
|
|
ollama list
|
|
echo ""
|
|
echo "Quick start:"
|
|
echo " ollama run llama3.2:1b # Fast, basic"
|
|
echo " ollama run llama3.2:3b # Better quality"
|
|
echo " ollama run qwen2.5:3b # Good for coding"
|
|
EOF
|
|
chmod +x /usr/local/bin/llm-list
|
|
|
|
# Chat with model
|
|
cat > /usr/local/bin/llm-chat << 'EOF'
|
|
#!/bin/bash
|
|
MODEL=${1:-llama3.2:1b}
|
|
echo "Starting chat with $MODEL (Ctrl+D to exit)"
|
|
ollama run "$MODEL"
|
|
EOF
|
|
chmod +x /usr/local/bin/llm-chat
|
|
|
|
# Create systemd service for auto-start
|
|
cat > /tmp/ollama.service << 'EOF'
|
|
[Unit]
|
|
Description=Ollama Local LLM Service
|
|
After=network.target
|
|
|
|
[Service]
|
|
Type=simple
|
|
User=devmatrix
|
|
Group=devmatrix
|
|
ExecStart=/usr/local/bin/ollama serve
|
|
Restart=always
|
|
RestartSec=3
|
|
Environment="HOME=/home/devmatrix"
|
|
|
|
[Install]
|
|
WantedBy=multi-user.target
|
|
EOF
|
|
|
|
sudo mv /tmp/ollama.service /etc/systemd/system/ollama.service
|
|
sudo systemctl daemon-reload
|
|
sudo systemctl enable ollama
|
|
|
|
# Success summary
|
|
success "🎉 Local LLM setup complete!"
|
|
echo ""
|
|
echo "Available commands:"
|
|
echo " llm-start - Start Ollama service"
|
|
echo " llm-stop - Stop Ollama service"
|
|
echo " llm-list - List available models"
|
|
echo " llm-chat - Start chat (default: llama3.2:1b)"
|
|
echo ""
|
|
echo "Installed models:"
|
|
ollama list 2>/dev/null || echo " (Models downloading in background)"
|
|
echo ""
|
|
echo "Quick start:"
|
|
echo " ollama run llama3.2:1b"
|
|
echo ""
|
|
echo "API endpoint: http://localhost:11434"
|
|
echo ""
|
|
echo "To integrate with Mission Control, set:"
|
|
echo " OLLAMA_URL=http://localhost:11434"
|