From 1bd38de10b066b5f8b671de76713c0650ac227b2 Mon Sep 17 00:00:00 2001 From: devmatrix Date: Wed, 18 Feb 2026 13:58:27 +0000 Subject: [PATCH] Add local LLM setup script using Ollama - Auto-detects GPU (NVIDIA/AMD/Intel/CPU) - Installs appropriate models based on VRAM - Creates helper commands: llm-start, llm-stop, llm-list, llm-chat - Sets up systemd service for auto-start - API endpoint at localhost:11434 for integration --- ai/setup-local-llm.sh | 172 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100755 ai/setup-local-llm.sh diff --git a/ai/setup-local-llm.sh b/ai/setup-local-llm.sh new file mode 100755 index 0000000..e981a0d --- /dev/null +++ b/ai/setup-local-llm.sh @@ -0,0 +1,172 @@ +#!/bin/bash +# Local LLM Setup Script for DevMatrix +# Supports CPU and GPU (CUDA/ROCm) +# Source: https://git.lemonlink.eu/devmatrix/devmatrix-scripts + +set -e + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' + +log() { echo -e "${BLUE}[LLM-SETUP]${NC} $1"; } +success() { echo -e "${GREEN}[✓]${NC} $1"; } +warning() { echo -e "${YELLOW}[!]${NC} $1"; } +error() { echo -e "${RED}[✗]${NC} $1"; } + +log "🤖 Setting up Local LLM Environment" +log "====================================" + +# Detect GPU +log "Detecting GPU..." +if command -v nvidia-smi &> /dev/null; then + GPU_TYPE="nvidia" + GPU_INFO=$(nvidia-smi --query-gpu=name,memory.total --format=csv,noheader 2>/dev/null || echo "Unknown") + success "NVIDIA GPU detected: $GPU_INFO" +elif lspci | grep -i amd &> /dev/null; then + GPU_TYPE="amd" + success "AMD GPU detected" +elif lspci | grep -i intel &> /dev/null; then + GPU_TYPE="intel" + success "Intel GPU detected" +else + GPU_TYPE="cpu" + warning "No dedicated GPU detected, will use CPU" +fi + +# Install Ollama +log "Installing Ollama..." +if ! command -v ollama &> /dev/null; then + curl -fsSL https://ollama.com/install.sh | sh + success "Ollama installed" +else + success "Ollama already installed" +fi + +# Setup Ollama service +log "Setting up Ollama service..." +sudo systemctl enable ollama 2>/dev/null || true +sudo systemctl start ollama 2>/dev/null || true + +# Wait for service +sleep 3 + +# Pull recommended models based on GPU +log "Downloading recommended models..." + +if [ "$GPU_TYPE" == "nvidia" ]; then + # Check VRAM + VRAM=$(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits 2>/dev/null | head -1) + log "Detected VRAM: ${VRAM}MB" + + if [ "$VRAM" -gt 8000 ]; then + # 8GB+ VRAM - can run larger models + log "8GB+ VRAM detected - installing larger models" + ollama pull llama3.2:3b + ollama pull qwen2.5:3b + ollama pull phi3:medium + elif [ "$VRAM" -gt 4000 ]; then + # 4-8GB VRAM + log "4-8GB VRAM detected - installing medium models" + ollama pull llama3.2:3b + ollama pull qwen2.5:3b + ollama pull gemma2:2b + else + # Less than 4GB + log "Under 4GB VRAM - installing lightweight models" + ollama pull llama3.2:1b + ollama pull qwen2.5:0.5b + fi +else + # CPU-only + log "CPU-only mode - installing lightweight models" + ollama pull llama3.2:1b + ollama pull qwen2.5:0.5b +fi + +# Create helper scripts +log "Creating helper scripts..." + +# Start Ollama +cat > /usr/local/bin/llm-start << 'EOF' +#!/bin/bash +sudo systemctl start ollama +echo "Ollama started. Available models:" +ollama list +EOF +chmod +x /usr/local/bin/llm-start + +# Stop Ollama +cat > /usr/local/bin/llm-stop << 'EOF' +#!/bin/bash +sudo systemctl stop ollama +echo "Ollama stopped" +EOF +chmod +x /usr/local/bin/llm-stop + +# List models +cat > /usr/local/bin/llm-list << 'EOF' +#!/bin/bash +echo "Available models:" +ollama list +echo "" +echo "Quick start:" +echo " ollama run llama3.2:1b # Fast, basic" +echo " ollama run llama3.2:3b # Better quality" +echo " ollama run qwen2.5:3b # Good for coding" +EOF +chmod +x /usr/local/bin/llm-list + +# Chat with model +cat > /usr/local/bin/llm-chat << 'EOF' +#!/bin/bash +MODEL=${1:-llama3.2:1b} +echo "Starting chat with $MODEL (Ctrl+D to exit)" +ollama run "$MODEL" +EOF +chmod +x /usr/local/bin/llm-chat + +# Create systemd service for auto-start +cat > /tmp/ollama.service << 'EOF' +[Unit] +Description=Ollama Local LLM Service +After=network.target + +[Service] +Type=simple +User=devmatrix +Group=devmatrix +ExecStart=/usr/local/bin/ollama serve +Restart=always +RestartSec=3 +Environment="HOME=/home/devmatrix" + +[Install] +WantedBy=multi-user.target +EOF + +sudo mv /tmp/ollama.service /etc/systemd/system/ollama.service +sudo systemctl daemon-reload +sudo systemctl enable ollama + +# Success summary +success "🎉 Local LLM setup complete!" +echo "" +echo "Available commands:" +echo " llm-start - Start Ollama service" +echo " llm-stop - Stop Ollama service" +echo " llm-list - List available models" +echo " llm-chat - Start chat (default: llama3.2:1b)" +echo "" +echo "Installed models:" +ollama list 2>/dev/null || echo " (Models downloading in background)" +echo "" +echo "Quick start:" +echo " ollama run llama3.2:1b" +echo "" +echo "API endpoint: http://localhost:11434" +echo "" +echo "To integrate with Mission Control, set:" +echo " OLLAMA_URL=http://localhost:11434"