🎉 DevOps Interview Prep Bundle is live — 1000+ Q&A across 20 topicsGet it →
All Articles

Build an AI DevOps Assistant with MCP and Claude API

Use the Model Context Protocol (MCP) with Claude API to build a DevOps assistant that can read Kubernetes state, run Terraform commands, and query metrics via natural language.

DevOpsBoys5 min read
Share:Tweet

MCP (Model Context Protocol) is an open standard that lets Claude connect to external data sources and tools. Instead of building custom tool integrations, you write an MCP server and Claude connects to it automatically. Here's how to build a DevOps MCP server.

What is MCP?

MCP lets you expose capabilities to Claude through a standardized interface:

  • Resources — data Claude can read (cluster state, config files, dashboards)
  • Tools — actions Claude can execute (run kubectl, apply terraform, query prometheus)
  • Prompts — reusable templates for common tasks

Claude Desktop, the Claude API, and Claude Code can all use MCP servers.

Setup

bash
# MCP SDK
pip install mcp anthropic
 
# DevOps tools
pip install kubernetes prometheus-api-client

Build the DevOps MCP Server

python
#!/usr/bin/env python3
"""
DevOps MCP Server - exposes Kubernetes and monitoring tools to Claude
"""
 
import asyncio
import json
import subprocess
from typing import Any
 
from mcp.server import Server
from mcp.server.models import InitializationOptions
from mcp.server.stdio import stdio_server
from mcp.types import (
    Resource,
    Tool,
    TextContent,
    EmbeddedResource,
    INVALID_PARAMS,
    INTERNAL_ERROR,
)
from kubernetes import client, config
 
# Initialize
try:
    config.load_incluster_config()
except Exception:
    config.load_kube_config()
 
k8s_core = client.CoreV1Api()
k8s_apps = client.AppsV1Api()
 
server = Server("devops-assistant")
 
 
# ── RESOURCES ──────────────────────────────────────────────
 
@server.list_resources()
async def list_resources() -> list[Resource]:
    """Expose cluster overview as a resource."""
    return [
        Resource(
            uri="k8s://cluster/overview",
            name="Kubernetes Cluster Overview",
            description="Current state of all namespaces and key resources",
            mimeType="application/json",
        ),
        Resource(
            uri="k8s://nodes",
            name="Kubernetes Nodes",
            description="Node status and resource capacity",
            mimeType="application/json",
        ),
    ]
 
 
@server.read_resource()
async def read_resource(uri: str) -> str:
    if uri == "k8s://cluster/overview":
        namespaces = k8s_core.list_namespace()
        ns_names = [ns.metadata.name for ns in namespaces.items]
        
        overview = {"namespaces": ns_names, "summary": {}}
        for ns in ns_names[:10]:  # limit for size
            try:
                pods = k8s_core.list_namespaced_pod(ns)
                running = sum(1 for p in pods.items if p.status.phase == "Running")
                total = len(pods.items)
                overview["summary"][ns] = {"pods_running": running, "pods_total": total}
            except Exception:
                pass
        
        return json.dumps(overview, indent=2)
    
    elif uri == "k8s://nodes":
        nodes = k8s_core.list_node()
        node_info = []
        for node in nodes.items:
            conditions = {c.type: c.status for c in (node.status.conditions or [])}
            capacity = node.status.capacity or {}
            node_info.append({
                "name": node.metadata.name,
                "ready": conditions.get("Ready") == "True",
                "cpu": capacity.get("cpu"),
                "memory": capacity.get("memory"),
                "labels": dict(list((node.metadata.labels or {}).items())[:5]),
            })
        return json.dumps(node_info, indent=2)
    
    raise ValueError(f"Unknown resource: {uri}")
 
 
# ── TOOLS ──────────────────────────────────────────────────
 
@server.list_tools()
async def list_tools() -> list[Tool]:
    return [
        Tool(
            name="kubectl_get",
            description="Run kubectl get to list Kubernetes resources",
            inputSchema={
                "type": "object",
                "properties": {
                    "resource": {"type": "string", "description": "Resource type (pods, deployments, services, etc.)"},
                    "namespace": {"type": "string", "description": "Namespace (default: all namespaces)"},
                    "label_selector": {"type": "string", "description": "Label selector like 'app=myapp'"},
                },
                "required": ["resource"],
            },
        ),
        Tool(
            name="kubectl_describe",
            description="Describe a specific Kubernetes resource",
            inputSchema={
                "type": "object",
                "properties": {
                    "resource": {"type": "string"},
                    "name": {"type": "string"},
                    "namespace": {"type": "string"},
                },
                "required": ["resource", "name"],
            },
        ),
        Tool(
            name="kubectl_logs",
            description="Get logs from a pod",
            inputSchema={
                "type": "object",
                "properties": {
                    "pod_name": {"type": "string"},
                    "namespace": {"type": "string", "default": "default"},
                    "tail": {"type": "integer", "default": 50},
                    "container": {"type": "string"},
                },
                "required": ["pod_name"],
            },
        ),
        Tool(
            name="terraform_plan",
            description="Run terraform plan in a directory and return the output",
            inputSchema={
                "type": "object",
                "properties": {
                    "directory": {"type": "string", "description": "Path to terraform directory"},
                    "var_file": {"type": "string", "description": "Optional var file path"},
                },
                "required": ["directory"],
            },
        ),
        Tool(
            name="run_safe_command",
            description="Run a read-only shell command (no writes, no destructive ops)",
            inputSchema={
                "type": "object",
                "properties": {
                    "command": {"type": "string", "description": "Command to run (read-only operations only)"},
                },
                "required": ["command"],
            },
        ),
    ]
 
 
@server.call_tool()
async def call_tool(name: str, arguments: dict) -> list[TextContent]:
    try:
        if name == "kubectl_get":
            cmd = ["kubectl", "get", arguments["resource"], "-o", "wide"]
            if arguments.get("namespace"):
                cmd.extend(["-n", arguments["namespace"]])
            else:
                cmd.append("-A")
            if arguments.get("label_selector"):
                cmd.extend(["-l", arguments["label_selector"]])
            
            result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
            output = result.stdout or result.stderr
            return [TextContent(type="text", text=output[:5000])]
        
        elif name == "kubectl_describe":
            cmd = ["kubectl", "describe", arguments["resource"], arguments["name"]]
            if arguments.get("namespace"):
                cmd.extend(["-n", arguments["namespace"]])
            
            result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
            return [TextContent(type="text", text=(result.stdout or result.stderr)[:8000])]
        
        elif name == "kubectl_logs":
            cmd = ["kubectl", "logs", arguments["pod_name"]]
            if arguments.get("namespace"):
                cmd.extend(["-n", arguments["namespace"]])
            if arguments.get("container"):
                cmd.extend(["-c", arguments["container"]])
            cmd.extend(["--tail", str(arguments.get("tail", 50))])
            
            result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
            return [TextContent(type="text", text=(result.stdout or result.stderr)[:5000])]
        
        elif name == "terraform_plan":
            directory = arguments["directory"]
            cmd = ["terraform", "plan", "-no-color"]
            if arguments.get("var_file"):
                cmd.extend([f"-var-file={arguments['var_file']}"])
            
            result = subprocess.run(
                cmd, capture_output=True, text=True,
                timeout=120, cwd=directory
            )
            output = (result.stdout + result.stderr)[:8000]
            return [TextContent(type="text", text=output)]
        
        elif name == "run_safe_command":
            command = arguments["command"]
            
            # Safety check - block destructive commands
            blocked = ["rm ", "delete", "drop ", "truncate", "format", "> /", "dd if"]
            if any(b in command.lower() for b in blocked):
                return [TextContent(type="text", text="Error: Destructive commands are not allowed")]
            
            result = subprocess.run(
                command, shell=True, capture_output=True,
                text=True, timeout=30
            )
            return [TextContent(type="text", text=(result.stdout + result.stderr)[:5000])]
        
        else:
            return [TextContent(type="text", text=f"Unknown tool: {name}")]
    
    except subprocess.TimeoutExpired:
        return [TextContent(type="text", text="Command timed out after 30 seconds")]
    except Exception as e:
        return [TextContent(type="text", text=f"Error: {e}")]
 
 
async def main():
    async with stdio_server() as (read_stream, write_stream):
        await server.run(
            read_stream,
            write_stream,
            InitializationOptions(
                server_name="devops-assistant",
                server_version="1.0.0",
                capabilities=server.get_capabilities(
                    notification_options=None,
                    experimental_capabilities={},
                ),
            ),
        )
 
 
if __name__ == "__main__":
    asyncio.run(main())

Configure Claude Desktop to Use Your Server

json
// ~/Library/Application Support/Claude/claude_desktop_config.json
{
  "mcpServers": {
    "devops-assistant": {
      "command": "python",
      "args": ["/path/to/devops_mcp_server.py"],
      "env": {
        "KUBECONFIG": "/path/to/.kube/config"
      }
    }
  }
}

Now in Claude Desktop, you can ask:

"What pods are failing in the production namespace?" "Show me the logs from the nginx pod" "Run terraform plan in /infra/prod and tell me if there are any risky changes"

Claude will automatically call your MCP tools and synthesize the results.

Use via Claude API (Programmatic)

python
import anthropic
import json
 
client = anthropic.Anthropic()
 
# Define the same tools for API use
tools = [
    {
        "name": "kubectl_get",
        "description": "List Kubernetes resources",
        "input_schema": {
            "type": "object",
            "properties": {
                "resource": {"type": "string"},
                "namespace": {"type": "string"},
            },
            "required": ["resource"]
        }
    }
]
 
def run_devops_assistant(question: str) -> str:
    messages = [{"role": "user", "content": question}]
    
    while True:
        response = client.messages.create(
            model="claude-opus-4-8",
            max_tokens=2000,
            tools=tools,
            messages=messages
        )
        
        if response.stop_reason == "tool_use":
            tool_use = next(b for b in response.content if b.type == "tool_use")
            # Execute the tool
            result = execute_tool(tool_use.name, tool_use.input)
            
            messages.append({"role": "assistant", "content": response.content})
            messages.append({
                "role": "user",
                "content": [{"type": "tool_result", "tool_use_id": tool_use.id, "content": result}]
            })
        else:
            return response.content[0].text
 
 
result = run_devops_assistant("Are there any pods in CrashLoopBackOff state?")
print(result)

MCP makes it trivially easy to extend Claude with your own infrastructure tools. Once the server is written, any MCP-compatible client — Claude Desktop, Claude Code, or the API — can use it.

Resources: MCP docs | Anthropic MCP guide

🔧

Today I Fixed

Short real fixes from production — posted daily

Browse fixes
Newsletter

Stay ahead of the curve

Get the latest DevOps, Kubernetes, AWS, and AI/ML guides delivered straight to your inbox. No spam — just practical engineering content.

Related Articles

Comments