Build a Kubernetes Cost Optimization Bot with AI in 2026
Build an AI-powered bot that analyzes your Kubernetes cluster, finds idle resources, oversized pods, and unused namespaces — and gives cost-cutting recommendations.
Your Kubernetes bill is growing faster than your traffic. You have pods running at 5% CPU utilization, namespaces nobody uses, and oversized node groups provisioned for a peak that never came.
Let's build a bot that scans your cluster, identifies waste, and generates actionable recommendations using Claude AI.
What We're Building
A Python script that:
- Queries your cluster's resource usage via Kubernetes API + Metrics Server
- Identifies cost waste: idle pods, oversized requests, unused namespaces
- Sends the findings to Claude API for human-readable recommendations
- Outputs a prioritized action list
No SaaS required. Runs anywhere.
Prerequisites
pip install kubernetes anthropic requestsYou need:
kubectlconfigured with cluster access- Metrics Server running in your cluster (
kubectl top podsworks) - Anthropic API key
Step 1: Collect Cluster Data
# cost_bot.py
from kubernetes import client, config
import anthropic
import json
def collect_cluster_data():
config.load_kube_config()
v1 = client.CoreV1Api()
apps_v1 = client.AppsV1Api()
data = {
"namespaces": [],
"pods": [],
"deployments": [],
"nodes": []
}
# Get all namespaces
namespaces = v1.list_namespace()
for ns in namespaces.items:
data["namespaces"].append(ns.metadata.name)
# Get all pods with resource requests/limits
pods = v1.list_pod_for_all_namespaces()
for pod in pods.items:
pod_info = {
"name": pod.metadata.name,
"namespace": pod.metadata.namespace,
"phase": pod.status.phase,
"containers": []
}
for container in pod.spec.containers:
resources = container.resources
pod_info["containers"].append({
"name": container.name,
"requests": {
"cpu": resources.requests.get("cpu", "not set") if resources.requests else "not set",
"memory": resources.requests.get("memory", "not set") if resources.requests else "not set"
},
"limits": {
"cpu": resources.limits.get("cpu", "not set") if resources.limits else "not set",
"memory": resources.limits.get("memory", "not set") if resources.limits else "not set"
}
})
data["pods"].append(pod_info)
# Get deployments
deployments = apps_v1.list_deployment_for_all_namespaces()
for deploy in deployments.items:
data["deployments"].append({
"name": deploy.metadata.name,
"namespace": deploy.metadata.namespace,
"replicas": deploy.spec.replicas,
"ready_replicas": deploy.status.ready_replicas or 0
})
# Get nodes
nodes = v1.list_node()
for node in nodes.items:
allocatable = node.status.allocatable
data["nodes"].append({
"name": node.metadata.name,
"cpu_allocatable": allocatable.get("cpu", "unknown"),
"memory_allocatable": allocatable.get("memory", "unknown"),
"instance_type": node.metadata.labels.get("node.kubernetes.io/instance-type", "unknown")
})
return dataStep 2: Find Obvious Waste
def analyze_waste(data):
issues = []
# Find pods with no resource requests (dangerous and wasteful)
for pod in data["pods"]:
if pod["phase"] != "Running":
continue
for container in pod["containers"]:
if container["requests"]["cpu"] == "not set":
issues.append({
"type": "no_resource_requests",
"severity": "HIGH",
"resource": f"{pod['namespace']}/{pod['name']}/{container['name']}",
"detail": "No CPU/memory requests set — scheduler can't optimize placement"
})
# Find deployments with 0 ready replicas (potentially idle)
for deploy in data["deployments"]:
if deploy["replicas"] > 0 and deploy["ready_replicas"] == 0:
issues.append({
"type": "unhealthy_deployment",
"severity": "MEDIUM",
"resource": f"{deploy['namespace']}/{deploy['name']}",
"detail": f"Deployment has {deploy['replicas']} replicas but 0 ready — wasting compute"
})
# Find high-replica deployments in non-prod namespaces
non_prod_keywords = ["dev", "staging", "test", "qa", "demo"]
for deploy in data["deployments"]:
ns = deploy["namespace"].lower()
if any(kw in ns for kw in non_prod_keywords) and (deploy["replicas"] or 0) > 2:
issues.append({
"type": "over_replicated_non_prod",
"severity": "MEDIUM",
"resource": f"{deploy['namespace']}/{deploy['name']}",
"detail": f"{deploy['replicas']} replicas in non-prod namespace — likely should be 1"
})
# Find suspiciously large number of namespaces
if len(data["namespaces"]) > 20:
issues.append({
"type": "namespace_sprawl",
"severity": "LOW",
"resource": "cluster",
"detail": f"{len(data['namespaces'])} namespaces — review for unused ones"
})
return issuesStep 3: Ask Claude for Recommendations
def get_ai_recommendations(cluster_data, issues):
ai_client = anthropic.Anthropic()
prompt = f"""You are a Kubernetes cost optimization expert.
Analyze this cluster data and the identified issues, then provide:
1. A prioritized list of cost-saving actions (most impactful first)
2. Estimated savings potential for each action
3. The exact kubectl/Terraform commands to implement each fix
4. Risk level for each change
Cluster Summary:
- Nodes: {len(cluster_data['nodes'])} nodes
- Node types: {list(set(n['instance_type'] for n in cluster_data['nodes']))}
- Total pods: {len(cluster_data['pods'])}
- Total namespaces: {len(cluster_data['namespaces'])}
- Total deployments: {len(cluster_data['deployments'])}
Identified Issues:
{json.dumps(issues, indent=2)}
Keep recommendations practical and immediately actionable. Format with clear headers."""
message = ai_client.messages.create(
model="claude-sonnet-4-6",
max_tokens=2000,
messages=[{"role": "user", "content": prompt}]
)
return message.content[0].textStep 4: Generate the Report
def run_cost_bot():
print("🔍 Collecting cluster data...")
cluster_data = collect_cluster_data()
print(f"📊 Found {len(cluster_data['pods'])} pods, {len(cluster_data['nodes'])} nodes")
print("🔎 Analyzing for waste...")
issues = analyze_waste(cluster_data)
print(f"⚠️ Found {len(issues)} potential issues")
if not issues:
print("✅ No obvious waste found!")
return
print("\n--- ISSUES FOUND ---")
for issue in sorted(issues, key=lambda x: {"HIGH": 0, "MEDIUM": 1, "LOW": 2}[x["severity"]]):
print(f"[{issue['severity']}] {issue['type']}: {issue['resource']}")
print(f" {issue['detail']}\n")
print("\n🤖 Getting AI recommendations...")
recommendations = get_ai_recommendations(cluster_data, issues)
print("\n--- AI RECOMMENDATIONS ---")
print(recommendations)
# Save report
with open("cost_report.md", "w") as f:
f.write("# Kubernetes Cost Optimization Report\n\n")
f.write(f"**Cluster:** {cluster_data['nodes'][0]['name'].rsplit('-', 2)[0] if cluster_data['nodes'] else 'unknown'}\n")
f.write(f"**Date:** 2026-04-17\n\n")
f.write("## Issues Found\n\n")
for issue in issues:
f.write(f"- **[{issue['severity']}]** {issue['type']}: {issue['resource']}\n")
f.write(f" - {issue['detail']}\n")
f.write("\n## AI Recommendations\n\n")
f.write(recommendations)
print("\n📄 Report saved to cost_report.md")
if __name__ == "__main__":
run_cost_bot()Running It
export ANTHROPIC_API_KEY="your-key-here"
python cost_bot.pySample output:
🔍 Collecting cluster data...
📊 Found 247 pods, 8 nodes
🔎 Analyzing for waste...
⚠️ Found 12 potential issues
--- ISSUES FOUND ---
[HIGH] no_resource_requests: staging/payment-api/app
No CPU/memory requests set — scheduler can't optimize placement
[MEDIUM] over_replicated_non_prod: dev/frontend/app
5 replicas in non-prod namespace — likely should be 1
--- AI RECOMMENDATIONS ---
## Priority 1: Set Resource Requests (Saves ~30% on node costs)
...
Adding Real Metrics (kubectl top)
For actual CPU/memory usage vs requests, call Metrics Server:
import subprocess
import json
def get_actual_usage():
result = subprocess.run(
["kubectl", "top", "pods", "--all-namespaces", "--no-headers", "-o", "json"],
capture_output=True, text=True
)
# Parse and return usage data
return result.stdoutRunning as a Kubernetes CronJob
Schedule the bot to run weekly and send results to Slack:
apiVersion: batch/v1
kind: CronJob
metadata:
name: cost-optimization-bot
namespace: monitoring
spec:
schedule: "0 9 * * 1" # Every Monday 9am
jobTemplate:
spec:
template:
spec:
serviceAccountName: cost-bot-sa
containers:
- name: bot
image: your-registry/cost-bot:latest
env:
- name: ANTHROPIC_API_KEY
valueFrom:
secretKeyRef:
name: ai-secrets
key: anthropic-key
- name: SLACK_WEBHOOK
valueFrom:
secretKeyRef:
name: ai-secrets
key: slack-webhook
restartPolicy: OnFailureNext Steps
- Add Prometheus metrics queries for actual CPU/memory utilization over time
- Compare requests vs actual usage to find oversized pods
- Integrate with Slack for weekly cost reports
- Add rightsizing recommendations using VPA data
Resources
- Anthropic Claude API Docs — API reference and pricing
- Kubernetes Python Client — official K8s SDK
- DevOpsBoys FinOps Guide — broader FinOps strategy
- KEDA Event-Driven Autoscaling — scale to zero for cost savings
- Karpenter Guide — intelligent node provisioning
A bot that finds $5,000/month in waste is worth more than 10 dashboards. Build it once, run it forever.
Stay ahead of the curve
Get the latest DevOps, Kubernetes, AWS, and AI/ML guides delivered straight to your inbox. No spam — just practical engineering content.
Related Articles
AI-Driven Capacity Planning for Kubernetes Clusters (2026)
How to use AI and machine learning for Kubernetes capacity planning. Covers predictive autoscaling, cost optimization, tools like StormForge and Kubecost, and building custom ML models for resource forecasting.
FinOps for DevOps Engineers: How to Cut Cloud Bills by 40% in 2026
Cloud costs are out of control at most companies. FinOps is the discipline that fixes it — and DevOps engineers are the most important people in any FinOps implementation. Here is everything you need to know.
Karpenter Complete Guide 2026: Smarter Kubernetes Node Autoscaling
Karpenter replaces Cluster Autoscaler with faster, more cost-efficient node provisioning. Learn architecture, NodePools, disruption budgets, Spot integration, and production best practices.