Build an AI Kubernetes Cost Optimizer with Python and Claude API
Use AI to automatically analyze your Kubernetes resource usage, detect waste, and generate optimization recommendations. Full Python project with Claude API.
Kubernetes costs spiral fast when teams overprovision resources. Most orgs have dozens of pods running with 10x the CPU/memory they actually use. In this project, we'll build an AI-powered tool that analyzes your cluster, detects waste, and generates specific recommendations using Claude API.
What We're Building
A Python script that:
- Pulls live resource requests vs actual usage from your cluster
- Identifies over-provisioned, idle, and rightsizing candidates
- Sends the data to Claude API
- Returns specific, actionable recommendations in plain English
- Outputs a markdown report you can share with your team
Stack: Python, kubectl/Kubernetes Python client, Anthropic Claude API
Prerequisites
pip install kubernetes anthropic tabulate python-dotenvCreate .env:
ANTHROPIC_API_KEY=sk-ant-...
KUBECONFIG=/path/to/kubeconfig # optional if using default
Step 1: Collect Resource Data
# k8s_collector.py
from kubernetes import client, config
from dataclasses import dataclass
from typing import Optional
import subprocess
import json
@dataclass
class PodResource:
namespace: str
pod_name: str
container_name: str
# Requested resources
cpu_request: str
memory_request: str
# Limits
cpu_limit: Optional[str]
memory_limit: Optional[str]
# Actual usage (from metrics-server)
cpu_usage: Optional[str] = None
memory_usage: Optional[str] = None
def load_k8s_config():
try:
config.load_incluster_config()
except:
config.load_kube_config()
def get_pod_resources() -> list[PodResource]:
load_k8s_config()
v1 = client.CoreV1Api()
pods = v1.list_pod_for_all_namespaces(watch=False)
resources = []
for pod in pods.items:
# Skip system namespaces
if pod.metadata.namespace in ['kube-system', 'kube-public']:
continue
# Skip completed/failed pods
if pod.status.phase not in ['Running', 'Pending']:
continue
for container in pod.spec.containers:
req = container.resources.requests or {}
lim = container.resources.limits or {}
resources.append(PodResource(
namespace=pod.metadata.namespace,
pod_name=pod.metadata.name,
container_name=container.name,
cpu_request=req.get('cpu', 'not set'),
memory_request=req.get('memory', 'not set'),
cpu_limit=lim.get('cpu'),
memory_limit=lim.get('memory'),
))
return resources
def get_actual_usage() -> dict:
"""Get actual CPU/memory usage via kubectl top"""
usage = {}
try:
result = subprocess.run(
['kubectl', 'top', 'pods', '--all-namespaces', '--no-headers'],
capture_output=True, text=True, timeout=30
)
for line in result.stdout.strip().split('\n'):
if not line:
continue
parts = line.split()
if len(parts) >= 4:
namespace, pod, cpu, memory = parts[0], parts[1], parts[2], parts[3]
usage[f"{namespace}/{pod}"] = {
'cpu': cpu,
'memory': memory
}
except Exception as e:
print(f"Warning: Could not get usage data: {e}")
print("Install metrics-server: kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml")
return usage
def enrich_with_usage(resources: list[PodResource], usage: dict) -> list[PodResource]:
for r in resources:
key = f"{r.namespace}/{r.pod_name}"
if key in usage:
r.cpu_usage = usage[key]['cpu']
r.memory_usage = usage[key]['memory']
return resourcesStep 2: Analyze and Detect Issues
# analyzer.py
from dataclasses import dataclass
from typing import Literal
@dataclass
class Issue:
severity: Literal['HIGH', 'MEDIUM', 'LOW']
issue_type: str
namespace: str
pod_name: str
container_name: str
detail: str
recommendation: str
def parse_cpu_millicores(cpu_str: str) -> Optional[int]:
"""Convert CPU string to millicores"""
if not cpu_str or cpu_str == 'not set':
return None
if cpu_str.endswith('m'):
return int(cpu_str[:-1])
try:
return int(float(cpu_str) * 1000)
except:
return None
def parse_memory_mi(mem_str: str) -> Optional[int]:
"""Convert memory string to MiB"""
if not mem_str or mem_str == 'not set':
return None
mem_str = mem_str.strip()
multipliers = {'Ki': 1/1024, 'Mi': 1, 'Gi': 1024, 'Ti': 1024*1024}
for suffix, mult in multipliers.items():
if mem_str.endswith(suffix):
return int(float(mem_str[:-len(suffix)]) * mult)
try:
return int(mem_str) // (1024 * 1024)
except:
return None
def detect_issues(resources) -> list[Issue]:
issues = []
for r in resources:
# Issue 1: No resource requests set
if r.cpu_request == 'not set' or r.memory_request == 'not set':
issues.append(Issue(
severity='HIGH',
issue_type='NO_REQUESTS',
namespace=r.namespace,
pod_name=r.pod_name,
container_name=r.container_name,
detail=f"CPU request: {r.cpu_request}, Memory request: {r.memory_request}",
recommendation="Set explicit resource requests to enable proper scheduling and autoscaling"
))
continue
# Issue 2: No limits set (with requests set)
if not r.cpu_limit or not r.memory_limit:
issues.append(Issue(
severity='MEDIUM',
issue_type='NO_LIMITS',
namespace=r.namespace,
pod_name=r.pod_name,
container_name=r.container_name,
detail=f"CPU limit: {r.cpu_limit or 'not set'}, Memory limit: {r.memory_limit or 'not set'}",
recommendation="Set memory limits to prevent OOMKill surprises. CPU limits are optional but recommended."
))
# Issue 3: Over-provisioned (if we have usage data)
if r.cpu_usage and r.cpu_request:
req_mc = parse_cpu_millicores(r.cpu_request)
use_mc = parse_cpu_millicores(r.cpu_usage)
if req_mc and use_mc and req_mc > 0:
ratio = use_mc / req_mc
if ratio < 0.1: # Using less than 10% of request
issues.append(Issue(
severity='HIGH',
issue_type='CPU_OVERPROVISIONED',
namespace=r.namespace,
pod_name=r.pod_name,
container_name=r.container_name,
detail=f"Requested: {r.cpu_request}, Actual: {r.cpu_usage} ({ratio*100:.1f}% utilization)",
recommendation=f"Reduce CPU request to ~{use_mc * 2}m (2x actual usage as buffer)"
))
if r.memory_usage and r.memory_request:
req_mi = parse_memory_mi(r.memory_request)
use_mi = parse_memory_mi(r.memory_usage)
if req_mi and use_mi and req_mi > 0:
ratio = use_mi / req_mi
if ratio < 0.1:
issues.append(Issue(
severity='HIGH',
issue_type='MEMORY_OVERPROVISIONED',
namespace=r.namespace,
pod_name=r.pod_name,
container_name=r.container_name,
detail=f"Requested: {r.memory_request}, Actual: {r.memory_usage} ({ratio*100:.1f}% utilization)",
recommendation=f"Reduce memory request to ~{use_mi * 2}Mi (2x actual usage as buffer)"
))
return issuesStep 3: AI Analysis with Claude
# ai_optimizer.py
import anthropic
import os
from dotenv import load_dotenv
load_dotenv()
client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
def get_ai_recommendations(issues: list, cluster_summary: dict) -> str:
"""Send issues to Claude for intelligent analysis"""
# Format issues for the prompt
issues_text = "\n".join([
f"[{i.severity}] {i.issue_type} - {i.namespace}/{i.pod_name}/{i.container_name}\n"
f" Detail: {i.detail}\n"
f" Basic fix: {i.recommendation}"
for i in issues[:50] # Limit to avoid token limits
])
prompt = f"""You are a Kubernetes cost optimization expert. Analyze the following cluster issues and provide actionable recommendations.
CLUSTER SUMMARY:
- Total pods analyzed: {cluster_summary['total_pods']}
- Issues found: {cluster_summary['total_issues']}
- High severity: {cluster_summary['high_count']}
- Medium severity: {cluster_summary['medium_count']}
DETECTED ISSUES:
{issues_text}
Please provide:
1. A brief executive summary (2-3 sentences) of the cost optimization opportunity
2. Top 5 highest-impact recommendations with estimated cost savings
3. A priority order for tackling these issues
4. Any patterns you notice across the issues (e.g., specific namespace or team that needs attention)
5. Quick wins (changes that take <30 minutes and have immediate impact)
Format your response as markdown. Be specific and practical — include actual kubectl/Helm commands where relevant."""
message = client.messages.create(
model="claude-opus-4-7",
max_tokens=2000,
messages=[{"role": "user", "content": prompt}]
)
return message.content[0].textStep 4: Generate Report
# report.py
from datetime import datetime
from tabulate import tabulate
def generate_report(resources, issues, ai_analysis: str) -> str:
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# Summary stats
total_pods = len(set(f"{r.namespace}/{r.pod_name}" for r in resources))
high = sum(1 for i in issues if i.severity == 'HIGH')
medium = sum(1 for i in issues if i.severity == 'MEDIUM')
low = sum(1 for i in issues if i.severity == 'LOW')
# Issues table
table_data = [
[i.severity, i.issue_type, i.namespace, i.pod_name[:30], i.container_name[:20]]
for i in sorted(issues, key=lambda x: ['HIGH', 'MEDIUM', 'LOW'].index(x.severity))
]
report = f"""# Kubernetes Cost Optimization Report
**Generated:** {timestamp}
## Summary
| Metric | Count |
|--------|-------|
| Pods Analyzed | {total_pods} |
| High Severity Issues | {high} |
| Medium Severity Issues | {medium} |
| Low Severity Issues | {low} |
| Total Issues | {len(issues)} |
## AI Analysis
{ai_analysis}
## Detailed Issues
{tabulate(table_data, headers=['Severity', 'Type', 'Namespace', 'Pod', 'Container'], tablefmt='pipe')}
## Next Steps
1. Fix HIGH severity issues first (no resource requests = unpredictable scheduling)
2. Review MEMORY_OVERPROVISIONED pods in dev/staging (safe to reduce)
3. Set up VPA (Vertical Pod Autoscaler) for automatic rightsizing
4. Schedule this script as a weekly CronJob for ongoing monitoring
"""
return reportStep 5: Main Entry Point
# main.py
from k8s_collector import get_pod_resources, get_actual_usage, enrich_with_usage
from analyzer import detect_issues
from ai_optimizer import get_ai_recommendations
from report import generate_report
def main():
print("🔍 Collecting cluster resource data...")
resources = get_pod_resources()
print("📊 Getting actual usage metrics...")
usage = get_actual_usage()
resources = enrich_with_usage(resources, usage)
print("🔎 Detecting issues...")
issues = detect_issues(resources)
if not issues:
print("✅ No issues found! Your cluster looks well-optimized.")
return
print(f"⚠️ Found {len(issues)} issues. Sending to AI for analysis...")
cluster_summary = {
'total_pods': len(set(f"{r.namespace}/{r.pod_name}" for r in resources)),
'total_issues': len(issues),
'high_count': sum(1 for i in issues if i.severity == 'HIGH'),
'medium_count': sum(1 for i in issues if i.severity == 'MEDIUM'),
'low_count': sum(1 for i in issues if i.severity == 'LOW'),
}
ai_analysis = get_ai_recommendations(issues, cluster_summary)
print("📝 Generating report...")
report = generate_report(resources, issues, ai_analysis)
# Save report
with open("k8s-cost-report.md", "w") as f:
f.write(report)
print("\n✅ Report saved to k8s-cost-report.md")
print("\n--- AI RECOMMENDATIONS PREVIEW ---")
print(ai_analysis[:500] + "..." if len(ai_analysis) > 500 else ai_analysis)
if __name__ == "__main__":
main()Run It
python main.pyOutput:
🔍 Collecting cluster resource data...
📊 Getting actual usage metrics...
🔎 Detecting issues...
⚠️ Found 23 issues. Sending to AI for analysis...
📝 Generating report...
✅ Report saved to k8s-cost-report.md
Run as a Kubernetes CronJob
apiVersion: batch/v1
kind: CronJob
metadata:
name: cost-optimizer
namespace: monitoring
spec:
schedule: "0 9 * * 1" # Every Monday at 9am
jobTemplate:
spec:
template:
spec:
serviceAccountName: cost-optimizer-sa
containers:
- name: optimizer
image: python:3.12-slim
env:
- name: ANTHROPIC_API_KEY
valueFrom:
secretKeyRef:
name: anthropic-secret
key: api-key
restartPolicy: OnFailureWhat's Next
- Add Slack notification with weekly cost report
- Track recommendations over time (did someone fix it?)
- Add cost estimates per namespace (node instance prices × resource waste)
- Build a simple dashboard with Streamlit
Anthropic Claude API —
claude-opus-4-7is best for analysis tasks like this. Sign up for API access.
Spacelift — if you want drift detection and cost policy enforcement in Terraform (provision the cluster), Spacelift is the tool.
Today I Fixed
Short real fixes from production — posted daily
Stay ahead of the curve
Get the latest DevOps, Kubernetes, AWS, and AI/ML guides delivered straight to your inbox. No spam — just practical engineering content.
Related Articles
Build a Kubernetes Cost Optimization Bot with AI in 2026
Build an AI-powered bot that analyzes your Kubernetes cluster, finds idle resources, oversized pods, and unused namespaces — and gives cost-cutting recommendations.
AI-Driven Capacity Planning for Kubernetes Clusters (2026)
How to use AI and machine learning for Kubernetes capacity planning. Covers predictive autoscaling, cost optimization, tools like StormForge and Kubecost, and building custom ML models for resource forecasting.
Build an AI-Powered SLO Budget Tracker with Python + Claude (2026)
Track your error budget automatically and get AI-generated burn rate alerts and incident summaries. Build a real SLO monitoring tool with Python, Prometheus, and Claude API.