mirror of
https://github.com/dat515-2025/Group-8.git
synced 2026-03-22 15:12:08 +01:00
fix(infrastructure): prometheus
This commit is contained in:
14
tofu/modules/prometheus/grafana-ui.yaml
Normal file
14
tofu/modules/prometheus/grafana-ui.yaml
Normal file
@@ -0,0 +1,14 @@
|
||||
apiVersion: networking.cfargotunnel.com/v1alpha1
|
||||
kind: TunnelBinding
|
||||
metadata:
|
||||
name: grafana-tunnel-binding
|
||||
namespace: monitoring
|
||||
subjects:
|
||||
- name: grafana
|
||||
spec:
|
||||
target: http://kube-prometheus-stack-grafana.monitoring.svc.cluster.local
|
||||
fqdn: grafana.${base_domain}
|
||||
noTlsVerify: true
|
||||
tunnelRef:
|
||||
kind: ClusterTunnel
|
||||
name: cluster-tunnel
|
||||
66
tofu/modules/prometheus/main.tf
Normal file
66
tofu/modules/prometheus/main.tf
Normal file
@@ -0,0 +1,66 @@
|
||||
terraform {
|
||||
required_providers {
|
||||
kubectl = {
|
||||
source = "gavinbunney/kubectl"
|
||||
version = "1.19.0"
|
||||
}
|
||||
helm = {
|
||||
source = "hashicorp/helm"
|
||||
version = "3.0.2"
|
||||
}
|
||||
kubernetes = {
|
||||
source = "hashicorp/kubernetes"
|
||||
version = "2.38.0"
|
||||
}
|
||||
kustomization = {
|
||||
source = "kbst/kustomization"
|
||||
version = "0.9.6"
|
||||
}
|
||||
time = {
|
||||
source = "hashicorp/time"
|
||||
version = "0.13.1"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Create namespace for monitoring
|
||||
resource "kubernetes_namespace" "monitoring" {
|
||||
metadata {
|
||||
name = "monitoring"
|
||||
labels = {
|
||||
"pod-security.kubernetes.io/enforce" = "privileged"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Deploy kube-prometheus-stack
|
||||
resource "helm_release" "kube_prometheus_stack" {
|
||||
name = "kube-prometheus-stack"
|
||||
repository = "https://prometheus-community.github.io/helm-charts"
|
||||
chart = "kube-prometheus-stack"
|
||||
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||
version = "67.2.1" # Check for latest version
|
||||
|
||||
# Wait for CRDs to be created
|
||||
wait = true
|
||||
timeout = 600
|
||||
force_update = false
|
||||
recreate_pods = false
|
||||
|
||||
# Reference the values file
|
||||
values = [
|
||||
file("${path.module}/values.yaml")
|
||||
]
|
||||
|
||||
depends_on = [
|
||||
kubernetes_namespace.monitoring
|
||||
]
|
||||
}
|
||||
|
||||
resource "kubectl_manifest" "argocd-tunnel-bind" {
|
||||
depends_on = [helm_release.kube_prometheus_stack]
|
||||
|
||||
yaml_body = templatefile("${path.module}/grafana-ui.yaml", {
|
||||
base_domain = var.cloudflare_domain
|
||||
})
|
||||
}
|
||||
189
tofu/modules/prometheus/values.yaml
Normal file
189
tofu/modules/prometheus/values.yaml
Normal file
@@ -0,0 +1,189 @@
|
||||
# Prometheus configuration
|
||||
prometheus:
|
||||
prometheusSpec:
|
||||
retention: 30d
|
||||
retentionSize: "45GB"
|
||||
|
||||
# Storage configuration
|
||||
storageSpec:
|
||||
volumeClaimTemplate:
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 50Gi
|
||||
# storageClassName: "your-storage-class" # Uncomment and specify if needed
|
||||
|
||||
# Resource limits
|
||||
resources:
|
||||
requests:
|
||||
cpu: 500m
|
||||
memory: 2Gi
|
||||
limits:
|
||||
cpu: 2000m
|
||||
memory: 4Gi
|
||||
|
||||
# Scrape interval
|
||||
scrapeInterval: 30s
|
||||
evaluationInterval: 30s
|
||||
|
||||
# Service configuration
|
||||
service:
|
||||
type: ClusterIP
|
||||
port: 9090
|
||||
|
||||
# Ingress (disabled by default)
|
||||
ingress:
|
||||
enabled: false
|
||||
# ingressClassName: nginx
|
||||
# hosts:
|
||||
# - prometheus.example.com
|
||||
# tls:
|
||||
# - secretName: prometheus-tls
|
||||
# hosts:
|
||||
# - prometheus.example.com
|
||||
|
||||
# Grafana configuration
|
||||
grafana:
|
||||
enabled: true
|
||||
|
||||
# Admin credentials
|
||||
adminPassword: "admin" # CHANGE THIS IN PRODUCTION!
|
||||
|
||||
# Persistence
|
||||
persistence:
|
||||
enabled: true
|
||||
size: 10Gi
|
||||
# storageClassName: "your-storage-class" # Uncomment and specify if needed
|
||||
|
||||
# Resource limits
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 256Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 512Mi
|
||||
|
||||
# Service configuration
|
||||
service:
|
||||
type: ClusterIP
|
||||
port: 80
|
||||
|
||||
# Ingress (disabled by default)
|
||||
ingress:
|
||||
enabled: false
|
||||
# ingressClassName: nginx
|
||||
# hosts:
|
||||
# - grafana.example.com
|
||||
# tls:
|
||||
# - secretName: grafana-tls
|
||||
# hosts:
|
||||
# - grafana.example.com
|
||||
|
||||
# Default dashboards
|
||||
defaultDashboardsEnabled: true
|
||||
defaultDashboardsTimezone: Europe/Prague
|
||||
|
||||
# Alertmanager configuration
|
||||
alertmanager:
|
||||
enabled: true
|
||||
|
||||
alertmanagerSpec:
|
||||
# Storage configuration
|
||||
storage:
|
||||
volumeClaimTemplate:
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 10Gi
|
||||
# storageClassName: "your-storage-class" # Uncomment and specify if needed
|
||||
|
||||
# Resource limits
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 128Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 256Mi
|
||||
|
||||
# Service configuration
|
||||
service:
|
||||
type: ClusterIP
|
||||
port: 9093
|
||||
|
||||
# Ingress (disabled by default)
|
||||
ingress:
|
||||
enabled: false
|
||||
# ingressClassName: nginx
|
||||
# hosts:
|
||||
# - alertmanager.example.com
|
||||
# tls:
|
||||
# - secretName: alertmanager-tls
|
||||
# hosts:
|
||||
# - alertmanager.example.com
|
||||
|
||||
# Alertmanager configuration
|
||||
config:
|
||||
global:
|
||||
resolve_timeout: 5m
|
||||
|
||||
route:
|
||||
group_by: [ 'alertname', 'cluster', 'service' ]
|
||||
group_wait: 10s
|
||||
group_interval: 10s
|
||||
repeat_interval: 12h
|
||||
receiver: 'null'
|
||||
routes:
|
||||
- match:
|
||||
alertname: Watchdog
|
||||
receiver: 'null'
|
||||
|
||||
receivers:
|
||||
- name: 'null'
|
||||
# Add your receivers here (email, slack, pagerduty, etc.)
|
||||
# - name: 'slack'
|
||||
# slack_configs:
|
||||
# - api_url: 'YOUR_SLACK_WEBHOOK_URL'
|
||||
# channel: '#alerts'
|
||||
# title: '{{ range .Alerts }}{{ .Annotations.summary }}\n{{ end }}'
|
||||
# text: '{{ range .Alerts }}{{ .Annotations.description }}\n{{ end }}'
|
||||
|
||||
# Node Exporter
|
||||
nodeExporter:
|
||||
enabled: true
|
||||
|
||||
# Kube State Metrics
|
||||
kubeStateMetrics:
|
||||
enabled: true
|
||||
|
||||
# Prometheus Operator
|
||||
prometheusOperator:
|
||||
enabled: true
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 128Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 256Mi
|
||||
|
||||
# Service Monitors
|
||||
# Automatically discover and monitor services with appropriate labels
|
||||
prometheus-node-exporter:
|
||||
prometheus:
|
||||
monitor:
|
||||
enabled: true
|
||||
|
||||
# Additional ServiceMonitors can be defined here
|
||||
# additionalServiceMonitors: []
|
||||
|
||||
# Global settings
|
||||
global:
|
||||
rbac:
|
||||
create: true
|
||||
5
tofu/modules/prometheus/variables.tf
Normal file
5
tofu/modules/prometheus/variables.tf
Normal file
@@ -0,0 +1,5 @@
|
||||
variable "cloudflare_domain" {
|
||||
type = string
|
||||
default = "Base cloudflare domain, e.g. example.com"
|
||||
nullable = false
|
||||
}
|
||||
Reference in New Issue
Block a user