mirror of
https://github.com/dat515-2025/Group-8.git
synced 2026-03-22 15:12:08 +01:00
fix(infrastructure): prometheus
This commit is contained in:
33
tofu/main.tf
33
tofu/main.tf
@@ -54,6 +54,7 @@ module "loadbalancer" {
|
||||
|
||||
module "cert-manager" {
|
||||
source = "${path.module}/modules/cert-manager"
|
||||
depends_on = [module.loadbalancer]
|
||||
}
|
||||
|
||||
module "cloudflare" {
|
||||
@@ -67,10 +68,16 @@ module "cloudflare" {
|
||||
cloudflare_account_id = var.cloudflare_account_id
|
||||
}
|
||||
|
||||
module "monitoring" {
|
||||
source = "${path.module}/modules/prometheus"
|
||||
depends_on = [module.cloudflare]
|
||||
cloudflare_domain = var.cloudflare_domain
|
||||
}
|
||||
|
||||
|
||||
module "database" {
|
||||
source = "${path.module}/modules/maxscale"
|
||||
depends_on = [module.storage, module.loadbalancer, module.cloudflare]
|
||||
depends_on = [module.monitoring]
|
||||
|
||||
mariadb_password = var.mariadb_password
|
||||
mariadb_root_password = var.mariadb_root_password
|
||||
@@ -87,23 +94,23 @@ module "database" {
|
||||
cloudflare_domain = var.cloudflare_domain
|
||||
}
|
||||
|
||||
module "argocd" {
|
||||
source = "${path.module}/modules/argocd"
|
||||
depends_on = [module.storage, module.loadbalancer, module.cloudflare]
|
||||
#module "argocd" {
|
||||
# source = "${path.module}/modules/argocd"
|
||||
# depends_on = [module.storage, module.loadbalancer, module.cloudflare]
|
||||
|
||||
argocd_admin_password = var.argocd_admin_password
|
||||
cloudflare_domain = var.cloudflare_domain
|
||||
}
|
||||
# argocd_admin_password = var.argocd_admin_password
|
||||
# cloudflare_domain = var.cloudflare_domain
|
||||
#}
|
||||
|
||||
module "redis" {
|
||||
source = "${path.module}/modules/redis"
|
||||
depends_on = [module.storage]
|
||||
cloudflare_base_domain = var.cloudflare_domain
|
||||
}
|
||||
#module "redis" {
|
||||
# source = "${path.module}/modules/redis"
|
||||
# depends_on = [module.storage]
|
||||
# cloudflare_base_domain = var.cloudflare_domain
|
||||
#}
|
||||
|
||||
module "rabbitmq" {
|
||||
source = "${path.module}/modules/rabbitmq"
|
||||
depends_on = [module.storage]
|
||||
depends_on = [module.database]
|
||||
base_domain = var.cloudflare_domain
|
||||
rabbitmq-password = var.rabbitmq-password
|
||||
}
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
apiVersion: networking.cfargotunnel.com/v1alpha2
|
||||
kind: ClusterTunnel
|
||||
metadata:
|
||||
name: cluster-tunnel # The ClusterTunnel Custom Resource Name
|
||||
name: cluster-tunnel
|
||||
spec:
|
||||
newTunnel:
|
||||
name: ${cloudflare_tunnel_name} # Name of your new tunnel on Cloudflare
|
||||
name: ${cloudflare_tunnel_name}
|
||||
cloudflare:
|
||||
email: ${cloudflare_email}
|
||||
domain: ${cloudflare_domain}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
apiVersion: v2
|
||||
name: maxscale-helm
|
||||
version: 1.0.2
|
||||
version: 1.0.7
|
||||
description: Helm chart for MaxScale related Kubernetes manifests
|
||||
|
||||
@@ -54,6 +54,12 @@ spec:
|
||||
|
||||
metrics:
|
||||
enabled: true
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
interval: 30s
|
||||
scrapeTimeout: 10s
|
||||
prometheusRelease: kube-prometheus-stack
|
||||
jobLabel: mariadb-monitoring
|
||||
|
||||
tls:
|
||||
enabled: true
|
||||
@@ -106,7 +112,17 @@ spec:
|
||||
key: dsn
|
||||
|
||||
affinity:
|
||||
antiAffinityEnabled: true
|
||||
podAntiAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 100
|
||||
podAffinityTerm:
|
||||
labelSelector:
|
||||
matchExpressions:
|
||||
- key: app.kubernetes.io/name
|
||||
operator: In
|
||||
values:
|
||||
- mariadb-repl
|
||||
topologyKey: kubernetes.io/hostname
|
||||
|
||||
tolerations:
|
||||
- key: "k8s.mariadb.com/ha"
|
||||
@@ -149,6 +165,12 @@ spec:
|
||||
|
||||
metrics:
|
||||
enabled: true
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
interval: 30s
|
||||
scrapeTimeout: 10s
|
||||
prometheusRelease: kube-prometheus-stack
|
||||
jobLabel: mariadb-monitoring
|
||||
|
||||
tls:
|
||||
enabled: true
|
||||
|
||||
@@ -33,7 +33,7 @@ spec:
|
||||
value: "3306"
|
||||
- name: PHPMYADMIN_ALLOW_NO_PASSWORD
|
||||
value: "false"
|
||||
image: "docker.io/bitnami/phpmyadmin:5.2.2"
|
||||
image: "bitnamilegacy/phpmyadmin:5.2.2"
|
||||
imagePullPolicy: IfNotPresent
|
||||
livenessProbe:
|
||||
failureThreshold: 3
|
||||
|
||||
@@ -58,7 +58,7 @@ resource "helm_release" "mariadb-operator" {
|
||||
resource "helm_release" "maxscale_helm" {
|
||||
name = "maxscale-helm"
|
||||
chart = "${path.module}/charts/maxscale-helm"
|
||||
version = "1.0.2"
|
||||
version = "1.0.7"
|
||||
depends_on = [ helm_release.mariadb-operator-crds, kubectl_manifest.secrets ]
|
||||
timeout = 3600
|
||||
|
||||
|
||||
14
tofu/modules/prometheus/grafana-ui.yaml
Normal file
14
tofu/modules/prometheus/grafana-ui.yaml
Normal file
@@ -0,0 +1,14 @@
|
||||
apiVersion: networking.cfargotunnel.com/v1alpha1
|
||||
kind: TunnelBinding
|
||||
metadata:
|
||||
name: grafana-tunnel-binding
|
||||
namespace: monitoring
|
||||
subjects:
|
||||
- name: grafana
|
||||
spec:
|
||||
target: http://kube-prometheus-stack-grafana.monitoring.svc.cluster.local
|
||||
fqdn: grafana.${base_domain}
|
||||
noTlsVerify: true
|
||||
tunnelRef:
|
||||
kind: ClusterTunnel
|
||||
name: cluster-tunnel
|
||||
66
tofu/modules/prometheus/main.tf
Normal file
66
tofu/modules/prometheus/main.tf
Normal file
@@ -0,0 +1,66 @@
|
||||
terraform {
|
||||
required_providers {
|
||||
kubectl = {
|
||||
source = "gavinbunney/kubectl"
|
||||
version = "1.19.0"
|
||||
}
|
||||
helm = {
|
||||
source = "hashicorp/helm"
|
||||
version = "3.0.2"
|
||||
}
|
||||
kubernetes = {
|
||||
source = "hashicorp/kubernetes"
|
||||
version = "2.38.0"
|
||||
}
|
||||
kustomization = {
|
||||
source = "kbst/kustomization"
|
||||
version = "0.9.6"
|
||||
}
|
||||
time = {
|
||||
source = "hashicorp/time"
|
||||
version = "0.13.1"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Create namespace for monitoring
|
||||
resource "kubernetes_namespace" "monitoring" {
|
||||
metadata {
|
||||
name = "monitoring"
|
||||
labels = {
|
||||
"pod-security.kubernetes.io/enforce" = "privileged"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Deploy kube-prometheus-stack
|
||||
resource "helm_release" "kube_prometheus_stack" {
|
||||
name = "kube-prometheus-stack"
|
||||
repository = "https://prometheus-community.github.io/helm-charts"
|
||||
chart = "kube-prometheus-stack"
|
||||
namespace = kubernetes_namespace.monitoring.metadata[0].name
|
||||
version = "67.2.1" # Check for latest version
|
||||
|
||||
# Wait for CRDs to be created
|
||||
wait = true
|
||||
timeout = 600
|
||||
force_update = false
|
||||
recreate_pods = false
|
||||
|
||||
# Reference the values file
|
||||
values = [
|
||||
file("${path.module}/values.yaml")
|
||||
]
|
||||
|
||||
depends_on = [
|
||||
kubernetes_namespace.monitoring
|
||||
]
|
||||
}
|
||||
|
||||
resource "kubectl_manifest" "argocd-tunnel-bind" {
|
||||
depends_on = [helm_release.kube_prometheus_stack]
|
||||
|
||||
yaml_body = templatefile("${path.module}/grafana-ui.yaml", {
|
||||
base_domain = var.cloudflare_domain
|
||||
})
|
||||
}
|
||||
189
tofu/modules/prometheus/values.yaml
Normal file
189
tofu/modules/prometheus/values.yaml
Normal file
@@ -0,0 +1,189 @@
|
||||
# Prometheus configuration
|
||||
prometheus:
|
||||
prometheusSpec:
|
||||
retention: 30d
|
||||
retentionSize: "45GB"
|
||||
|
||||
# Storage configuration
|
||||
storageSpec:
|
||||
volumeClaimTemplate:
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 50Gi
|
||||
# storageClassName: "your-storage-class" # Uncomment and specify if needed
|
||||
|
||||
# Resource limits
|
||||
resources:
|
||||
requests:
|
||||
cpu: 500m
|
||||
memory: 2Gi
|
||||
limits:
|
||||
cpu: 2000m
|
||||
memory: 4Gi
|
||||
|
||||
# Scrape interval
|
||||
scrapeInterval: 30s
|
||||
evaluationInterval: 30s
|
||||
|
||||
# Service configuration
|
||||
service:
|
||||
type: ClusterIP
|
||||
port: 9090
|
||||
|
||||
# Ingress (disabled by default)
|
||||
ingress:
|
||||
enabled: false
|
||||
# ingressClassName: nginx
|
||||
# hosts:
|
||||
# - prometheus.example.com
|
||||
# tls:
|
||||
# - secretName: prometheus-tls
|
||||
# hosts:
|
||||
# - prometheus.example.com
|
||||
|
||||
# Grafana configuration
|
||||
grafana:
|
||||
enabled: true
|
||||
|
||||
# Admin credentials
|
||||
adminPassword: "admin" # CHANGE THIS IN PRODUCTION!
|
||||
|
||||
# Persistence
|
||||
persistence:
|
||||
enabled: true
|
||||
size: 10Gi
|
||||
# storageClassName: "your-storage-class" # Uncomment and specify if needed
|
||||
|
||||
# Resource limits
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 256Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 512Mi
|
||||
|
||||
# Service configuration
|
||||
service:
|
||||
type: ClusterIP
|
||||
port: 80
|
||||
|
||||
# Ingress (disabled by default)
|
||||
ingress:
|
||||
enabled: false
|
||||
# ingressClassName: nginx
|
||||
# hosts:
|
||||
# - grafana.example.com
|
||||
# tls:
|
||||
# - secretName: grafana-tls
|
||||
# hosts:
|
||||
# - grafana.example.com
|
||||
|
||||
# Default dashboards
|
||||
defaultDashboardsEnabled: true
|
||||
defaultDashboardsTimezone: Europe/Prague
|
||||
|
||||
# Alertmanager configuration
|
||||
alertmanager:
|
||||
enabled: true
|
||||
|
||||
alertmanagerSpec:
|
||||
# Storage configuration
|
||||
storage:
|
||||
volumeClaimTemplate:
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 10Gi
|
||||
# storageClassName: "your-storage-class" # Uncomment and specify if needed
|
||||
|
||||
# Resource limits
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 128Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 256Mi
|
||||
|
||||
# Service configuration
|
||||
service:
|
||||
type: ClusterIP
|
||||
port: 9093
|
||||
|
||||
# Ingress (disabled by default)
|
||||
ingress:
|
||||
enabled: false
|
||||
# ingressClassName: nginx
|
||||
# hosts:
|
||||
# - alertmanager.example.com
|
||||
# tls:
|
||||
# - secretName: alertmanager-tls
|
||||
# hosts:
|
||||
# - alertmanager.example.com
|
||||
|
||||
# Alertmanager configuration
|
||||
config:
|
||||
global:
|
||||
resolve_timeout: 5m
|
||||
|
||||
route:
|
||||
group_by: [ 'alertname', 'cluster', 'service' ]
|
||||
group_wait: 10s
|
||||
group_interval: 10s
|
||||
repeat_interval: 12h
|
||||
receiver: 'null'
|
||||
routes:
|
||||
- match:
|
||||
alertname: Watchdog
|
||||
receiver: 'null'
|
||||
|
||||
receivers:
|
||||
- name: 'null'
|
||||
# Add your receivers here (email, slack, pagerduty, etc.)
|
||||
# - name: 'slack'
|
||||
# slack_configs:
|
||||
# - api_url: 'YOUR_SLACK_WEBHOOK_URL'
|
||||
# channel: '#alerts'
|
||||
# title: '{{ range .Alerts }}{{ .Annotations.summary }}\n{{ end }}'
|
||||
# text: '{{ range .Alerts }}{{ .Annotations.description }}\n{{ end }}'
|
||||
|
||||
# Node Exporter
|
||||
nodeExporter:
|
||||
enabled: true
|
||||
|
||||
# Kube State Metrics
|
||||
kubeStateMetrics:
|
||||
enabled: true
|
||||
|
||||
# Prometheus Operator
|
||||
prometheusOperator:
|
||||
enabled: true
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 128Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 256Mi
|
||||
|
||||
# Service Monitors
|
||||
# Automatically discover and monitor services with appropriate labels
|
||||
prometheus-node-exporter:
|
||||
prometheus:
|
||||
monitor:
|
||||
enabled: true
|
||||
|
||||
# Additional ServiceMonitors can be defined here
|
||||
# additionalServiceMonitors: []
|
||||
|
||||
# Global settings
|
||||
global:
|
||||
rbac:
|
||||
create: true
|
||||
5
tofu/modules/prometheus/variables.tf
Normal file
5
tofu/modules/prometheus/variables.tf
Normal file
@@ -0,0 +1,5 @@
|
||||
variable "cloudflare_domain" {
|
||||
type = string
|
||||
default = "Base cloudflare domain, e.g. example.com"
|
||||
nullable = false
|
||||
}
|
||||
@@ -65,7 +65,11 @@ resource "helm_release" "rabbitmq" {
|
||||
{
|
||||
name = "podAntiAffinityPreset"
|
||||
value = "soft"
|
||||
}
|
||||
},
|
||||
{
|
||||
name = "image.repository"
|
||||
value = "bitnamilegacy/rabbitmq"
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user