创建命名空间

apiVersion: v1
kind: Namespace
metadata:
  name: logging
  labels:
    name: logging

部署loki

apiVersion: v1
kind: ConfigMap
metadata:
  name: loki-config
  namespace: logging
data:
  loki.yaml: |
    auth_enabled: false

    server:
      http_listen_port: 3100
      grpc_listen_port: 9096
    
    common:
      path_prefix: /loki
      replication_factor: 1
      ring:
        instance_addr: 127.0.0.1
        kvstore:
          store: inmemory
    
    query_range:
      results_cache:
        cache:
          embedded_cache:
            enabled: true
            max_size_mb: 100
    
    schema_config:
      configs:
        - from: 2020-10-24
          store: boltdb-shipper
          object_store: filesystem
          schema: v11
          index:
            prefix: index_
            period: 24h
    
    storage_config:
      boltdb_shipper:
        active_index_directory: /loki/index
        cache_location: /loki/boltdb-cache
        shared_store: filesystem
      filesystem:
        directory: /loki/chunks
    
    compactor:
      working_directory: /loki/compactor
      shared_store: filesystem
    
    limits_config:
      reject_old_samples: true
      reject_old_samples_max_age: 168h
      max_query_length: 721h
      max_query_parallelism: 32
    
    chunk_store_config:
      max_look_back_period: 0s
    
    table_manager:
      retention_deletes_enabled: false
      retention_period: 0s
    
    ruler:
      storage:
        type: local
        local:
          directory: /loki/rules
      rule_path: /loki/rules-temp
      ring:
        kvstore:
          store: inmemory
      enable_api: true
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
  name: loki
  namespace: logging
  labels:
    app: loki
spec:
  serviceName: loki
  replicas: 1
  selector:
    matchLabels:
      app: loki
  template:
    metadata:
      labels:
        app: loki
    spec:
      containers:
      - name: loki
        image: grafana/loki:2.9.0
        imagePullPolicy: IfNotPresent
        args:
        - -config.file=/etc/loki/loki.yaml
        ports:
        - containerPort: 3100
          name: http
        readinessProbe:
          httpGet:
            path: /ready
            port: 3100
          initialDelaySeconds: 10
          timeoutSeconds: 1
        livenessProbe:
          httpGet:
            path: /ready
            port: 3100
          initialDelaySeconds: 15
          timeoutSeconds: 1
        volumeMounts:
        - name: config
          mountPath: /etc/loki
        - name: storage
          mountPath: /loki
        resources:
          requests:
            memory: "512Mi"
            cpu: "500m"
          limits:
            memory: "2Gi"
            cpu: "2000m"
      volumes:
      - name: config
        configMap:
          name: loki-config
      - name: storage
        persistentVolumeClaim:
          claimName: loki-storage
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: loki-storage
  namespace: logging
spec:
  accessModes:
  - ReadWriteOnce
  resources:
    requests:
      storage: 100Gi
  # 根据你的存储类调整,如果使用默认存储类则删除下面这行
  storageClassName: nfs
---
apiVersion: v1
kind: Service
metadata:
  name: loki
  namespace: logging
  labels:
    app: loki
spec:
  ports:
  - name: http
    port: 3100
    targetPort: 3100
    protocol: TCP
  - name: grpc
    port: 9096
    targetPort: 9096
    protocol: TCP
  selector:
    app: loki
  type: ClusterIP

部署promtail

apiVersion: v1
kind: ConfigMap
metadata:
  name: promtail-config
  namespace: logging
data:
  promtail.yaml: |
    server:
      http_listen_port: 9080
      grpc_listen_port: 0
    
    positions:
      filename: /tmp/positions.yaml
    
    clients:
    - url: http://loki.logging:3100/loki/api/v1/push
      backoff_config:
        min_period: 100ms
        max_period: 10s
        max_retries: 10
    
    scrape_configs:
    - job_name: pod-logs
      kubernetes_sd_configs:
        - role: pod
      pipeline_stages:
        - docker: {}
      relabel_configs:
        - source_labels:
            - __meta_kubernetes_pod_node_name
          target_label: host
        - action: labelmap
          regex: __meta_kubernetes_pod_label_(.+)
        - action: replace
          replacement: $1
          separator: /
          source_labels:
            - __meta_kubernetes_namespace
            - __meta_kubernetes_pod_name
          target_label: job
        - action: replace
          source_labels:
            - __meta_kubernetes_namespace
          target_label: namespace
        - action: replace
          source_labels:
            - __meta_kubernetes_pod_name
          target_label: pod
        - action: replace
          source_labels:
            - __meta_kubernetes_pod_container_name
          target_label: container
        - replacement: /var/log/pods/*$1/*.log
          separator: /
          source_labels:
            - __meta_kubernetes_pod_uid
            - __meta_kubernetes_pod_container_name
          target_label: __path__
---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: promtail
  namespace: logging
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: promtail
rules:
- apiGroups: [""]
  resources:
  - nodes
  - nodes/proxy
  - services
  - endpoints
  - pods
  verbs: ["get", "list", "watch"]
- apiGroups:
  - extensions
  resources:
  - pods
  verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: promtail
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: promtail
subjects:
- kind: ServiceAccount
  name: promtail
  namespace: logging
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: promtail
  namespace: logging
  labels:
    app: promtail
spec:
  selector:
    matchLabels:
      app: promtail
  template:
    metadata:
      labels:
        app: promtail
    spec:
      serviceAccountName: promtail
      containers:
      - name: promtail
        image: grafana/promtail:2.9.0
        imagePullPolicy: IfNotPresent
        args:
        - -config.file=/etc/promtail/promtail.yaml
        env:
        - name: 'HOSTNAME' # needed when using kubernetes_sd_configs
          valueFrom:
            fieldRef:
              fieldPath: 'spec.nodeName'
        ports:
        - containerPort: 9080
          name: http
        volumeMounts:
        - name: config
          mountPath: /etc/promtail
        - name: varlog
          mountPath: /var/log
          readOnly: true
        - name: varlibdockercontainers
          mountPath: /data/docker/containers
          readOnly: true
        - name: positions  # daemonset的pod,positions存放在主机的tmp目录下
          mountPath: /tmp
        resources:
          requests:
            memory: "128Mi"
            cpu: "100m"
          limits:
            memory: "2048Mi"
            cpu: "2000m"
      volumes:
      - name: config
        configMap:
          name: promtail-config
      - name: varlog
        hostPath:
          path: /var/log
      - name: varlibdockercontainers
        hostPath:
          path: /data/docker/containers
      - name: positions
        hostPath:
          path: /tmp
      tolerations:
      - effect: NoSchedule
        operator: Exists
      - effect: NoExecute
        operator: Exists

部署grafana

apiVersion: v1
kind: ConfigMap
metadata:
  name: grafana-config
  namespace: logging
  labels:
    app: grafana
data:
  grafana.ini: |
    [paths]
    provisioning = /etc/grafana/provisioning
    [server]
    root_url = %(protocol)s://%(domain)s:%(http_port)s/monitor/
    serve_from_sub_path = true
    [database]
    [datasources]
    [remote_cache]
    [dataproxy]
    [analytics]
    [security]
    # 允许浏览器渲染grafana到iframe
    allow_embedding = true
    admin_user = admin
    admin_password = Vj@%{<W3BFG&AGz
    [snapshots]
    [dashboards]
    [users]
    default_theme = light
    default_language = zh-Hans
    [auth]
    [auth.anonymous]
    #开启匿名登录
    enabled = false
    #影藏版本
    hide_version = true
    [auth.github]
    [auth.gitlab]
    [auth.google]
    [auth.grafana_com]
    [auth.azuread]
    [auth.okta]
    [auth.generic_oauth]
    [auth.basic]
    [auth.proxy]
    [auth.jwt]
    [auth.ldap]
    [aws]
    [azure]
    [smtp]
    [emails]
    [log]
    [log.console]
    [log.file]
    [log.syslog]
    [log.frontend]
    [quota]
    [unified_alerting]
    [alerting]
    [annotations]
    [annotations.dashboard]
    [annotations.api]
    [explore]
    [query_history]
    [metrics]
    [metrics.environment_info]
    [metrics.graphite]
    [grafana_com]
    [tracing.jaeger]
    [tracing.opentelemetry.jaeger]
    [external_image_storage]
    [external_image_storage.s3]
    [external_image_storage.webdav]
    [external_image_storage.gcs]
    [external_image_storage.azure_blob]
    [external_image_storage.local]
    [rendering]
    [panels]
    [plugins]
    [live]
    [plugin.grafana-image-renderer]
    [enterprise]
    [feature_toggles]
    [date_formats]
    [expressions]
    [geomap]
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: grafana-storage
  namespace: logging
  labels:
    app: grafana
spec:
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 100Gi
  # 根据你的存储类调整,如果使用默认存储类则删除下面这行
  storageClassName: nfs
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: grafana-datasources
  namespace: logging
  labels:
    app: grafana
data:
  loki-datasource.yaml: |
    apiVersion: 1
    datasources:
    - name: Loki
      type: loki
      access: proxy
      url: http://loki.logging:3100
      isDefault: false
      version: 1
      editable: true
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: grafana
  namespace: logging
  labels:
    app: grafana
spec:
  replicas: 1
  selector:
    matchLabels:
      app: grafana
  strategy:
    type: Recreate
  template:
    metadata:
      labels:
        app: grafana
    spec:
      securityContext:
        fsGroup: 472
        runAsGroup: 472
        runAsUser: 472
      containers:
      - name: grafana
        image: grafana/grafana:10.4.5
        imagePullPolicy: IfNotPresent
        ports:
        - name: http
          containerPort: 3000
          protocol: TCP
        env:
        - name: GF_INSTALL_PLUGINS
          value: "grafana-clock-panel,grafana-simple-json-datasource"
        readinessProbe:
          httpGet:
            path: /api/health
            port: http
          initialDelaySeconds: 10
          timeoutSeconds: 5
          failureThreshold: 3
          periodSeconds: 10
        livenessProbe:
          httpGet:
            path: /api/health
            port: http
          initialDelaySeconds: 15
          timeoutSeconds: 5
          failureThreshold: 3
          periodSeconds: 10
        resources:
          requests:
            memory: "256Mi"
            cpu: "250m"
          limits:
            memory: "2048Mi"
            cpu: "2000m"
        volumeMounts:
        - name: grafana-storage
          mountPath: /var/lib/grafana
        - name: grafana-config
          mountPath: /etc/grafana/grafana.ini
          subPath: grafana.ini
        - name: grafana-datasources
          mountPath: /etc/grafana/provisioning/datasources
      volumes:
      - name: grafana-storage
        persistentVolumeClaim:
          claimName: grafana-storage
      - name: grafana-config
        configMap:
          name: grafana-config
      - name: grafana-datasources
        configMap:
          name: grafana-datasources
---
apiVersion: v1
kind: Service
metadata:
  name: grafana
  namespace: logging
  labels:
    app: grafana
spec:
  ports:
  - name: http
    port: 3000
    targetPort: http
    protocol: TCP
  selector:
    app: grafana
  type: ClusterIP