7. Monitoreo y Observabilidad

7. Monitoreo y Observabilidad

Esta sección cubre el monitoreo completo del sistema DTEM, incluyendo métricas, logging, alerting, tracing y troubleshooting.

7.1. Arquitectura de Monitoreo

7.1.1. Stack de Observabilidad

Monitoring Stack Architecture

┌─────────────────────────────────────────────────────────────┐
│                  OBSERVABILITY STACK                        │
├─────────────────────────────────────────────────────────────┤
│  ┌─────────────┐  ┌─────────────┐  ┌─────────────┐        │
│  │  Grafana    │  │  Kibana     │  │  Jaeger     │        │
│  │ Dashboard   │  │   Logs      │  │  Tracing    │        │
│  └─────────────┘  └─────────────┘  └─────────────┘        │
│         │                │                │               │
│  ┌─────────────────────────────────────────────────────┐   │
│  │              PROMETHEUS                              │   │
│  │  ┌─────────────┬─────────────┬─────────────┐     │   │
│  │  │   Metrics   │   Alerting  │   Storage   │     │   │
│  │  └─────────────┴─────────────┴─────────────┘     │   │
│  └─────────────────────────────────────────────────────┘   │
│         │                │                │               │
│  ┌─────────────────────────────────────────────────────┐   │
│  │              ELASTICSEARCH                           │   │
│  │  ┌─────────────┬─────────────┬─────────────┐     │   │
│  │  │    Logs     │   Metrics   │   Traces    │     │   │
│  │  └─────────────┴─────────────┴─────────────┘     │   │
│  └─────────────────────────────────────────────────────┘   │
└─────────────────────────────────────────────────────────────┘

Data Flow

flowchart TD
    A[Applications] --> B[Metrics Exporters]
    A --> C[Log Collectors]
    A --> D[Trace Collectors]
    
    B --> E[Prometheus]
    C --> F[Logstash]
    D --> G[Jaeger Collector]
    
    E --> H[Grafana]
    F --> I[Elasticsearch]
    G --> I
    
    I --> J[Kibana]
    E --> K[AlertManager]
    
    K --> L[Slack/Email/PagerDuty]

7.2. Métricas y Monitoring

7.2.1. Application Metrics

Custom Metrics Implementation

// Metrics Collection
const prometheus = require('prom-client');

// Create metrics registry
const register = new prometheus.Registry();

// Default metrics (CPU, memory, etc.)
prometheus.collectDefaultMetrics({ register });

// Custom application metrics
const httpRequestDuration = new prometheus.Histogram({
    name: 'http_request_duration_seconds',
    help: 'Duration of HTTP requests in seconds',
    labelNames: ['method', 'route', 'status_code'],
    buckets: [0.1, 0.3, 0.5, 0.7, 1, 3, 5, 7, 10]
});

const httpRequestTotal = new prometheus.Counter({
    name: 'http_requests_total',
    help: 'Total number of HTTP requests',
    labelNames: ['method', 'route', 'status_code']
});

const activeConnections = new prometheus.Gauge({
    name: 'websocket_connections_active',
    help: 'Number of active WebSocket connections'
});

const dteProcessingTime = new prometheus.Histogram({
    name: 'dte_processing_duration_seconds',
    help: 'Time spent processing DTEs',
    labelNames: ['document_type', 'operation'],
    buckets: [0.5, 1, 2, 5, 10, 30, 60]
});

const databaseConnections = new prometheus.Gauge({
    name: 'database_connections_active',
    help: 'Number of active database connections',
    labelNames: ['database']
});

const queueSize = new prometheus.Gauge({
    name: 'message_queue_size',
    help: 'Number of messages in queue',
    labelNames: ['queue_name']
});

// Register metrics
register.registerMetric(httpRequestDuration);
register.registerMetric(httpRequestTotal);
register.registerMetric(activeConnections);
register.registerMetric(dteProcessingTime);
register.registerMetric(databaseConnections);
register.registerMetric(queueSize);

// Middleware for Express
function metricsMiddleware(req, res, next) {
    const start = Date.now();
    
    res.on('finish', () => {
        const duration = (Date.now() - start) / 1000;
        const labels = {
            method: req.method,
            route: req.route ? req.route.path : req.path,
            status_code: res.statusCode.toString()
        };
        
        httpRequestDuration.observe(labels, duration);
        httpRequestTotal.inc(labels);
    });
    
    next();
}

// Metrics endpoint
app.get('/metrics', async (req, res) => {
    try {
        // Update dynamic metrics
        await updateDynamicMetrics();
        
        res.set('Content-Type', register.contentType);
        res.end(await register.metrics());
    } catch (error) {
        console.error('Error generating metrics:', error);
        res.status(500).end('Error generating metrics');
    }
});

async function updateDynamicMetrics() {
    // Update database connections
    const dbConnections = await pool.query('SELECT count(*) FROM pg_stat_activity');
    databaseConnections.set({ database: 'postgres' }, dbConnections.rows[0].count);
    
    // Update queue sizes
    const queueSizes = await getQueueSizes();
    for (const [queue, size] of Object.entries(queueSizes)) {
        queueSize.set({ queue_name: queue }, size);
    }
}

Business Metrics

// Business Metrics Collector
class BusinessMetricsCollector {
    constructor() {
        this.dteCreated = new prometheus.Counter({
            name: 'dte_created_total',
            help: 'Total number of DTEs created',
            labelNames: ['document_type', 'company_id']
        });
        
        this.dteSent = new prometheus.Counter({
            name: 'dte_sent_total',
            help: 'Total number of DTEs sent to SII',
            labelNames: ['document_type', 'company_id']
        });
        
        this.dteApproved = new prometheus.Counter({
            name: 'dte_approved_total',
            help: 'Total number of DTEs approved by SII',
            labelNames: ['document_type', 'company_id']
        });
        
        this.siiResponseTime = new prometheus.Histogram({
            name: 'sii_response_duration_seconds',
            help: 'SII API response time',
            labelNames: ['operation'],
            buckets: [1, 2, 5, 10, 30, 60, 120]
        });
        
        this.cafUsage = new prometheus.Gauge({
            name: 'caf_folios_used',
            help: 'Number of CAF folios used',
            labelNames: ['document_type', 'company_id']
        });
    }
    
    recordDTECreated(documentType, companyId) {
        this.dteCreated.inc({ document_type: documentType, company_id: companyId });
    }
    
    recordDTESent(documentType, companyId) {
        this.dteSent.inc({ document_type: documentType, company_id: companyId });
    }
    
    recordDTEApproved(documentType, companyId) {
        this.dteApproved.inc({ document_type: documentType, company_id: companyId });
    }
    
    recordSIIResponse(operation, duration) {
        this.siiResponseTime.observe({ operation }, duration);
    }
    
    updateCAFUsage(documentType, companyId, usedFolios) {
        this.cafUsage.set({ document_type: documentType, company_id: companyId }, usedFolios);
    }
}

7.2.2. Infrastructure Metrics

Node Exporter Configuration

# node-exporter-config.yaml
apiVersion: v1
kind: DaemonSet
metadata:
  name: node-exporter
  namespace: monitoring
spec:
  selector:
    matchLabels:
      app: node-exporter
  template:
    metadata:
      labels:
        app: node-exporter
    spec:
      containers:
      - name: node-exporter
        image: prom/node-exporter:latest
        args:
          - --path.rootfs=/host
          - --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|var/lib/docker/.+)($|/)
          - --collector.netdev.device-exclude=^(veth.*)$
        ports:
        - containerPort: 9100
          name: metrics
        volumeMounts:
        - name: rootfs
          mountPath: /host
          readOnly: true
        resources:
          requests:
            memory: 30Mi
            cpu: 100m
          limits:
            memory: 50Mi
            cpu: 200m
      volumes:
      - name: rootfs
        hostPath:
          path: /

Database Metrics Exporter

# postgres-exporter.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: postgres-exporter
  namespace: monitoring
spec:
  replicas: 1
  selector:
    matchLabels:
      app: postgres-exporter
  template:
    metadata:
      labels:
        app: postgres-exporter
    spec:
      containers:
      - name: postgres-exporter
        image: prometheuscommunity/postgres-exporter:latest
        env:
          - name: DATA_SOURCE_NAME
            value: "postgresql://postgres:password@postgres:5432/dtem_prod?sslmode=disable"
          - name: PG_EXPORTER_EXTEND_QUERY_PATH
            value: "/etc/postgres-exporter/queries.yaml"
        ports:
        - containerPort: 9187
          name: metrics
        volumeMounts:
          - name: queries
            mountPath: /etc/postgres-exporter
        resources:
          requests:
            memory: 50Mi
            cpu: 100m
          limits:
            memory: 100Mi
            cpu: 200m
      volumes:
      - name: queries
        configMap:
          name: postgres-exporter-queries

7.3. Logging Centralizado

7.3.1. Structured Logging

Logging Configuration

// Winston Logger Configuration
const winston = require('winston');
const { ElasticsearchTransport } = require('winston-elasticsearch');

// Custom log format
const logFormat = winston.format.combine(
    winston.format.timestamp(),
    winston.format.errors({ stack: true }),
    winston.format.json(),
    winston.format.printf(({ timestamp, level, message, ...meta }) => {
        return JSON.stringify({
            timestamp,
            level,
            message,
            service: 'dtem-api',
            version: process.env.APP_VERSION || '3.0.0',
            environment: process.env.NODE_ENV || 'development',
            traceId: meta.traceId,
            userId: meta.userId,
            requestId: meta.requestId,
            ...meta
        });
    })
);

// Create logger
const logger = winston.createLogger({
    level: process.env.LOG_LEVEL || 'info',
    format: logFormat,
    defaultMeta: {
        service: 'dtem-api',
        version: process.env.APP_VERSION || '3.0.0'
    },
    transports: [
        // Console transport for development
        new winston.transports.Console({
            format: winston.format.combine(
                winston.format.colorize(),
                winston.format.simple()
            )
        }),
        
        // File transport for production
        new winston.transports.File({
            filename: 'logs/error.log',
            level: 'error',
            maxsize: 10485760, // 10MB
            maxFiles: 5
        }),
        
        new winston.transports.File({
            filename: 'logs/combined.log',
            maxsize: 10485760, // 10MB
            maxFiles: 5
        }),
        
        // Elasticsearch transport
        new ElasticsearchTransport({
            level: 'info',
            clientOpts: {
                node: process.env.ELASTICSEARCH_URL || 'http://elasticsearch:9200',
                auth: {
                    username: process.env.ELASTICSEARCH_USER,
                    password: process.env.ELASTICSEARCH_PASSWORD
                }
            },
            index: 'dtem-logs',
            transformer: (logData) => {
                return {
                    '@timestamp': logData.timestamp,
                    level: logData.level,
                    message: logData.message,
                    service: logData.service,
                    version: logData.version,
                    environment: logData.environment,
                    traceId: logData.traceId,
                    userId: logData.userId,
                    requestId: logData.requestId,
                    fields: logData
                };
            }
        })
    ]
});

// Request logging middleware
function requestLogger(req, res, next) {
    const requestId = req.headers['x-request-id'] || crypto.randomUUID();
    const traceId = req.headers['x-trace-id'] || crypto.randomUUID();
    
    req.requestId = requestId;
    req.traceId = traceId;
    
    // Log request
    logger.info('HTTP Request', {
        requestId,
        traceId,
        method: req.method,
        url: req.url,
        userAgent: req.get('User-Agent'),
        ip: req.ip,
        userId: req.user?.id
    });
    
    // Override res.end to log response
    const originalEnd = res.end;
    res.end = function(chunk, encoding) {
        logger.info('HTTP Response', {
            requestId,
            traceId,
            statusCode: res.statusCode,
            responseTime: Date.now() - req.startTime,
            userId: req.user?.id
        });
        
        originalEnd.call(this, chunk, encoding);
    };
    
    next();
}

Log Aggregation with Filebeat

# filebeat.yml
filebeat.inputs:
- type: log
  enabled: true
  paths:
    - /app/logs/*.log
  fields:
    service: dtem-api
    environment: ${ENVIRONMENT:development}
  fields_under_root: true
  multiline.pattern: '^\d{4}-\d{2}-\d{2}'
  multiline.negate: true
  multiline.match: after

- type: container
  enabled: true
  paths:
    - '/var/lib/docker/containers/*/*.log'
  processors:
    - add_docker_metadata:
        host: "unix:///var/run/docker.sock"

output.elasticsearch:
  hosts: ["${ELASTICSEARCH_HOSTS:elasticsearch:9200}"]
  username: "${ELASTICSEARCH_USER}"
  password: "${ELASTICSEARCH_PASSWORD}"
  index: "dtem-logs-%{+yyyy.MM.dd}"
  template.name: "dtem"
  template.pattern: "dtem-*"

setup.kibana:
  host: "${KIBANA_HOST:kibana:5601}"

logging.level: info
logging.to_files: true
logging.files:
  path: /var/log/filebeat
  name: filebeat
  keepfiles: 7
  permissions: 0644

7.3.2. Log Analysis

Kibana Dashboards

{
  "dashboard": {
    "title": "DTEM Application Logs",
    "panels": [
      {
        "title": "Log Levels Over Time",
        "type": "histogram",
        "query": {
          "bool": {
            "must": [
              {"range": {"@timestamp": {"gte": "now-24h"}}}
            ]
          }
        },
        "timeField": "@timestamp",
        "interval": "1h"
      },
      {
        "title": "Error Rate",
        "type": "metric",
        "query": {
          "bool": {
            "must": [
              {"term": {"level": "error"}},
              {"range": {"@timestamp": {"gte": "now-1h"}}}
            ]
          }
        }
      },
      {
        "title": "Top Error Messages",
        "type": "table",
        "query": {
          "bool": {
            "must": [
              {"term": {"level": "error"}},
              {"range": {"@timestamp": {"gte": "now-24h"}}}
            ]
          }
        },
        "bucketAggs": [
          {
            "name": "message",
            "type": "terms",
            "field": "message.keyword",
            "size": 10
          }
        ]
      }
    ]
  }
}

7.4. Distributed Tracing

7.4.1. OpenTelemetry Implementation

Tracing Configuration

// OpenTelemetry Setup
const { NodeSDK } = require('@opentelemetry/sdk-node');
const { getNodeAutoInstrumentations } = require('@opentelemetry/auto-instrumentations-node');
const { Resource } = require('@opentelemetry/resources');
const { SemanticResourceAttributes } = require('@opentelemetry/semantic-conventions');
const { JaegerExporter } = require('@opentelemetry/exporter-jaeger');
const { OTLPTraceExporter } = require('@opentelemetry/exporter-otlp-grpc');

// Initialize OpenTelemetry
const sdk = new NodeSDK({
    resource: new Resource({
        [SemanticResourceAttributes.SERVICE_NAME]: 'dtem-api',
        [SemanticResourceAttributes.SERVICE_VERSION]: process.env.APP_VERSION || '3.0.0',
        [SemanticResourceAttributes.DEPLOYMENT_ENVIRONMENT]: process.env.NODE_ENV || 'development'
    }),
    
    instrumentations: [getNodeAutoInstrumentations()],
    
    traceExporter: new OTLPTraceExporter({
        url: process.env.JAEGER_ENDPOINT || 'http://jaeger:4317'
    }),
    
    spanProcessor: new BatchSpanProcessor(traceExporter),
    
    sampler: new TraceIdRatioBasedSampler(1.0) // Sample 100% of traces
});

sdk.start();

// Custom tracing
const tracer = opentelemetry.trace.getTracer('dtem-api');

class DTETracing {
    static async traceDTEProcessing(operation, dteData, callback) {
        const span = tracer.startSpan(`dte.${operation}`, {
            attributes: {
                'dte.document_type': dteData.documentType,
                'dte.folio': dteData.folio,
                'dte.company_id': dteData.companyId
            }
        });
        
        try {
            const result = await callback(span);
            
            span.setAttributes({
                'dte.processing_time': Date.now() - span.startTime[0],
                'dte.success': true
            });
            
            return result;
        } catch (error) {
            span.recordException(error);
            span.setAttributes({
                'dte.success': false,
                'error.message': error.message
            });
            throw error;
        } finally {
            span.end();
        }
    }
    
    static traceSIIIntegration(operation, data, callback) {
        const span = tracer.startSpan(`sii.${operation}`, {
            attributes: {
                'sii.operation': operation,
                'sii.data_size': JSON.stringify(data).length
            }
        });
        
        try {
            const result = callback(span);
            
            span.setAttributes({
                'sii.response_time': Date.now() - span.startTime[0],
                'sii.success': true
            });
            
            return result;
        } catch (error) {
            span.recordException(error);
            span.setAttributes({
                'sii.success': false,
                'error.message': error.message
            });
            throw error;
        } finally {
            span.end();
        }
    }
}

Tracing Middleware

// Express tracing middleware
function tracingMiddleware(req, res, next) {
    const span = tracer.startSpan('http.request', {
        attributes: {
            'http.method': req.method,
            'http.url': req.url,
            'http.user_agent': req.get('User-Agent'),
            'http.remote_addr': req.ip,
            'user.id': req.user?.id
        }
    });
    
    // Add span to request for later use
    req.span = span;
    
    // Override res.end to capture response
    const originalEnd = res.end;
    res.end = function(chunk, encoding) {
        span.setAttributes({
            'http.status_code': res.statusCode,
            'http.response_time': Date.now() - req.startTime
        });
        
        if (res.statusCode >= 400) {
            span.setStatus({
                code: SpanStatusCode.ERROR,
                message: `HTTP ${res.statusCode}`
            });
        }
        
        span.end();
        originalEnd.call(this, chunk, encoding);
    };
    
    next();
}

7.5. Alerting y Notificaciones

7.5.1. Prometheus Alerting Rules

Application Alerts

# application-alerts.yml
groups:
  - name: dtem-application
    rules:
      - alert: HighErrorRate
        expr: rate(http_requests_total{status_code=~"5.."}[5m]) / rate(http_requests_total[5m]) > 0.05
        for: 2m
        labels:
          severity: critical
          service: dtem-api
        annotations:
          summary: "High error rate detected"
          description: "Error rate is {{ $value | humanizePercentage }} for the last 5 minutes"
          
      - alert: HighResponseTime
        expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) > 2
        for: 5m
        labels:
          severity: warning
          service: dtem-api
        annotations:
          summary: "High response time detected"
          description: "95th percentile response time is {{ $value }}s"
          
      - alert: DTEProcessingBacklog
        expr: message_queue_size{queue_name="dte.processing"} > 1000
        for: 10m
        labels:
          severity: warning
          service: dtem-api
        annotations:
          summary: "DTE processing backlog detected"
          description: "DTE processing queue has {{ $value }} messages"
          
      - alert: SIIIntegrationFailure
        expr: rate(sii_requests_failed_total[5m]) > 0.1
        for: 1m
        labels:
          severity: critical
          service: dtem-api
        annotations:
          summary: "SII integration failures detected"
          description: "SII request failure rate is {{ $value | humanizePercentage }}"
          
      - alert: DatabaseConnectionPoolExhaustion
        expr: database_connections_active / database_connections_max > 0.9
        for: 5m
        labels:
          severity: critical
          service: dtem-api
        annotations:
          summary: "Database connection pool nearly exhausted"
          description: "{{ $value | humanizePercentage }} of database connections are in use"

Infrastructure Alerts

# infrastructure-alerts.yml
groups:
  - name: dtem-infrastructure
    rules:
      - alert: HighCPUUsage
        expr: 100 - (avg by(instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80
        for: 5m
        labels:
          severity: warning
          service: infrastructure
        annotations:
          summary: "High CPU usage detected"
          description: "CPU usage is {{ $value }}% on {{ $labels.instance }}"
          
      - alert: HighMemoryUsage
        expr: (1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100 > 85
        for: 5m
        labels:
          severity: warning
          service: infrastructure
        annotations:
          summary: "High memory usage detected"
          description: "Memory usage is {{ $value }}% on {{ $labels.instance }}"
          
      - alert: DiskSpaceLow
        expr: (1 - (node_filesystem_avail_bytes / node_filesystem_size_bytes)) * 100 > 90
        for: 5m
        labels:
          severity: critical
          service: infrastructure
        annotations:
          summary: "Low disk space detected"
          description: "Disk usage is {{ $value }}% on {{ $labels.instance }}:{{ $labels.mountpoint }}"
          
      - alert: PodCrashLooping
        expr: rate(kube_pod_container_status_restarts_total[15m]) > 0
        for: 5m
        labels:
          severity: critical
          service: kubernetes
        annotations:
          summary: "Pod is crash looping"
          description: "Pod {{ $labels.pod }} in namespace {{ $labels.namespace }} is crash looping"

7.5.2. AlertManager Configuration

AlertManager Setup

# alertmanager.yml
global:
  smtp_smarthost: 'smtp.empresa.cl:587'
  smtp_from: 'alerts@dtem.empresa.cl'
  smtp_auth_username: 'alerts@dtem.empresa.cl'
  smtp_auth_password: '${SMTP_PASSWORD}'

route:
  group_by: ['alertname', 'service']
  group_wait: 10s
  group_interval: 10s
  repeat_interval: 1h
  receiver: 'default'
  routes:
    - match:
        severity: critical
      receiver: 'critical-alerts'
      group_wait: 0s
      repeat_interval: 5m
      
    - match:
        severity: warning
      receiver: 'warning-alerts'
      repeat_interval: 2h
      
    - match:
        service: dtem-api
      receiver: 'dtem-team'
      
    - match:
        service: infrastructure
      receiver: 'ops-team'

receivers:
  - name: 'default'
    email_configs:
      - to: 'ops-team@empresa.cl'
        subject: '[DTEM] {{ .GroupLabels.alertname }}'
        
  - name: 'critical-alerts'
    email_configs:
      - to: 'oncall@empresa.cl'
        subject: '[CRITICAL] DTEM Alert: {{ .GroupLabels.alertname }}'
    slack_configs:
      - api_url: '${SLACK_WEBHOOK_URL}'
        channel: '#dtem-alerts'
        title: 'Critical DTEM Alert'
        text: '{{ range .Alerts }}{{ .Annotations.summary }}{{ end }}'
    pagerduty_configs:
      - service_key: '${PAGERDUTY_SERVICE_KEY}'
        severity: 'critical'
        
  - name: 'warning-alerts'
    email_configs:
      - to: 'dtem-team@empresa.cl'
        subject: '[WARNING] DTEM Alert: {{ .GroupLabels.alertname }}'
    slack_configs:
      - api_url: '${SLACK_WEBHOOK_URL}'
        channel: '#dtem-warnings'
        title: 'DTEM Warning'
        text: '{{ range .Alerts }}{{ .Annotations.summary }}{{ end }}'
        
  - name: 'dtem-team'
    email_configs:
      - to: 'dtem-team@empresa.cl'
        subject: '[DTEM] {{ .GroupLabels.alertname }}'
        
  - name: 'ops-team'
    email_configs:
      - to: 'ops-team@empresa.cl'
        subject: '[INFRA] {{ .GroupLabels.alertname }}'

inhibit_rules:
  - source_match:
      severity: 'critical'
    target_match:
      severity: 'warning'
    equal: ['alertname', 'service']

7.6. Troubleshooting Guide

7.6.1. Common Issues and Solutions

Performance Issues

# Check high CPU usage
top -p $(pgrep -f "node.*dtem")

# Check memory usage
ps aux --sort=-%mem | grep node

# Check database connections
psql -U postgres -d dtem_prod -c "SELECT * FROM pg_stat_activity WHERE state = 'active';"

# Check slow queries
psql -U postgres -d dtem_prod -c "SELECT query, mean_time, calls FROM pg_stat_statements ORDER BY mean_time DESC LIMIT 10;"

Database Issues

# Check database size
psql -U postgres -d dtem_prod -c "SELECT pg_size_pretty(pg_database_size('dtem_prod'));"

# Check table sizes
psql -U postgres -d dtem_prod -c "SELECT schemaname, tablename, pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) FROM pg_tables ORDER BY pg_total_relation_size DESC;"

# Check index usage
psql -U postgres -d dtem_prod -c "SELECT schemaname, tablename, indexname, idx_scan FROM pg_stat_user_indexes ORDER BY idx_scan ASC;"

# Check locks
psql -U postgres -d dtem_prod -c "SELECT blocked_locks.pid AS blocked_pid, blocked_activity.usename AS blocked_user, blocking_locks.pid AS blocking_pid, blocking_activity.usename AS blocking_user, blocked_activity.query AS blocked_statement, blocking_activity.query AS current_statement_in_blocking_process FROM pg_catalog.pg_locks blocked_locks JOIN pg_catalog.pg_stat_activity blocked_activity ON blocked_activity.pid = blocked_locks.pid JOIN pg_catalog.pg_locks blocking_locks ON blocking_locks.locktype = blocked_locks.locktype JOIN pg_catalog.pg_stat_activity blocking_activity ON blocking_activity.pid = blocking_locks.pid WHERE NOT blocked_locks.granted;"

Application Issues

# Check application logs
kubectl logs -f deployment/dtem-api -n dtem-prod

# Check pod status
kubectl get pods -n dtem-prod

# Check events
kubectl get events -n dtem-prod --sort-by='.lastTimestamp'

# Check resource usage
kubectl top pods -n dtem-prod

# Check service connectivity
kubectl exec -it deployment/dtem-api -n dtem-prod -- curl -s http://localhost:3000/health

7.6.2. Debugging Tools

Health Check Endpoints

// Comprehensive health check
app.get('/health/detailed', async (req, res) => {
    const health = {
        status: 'healthy',
        timestamp: new Date().toISOString(),
        version: process.env.APP_VERSION,
        checks: {}
    };
    
    try {
        // Database health
        const dbResult = await pool.query('SELECT 1');
        health.checks.database = {
            status: 'healthy',
            responseTime: dbResult.duration || 0
        };
    } catch (error) {
        health.checks.database = {
            status: 'unhealthy',
            error: error.message
        };
        health.status = 'unhealthy';
    }
    
    try {
        // Redis health
        const redisResult = await redis.ping();
        health.checks.redis = {
            status: redisResult === 'PONG' ? 'healthy' : 'unhealthy',
            response: redisResult
        };
    } catch (error) {
        health.checks.redis = {
            status: 'unhealthy',
            error: error.message
        };
        health.status = 'unhealthy';
    }
    
    try {
        // SII connectivity
        const siiResult = await axios.get(`${process.env.SII_API_URL}/health`, {
            timeout: 5000
        });
        health.checks.sii = {
            status: 'healthy',
            responseTime: siiResult.duration || 0
        };
    } catch (error) {
        health.checks.sii = {
            status: 'unhealthy',
            error: error.message
        };
    }
    
    const statusCode = health.status === 'healthy' ? 200 : 503;
    res.status(statusCode).json(health);
});

Próxima sección: 8. Mantenimiento y Operaciones