diff --git a/.cursor b/.cursor index 80f2889..735d2bf 160000 --- a/.cursor +++ b/.cursor @@ -1 +1 @@ -Subproject commit 80f2889769e32f560029ac5ed62baca45b67fcbd +Subproject commit 735d2bfaffdf25528dd53cc1ccc782bd4979852c diff --git a/.gitignore b/.gitignore index 6eeeb56..ced3b08 100644 --- a/.gitignore +++ b/.gitignore @@ -28,6 +28,27 @@ pids *.seed *.pid.lock +# Python artifacts +__pycache__/ +*.py[cod] + +# Python virtual environments +.venv/ +venv/ + +# Node artifacts +node_modules/ +cli/node_modules/ +cli/lib/node_modules/ + +# TypeScript build output +cli/dist/ +cli/**/*.js +cli/**/*.js.map +cli/**/*.d.ts +cli/**/*.d.ts.map +!cli/jest.config.js + # Generated config files (these are created by the sources-generator) config/pgwatch-postgres/sources.yml config/pgwatch-prometheus/sources.yml diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000..64055bc --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,196 @@ +stages: + - test + +reporter:tests: + stage: test + image: python:3.11-bullseye + variables: + GIT_STRATEGY: fetch + PIP_DISABLE_PIP_VERSION_CHECK: "1" + PIP_NO_CACHE_DIR: "1" + before_script: + - python --version + - pip install --upgrade pip + - apt-get update + - apt-get install -y --no-install-recommends postgresql postgresql-client && rm -rf /var/lib/apt/lists/* + - pip install -r reporter/requirements-dev.txt + script: + - chown -R postgres:postgres "$CI_PROJECT_DIR" + - su - postgres -c "cd \"$CI_PROJECT_DIR\" && python -m pytest --run-integration tests/reporter" + rules: + - if: '$CI_COMMIT_BRANCH' + +cli:smoke:test: + stage: test + image: alpine:3.20 + variables: + GIT_STRATEGY: fetch + before_script: + - apk add --no-cache bash curl git docker-cli docker-compose + script: + - bash -n ./postgres_ai + - | + set -euo pipefail + out=$(./postgres_ai help | tr -d "\r") + echo "$out" | grep -q "Postgres AI CLI" + echo "$out" | grep -q "COMMANDS:" + rules: + - if: '$CI_COMMIT_BRANCH' + +cli:e2e:dind: + stage: test + image: alpine:3.20 + services: + - name: docker:24-dind + command: ["--tls=false"] + variables: + DOCKER_HOST: tcp://docker:2375 + DOCKER_TLS_CERTDIR: "" + DOCKER_API_VERSION: "1.43" + GIT_STRATEGY: fetch + before_script: + - apk add --no-cache bash curl git coreutils docker-cli docker-compose openssl + - docker version + script: + - set -euo pipefail + - bash -n ./postgres_ai + - ./postgres_ai check || true + - ./postgres_ai quickstart --demo -y + - timeout 60 ./postgres_ai status + - timeout 10 ./postgres_ai logs grafana || true + - ./postgres_ai config + - ./postgres_ai update-config + - ./postgres_ai list-instances || true + - ./postgres_ai add-key "test_key_123" + - ./postgres_ai show-key + - ./postgres_ai remove-key + - ./postgres_ai generate-grafana-password || true + - ./postgres_ai show-grafana-credentials || true + - ./postgres_ai add-instance "postgresql://postgres:postgres@target-db:5432/target_database" "ci-demo" + - ./postgres_ai test-instance "ci-demo" || true + - printf "y\n" | ./postgres_ai reset sink-postgres + - ./postgres_ai restart + - ./postgres_ai stop + - ./postgres_ai start + - printf "y\n" | ./postgres_ai reset + - ./postgres_ai clean + after_script: + - docker ps -a || true + - docker system prune -af || true + rules: + - if: '$CI_COMMIT_BRANCH' +cli:node:smoke: + stage: test + image: node:20-alpine + variables: + GIT_STRATEGY: fetch + before_script: + - corepack enable || true + script: + - node -v && npm -v + - npm --prefix cli install --no-audit --no-fund + - node ./cli/dist/bin/postgres-ai.js --help + - node ./cli/dist/bin/postgres-ai.js mon status --help + - node ./cli/dist/bin/postgres-ai.js mon targets list --help + - npm install -g ./cli + - echo "prefix=$(npm config get prefix)" && echo "PATH=$PATH" + - command -v postgres-ai && postgres-ai --help + - command -v pgai && pgai --help + - rm -f .pgwatch-config + - node ./cli/dist/bin/postgres-ai.js add-key "test_key_1234567890" + - node ./cli/dist/bin/postgres-ai.js show-key | grep -E "\*{2,}|[0-9]{4}$" + - test -f ~/.config/postgresai/config.json + - grep -q 'test_key' ~/.config/postgresai/config.json + - node ./cli/dist/bin/postgres-ai.js remove-key + - if grep -q 'apiKey' ~/.config/postgresai/config.json; then echo 'key not removed' && exit 1; fi + - node ./cli/dist/bin/postgres-ai.js mon targets list | head -n 1 || true + - node ./cli/dist/bin/postgres-ai.js mon targets add 'postgresql://user:pass@host:5432/db' ci-test || true + - node ./cli/dist/bin/postgres-ai.js mon targets remove ci-test || true + rules: + - if: '$CI_COMMIT_BRANCH' + +cli:node:e2e:dind: + stage: test + image: node:20-alpine + services: + - name: docker:24-dind + command: ["--tls=false"] + variables: + DOCKER_HOST: tcp://docker:2375 + DOCKER_TLS_CERTDIR: "" + DOCKER_API_VERSION: "1.43" + GIT_STRATEGY: fetch + before_script: + - corepack enable || true + - apk add --no-cache bash docker-cli docker-compose openssl postgresql-client + - node -v && npm -v && docker version + - npm --prefix cli install --no-audit --no-fund + script: + - ./tests/e2e.cli.sh + after_script: + - docker ps -a || true + rules: + - if: '$CI_COMMIT_BRANCH' + +cli:node:full:dind: + stage: test + image: node:20-alpine + services: + - name: docker:24-dind + command: ["--tls=false"] + variables: + DOCKER_HOST: tcp://docker:2375 + DOCKER_TLS_CERTDIR: "" + DOCKER_API_VERSION: "1.43" + GIT_STRATEGY: fetch + before_script: + - corepack enable || true + - apk add --no-cache bash git docker-cli docker-compose openssl postgresql-client + - node -v && npm -v && docker version + - npm --prefix cli install --no-audit --no-fund + script: + - echo "=== Testing quickstart (demo mode) ===" + - node ./cli/dist/bin/postgres-ai.js mon quickstart --demo + - sleep 10 + - node ./cli/dist/bin/postgres-ai.js mon status + - echo "" + - echo "=== Testing shell command ===" + - echo "SELECT 1;" | node ./cli/dist/bin/postgres-ai.js mon shell target-db || true + - echo "" + - echo "=== Testing complete workflow ===" + - node ./cli/dist/bin/postgres-ai.js mon targets add "postgresql://monitor:monitor_pass@target-db:5432/target_database" demo-test + - node ./cli/dist/bin/postgres-ai.js mon targets list + - node ./cli/dist/bin/postgres-ai.js mon targets test demo-test || true + - node ./cli/dist/bin/postgres-ai.js mon health --wait 120 + - node ./cli/dist/bin/postgres-ai.js mon show-grafana-credentials + - echo "" + - echo "=== Cleanup ===" + - node ./cli/dist/bin/postgres-ai.js mon stop + - node ./cli/dist/bin/postgres-ai.js mon clean || true + after_script: + - docker ps -a || true + rules: + - if: '$CI_COMMIT_BRANCH == "main" || $CI_COMMIT_BRANCH =~ /^feature\//' + allow_failure: false + +cli:node:integration: + stage: test + image: node:20-alpine + variables: + GIT_STRATEGY: fetch + before_script: + - corepack enable || true + - node -v && npm -v + - npm --prefix cli install --no-audit --no-fund + script: + - | + set -euo pipefail + : "${PGAI_API_KEY:?PGAI_API_KEY is required for integration tests}" + BASE_URL="${PGAI_BASE_URL:-https://v2.postgres.ai/api/general/}" + echo "Using BASE_URL=$BASE_URL" + # Placeholder: run CLI help until API-backed commands are implemented + node ./cli/dist/bin/postgres-ai.js --help + rules: + - if: '$PGAI_API_KEY' + + diff --git a/Formula/postgresai.rb b/Formula/postgresai.rb new file mode 100644 index 0000000..126260b --- /dev/null +++ b/Formula/postgresai.rb @@ -0,0 +1,24 @@ +# typed: false +# frozen_string_literal: true + +class Postgresai < Formula + desc "postgres_ai CLI (Node.js)" + homepage "https://gitlab.com/postgres-ai/postgres_ai" + url "https://registry.npmjs.org/postgresai/-/postgresai-0.11.0-alpha.8.tgz" + sha256 "" # Will be calculated after publishing to npm + license "Apache-2.0" + + depends_on "node" + + def install + system "npm", "install", *Language::Node.std_npm_install_args(libexec) + bin.install_symlink Dir["#{libexec}/bin/*"] + end + + test do + assert_match version.to_s, shell_output("#{bin}/postgres-ai --version") + assert_match "PostgresAI CLI", shell_output("#{bin}/postgres-ai --help") + assert_match version.to_s, shell_output("#{bin}/pgai --version") + end +end + diff --git a/README.md b/README.md index 82f66b8..fa8a73b 100644 --- a/README.md +++ b/README.md @@ -95,13 +95,9 @@ create user postgres_ai_mon with password ''; grant connect on database to postgres_ai_mon; grant pg_monitor to postgres_ai_mon; -grant select on pg_stat_statements to postgres_ai_mon; -grant select on pg_stat_database to postgres_ai_mon; -grant select on pg_stat_user_tables to postgres_ai_mon; -grant select on pg_stat_user_indexes to postgres_ai_mon; grant select on pg_index to postgres_ai_mon; --- Create a public view for pg_statistic access (required for bloat metrics on user schemas) +-- Create a public view for pg_statistic access (optional, for bloat analysis) create view public.pg_statistic as select n.nspname as schemaname, @@ -116,11 +112,29 @@ join pg_namespace n on n.oid = c.relnamespace join pg_attribute a on a.attrelid = s.starelid and a.attnum = s.staattnum where a.attnum > 0 and not a.attisdropped; -grant select on public.pg_statistic to pg_monitor; +grant select on public.pg_statistic to postgres_ai_mon; alter user postgres_ai_mon set search_path = "$user", public, pg_catalog; commit; ``` +### Optional permissions to analyze risks of certain performance cliffs + +For RDS Postgres and Aurora: + +```sql +create extension if not exists rds_tools; +grant execute on function rds_tools.pg_ls_multixactdir() to postgres_ai_mon; +``` + +For self-managed Postgres: + +```sql +grant execute on function pg_stat_file(text) to postgres_ai_mon; +grant execute on function pg_stat_file(text, boolean) to postgres_ai_mon; +grant execute on function pg_ls_dir(text) to postgres_ai_mon; +grant execute on function pg_ls_dir(text, boolean, boolean) to postgres_ai_mon; +``` + **One command setup:** ```bash @@ -188,6 +202,52 @@ Get a complete monitoring setup with demo data in under 2 minutes. ./postgres_ai health ``` +## ๐Ÿ“‹ Checkup reports + +postgres_ai monitoring generates automated health check reports based on [postgres-checkup](https://gitlab.com/postgres-ai/postgres-checkup). Each report has a unique check ID and title: + +### A. General / Infrastructural +| Check ID | Title | +|----------|-------| +| A001 | System information | +| A002 | Version information | +| A003 | Postgres settings | +| A004 | Cluster information | +| A005 | Extensions | +| A006 | Postgres setting deviations | +| A007 | Altered settings | +| A008 | Disk usage and file system type | + +### D. Monitoring / Troubleshooting +| Check ID | Title | +|----------|-------| +| D004 | pg_stat_statements and pg_stat_kcache settings | + +### F. Autovacuum, Bloat +| Check ID | Title | +|----------|-------| +| F001 | Autovacuum: current settings | +| F004 | Autovacuum: heap bloat (estimated) | +| F005 | Autovacuum: index bloat (estimated) | + +### G. Performance / Connections / Memory-related settings +| Check ID | Title | +|----------|-------| +| G001 | Memory-related settings | + +### H. Index analysis +| Check ID | Title | +|----------|-------| +| H001 | Invalid indexes | +| H002 | Unused indexes | +| H004 | Redundant indexes | + +### K. SQL query analysis +| Check ID | Title | +|----------|-------| +| K001 | Globally aggregated query metrics | +| K003 | Top-50 queries by total_time | + ## ๐ŸŒ Access points After running quickstart: @@ -205,6 +265,24 @@ Technical URLs (for advanced users): ./postgres_ai help ``` +### Node.js CLI (early preview) + +```bash +# run without install +node ./cli/bin/postgres-ai.js --help + +# local dev: install aliases into PATH +npm --prefix cli install --no-audit --no-fund +npm link ./cli +postgres-ai --help +pgai --help + +# or install globally after publish (planned) +# npm i -g @postgresai/cli +# postgres-ai --help +# pgai --help +``` + ## ๐Ÿ”‘ PostgresAI access token Get your access token at [PostgresAI](https://postgres.ai) for automated report uploads and advanced analysis. @@ -216,6 +294,54 @@ Get your access token at [PostgresAI](https://postgres.ai) for automated report - Query plan analysis and automated recommendations - Enhanced AI integration capabilities +## ๐Ÿงช Testing + +Python-based report generation lives under `reporter/` and now ships with a pytest suite. + +### Installation + +Install dev dependencies (includes `pytest`, `pytest-postgresql`, `psycopg`, etc.): +```bash +python3 -m pip install -r reporter/requirements-dev.txt +``` + +### Running Tests + +#### Unit Tests Only (Fast, No External Services Required) + +Run only unit tests with mocked Prometheus interactions: +```bash +pytest tests/reporter +``` + +This automatically skips integration tests. Or run specific test files: +```bash +pytest tests/reporter/test_generators_unit.py -v +pytest tests/reporter/test_formatters.py -v +``` + +#### All Tests: Unit + Integration (Requires PostgreSQL) + +Run the complete test suite (both unit and integration tests): +```bash +pytest tests/reporter --run-integration +``` + +Integration tests create a temporary PostgreSQL instance automatically and require PostgreSQL binaries (`initdb`, `postgres`) on your PATH. No manual database setup or environment variables are required - the tests create and destroy their own temporary PostgreSQL instances. + +**Summary:** +- `pytest tests/reporter` โ†’ **Unit tests only** (integration tests skipped) +- `pytest tests/reporter --run-integration` โ†’ **Both unit and integration tests** + +### Test Coverage + +Generate coverage report: +```bash +pytest tests/reporter -m unit --cov=reporter --cov-report=html +``` + +View the coverage report by opening `htmlcov/index.html` in your browser. + ## ๐Ÿค Contributing We welcome contributions from Postgres experts! Please check our [GitLab repository](https://gitlab.com/postgres-ai/postgres_ai) for: diff --git a/cli/README.md b/cli/README.md new file mode 100644 index 0000000..c1eeb72 --- /dev/null +++ b/cli/README.md @@ -0,0 +1,268 @@ +# PostgresAI CLI + +Command-line interface for PostgresAI monitoring and database management. + +## Installation + +### From npm + +```bash +npm install -g postgresai +``` + +Or install the latest alpha release explicitly: +```bash +npm install -g postgresai@alpha +``` + +### From Homebrew (macOS) + +```bash +# Add the PostgresAI tap +brew tap postgres-ai/tap https://gitlab.com/postgres-ai/homebrew-tap.git + +# Install postgresai +brew install postgresai +``` + +## Usage + +The CLI provides three command aliases: +```bash +postgres-ai --help +postgresai --help +pgai --help # short alias +``` + +## Quick start + +### Authentication + +Authenticate via browser to obtain API key: +```bash +pgai auth +``` + +This will: +- Open your browser for authentication +- Prompt you to select an organization +- Automatically save your API key to `~/.config/postgresai/config.json` + +### Start monitoring + +Start monitoring with demo database: +```bash +postgres-ai mon quickstart --demo +``` + +Start monitoring with your own database: +```bash +postgres-ai mon quickstart --db-url postgresql://user:pass@host:5432/db +``` + +Complete automated setup with API key and database: +```bash +postgres-ai mon quickstart --api-key your_key --db-url postgresql://user:pass@host:5432/db -y +``` + +This will: +- Configure API key for automated report uploads (if provided) +- Add PostgreSQL instance to monitor (if provided) +- Generate secure Grafana password +- Start all monitoring services +- Open Grafana at http://localhost:3000 + +## Commands + +### Monitoring services management (`mon` group) + +#### Service lifecycle +```bash +# Complete setup with various options +postgres-ai mon quickstart # Interactive setup for production +postgres-ai mon quickstart --demo # Demo mode with sample database +postgres-ai mon quickstart --api-key # Setup with API key +postgres-ai mon quickstart --db-url # Setup with database URL +postgres-ai mon quickstart --api-key --db-url # Complete automated setup +postgres-ai mon quickstart -y # Auto-accept all defaults + +# Service management +postgres-ai mon start # Start monitoring services +postgres-ai mon stop # Stop monitoring services +postgres-ai mon restart [service] # Restart all or specific monitoring service +postgres-ai mon status # Show monitoring services status +postgres-ai mon health [--wait ] # Check monitoring services health +``` + +##### Quickstart options +- `--demo` - Demo mode with sample database (testing only, cannot use with --api-key) +- `--api-key ` - Postgres AI API key for automated report uploads +- `--db-url ` - PostgreSQL connection URL to monitor (format: `postgresql://user:pass@host:port/db`) +- `-y, --yes` - Accept all defaults and skip interactive prompts + +#### Monitoring target databases (`mon targets` subgroup) +```bash +postgres-ai mon targets list # List databases to monitor +postgres-ai mon targets add # Add database to monitor +postgres-ai mon targets remove # Remove monitoring target +postgres-ai mon targets test # Test target connectivity +``` + +#### Configuration and maintenance +```bash +postgres-ai mon config # Show monitoring configuration +postgres-ai mon update-config # Apply configuration changes +postgres-ai mon update # Update monitoring stack +postgres-ai mon reset [service] # Reset service data +postgres-ai mon clean # Cleanup artifacts +postgres-ai mon check # System readiness check +postgres-ai mon shell # Open shell to monitoring service +``` + +### MCP server (`mcp` group) + +```bash +pgai mcp start # Start MCP stdio server exposing tools +``` + +Cursor configuration example (Settings โ†’ MCP): + +```json +{ + "mcpServers": { + "PostgresAI": { + "command": "pgai", + "args": ["mcp", "start"], + "env": { + "PGAI_API_BASE_URL": "https://postgres.ai/api/general/" + } + } + } +} +``` + +Tools exposed: +- list_issues: returns the same JSON as `pgai issues list`. +- view_issue: view a single issue with its comments (args: { issue_id, debug? }) +- post_issue_comment: post a comment (args: { issue_id, content, parent_comment_id?, debug? }) + +### Issues management (`issues` group) + +```bash +pgai issues list # List issues (shows: id, title, status, created_at) +pgai issues view # View issue details and comments +pgai issues post_comment # Post a comment to an issue +# Options: +# --parent Parent comment ID (for replies) +# --debug Enable debug output +# --json Output raw JSON (overrides default YAML) +``` + +#### Output format for issues commands + +By default, issues commands print human-friendly YAML when writing to a terminal. For scripting, you can: + +- Use `--json` to force JSON output: + +```bash +pgai issues list --json | jq '.[] | {id, title}' +``` + +- Rely on auto-detection: when stdout is not a TTY (e.g., piped or redirected), output is JSON automatically: + +```bash +pgai issues view > issue.json +``` + +#### Grafana management +```bash +postgres-ai mon generate-grafana-password # Generate new Grafana password +postgres-ai mon show-grafana-credentials # Show Grafana credentials +``` + +### Authentication and API key management +```bash +postgres-ai auth # Authenticate via browser (recommended) +postgres-ai add-key # Manually store API key +postgres-ai show-key # Show stored key (masked) +postgres-ai remove-key # Remove stored key +``` + +## Configuration + +The CLI stores configuration in `~/.config/postgresai/config.json` including: +- API key +- Base URL +- Organization ID + +### Configuration priority + +API key resolution order: +1. Command line option (`--api-key`) +2. Environment variable (`PGAI_API_KEY`) +3. User config file (`~/.config/postgresai/config.json`) +4. Legacy project config (`.pgwatch-config`) + +Base URL resolution order: +- API base URL (`apiBaseUrl`): + 1. Command line option (`--api-base-url`) + 2. Environment variable (`PGAI_API_BASE_URL`) + 3. User config file `baseUrl` (`~/.config/postgresai/config.json`) + 4. Default: `https://postgres.ai/api/general/` +- UI base URL (`uiBaseUrl`): + 1. Command line option (`--ui-base-url`) + 2. Environment variable (`PGAI_UI_BASE_URL`) + 3. Default: `https://console.postgres.ai` + +Normalization: +- A single trailing `/` is removed to ensure consistent path joining. + +### Environment variables + +- `PGAI_API_KEY` - API key for PostgresAI services +- `PGAI_API_BASE_URL` - API endpoint for backend RPC (default: `https://postgres.ai/api/general/`) +- `PGAI_UI_BASE_URL` - UI endpoint for browser routes (default: `https://console.postgres.ai`) + +### CLI options + +- `--api-base-url ` - overrides `PGAI_API_BASE_URL` +- `--ui-base-url ` - overrides `PGAI_UI_BASE_URL` + +### Examples + +Linux/macOS (bash/zsh): + +```bash +export PGAI_API_BASE_URL=https://v2.postgres.ai/api/general/ +export PGAI_UI_BASE_URL=https://console-dev.postgres.ai +pgai auth --debug +``` + +Windows PowerShell: + +```powershell +$env:PGAI_API_BASE_URL = "https://v2.postgres.ai/api/general/" +$env:PGAI_UI_BASE_URL = "https://console-dev.postgres.ai" +pgai auth --debug +``` + +Via CLI options (overrides env): + +```bash +pgai auth --debug \ + --api-base-url https://v2.postgres.ai/api/general/ \ + --ui-base-url https://console-dev.postgres.ai +``` + +Notes: +- If `PGAI_UI_BASE_URL` is not set, the default is `https://console.postgres.ai`. + +## Requirements + +- Node.js 18 or higher +- Docker and Docker Compose + +## Learn more + +- Documentation: https://postgres.ai/docs +- Issues: https://gitlab.com/postgres-ai/postgres_ai/-/issues diff --git a/cli/bin/postgres-ai.ts b/cli/bin/postgres-ai.ts new file mode 100644 index 0000000..a408d50 --- /dev/null +++ b/cli/bin/postgres-ai.ts @@ -0,0 +1,1771 @@ +#!/usr/bin/env node + +import { Command } from "commander"; +import * as pkg from "../package.json"; +import * as config from "../lib/config"; +import * as yaml from "js-yaml"; +import * as fs from "fs"; +import * as path from "path"; +import * as os from "os"; +import { spawn, spawnSync, exec, execFile } from "child_process"; +import { promisify } from "util"; +import * as readline from "readline"; +import * as http from "https"; +import { URL } from "url"; +import { startMcpServer } from "../lib/mcp-server"; +import { fetchIssues, fetchIssueComments, createIssueComment, fetchIssue } from "../lib/issues"; +import { resolveBaseUrls } from "../lib/util"; + +const execPromise = promisify(exec); +const execFilePromise = promisify(execFile); + +/** + * CLI configuration options + */ +interface CliOptions { + apiKey?: string; + apiBaseUrl?: string; + uiBaseUrl?: string; +} + +/** + * Configuration result + */ +interface ConfigResult { + apiKey: string; +} + +/** + * Instance configuration + */ +interface Instance { + name: string; + conn_str?: string; + preset_metrics?: string; + custom_metrics?: any; + is_enabled?: boolean; + group?: string; + custom_tags?: Record; +} + +/** + * Path resolution result + */ +interface PathResolution { + fs: typeof fs; + path: typeof path; + projectDir: string; + composeFile: string; + instancesFile: string; +} + +/** + * Get configuration from various sources + * @param opts - Command line options + * @returns Configuration object + */ +function getConfig(opts: CliOptions): ConfigResult { + // Priority order: + // 1. Command line option (--api-key) + // 2. Environment variable (PGAI_API_KEY) + // 3. User-level config file (~/.config/postgresai/config.json) + // 4. Legacy project-local config (.pgwatch-config) + + let apiKey = opts.apiKey || process.env.PGAI_API_KEY || ""; + + // Try config file if not provided via CLI or env + if (!apiKey) { + const fileConfig = config.readConfig(); + if (!apiKey) apiKey = fileConfig.apiKey || ""; + } + + return { apiKey }; +} + +// Human-friendly output helper: YAML for TTY by default, JSON when --json or non-TTY +function printResult(result: unknown, json?: boolean): void { + if (typeof result === "string") { + process.stdout.write(result); + if (!/\n$/.test(result)) console.log(); + return; + } + if (json || !process.stdout.isTTY) { + console.log(JSON.stringify(result, null, 2)); + } else { + let text = yaml.dump(result as any); + if (Array.isArray(result)) { + text = text.replace(/\n- /g, "\n\n- "); + } + console.log(text); + } +} + +const program = new Command(); + +program + .name("postgres-ai") + .description("PostgresAI CLI") + .version(pkg.version) + .option("--api-key ", "API key (overrides PGAI_API_KEY)") + .option( + "--api-base-url ", + "API base URL for backend RPC (overrides PGAI_API_BASE_URL)" + ) + .option( + "--ui-base-url ", + "UI base URL for browser routes (overrides PGAI_UI_BASE_URL)" + ); + +/** + * Stub function for not implemented commands + */ +const stub = (name: string) => async (): Promise => { + // Temporary stubs until Node parity is implemented + console.error(`${name}: not implemented in Node CLI yet; use bash CLI for now`); + process.exitCode = 2; +}; + +/** + * Resolve project paths + */ +function resolvePaths(): PathResolution { + const startDir = process.cwd(); + let currentDir = startDir; + + while (true) { + const composeFile = path.resolve(currentDir, "docker-compose.yml"); + if (fs.existsSync(composeFile)) { + const instancesFile = path.resolve(currentDir, "instances.yml"); + return { fs, path, projectDir: currentDir, composeFile, instancesFile }; + } + + const parentDir = path.dirname(currentDir); + if (parentDir === currentDir) break; + currentDir = parentDir; + } + + throw new Error( + `docker-compose.yml not found. Run monitoring commands from the PostgresAI project directory or one of its subdirectories (starting search from ${startDir}).` + ); +} + +/** + * Check if Docker daemon is running + */ +function isDockerRunning(): boolean { + try { + const result = spawnSync("docker", ["info"], { stdio: "pipe" }); + return result.status === 0; + } catch { + return false; + } +} + +/** + * Get docker compose command + */ +function getComposeCmd(): string[] | null { + const tryCmd = (cmd: string, args: string[]): boolean => + spawnSync(cmd, args, { stdio: "ignore" }).status === 0; + if (tryCmd("docker-compose", ["version"])) return ["docker-compose"]; + if (tryCmd("docker", ["compose", "version"])) return ["docker", "compose"]; + return null; +} + +/** + * Check if monitoring containers are already running + */ +function checkRunningContainers(): { running: boolean; containers: string[] } { + try { + const result = spawnSync( + "docker", + ["ps", "--filter", "name=grafana-with-datasources", "--filter", "name=pgwatch", "--format", "{{.Names}}"], + { stdio: "pipe", encoding: "utf8" } + ); + + if (result.status === 0 && result.stdout) { + const containers = result.stdout.trim().split("\n").filter(Boolean); + return { running: containers.length > 0, containers }; + } + return { running: false, containers: [] }; + } catch { + return { running: false, containers: [] }; + } +} + +/** + * Run docker compose command + */ +async function runCompose(args: string[]): Promise { + let composeFile: string; + let projectDir: string; + try { + ({ composeFile, projectDir } = resolvePaths()); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + console.error(message); + process.exitCode = 1; + return 1; + } + + // Check if Docker daemon is running + if (!isDockerRunning()) { + console.error("Docker is not running. Please start Docker and try again"); + process.exitCode = 1; + return 1; + } + + const cmd = getComposeCmd(); + if (!cmd) { + console.error("docker compose not found (need docker-compose or docker compose)"); + process.exitCode = 1; + return 1; + } + + // Read Grafana password from .pgwatch-config and pass to Docker Compose + const env = { ...process.env }; + const cfgPath = path.resolve(projectDir, ".pgwatch-config"); + if (fs.existsSync(cfgPath)) { + try { + const stats = fs.statSync(cfgPath); + if (!stats.isDirectory()) { + const content = fs.readFileSync(cfgPath, "utf8"); + const match = content.match(/^grafana_password=([^\r\n]+)/m); + if (match) { + env.GF_SECURITY_ADMIN_PASSWORD = match[1].trim(); + } + } + } catch (err) { + // If we can't read the config, continue without setting the password + } + } + + return new Promise((resolve) => { + const child = spawn(cmd[0], [...cmd.slice(1), "-f", composeFile, ...args], { + stdio: "inherit", + env: env + }); + child.on("close", (code) => resolve(code || 0)); + }); +} + +program.command("help", { isDefault: true }).description("show help").action(() => { + program.outputHelp(); +}); + +// Monitoring services management +const mon = program.command("mon").description("monitoring services management"); + +mon + .command("quickstart") + .description("complete setup (generate config, start monitoring services)") + .option("--demo", "demo mode with sample database", false) + .option("--api-key ", "Postgres AI API key for automated report uploads") + .option("--db-url ", "PostgreSQL connection URL to monitor") + .option("-y, --yes", "accept all defaults and skip interactive prompts", false) + .action(async (opts: { demo: boolean; apiKey?: string; dbUrl?: string; yes: boolean }) => { + console.log("\n================================="); + console.log(" PostgresAI Monitoring Quickstart"); + console.log("=================================\n"); + console.log("This will install, configure, and start the monitoring system\n"); + + // Validate conflicting options + if (opts.demo && opts.dbUrl) { + console.log("โš  Both --demo and --db-url provided. Demo mode includes its own database."); + console.log("โš  The --db-url will be ignored in demo mode.\n"); + opts.dbUrl = undefined; + } + + if (opts.demo && opts.apiKey) { + console.error("โœ— Cannot use --api-key with --demo mode"); + console.error("โœ— Demo mode is for testing only and does not support API key integration"); + console.error("\nUse demo mode without API key: postgres-ai mon quickstart --demo"); + console.error("Or use production mode with API key: postgres-ai mon quickstart --api-key=your_key"); + process.exitCode = 1; + return; + } + + // Check if containers are already running + const { running, containers } = checkRunningContainers(); + if (running) { + console.log(`โš  Monitoring services are already running: ${containers.join(", ")}`); + console.log("Use 'postgres-ai mon restart' to restart them\n"); + return; + } + + // Step 1: API key configuration (only in production mode) + if (!opts.demo) { + console.log("Step 1: Postgres AI API Configuration (Optional)"); + console.log("An API key enables automatic upload of PostgreSQL reports to Postgres AI\n"); + + if (opts.apiKey) { + console.log("Using API key provided via --api-key parameter"); + config.writeConfig({ apiKey: opts.apiKey }); + console.log("โœ“ API key saved\n"); + } else if (opts.yes) { + // Auto-yes mode without API key - skip API key setup + console.log("Auto-yes mode: no API key provided, skipping API key setup"); + console.log("โš  Reports will be generated locally only"); + console.log("You can add an API key later with: postgres-ai add-key \n"); + } else { + const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout + }); + + const question = (prompt: string): Promise => + new Promise((resolve) => rl.question(prompt, resolve)); + + try { + const answer = await question("Do you have a Postgres AI API key? (Y/n): "); + const proceedWithApiKey = !answer || answer.toLowerCase() === "y"; + + if (proceedWithApiKey) { + while (true) { + const inputApiKey = await question("Enter your Postgres AI API key: "); + const trimmedKey = inputApiKey.trim(); + + if (trimmedKey) { + config.writeConfig({ apiKey: trimmedKey }); + console.log("โœ“ API key saved\n"); + break; + } + + console.log("โš  API key cannot be empty"); + const retry = await question("Try again or skip API key setup, retry? (Y/n): "); + if (retry.toLowerCase() === "n") { + console.log("โš  Skipping API key setup - reports will be generated locally only"); + console.log("You can add an API key later with: postgres-ai add-key \n"); + break; + } + } + } else { + console.log("โš  Skipping API key setup - reports will be generated locally only"); + console.log("You can add an API key later with: postgres-ai add-key \n"); + } + } finally { + rl.close(); + } + } + } else { + console.log("Step 1: Demo mode - API key configuration skipped"); + console.log("Demo mode is for testing only and does not support API key integration\n"); + } + + // Step 2: Add PostgreSQL instance (if not demo mode) + if (!opts.demo) { + console.log("Step 2: Add PostgreSQL Instance to Monitor\n"); + + // Clear instances.yml in production mode (start fresh) + const instancesPath = path.resolve(process.cwd(), "instances.yml"); + const emptyInstancesContent = "# PostgreSQL instances to monitor\n# Add your instances using: postgres-ai mon targets add\n\n"; + fs.writeFileSync(instancesPath, emptyInstancesContent, "utf8"); + + if (opts.dbUrl) { + console.log("Using database URL provided via --db-url parameter"); + console.log(`Adding PostgreSQL instance from: ${opts.dbUrl}\n`); + + const match = opts.dbUrl.match(/^postgresql:\/\/[^@]+@([^:/]+)/); + const autoInstanceName = match ? match[1] : "db-instance"; + + const connStr = opts.dbUrl; + const m = connStr.match(/^postgresql:\/\/([^:]+):([^@]+)@([^:\/]+)(?::(\d+))?\/(.+)$/); + + if (!m) { + console.error("โœ— Invalid connection string format"); + process.exitCode = 1; + return; + } + + const host = m[3]; + const db = m[5]; + const instanceName = `${host}-${db}`.replace(/[^a-zA-Z0-9-]/g, "-"); + + const body = `- name: ${instanceName}\n conn_str: ${connStr}\n preset_metrics: full\n custom_metrics:\n is_enabled: true\n group: default\n custom_tags:\n env: production\n cluster: default\n node_name: ${instanceName}\n sink_type: ~sink_type~\n`; + fs.appendFileSync(instancesPath, body, "utf8"); + console.log(`โœ“ Monitoring target '${instanceName}' added\n`); + + // Test connection + console.log("Testing connection to the added instance..."); + try { + const { Client } = require("pg"); + const client = new Client({ connectionString: connStr }); + await client.connect(); + const result = await client.query("select version();"); + console.log("โœ“ Connection successful"); + console.log(`${result.rows[0].version}\n`); + await client.end(); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + console.error(`โœ— Connection failed: ${message}\n`); + } + } else if (opts.yes) { + // Auto-yes mode without database URL - skip database setup + console.log("Auto-yes mode: no database URL provided, skipping database setup"); + console.log("โš  No PostgreSQL instance added"); + console.log("You can add one later with: postgres-ai mon targets add\n"); + } else { + const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout + }); + + const question = (prompt: string): Promise => + new Promise((resolve) => rl.question(prompt, resolve)); + + try { + console.log("You need to add at least one PostgreSQL instance to monitor"); + const answer = await question("Do you want to add a PostgreSQL instance now? (Y/n): "); + const proceedWithInstance = !answer || answer.toLowerCase() === "y"; + + if (proceedWithInstance) { + console.log("\nYou can provide either:"); + console.log(" 1. A full connection string: postgresql://user:pass@host:port/database"); + console.log(" 2. Press Enter to skip for now\n"); + + const connStr = await question("Enter connection string (or press Enter to skip): "); + + if (connStr.trim()) { + const m = connStr.match(/^postgresql:\/\/([^:]+):([^@]+)@([^:\/]+)(?::(\d+))?\/(.+)$/); + if (!m) { + console.error("โœ— Invalid connection string format"); + console.log("โš  Continuing without adding instance\n"); + } else { + const host = m[3]; + const db = m[5]; + const instanceName = `${host}-${db}`.replace(/[^a-zA-Z0-9-]/g, "-"); + + const body = `- name: ${instanceName}\n conn_str: ${connStr}\n preset_metrics: full\n custom_metrics:\n is_enabled: true\n group: default\n custom_tags:\n env: production\n cluster: default\n node_name: ${instanceName}\n sink_type: ~sink_type~\n`; + fs.appendFileSync(instancesPath, body, "utf8"); + console.log(`โœ“ Monitoring target '${instanceName}' added\n`); + + // Test connection + console.log("Testing connection to the added instance..."); + try { + const { Client } = require("pg"); + const client = new Client({ connectionString: connStr }); + await client.connect(); + const result = await client.query("select version();"); + console.log("โœ“ Connection successful"); + console.log(`${result.rows[0].version}\n`); + await client.end(); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + console.error(`โœ— Connection failed: ${message}\n`); + } + } + } else { + console.log("โš  No PostgreSQL instance added - you can add one later with: postgres-ai mon targets add\n"); + } + } else { + console.log("โš  No PostgreSQL instance added - you can add one later with: postgres-ai mon targets add\n"); + } + } finally { + rl.close(); + } + } + } else { + console.log("Step 2: Demo mode enabled - using included demo PostgreSQL database\n"); + } + + // Step 3: Update configuration + console.log(opts.demo ? "Step 3: Updating configuration..." : "Step 3: Updating configuration..."); + const code1 = await runCompose(["run", "--rm", "sources-generator"]); + if (code1 !== 0) { + process.exitCode = code1; + return; + } + console.log("โœ“ Configuration updated\n"); + + // Step 4: Ensure Grafana password is configured + console.log(opts.demo ? "Step 4: Configuring Grafana security..." : "Step 4: Configuring Grafana security..."); + const cfgPath = path.resolve(process.cwd(), ".pgwatch-config"); + let grafanaPassword = ""; + + try { + if (fs.existsSync(cfgPath)) { + const stats = fs.statSync(cfgPath); + if (!stats.isDirectory()) { + const content = fs.readFileSync(cfgPath, "utf8"); + const match = content.match(/^grafana_password=([^\r\n]+)/m); + if (match) { + grafanaPassword = match[1].trim(); + } + } + } + + if (!grafanaPassword) { + console.log("Generating secure Grafana password..."); + const { stdout: password } = await execPromise("openssl rand -base64 12 | tr -d '\n'"); + grafanaPassword = password.trim(); + + let configContent = ""; + if (fs.existsSync(cfgPath)) { + const stats = fs.statSync(cfgPath); + if (!stats.isDirectory()) { + configContent = fs.readFileSync(cfgPath, "utf8"); + } + } + + const lines = configContent.split(/\r?\n/).filter((l) => !/^grafana_password=/.test(l)); + lines.push(`grafana_password=${grafanaPassword}`); + fs.writeFileSync(cfgPath, lines.filter(Boolean).join("\n") + "\n", "utf8"); + } + + console.log("โœ“ Grafana password configured\n"); + } catch (error) { + console.log("โš  Could not generate Grafana password automatically"); + console.log("Using default password: demo\n"); + grafanaPassword = "demo"; + } + + // Step 5: Start services + console.log(opts.demo ? "Step 5: Starting monitoring services..." : "Step 5: Starting monitoring services..."); + const code2 = await runCompose(["up", "-d", "--force-recreate"]); + if (code2 !== 0) { + process.exitCode = code2; + return; + } + console.log("โœ“ Services started\n"); + + // Final summary + console.log("================================="); + console.log(" ๐ŸŽ‰ Quickstart setup completed!"); + console.log("=================================\n"); + + console.log("What's running:"); + if (opts.demo) { + console.log(" โœ… Demo PostgreSQL database (monitoring target)"); + } + console.log(" โœ… PostgreSQL monitoring infrastructure"); + console.log(" โœ… Grafana dashboards (with secure password)"); + console.log(" โœ… Prometheus metrics storage"); + console.log(" โœ… Flask API backend"); + console.log(" โœ… Automated report generation (every 24h)"); + console.log(" โœ… Host stats monitoring (CPU, memory, disk, I/O)\n"); + + if (!opts.demo) { + console.log("Next steps:"); + console.log(" โ€ข Add more PostgreSQL instances: postgres-ai mon targets add"); + console.log(" โ€ข View configured instances: postgres-ai mon targets list"); + console.log(" โ€ข Check service health: postgres-ai mon health\n"); + } else { + console.log("Demo mode next steps:"); + console.log(" โ€ข Explore Grafana dashboards at http://localhost:3000"); + console.log(" โ€ข Connect to demo database: postgresql://postgres:postgres@localhost:55432/target_database"); + console.log(" โ€ข Generate some load on the demo database to see metrics\n"); + } + + console.log("โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”"); + console.log("๐Ÿš€ MAIN ACCESS POINT - Start here:"); + console.log(" Grafana Dashboard: http://localhost:3000"); + console.log(` Login: monitor / ${grafanaPassword}`); + console.log("โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\n"); + }); + +mon + .command("start") + .description("start monitoring services") + .action(async () => { + // Check if containers are already running + const { running, containers } = checkRunningContainers(); + if (running) { + console.log(`Monitoring services are already running: ${containers.join(", ")}`); + console.log("Use 'postgres-ai mon restart' to restart them"); + return; + } + + const code = await runCompose(["up", "-d"]); + if (code !== 0) process.exitCode = code; + }); + +mon + .command("stop") + .description("stop monitoring services") + .action(async () => { + const code = await runCompose(["down"]); + if (code !== 0) process.exitCode = code; + }); + +mon + .command("restart [service]") + .description("restart all monitoring services or specific service") + .action(async (service?: string) => { + const args = ["restart"]; + if (service) args.push(service); + const code = await runCompose(args); + if (code !== 0) process.exitCode = code; + }); + +mon + .command("status") + .description("show monitoring services status") + .action(async () => { + const code = await runCompose(["ps"]); + if (code !== 0) process.exitCode = code; + }); + +mon + .command("logs [service]") + .option("-f, --follow", "follow logs", false) + .option("--tail ", "number of lines to show from the end of logs", "all") + .description("show logs for all or specific monitoring service") + .action(async (service: string | undefined, opts: { follow: boolean; tail: string }) => { + const args: string[] = ["logs"]; + if (opts.follow) args.push("-f"); + if (opts.tail) args.push("--tail", opts.tail); + if (service) args.push(service); + const code = await runCompose(args); + if (code !== 0) process.exitCode = code; + }); +mon + .command("health") + .description("health check for monitoring services") + .option("--wait ", "wait time in seconds for services to become healthy", parseInt, 0) + .action(async (opts: { wait: number }) => { + const services = [ + { name: "Grafana", container: "grafana-with-datasources" }, + { name: "Prometheus", container: "sink-prometheus" }, + { name: "PGWatch (Postgres)", container: "pgwatch-postgres" }, + { name: "PGWatch (Prometheus)", container: "pgwatch-prometheus" }, + { name: "Target DB", container: "target-db" }, + { name: "Sink Postgres", container: "sink-postgres" }, + ]; + + const waitTime = opts.wait || 0; + const maxAttempts = waitTime > 0 ? Math.ceil(waitTime / 5) : 1; + + console.log("Checking service health...\n"); + + let allHealthy = false; + for (let attempt = 1; attempt <= maxAttempts; attempt++) { + if (attempt > 1) { + console.log(`Retrying (attempt ${attempt}/${maxAttempts})...\n`); + await new Promise(resolve => setTimeout(resolve, 5000)); + } + + allHealthy = true; + for (const service of services) { + try { + const { execSync } = require("child_process"); + const status = execSync(`docker inspect -f '{{.State.Status}}' ${service.container} 2>/dev/null`, { + encoding: 'utf8', + stdio: ['pipe', 'pipe', 'pipe'] + }).trim(); + + if (status === 'running') { + console.log(`โœ“ ${service.name}: healthy`); + } else { + console.log(`โœ— ${service.name}: unhealthy (status: ${status})`); + allHealthy = false; + } + } catch (error) { + console.log(`โœ— ${service.name}: unreachable`); + allHealthy = false; + } + } + + if (allHealthy) { + break; + } + } + + console.log(""); + if (allHealthy) { + console.log("All services are healthy"); + } else { + console.log("Some services are unhealthy"); + process.exitCode = 1; + } + }); +mon + .command("config") + .description("show monitoring services configuration") + .action(async () => { + let projectDir: string; + let composeFile: string; + let instancesFile: string; + try { + ({ projectDir, composeFile, instancesFile } = resolvePaths()); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + console.error(message); + process.exitCode = 1; + return; + } + console.log(`Project Directory: ${projectDir}`); + console.log(`Docker Compose File: ${composeFile}`); + console.log(`Instances File: ${instancesFile}`); + if (fs.existsSync(instancesFile)) { + console.log("\nInstances configuration:\n"); + const text = fs.readFileSync(instancesFile, "utf8"); + process.stdout.write(text); + if (!/\n$/.test(text)) console.log(); + } + }); +mon + .command("update-config") + .description("apply monitoring services configuration (generate sources)") + .action(async () => { + const code = await runCompose(["run", "--rm", "sources-generator"]); + if (code !== 0) process.exitCode = code; + }); +mon + .command("update") + .description("update monitoring stack") + .action(async () => { + console.log("Updating PostgresAI monitoring stack...\n"); + + try { + // Check if we're in a git repo + const gitDir = path.resolve(process.cwd(), ".git"); + if (!fs.existsSync(gitDir)) { + console.error("Not a git repository. Cannot update."); + process.exitCode = 1; + return; + } + + // Fetch latest changes + console.log("Fetching latest changes..."); + await execPromise("git fetch origin"); + + // Check current branch + const { stdout: branch } = await execPromise("git rev-parse --abbrev-ref HEAD"); + const currentBranch = branch.trim(); + console.log(`Current branch: ${currentBranch}`); + + // Pull latest changes + console.log("Pulling latest changes..."); + const { stdout: pullOut } = await execPromise("git pull origin " + currentBranch); + console.log(pullOut); + + // Update Docker images + console.log("\nUpdating Docker images..."); + const code = await runCompose(["pull"]); + + if (code === 0) { + console.log("\nโœ“ Update completed successfully"); + console.log("\nTo apply updates, restart monitoring services:"); + console.log(" postgres-ai mon restart"); + } else { + console.error("\nโœ— Docker image update failed"); + process.exitCode = 1; + } + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + console.error(`Update failed: ${message}`); + process.exitCode = 1; + } + }); +mon + .command("reset [service]") + .description("reset all or specific monitoring service") + .action(async (service?: string) => { + const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout, + }); + + const question = (prompt: string): Promise => + new Promise((resolve) => rl.question(prompt, resolve)); + + try { + if (service) { + // Reset specific service + console.log(`\nThis will stop '${service}', remove its volume, and restart it.`); + console.log("All data for this service will be lost!\n"); + + const answer = await question("Continue? (y/N): "); + if (answer.toLowerCase() !== "y") { + console.log("Cancelled"); + rl.close(); + return; + } + + console.log(`\nStopping ${service}...`); + await runCompose(["stop", service]); + + console.log(`Removing volume for ${service}...`); + await runCompose(["rm", "-f", "-v", service]); + + console.log(`Restarting ${service}...`); + const code = await runCompose(["up", "-d", service]); + + if (code === 0) { + console.log(`\nโœ“ Service '${service}' has been reset`); + } else { + console.error(`\nโœ— Failed to restart '${service}'`); + process.exitCode = 1; + } + } else { + // Reset all services + console.log("\nThis will stop all services and remove all data!"); + console.log("Volumes, networks, and containers will be deleted.\n"); + + const answer = await question("Continue? (y/N): "); + if (answer.toLowerCase() !== "y") { + console.log("Cancelled"); + rl.close(); + return; + } + + console.log("\nStopping services and removing data..."); + const downCode = await runCompose(["down", "-v"]); + + if (downCode === 0) { + console.log("โœ“ Environment reset completed - all containers and data removed"); + } else { + console.error("โœ— Reset failed"); + process.exitCode = 1; + } + } + + rl.close(); + } catch (error) { + rl.close(); + const message = error instanceof Error ? error.message : String(error); + console.error(`Reset failed: ${message}`); + process.exitCode = 1; + } + }); +mon + .command("clean") + .description("cleanup monitoring services artifacts") + .action(async () => { + console.log("Cleaning up Docker resources...\n"); + + try { + // Remove stopped containers + const { stdout: containers } = await execFilePromise("docker", ["ps", "-aq", "--filter", "status=exited"]); + if (containers.trim()) { + const containerIds = containers.trim().split('\n'); + await execFilePromise("docker", ["rm", ...containerIds]); + console.log("โœ“ Removed stopped containers"); + } else { + console.log("โœ“ No stopped containers to remove"); + } + + // Remove unused volumes + await execFilePromise("docker", ["volume", "prune", "-f"]); + console.log("โœ“ Removed unused volumes"); + + // Remove unused networks + await execFilePromise("docker", ["network", "prune", "-f"]); + console.log("โœ“ Removed unused networks"); + + // Remove dangling images + await execFilePromise("docker", ["image", "prune", "-f"]); + console.log("โœ“ Removed dangling images"); + + console.log("\nCleanup completed"); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + console.error(`Error during cleanup: ${message}`); + process.exitCode = 1; + } + }); +mon + .command("shell ") + .description("open shell to monitoring service") + .action(async (service: string) => { + const code = await runCompose(["exec", service, "/bin/sh"]); + if (code !== 0) process.exitCode = code; + }); +mon + .command("check") + .description("monitoring services system readiness check") + .action(async () => { + const code = await runCompose(["ps"]); + if (code !== 0) process.exitCode = code; + }); + +// Monitoring targets (databases to monitor) +const targets = mon.command("targets").description("manage databases to monitor"); + +targets + .command("list") + .description("list monitoring target databases") + .action(async () => { + const instancesPath = path.resolve(process.cwd(), "instances.yml"); + if (!fs.existsSync(instancesPath)) { + console.error(`instances.yml not found in ${process.cwd()}`); + process.exitCode = 1; + return; + } + + try { + const content = fs.readFileSync(instancesPath, "utf8"); + const instances = yaml.load(content) as Instance[] | null; + + if (!instances || !Array.isArray(instances) || instances.length === 0) { + console.log("No monitoring targets configured"); + console.log(""); + console.log("To add a monitoring target:"); + console.log(" postgres-ai mon targets add "); + console.log(""); + console.log("Example:"); + console.log(" postgres-ai mon targets add 'postgresql://user:pass@host:5432/db' my-db"); + return; + } + + // Filter out disabled instances (e.g., demo placeholders) + const filtered = instances.filter((inst) => inst.name && inst.is_enabled !== false); + + if (filtered.length === 0) { + console.log("No monitoring targets configured"); + console.log(""); + console.log("To add a monitoring target:"); + console.log(" postgres-ai mon targets add "); + console.log(""); + console.log("Example:"); + console.log(" postgres-ai mon targets add 'postgresql://user:pass@host:5432/db' my-db"); + return; + } + + for (const inst of filtered) { + console.log(`Target: ${inst.name}`); + } + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + console.error(`Error parsing instances.yml: ${message}`); + process.exitCode = 1; + } + }); +targets + .command("add [connStr] [name]") + .description("add monitoring target database") + .action(async (connStr?: string, name?: string) => { + const file = path.resolve(process.cwd(), "instances.yml"); + if (!connStr) { + console.error("Connection string required: postgresql://user:pass@host:port/db"); + process.exitCode = 1; + return; + } + const m = connStr.match(/^postgresql:\/\/([^:]+):([^@]+)@([^:\/]+)(?::(\d+))?\/(.+)$/); + if (!m) { + console.error("Invalid connection string format"); + process.exitCode = 1; + return; + } + const host = m[3]; + const db = m[5]; + const instanceName = name && name.trim() ? name.trim() : `${host}-${db}`.replace(/[^a-zA-Z0-9-]/g, "-"); + + // Check if instance already exists + try { + if (fs.existsSync(file)) { + const content = fs.readFileSync(file, "utf8"); + const instances = yaml.load(content) as Instance[] | null || []; + if (Array.isArray(instances)) { + const exists = instances.some((inst) => inst.name === instanceName); + if (exists) { + console.error(`Monitoring target '${instanceName}' already exists`); + process.exitCode = 1; + return; + } + } + } + } catch (err) { + // If YAML parsing fails, fall back to simple check + const content = fs.existsSync(file) ? fs.readFileSync(file, "utf8") : ""; + if (new RegExp(`^- name: ${instanceName}$`, "m").test(content)) { + console.error(`Monitoring target '${instanceName}' already exists`); + process.exitCode = 1; + return; + } + } + + // Add new instance + const body = `- name: ${instanceName}\n conn_str: ${connStr}\n preset_metrics: full\n custom_metrics:\n is_enabled: true\n group: default\n custom_tags:\n env: production\n cluster: default\n node_name: ${instanceName}\n sink_type: ~sink_type~\n`; + const content = fs.existsSync(file) ? fs.readFileSync(file, "utf8") : ""; + fs.appendFileSync(file, (content && !/\n$/.test(content) ? "\n" : "") + body, "utf8"); + console.log(`Monitoring target '${instanceName}' added`); + }); +targets + .command("remove ") + .description("remove monitoring target database") + .action(async (name: string) => { + const file = path.resolve(process.cwd(), "instances.yml"); + if (!fs.existsSync(file)) { + console.error("instances.yml not found"); + process.exitCode = 1; + return; + } + + try { + const content = fs.readFileSync(file, "utf8"); + const instances = yaml.load(content) as Instance[] | null; + + if (!instances || !Array.isArray(instances)) { + console.error("Invalid instances.yml format"); + process.exitCode = 1; + return; + } + + const filtered = instances.filter((inst) => inst.name !== name); + + if (filtered.length === instances.length) { + console.error(`Monitoring target '${name}' not found`); + process.exitCode = 1; + return; + } + + fs.writeFileSync(file, yaml.dump(filtered), "utf8"); + console.log(`Monitoring target '${name}' removed`); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + console.error(`Error processing instances.yml: ${message}`); + process.exitCode = 1; + } + }); +targets + .command("test ") + .description("test monitoring target database connectivity") + .action(async (name: string) => { + const instancesPath = path.resolve(process.cwd(), "instances.yml"); + if (!fs.existsSync(instancesPath)) { + console.error("instances.yml not found"); + process.exitCode = 1; + return; + } + + try { + const content = fs.readFileSync(instancesPath, "utf8"); + const instances = yaml.load(content) as Instance[] | null; + + if (!instances || !Array.isArray(instances)) { + console.error("Invalid instances.yml format"); + process.exitCode = 1; + return; + } + + const instance = instances.find((inst) => inst.name === name); + + if (!instance) { + console.error(`Monitoring target '${name}' not found`); + process.exitCode = 1; + return; + } + + if (!instance.conn_str) { + console.error(`Connection string not found for monitoring target '${name}'`); + process.exitCode = 1; + return; + } + + console.log(`Testing connection to monitoring target '${name}'...`); + + // Use native pg client instead of requiring psql to be installed + const { Client } = require('pg'); + const client = new Client({ connectionString: instance.conn_str }); + + try { + await client.connect(); + const result = await client.query('select version();'); + console.log(`โœ“ Connection successful`); + console.log(result.rows[0].version); + } finally { + await client.end(); + } + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + console.error(`โœ— Connection failed: ${message}`); + process.exitCode = 1; + } + }); + +// Authentication and API key management +program + .command("auth") + .description("authenticate via browser and obtain API key") + .option("--port ", "local callback server port (default: random)", parseInt) + .option("--debug", "enable debug output") + .action(async (opts: { port?: number; debug?: boolean }) => { + const pkce = require("../lib/pkce"); + const authServer = require("../lib/auth-server"); + + console.log("Starting authentication flow...\n"); + + // Generate PKCE parameters + const params = pkce.generatePKCEParams(); + + const rootOpts = program.opts(); + const cfg = config.readConfig(); + const { apiBaseUrl, uiBaseUrl } = resolveBaseUrls(rootOpts, cfg); + + if (opts.debug) { + console.log(`Debug: Resolved API base URL: ${apiBaseUrl}`); + console.log(`Debug: Resolved UI base URL: ${uiBaseUrl}`); + } + + try { + // Step 1: Start local callback server FIRST to get actual port + console.log("Starting local callback server..."); + const requestedPort = opts.port || 0; // 0 = OS assigns available port + const callbackServer = authServer.createCallbackServer(requestedPort, params.state, 120000); // 2 minute timeout + + // Wait a bit for server to start and get port + await new Promise(resolve => setTimeout(resolve, 100)); + const actualPort = callbackServer.getPort(); + const redirectUri = `http://localhost:${actualPort}/callback`; + + console.log(`Callback server listening on port ${actualPort}`); + + // Step 2: Initialize OAuth session on backend + console.log("Initializing authentication session..."); + const initData = JSON.stringify({ + client_type: "cli", + state: params.state, + code_challenge: params.codeChallenge, + code_challenge_method: params.codeChallengeMethod, + redirect_uri: redirectUri, + }); + + // Build init URL by appending to the API base path (keep /api/general) + const initUrl = new URL(`${apiBaseUrl}/rpc/oauth_init`); + + if (opts.debug) { + console.log(`Debug: Trying to POST to: ${initUrl.toString()}`); + console.log(`Debug: Request data: ${initData}`); + } + + const initReq = http.request( + initUrl, + { + method: "POST", + headers: { + "Content-Type": "application/json", + "Content-Length": Buffer.byteLength(initData), + }, + }, + (res) => { + let data = ""; + res.on("data", (chunk) => (data += chunk)); + res.on("end", async () => { + if (res.statusCode !== 200) { + console.error(`Failed to initialize auth session: ${res.statusCode}`); + + // Check if response is HTML (common for 404 pages) + if (data.trim().startsWith(" { + console.log("\n\nAuthentication cancelled by user."); + callbackServer.server.close(); + process.exit(130); // Standard exit code for SIGINT + }; + process.on("SIGINT", cancelHandler); + + try { + const { code } = await callbackServer.promise; + + // Remove the cancel handler after successful auth + process.off("SIGINT", cancelHandler); + + // Step 5: Exchange code for token + console.log("\nExchanging authorization code for API token..."); + const exchangeData = JSON.stringify({ + authorization_code: code, + code_verifier: params.codeVerifier, + state: params.state, + }); + const exchangeUrl = new URL(`${apiBaseUrl}/rpc/oauth_token_exchange`); + const exchangeReq = http.request( + exchangeUrl, + { + method: "POST", + headers: { + "Content-Type": "application/json", + "Content-Length": Buffer.byteLength(exchangeData), + }, + }, + (exchangeRes) => { + let exchangeBody = ""; + exchangeRes.on("data", (chunk) => (exchangeBody += chunk)); + exchangeRes.on("end", () => { + if (exchangeRes.statusCode !== 200) { + console.error(`Failed to exchange code for token: ${exchangeRes.statusCode}`); + + // Check if response is HTML (common for 404 pages) + if (exchangeBody.trim().startsWith(" { + console.error(`Exchange request failed: ${err.message}`); + process.exit(1); + }); + + exchangeReq.write(exchangeData); + exchangeReq.end(); + + } catch (err) { + // Remove the cancel handler in error case too + process.off("SIGINT", cancelHandler); + + const message = err instanceof Error ? err.message : String(err); + + // Provide more helpful error messages + if (message.includes("timeout")) { + console.error(`\nAuthentication timed out.`); + console.error(`This usually means you closed the browser window without completing authentication.`); + console.error(`Please try again and complete the authentication flow.`); + } else { + console.error(`\nAuthentication failed: ${message}`); + } + + process.exit(1); + } + }); + } + ); + + initReq.on("error", (err: Error) => { + console.error(`Failed to connect to API: ${err.message}`); + callbackServer.server.close(); + process.exit(1); + }); + + initReq.write(initData); + initReq.end(); + + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + console.error(`Authentication error: ${message}`); + process.exit(1); + } + }); + +program + .command("add-key ") + .description("store API key") + .action(async (apiKey: string) => { + config.writeConfig({ apiKey }); + console.log(`API key saved to ${config.getConfigPath()}`); + }); + +program + .command("show-key") + .description("show API key (masked)") + .action(async () => { + const cfg = config.readConfig(); + if (!cfg.apiKey) { + console.log("No API key configured"); + console.log(`\nTo authenticate, run: pgai auth`); + return; + } + const { maskSecret } = require("../lib/util"); + console.log(`Current API key: ${maskSecret(cfg.apiKey)}`); + if (cfg.orgId) { + console.log(`Organization ID: ${cfg.orgId}`); + } + console.log(`Config location: ${config.getConfigPath()}`); + }); + +program + .command("remove-key") + .description("remove API key") + .action(async () => { + // Check both new config and legacy config + const newConfigPath = config.getConfigPath(); + const hasNewConfig = fs.existsSync(newConfigPath); + const legacyPath = path.resolve(process.cwd(), ".pgwatch-config"); + const hasLegacyConfig = fs.existsSync(legacyPath) && fs.statSync(legacyPath).isFile(); + + if (!hasNewConfig && !hasLegacyConfig) { + console.log("No API key configured"); + return; + } + + // Remove from new config + if (hasNewConfig) { + config.deleteConfigKeys(["apiKey", "orgId"]); + } + + // Remove from legacy config + if (hasLegacyConfig) { + try { + const content = fs.readFileSync(legacyPath, "utf8"); + const filtered = content + .split(/\r?\n/) + .filter((l) => !/^api_key=/.test(l)) + .join("\n") + .replace(/\n+$/g, "\n"); + fs.writeFileSync(legacyPath, filtered, "utf8"); + } catch (err) { + // If we can't read/write the legacy config, just skip it + console.warn(`Warning: Could not update legacy config: ${err instanceof Error ? err.message : String(err)}`); + } + } + + console.log("API key removed"); + console.log(`\nTo authenticate again, run: pgai auth`); + }); +mon + .command("generate-grafana-password") + .description("generate Grafana password for monitoring services") + .action(async () => { + const cfgPath = path.resolve(process.cwd(), ".pgwatch-config"); + + try { + // Generate secure password using openssl + const { stdout: password } = await execPromise( + "openssl rand -base64 12 | tr -d '\n'" + ); + const newPassword = password.trim(); + + if (!newPassword) { + console.error("Failed to generate password"); + process.exitCode = 1; + return; + } + + // Read existing config + let configContent = ""; + if (fs.existsSync(cfgPath)) { + const stats = fs.statSync(cfgPath); + if (stats.isDirectory()) { + console.error(".pgwatch-config is a directory, expected a file. Skipping read."); + } else { + configContent = fs.readFileSync(cfgPath, "utf8"); + } + } + + // Update or add grafana_password + const lines = configContent.split(/\r?\n/).filter((l) => !/^grafana_password=/.test(l)); + lines.push(`grafana_password=${newPassword}`); + + // Write back + fs.writeFileSync(cfgPath, lines.filter(Boolean).join("\n") + "\n", "utf8"); + + console.log("โœ“ New Grafana password generated and saved"); + console.log("\nNew credentials:"); + console.log(" URL: http://localhost:3000"); + console.log(" Username: monitor"); + console.log(` Password: ${newPassword}`); + console.log("\nReset Grafana to apply new password:"); + console.log(" postgres-ai mon reset grafana"); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + console.error(`Failed to generate password: ${message}`); + console.error("\nNote: This command requires 'openssl' to be installed"); + process.exitCode = 1; + } + }); +mon + .command("show-grafana-credentials") + .description("show Grafana credentials for monitoring services") + .action(async () => { + const cfgPath = path.resolve(process.cwd(), ".pgwatch-config"); + if (!fs.existsSync(cfgPath)) { + console.error("Configuration file not found. Run 'postgres-ai mon quickstart' first."); + process.exitCode = 1; + return; + } + + const stats = fs.statSync(cfgPath); + if (stats.isDirectory()) { + console.error(".pgwatch-config is a directory, expected a file. Cannot read credentials."); + process.exitCode = 1; + return; + } + + const content = fs.readFileSync(cfgPath, "utf8"); + const lines = content.split(/\r?\n/); + let password = ""; + for (const line of lines) { + const m = line.match(/^grafana_password=(.+)$/); + if (m) { + password = m[1].trim(); + break; + } + } + if (!password) { + console.error("Grafana password not found in configuration"); + process.exitCode = 1; + return; + } + console.log("\nGrafana credentials:"); + console.log(" URL: http://localhost:3000"); + console.log(" Username: monitor"); + console.log(` Password: ${password}`); + console.log(""); + }); + +/** + * Interpret escape sequences in a string (e.g., \n -> newline) + * Note: In regex, to match literal backslash-n, we need \\n in the pattern + * which requires \\\\n in the JavaScript string literal + */ +function interpretEscapes(str: string): string { + // First handle double backslashes by temporarily replacing them + // Then handle other escapes, then restore double backslashes as single + return str + .replace(/\\\\/g, '\x00') // Temporarily mark double backslashes + .replace(/\\n/g, '\n') // Match literal backslash-n (\\\\n in JS string -> \\n in regex -> matches \n) + .replace(/\\t/g, '\t') + .replace(/\\r/g, '\r') + .replace(/\\"/g, '"') + .replace(/\\'/g, "'") + .replace(/\x00/g, '\\'); // Restore double backslashes as single +} + +// Issues management +const issues = program.command("issues").description("issues management"); + +issues + .command("list") + .description("list issues") + .option("--debug", "enable debug output") + .option("--json", "output raw JSON") + .action(async (opts: { debug?: boolean; json?: boolean }) => { + try { + const rootOpts = program.opts(); + const cfg = config.readConfig(); + const { apiKey } = getConfig(rootOpts); + if (!apiKey) { + console.error("API key is required. Run 'pgai auth' first or set --api-key."); + process.exitCode = 1; + return; + } + + const { apiBaseUrl } = resolveBaseUrls(rootOpts, cfg); + + const result = await fetchIssues({ apiKey, apiBaseUrl, debug: !!opts.debug }); + const trimmed = Array.isArray(result) + ? (result as any[]).map((r) => ({ + id: (r as any).id, + title: (r as any).title, + status: (r as any).status, + created_at: (r as any).created_at, + })) + : result; + printResult(trimmed, opts.json); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + console.error(message); + process.exitCode = 1; + } + }); + +issues + .command("view ") + .description("view issue details and comments") + .option("--debug", "enable debug output") + .option("--json", "output raw JSON") + .action(async (issueId: string, opts: { debug?: boolean; json?: boolean }) => { + try { + const rootOpts = program.opts(); + const cfg = config.readConfig(); + const { apiKey } = getConfig(rootOpts); + if (!apiKey) { + console.error("API key is required. Run 'pgai auth' first or set --api-key."); + process.exitCode = 1; + return; + } + + const { apiBaseUrl } = resolveBaseUrls(rootOpts, cfg); + + const issue = await fetchIssue({ apiKey, apiBaseUrl, issueId, debug: !!opts.debug }); + if (!issue) { + console.error("Issue not found"); + process.exitCode = 1; + return; + } + + const comments = await fetchIssueComments({ apiKey, apiBaseUrl, issueId, debug: !!opts.debug }); + const combined = { issue, comments }; + printResult(combined, opts.json); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + console.error(message); + process.exitCode = 1; + } + }); + +issues + .command("post_comment ") + .description("post a new comment to an issue") + .option("--parent ", "parent comment id") + .option("--debug", "enable debug output") + .option("--json", "output raw JSON") + .action(async (issueId: string, content: string, opts: { parent?: string; debug?: boolean; json?: boolean }) => { + try { + // Interpret escape sequences in content (e.g., \n -> newline) + if (opts.debug) { + // eslint-disable-next-line no-console + console.log(`Debug: Original content: ${JSON.stringify(content)}`); + } + content = interpretEscapes(content); + if (opts.debug) { + // eslint-disable-next-line no-console + console.log(`Debug: Interpreted content: ${JSON.stringify(content)}`); + } + + const rootOpts = program.opts(); + const cfg = config.readConfig(); + const { apiKey } = getConfig(rootOpts); + if (!apiKey) { + console.error("API key is required. Run 'pgai auth' first or set --api-key."); + process.exitCode = 1; + return; + } + + const { apiBaseUrl } = resolveBaseUrls(rootOpts, cfg); + + const result = await createIssueComment({ + apiKey, + apiBaseUrl, + issueId, + content, + parentCommentId: opts.parent, + debug: !!opts.debug, + }); + printResult(result, opts.json); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + console.error(message); + process.exitCode = 1; + } + }); + +// MCP server +const mcp = program.command("mcp").description("MCP server integration"); + +mcp + .command("start") + .description("start MCP stdio server") + .option("--debug", "enable debug output") + .action(async (opts: { debug?: boolean }) => { + const rootOpts = program.opts(); + await startMcpServer(rootOpts, { debug: !!opts.debug }); + }); + +mcp + .command("install [client]") + .description("install MCP server configuration for AI coding tool") + .action(async (client?: string) => { + const supportedClients = ["cursor", "claude-code", "windsurf", "codex"]; + + // If no client specified, prompt user to choose + if (!client) { + console.log("Available AI coding tools:"); + console.log(" 1. Cursor"); + console.log(" 2. Claude Code"); + console.log(" 3. Windsurf"); + console.log(" 4. Codex"); + console.log(""); + + const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout + }); + + const answer = await new Promise((resolve) => { + rl.question("Select your AI coding tool (1-4): ", resolve); + }); + rl.close(); + + const choices: Record = { + "1": "cursor", + "2": "claude-code", + "3": "windsurf", + "4": "codex" + }; + + client = choices[answer.trim()]; + if (!client) { + console.error("Invalid selection"); + process.exitCode = 1; + return; + } + } + + client = client.toLowerCase(); + + if (!supportedClients.includes(client)) { + console.error(`Unsupported client: ${client}`); + console.error(`Supported clients: ${supportedClients.join(", ")}`); + process.exitCode = 1; + return; + } + + try { + // Get the path to the current pgai executable + let pgaiPath: string; + try { + const execPath = await execPromise("which pgai"); + pgaiPath = execPath.stdout.trim(); + } catch { + // Fallback to just "pgai" if which fails + pgaiPath = "pgai"; + } + + // Claude Code uses its own CLI to manage MCP servers + if (client === "claude-code") { + console.log("Installing PostgresAI MCP server for Claude Code..."); + + try { + const { stdout, stderr } = await execPromise( + `claude mcp add -s user postgresai ${pgaiPath} mcp start` + ); + + if (stdout) console.log(stdout); + if (stderr) console.error(stderr); + + console.log(""); + console.log("Successfully installed PostgresAI MCP server for Claude Code"); + console.log(""); + console.log("Next steps:"); + console.log(" 1. Restart Claude Code to load the new configuration"); + console.log(" 2. The PostgresAI MCP server will be available as 'postgresai'"); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + console.error("Failed to install MCP server using Claude CLI"); + console.error(message); + console.error(""); + console.error("Make sure the 'claude' CLI tool is installed and in your PATH"); + console.error("See: https://docs.anthropic.com/en/docs/build-with-claude/mcp"); + process.exitCode = 1; + } + return; + } + + // For other clients (Cursor, Windsurf, Codex), use JSON config editing + const homeDir = os.homedir(); + let configPath: string; + let configDir: string; + + // Determine config file location based on client + switch (client) { + case "cursor": + configPath = path.join(homeDir, ".cursor", "mcp.json"); + configDir = path.dirname(configPath); + break; + + case "windsurf": + configPath = path.join(homeDir, ".windsurf", "mcp.json"); + configDir = path.dirname(configPath); + break; + + case "codex": + configPath = path.join(homeDir, ".codex", "mcp.json"); + configDir = path.dirname(configPath); + break; + + default: + console.error(`Configuration not implemented for: ${client}`); + process.exitCode = 1; + return; + } + + // Ensure config directory exists + if (!fs.existsSync(configDir)) { + fs.mkdirSync(configDir, { recursive: true }); + } + + // Read existing config or create new one + let config: any = { mcpServers: {} }; + if (fs.existsSync(configPath)) { + try { + const content = fs.readFileSync(configPath, "utf8"); + config = JSON.parse(content); + if (!config.mcpServers) { + config.mcpServers = {}; + } + } catch (err) { + console.error(`Warning: Could not parse existing config, creating new one`); + } + } + + // Add or update PostgresAI MCP server configuration + config.mcpServers.postgresai = { + command: pgaiPath, + args: ["mcp", "start"] + }; + + // Write updated config + fs.writeFileSync(configPath, JSON.stringify(config, null, 2), "utf8"); + + console.log(`โœ“ PostgresAI MCP server configured for ${client}`); + console.log(` Config file: ${configPath}`); + console.log(""); + console.log("Please restart your AI coding tool to activate the MCP server"); + + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + console.error(`Failed to install MCP server: ${message}`); + process.exitCode = 1; + } + }); + +program.parseAsync(process.argv); + diff --git a/cli/lib/auth-server.ts b/cli/lib/auth-server.ts new file mode 100644 index 0000000..04442a6 --- /dev/null +++ b/cli/lib/auth-server.ts @@ -0,0 +1,267 @@ +import * as http from "http"; +import { URL } from "url"; + +/** + * OAuth callback result + */ +export interface CallbackResult { + code: string; + state: string; +} + +/** + * Callback server structure + */ +export interface CallbackServer { + server: http.Server; + promise: Promise; + getPort: () => number; +} + +/** + * Simple HTML escape utility + * @param str - String to escape + * @returns Escaped string + */ +function escapeHtml(str: string | null): string { + if (!str) return ""; + return String(str) + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """) + .replace(/'/g, "'"); +} + +/** + * Create and start callback server, returning server object and promise + * @param port - Port to listen on (0 for random available port) + * @param expectedState - Expected state parameter for CSRF protection + * @param timeoutMs - Timeout in milliseconds + * @returns Server object with promise and getPort function + */ +export function createCallbackServer( + port: number = 0, + expectedState: string | null = null, + timeoutMs: number = 300000 +): CallbackServer { + let resolved = false; + let server: http.Server | null = null; + let actualPort = port; + let resolveCallback: (value: CallbackResult) => void; + let rejectCallback: (reason: Error) => void; + + const promise = new Promise((resolve, reject) => { + resolveCallback = resolve; + rejectCallback = reject; + }); + + // Timeout handler + const timeout = setTimeout(() => { + if (!resolved) { + resolved = true; + if (server) { + server.close(); + } + rejectCallback(new Error("Authentication timeout. Please try again.")); + } + }, timeoutMs); + + // Request handler + const requestHandler = (req: http.IncomingMessage, res: http.ServerResponse): void => { + if (resolved) { + return; + } + + // Only handle /callback path + if (!req.url || !req.url.startsWith("/callback")) { + res.writeHead(404, { "Content-Type": "text/plain" }); + res.end("Not Found"); + return; + } + + try { + const url = new URL(req.url, `http://localhost:${actualPort}`); + const code = url.searchParams.get("code"); + const state = url.searchParams.get("state"); + const error = url.searchParams.get("error"); + const errorDescription = url.searchParams.get("error_description"); + + // Handle OAuth error + if (error) { + resolved = true; + clearTimeout(timeout); + + res.writeHead(400, { "Content-Type": "text/html" }); + res.end(` + + + + Authentication failed + + + +
+

Authentication failed

+

Error: ${escapeHtml(error)}

+ ${errorDescription ? `

Description: ${escapeHtml(errorDescription)}

` : ""} +

You can close this window and return to your terminal.

+
+ + + `); + + if (server) { + server.close(); + } + rejectCallback(new Error(`OAuth error: ${error}${errorDescription ? ` - ${errorDescription}` : ""}`)); + return; + } + + // Validate required parameters + if (!code || !state) { + res.writeHead(400, { "Content-Type": "text/html" }); + res.end(` + + + + Authentication failed + + + +
+

Authentication failed

+

Missing required parameters (code or state).

+

You can close this window and return to your terminal.

+
+ + + `); + return; + } + + // Validate state (CSRF protection) + if (expectedState && state !== expectedState) { + resolved = true; + clearTimeout(timeout); + + res.writeHead(400, { "Content-Type": "text/html" }); + res.end(` + + + + Authentication failed + + + +
+

Authentication failed

+

Invalid state parameter (possible CSRF attack).

+

You can close this window and return to your terminal.

+
+ + + `); + + if (server) { + server.close(); + } + rejectCallback(new Error("State mismatch (possible CSRF attack)")); + return; + } + + // Success! + resolved = true; + clearTimeout(timeout); + + res.writeHead(200, { "Content-Type": "text/html" }); + res.end(` + + + + Authentication successful + + + +
+

Authentication successful

+

You have successfully authenticated the PostgresAI CLI.

+

You can close this window and return to your terminal.

+
+ + + `); + + if (server) { + server.close(); + } + resolveCallback({ code, state }); + } catch (err) { + if (!resolved) { + resolved = true; + clearTimeout(timeout); + res.writeHead(500, { "Content-Type": "text/plain" }); + res.end("Internal Server Error"); + if (server) { + server.close(); + } + rejectCallback(err instanceof Error ? err : new Error(String(err))); + } + } + }; + + // Create server + server = http.createServer(requestHandler); + + server.on("error", (err: Error) => { + if (!resolved) { + resolved = true; + clearTimeout(timeout); + rejectCallback(err); + } + }); + + server.listen(port, "127.0.0.1", () => { + const address = server?.address(); + if (address && typeof address === "object") { + actualPort = address.port; + } + }); + + return { + server, + promise, + getPort: () => { + const address = server?.address(); + return address && typeof address === "object" ? address.port : 0; + }, + }; +} + +/** + * Get the actual port the server is listening on + * @param server - HTTP server instance + * @returns Port number + */ +export function getServerPort(server: http.Server): number { + const address = server.address(); + return address && typeof address === "object" ? address.port : 0; +} + diff --git a/cli/lib/config.ts b/cli/lib/config.ts new file mode 100644 index 0000000..7e6f33c --- /dev/null +++ b/cli/lib/config.ts @@ -0,0 +1,161 @@ +import * as fs from "fs"; +import * as path from "path"; +import * as os from "os"; + +/** + * Configuration object structure + */ +export interface Config { + apiKey: string | null; + baseUrl: string | null; + orgId: number | null; +} + +/** + * Get the user-level config directory path + * @returns Path to ~/.config/postgresai + */ +export function getConfigDir(): string { + const configHome = process.env.XDG_CONFIG_HOME || path.join(os.homedir(), ".config"); + return path.join(configHome, "postgresai"); +} + +/** + * Get the user-level config file path + * @returns Path to ~/.config/postgresai/config.json + */ +export function getConfigPath(): string { + return path.join(getConfigDir(), "config.json"); +} + +/** + * Get the legacy project-local config file path + * @returns Path to .pgwatch-config in current directory + */ +export function getLegacyConfigPath(): string { + return path.resolve(process.cwd(), ".pgwatch-config"); +} + +/** + * Read configuration from file + * Tries user-level config first, then falls back to legacy project-local config + * @returns Configuration object with apiKey, baseUrl, orgId + */ +export function readConfig(): Config { + const config: Config = { + apiKey: null, + baseUrl: null, + orgId: null, + }; + + // Try user-level config first + const userConfigPath = getConfigPath(); + if (fs.existsSync(userConfigPath)) { + try { + const content = fs.readFileSync(userConfigPath, "utf8"); + const parsed = JSON.parse(content); + config.apiKey = parsed.apiKey || null; + config.baseUrl = parsed.baseUrl || null; + config.orgId = parsed.orgId || null; + return config; + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + console.error(`Warning: Failed to read config from ${userConfigPath}: ${message}`); + } + } + + // Fall back to legacy project-local config + const legacyPath = getLegacyConfigPath(); + if (fs.existsSync(legacyPath)) { + try { + const stats = fs.statSync(legacyPath); + if (stats.isFile()) { + const content = fs.readFileSync(legacyPath, "utf8"); + const lines = content.split(/\r?\n/); + for (const line of lines) { + const match = line.match(/^api_key=(.+)$/); + if (match) { + config.apiKey = match[1].trim(); + break; + } + } + } + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + console.error(`Warning: Failed to read legacy config from ${legacyPath}: ${message}`); + } + } + + return config; +} + +/** + * Write configuration to user-level config file + * @param config - Configuration object with apiKey, baseUrl, orgId + */ +export function writeConfig(config: Partial): void { + const configDir = getConfigDir(); + const configPath = getConfigPath(); + + // Ensure config directory exists + if (!fs.existsSync(configDir)) { + fs.mkdirSync(configDir, { recursive: true, mode: 0o700 }); + } + + // Read existing config and merge + let existingConfig: Record = {}; + if (fs.existsSync(configPath)) { + try { + const content = fs.readFileSync(configPath, "utf8"); + existingConfig = JSON.parse(content); + } catch (err) { + // Ignore parse errors, will overwrite + } + } + + const mergedConfig = { + ...existingConfig, + ...config, + }; + + // Write config file with restricted permissions + fs.writeFileSync(configPath, JSON.stringify(mergedConfig, null, 2) + "\n", { + mode: 0o600, + }); +} + +/** + * Delete specific keys from configuration + * @param keys - Array of keys to delete (e.g., ['apiKey']) + */ +export function deleteConfigKeys(keys: string[]): void { + const configPath = getConfigPath(); + if (!fs.existsSync(configPath)) { + return; + } + + try { + const content = fs.readFileSync(configPath, "utf8"); + const config: Record = JSON.parse(content); + + for (const key of keys) { + delete config[key]; + } + + fs.writeFileSync(configPath, JSON.stringify(config, null, 2) + "\n", { + mode: 0o600, + }); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + console.error(`Warning: Failed to update config: ${message}`); + } +} + +/** + * Check if config file exists + * @returns True if config exists + */ +export function configExists(): boolean { + return fs.existsSync(getConfigPath()) || fs.existsSync(getLegacyConfigPath()); +} + diff --git a/cli/lib/issues.ts b/cli/lib/issues.ts new file mode 100644 index 0000000..356885c --- /dev/null +++ b/cli/lib/issues.ts @@ -0,0 +1,405 @@ +import * as https from "https"; +import { URL } from "url"; +import { maskSecret, normalizeBaseUrl } from "./util"; + +export interface IssueActionItem { + id: string; + issue_id: string; + title: string; + description: string | null; + severity: number; + is_done: boolean; + done_by: number | null; + done_at: string | null; + created_at: string; + updated_at: string; +} + +export interface Issue { + id: string; + title: string; + description: string | null; + created_at: string; + updated_at: string; + status: number; + url_main: string | null; + urls_extra: string[] | null; + data: unknown | null; + author_id: number; + org_id: number; + project_id: number | null; + is_ai_generated: boolean; + assigned_to: number[] | null; + labels: string[] | null; + is_edited: boolean; + author_display_name: string; + comment_count: number; + action_items: IssueActionItem[]; +} + +export interface IssueComment { + id: string; + issue_id: string; + author_id: number; + parent_comment_id: string | null; + content: string; + created_at: string; + updated_at: string; + data: unknown | null; +} + +export type IssueListItem = Pick; + +export type IssueDetail = Pick; +export interface FetchIssuesParams { + apiKey: string; + apiBaseUrl: string; + debug?: boolean; +} + +export async function fetchIssues(params: FetchIssuesParams): Promise { + const { apiKey, apiBaseUrl, debug } = params; + if (!apiKey) { + throw new Error("API key is required"); + } + + const base = normalizeBaseUrl(apiBaseUrl); + const url = new URL(`${base}/issues`); + url.searchParams.set("select", "id,title,status,created_at"); + + const headers: Record = { + "access-token": apiKey, + "Prefer": "return=representation", + "Content-Type": "application/json", + }; + + if (debug) { + const debugHeaders: Record = { ...headers, "access-token": maskSecret(apiKey) }; + // eslint-disable-next-line no-console + console.log(`Debug: Resolved API base URL: ${base}`); + // eslint-disable-next-line no-console + console.log(`Debug: GET URL: ${url.toString()}`); + // eslint-disable-next-line no-console + console.log(`Debug: Auth scheme: access-token`); + // eslint-disable-next-line no-console + console.log(`Debug: Request headers: ${JSON.stringify(debugHeaders)}`); + } + + return new Promise((resolve, reject) => { + const req = https.request( + url, + { + method: "GET", + headers, + }, + (res) => { + let data = ""; + res.on("data", (chunk) => (data += chunk)); + res.on("end", () => { + if (debug) { + // eslint-disable-next-line no-console + console.log(`Debug: Response status: ${res.statusCode}`); + // eslint-disable-next-line no-console + console.log(`Debug: Response headers: ${JSON.stringify(res.headers)}`); + } + if (res.statusCode && res.statusCode >= 200 && res.statusCode < 300) { + try { + const parsed = JSON.parse(data) as IssueListItem[]; + resolve(parsed); + } catch { + reject(new Error(`Failed to parse issues response: ${data}`)); + } + } else { + let errMsg = `Failed to fetch issues: HTTP ${res.statusCode}`; + if (data) { + try { + const errObj = JSON.parse(data); + errMsg += `\n${JSON.stringify(errObj, null, 2)}`; + } catch { + errMsg += `\n${data}`; + } + } + reject(new Error(errMsg)); + } + }); + } + ); + + req.on("error", (err: Error) => reject(err)); + req.end(); + }); +} + + +export interface FetchIssueCommentsParams { + apiKey: string; + apiBaseUrl: string; + issueId: string; + debug?: boolean; +} + +export async function fetchIssueComments(params: FetchIssueCommentsParams): Promise { + const { apiKey, apiBaseUrl, issueId, debug } = params; + if (!apiKey) { + throw new Error("API key is required"); + } + if (!issueId) { + throw new Error("issueId is required"); + } + + const base = normalizeBaseUrl(apiBaseUrl); + const url = new URL(`${base}/issue_comments?issue_id=eq.${encodeURIComponent(issueId)}`); + + const headers: Record = { + "access-token": apiKey, + "Prefer": "return=representation", + "Content-Type": "application/json", + }; + + if (debug) { + const debugHeaders: Record = { ...headers, "access-token": maskSecret(apiKey) }; + // eslint-disable-next-line no-console + console.log(`Debug: Resolved API base URL: ${base}`); + // eslint-disable-next-line no-console + console.log(`Debug: GET URL: ${url.toString()}`); + // eslint-disable-next-line no-console + console.log(`Debug: Auth scheme: access-token`); + // eslint-disable-next-line no-console + console.log(`Debug: Request headers: ${JSON.stringify(debugHeaders)}`); + } + + return new Promise((resolve, reject) => { + const req = https.request( + url, + { + method: "GET", + headers, + }, + (res) => { + let data = ""; + res.on("data", (chunk) => (data += chunk)); + res.on("end", () => { + if (debug) { + // eslint-disable-next-line no-console + console.log(`Debug: Response status: ${res.statusCode}`); + // eslint-disable-next-line no-console + console.log(`Debug: Response headers: ${JSON.stringify(res.headers)}`); + } + if (res.statusCode && res.statusCode >= 200 && res.statusCode < 300) { + try { + const parsed = JSON.parse(data) as IssueComment[]; + resolve(parsed); + } catch { + reject(new Error(`Failed to parse issue comments response: ${data}`)); + } + } else { + let errMsg = `Failed to fetch issue comments: HTTP ${res.statusCode}`; + if (data) { + try { + const errObj = JSON.parse(data); + errMsg += `\n${JSON.stringify(errObj, null, 2)}`; + } catch { + errMsg += `\n${data}`; + } + } + reject(new Error(errMsg)); + } + }); + } + ); + + req.on("error", (err: Error) => reject(err)); + req.end(); + }); +} + +export interface FetchIssueParams { + apiKey: string; + apiBaseUrl: string; + issueId: string; + debug?: boolean; +} + +export async function fetchIssue(params: FetchIssueParams): Promise { + const { apiKey, apiBaseUrl, issueId, debug } = params; + if (!apiKey) { + throw new Error("API key is required"); + } + if (!issueId) { + throw new Error("issueId is required"); + } + + const base = normalizeBaseUrl(apiBaseUrl); + const url = new URL(`${base}/issues`); + url.searchParams.set("select", "id,title,description,status,created_at,author_display_name"); + url.searchParams.set("id", `eq.${issueId}`); + url.searchParams.set("limit", "1"); + + const headers: Record = { + "access-token": apiKey, + "Prefer": "return=representation", + "Content-Type": "application/json", + }; + + if (debug) { + const debugHeaders: Record = { ...headers, "access-token": maskSecret(apiKey) }; + // eslint-disable-next-line no-console + console.log(`Debug: Resolved API base URL: ${base}`); + // eslint-disable-next-line no-console + console.log(`Debug: GET URL: ${url.toString()}`); + // eslint-disable-next-line no-console + console.log(`Debug: Auth scheme: access-token`); + // eslint-disable-next-line no-console + console.log(`Debug: Request headers: ${JSON.stringify(debugHeaders)}`); + } + + return new Promise((resolve, reject) => { + const req = https.request( + url, + { + method: "GET", + headers, + }, + (res) => { + let data = ""; + res.on("data", (chunk) => (data += chunk)); + res.on("end", () => { + if (debug) { + // eslint-disable-next-line no-console + console.log(`Debug: Response status: ${res.statusCode}`); + // eslint-disable-next-line no-console + console.log(`Debug: Response headers: ${JSON.stringify(res.headers)}`); + } + if (res.statusCode && res.statusCode >= 200 && res.statusCode < 300) { + try { + const parsed = JSON.parse(data); + if (Array.isArray(parsed)) { + resolve((parsed[0] as IssueDetail) ?? null); + } else { + resolve(parsed as IssueDetail); + } + } catch { + reject(new Error(`Failed to parse issue response: ${data}`)); + } + } else { + let errMsg = `Failed to fetch issue: HTTP ${res.statusCode}`; + if (data) { + try { + const errObj = JSON.parse(data); + errMsg += `\n${JSON.stringify(errObj, null, 2)}`; + } catch { + errMsg += `\n${data}`; + } + } + reject(new Error(errMsg)); + } + }); + } + ); + + req.on("error", (err: Error) => reject(err)); + req.end(); + }); +} + +export interface CreateIssueCommentParams { + apiKey: string; + apiBaseUrl: string; + issueId: string; + content: string; + parentCommentId?: string; + debug?: boolean; +} + +export async function createIssueComment(params: CreateIssueCommentParams): Promise { + const { apiKey, apiBaseUrl, issueId, content, parentCommentId, debug } = params; + if (!apiKey) { + throw new Error("API key is required"); + } + if (!issueId) { + throw new Error("issueId is required"); + } + if (!content) { + throw new Error("content is required"); + } + + const base = normalizeBaseUrl(apiBaseUrl); + const url = new URL(`${base}/rpc/issue_comment_create`); + + const bodyObj: Record = { + issue_id: issueId, + content: content, + }; + if (parentCommentId) { + bodyObj.parent_comment_id = parentCommentId; + } + const body = JSON.stringify(bodyObj); + + const headers: Record = { + "access-token": apiKey, + "Prefer": "return=representation", + "Content-Type": "application/json", + "Content-Length": Buffer.byteLength(body).toString(), + }; + + if (debug) { + const debugHeaders: Record = { ...headers, "access-token": maskSecret(apiKey) }; + // eslint-disable-next-line no-console + console.log(`Debug: Resolved API base URL: ${base}`); + // eslint-disable-next-line no-console + console.log(`Debug: POST URL: ${url.toString()}`); + // eslint-disable-next-line no-console + console.log(`Debug: Auth scheme: access-token`); + // eslint-disable-next-line no-console + console.log(`Debug: Request headers: ${JSON.stringify(debugHeaders)}`); + // eslint-disable-next-line no-console + console.log(`Debug: Request body: ${body}`); + } + + return new Promise((resolve, reject) => { + const req = https.request( + url, + { + method: "POST", + headers, + }, + (res) => { + let data = ""; + res.on("data", (chunk) => (data += chunk)); + res.on("end", () => { + if (debug) { + // eslint-disable-next-line no-console + console.log(`Debug: Response status: ${res.statusCode}`); + // eslint-disable-next-line no-console + console.log(`Debug: Response headers: ${JSON.stringify(res.headers)}`); + } + if (res.statusCode && res.statusCode >= 200 && res.statusCode < 300) { + try { + const parsed = JSON.parse(data) as IssueComment; + resolve(parsed); + } catch { + reject(new Error(`Failed to parse create comment response: ${data}`)); + } + } else { + let errMsg = `Failed to create issue comment: HTTP ${res.statusCode}`; + if (data) { + try { + const errObj = JSON.parse(data); + errMsg += `\n${JSON.stringify(errObj, null, 2)}`; + } catch { + errMsg += `\n${data}`; + } + } + reject(new Error(errMsg)); + } + }); + } + ); + + req.on("error", (err: Error) => reject(err)); + req.write(body); + req.end(); + }); +} + + diff --git a/cli/lib/mcp-server.ts b/cli/lib/mcp-server.ts new file mode 100644 index 0000000..5532532 --- /dev/null +++ b/cli/lib/mcp-server.ts @@ -0,0 +1,156 @@ +import * as pkg from "../package.json"; +import * as config from "./config"; +import { fetchIssues, fetchIssueComments, createIssueComment, fetchIssue } from "./issues"; +import { resolveBaseUrls } from "./util"; + +// MCP SDK imports +import { Server } from "@modelcontextprotocol/sdk/server"; +import * as path from "path"; +// Types schemas will be loaded dynamically from the SDK's CJS bundle + +interface RootOptsLike { + apiKey?: string; + apiBaseUrl?: string; +} + +export async function startMcpServer(rootOpts?: RootOptsLike, extra?: { debug?: boolean }): Promise { + // Resolve stdio transport at runtime to avoid subpath export resolution issues + const serverEntry = require.resolve("@modelcontextprotocol/sdk/server"); + const stdioPath = path.join(path.dirname(serverEntry), "stdio.js"); + // eslint-disable-next-line @typescript-eslint/no-var-requires + const { StdioServerTransport } = require(stdioPath); + // Load schemas dynamically to avoid subpath export resolution issues + const typesPath = path.resolve(path.dirname(serverEntry), "../types.js"); + // eslint-disable-next-line @typescript-eslint/no-var-requires + const { CallToolRequestSchema, ListToolsRequestSchema } = require(typesPath); + + const server = new Server( + { name: "postgresai-mcp", version: pkg.version }, + { capabilities: { tools: {} } } + ); + + // Interpret escape sequences (e.g., \n -> newline). Input comes from JSON, but + // we still normalize common escapes for consistency. + const interpretEscapes = (str: string): string => + (str || "") + .replace(/\\n/g, "\n") + .replace(/\\t/g, "\t") + .replace(/\\r/g, "\r") + .replace(/\\"/g, '"') + .replace(/\\'/g, "'"); + + server.setRequestHandler(ListToolsRequestSchema, async () => { + return { + tools: [ + { + name: "list_issues", + description: "List issues from PostgresAI API (same as CLI 'issues list')", + inputSchema: { + type: "object", + properties: { + debug: { type: "boolean", description: "Enable verbose debug logs" }, + }, + additionalProperties: false, + }, + }, + { + name: "view_issue", + description: "View a specific issue with its comments", + inputSchema: { + type: "object", + properties: { + issue_id: { type: "string", description: "Issue ID (UUID)" }, + debug: { type: "boolean", description: "Enable verbose debug logs" }, + }, + required: ["issue_id"], + additionalProperties: false, + }, + }, + { + name: "post_issue_comment", + description: "Post a new comment to an issue (optionally as a reply)", + inputSchema: { + type: "object", + properties: { + issue_id: { type: "string", description: "Issue ID (UUID)" }, + content: { type: "string", description: "Comment text (supports \\n as newline)" }, + parent_comment_id: { type: "string", description: "Parent comment ID (UUID) for replies" }, + debug: { type: "boolean", description: "Enable verbose debug logs" }, + }, + required: ["issue_id", "content"], + additionalProperties: false, + }, + }, + ], + }; + }); + + server.setRequestHandler(CallToolRequestSchema, async (req: any) => { + const toolName = req.params.name; + const args = (req.params.arguments as Record) || {}; + + const cfg = config.readConfig(); + const apiKey = (rootOpts?.apiKey || process.env.PGAI_API_KEY || cfg.apiKey || "").toString(); + const { apiBaseUrl } = resolveBaseUrls(rootOpts, cfg); + + const debug = Boolean(args.debug ?? extra?.debug); + + if (!apiKey) { + return { + content: [ + { + type: "text", + text: "API key is required. Run 'pgai auth' or set PGAI_API_KEY.", + }, + ], + isError: true, + }; + } + + try { + if (toolName === "list_issues") { + const issues = await fetchIssues({ apiKey, apiBaseUrl, debug }); + return { content: [{ type: "text", text: JSON.stringify(issues, null, 2) }] }; + } + + if (toolName === "view_issue") { + const issueId = String(args.issue_id || "").trim(); + if (!issueId) { + return { content: [{ type: "text", text: "issue_id is required" }], isError: true }; + } + const issue = await fetchIssue({ apiKey, apiBaseUrl, issueId, debug }); + if (!issue) { + return { content: [{ type: "text", text: "Issue not found" }], isError: true }; + } + const comments = await fetchIssueComments({ apiKey, apiBaseUrl, issueId, debug }); + const combined = { issue, comments }; + return { content: [{ type: "text", text: JSON.stringify(combined, null, 2) }] }; + } + + if (toolName === "post_issue_comment") { + const issueId = String(args.issue_id || "").trim(); + const rawContent = String(args.content || ""); + const parentCommentId = args.parent_comment_id ? String(args.parent_comment_id) : undefined; + if (!issueId) { + return { content: [{ type: "text", text: "issue_id is required" }], isError: true }; + } + if (!rawContent) { + return { content: [{ type: "text", text: "content is required" }], isError: true }; + } + const content = interpretEscapes(rawContent); + const result = await createIssueComment({ apiKey, apiBaseUrl, issueId, content, parentCommentId, debug }); + return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] }; + } + + throw new Error(`Unknown tool: ${toolName}`); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + return { content: [{ type: "text", text: message }], isError: true }; + } + }); + + const transport = new StdioServerTransport(); + await server.connect(transport); +} + + diff --git a/cli/lib/pkce.ts b/cli/lib/pkce.ts new file mode 100644 index 0000000..e9838de --- /dev/null +++ b/cli/lib/pkce.ts @@ -0,0 +1,79 @@ +import * as crypto from "crypto"; + +/** + * PKCE parameters for OAuth 2.0 Authorization Code Flow with PKCE + */ +export interface PKCEParams { + codeVerifier: string; + codeChallenge: string; + codeChallengeMethod: "S256"; + state: string; +} + +/** + * Generate a cryptographically random string for PKCE + * @param length - Length of the string (43-128 characters per RFC 7636) + * @returns Base64URL-encoded random string + */ +function generateRandomString(length: number = 64): string { + const bytes = crypto.randomBytes(length); + return base64URLEncode(bytes); +} + +/** + * Base64URL encode (without padding) + * @param buffer - Buffer to encode + * @returns Base64URL-encoded string + */ +function base64URLEncode(buffer: Buffer): string { + return buffer + .toString("base64") + .replace(/\+/g, "-") + .replace(/\//g, "_") + .replace(/=/g, ""); +} + +/** + * Generate PKCE code verifier + * @returns Random code verifier (43-128 characters) + */ +export function generateCodeVerifier(): string { + return generateRandomString(32); // 32 bytes = 43 chars after base64url encoding +} + +/** + * Generate PKCE code challenge from verifier + * Uses S256 method (SHA256) + * @param verifier - Code verifier string + * @returns Base64URL-encoded SHA256 hash of verifier + */ +export function generateCodeChallenge(verifier: string): string { + const hash = crypto.createHash("sha256").update(verifier).digest(); + return base64URLEncode(hash); +} + +/** + * Generate random state for CSRF protection + * @returns Random state string + */ +export function generateState(): string { + return generateRandomString(16); // 16 bytes = 22 chars +} + +/** + * Generate complete PKCE parameters + * @returns Object with verifier, challenge, challengeMethod, and state + */ +export function generatePKCEParams(): PKCEParams { + const verifier = generateCodeVerifier(); + const challenge = generateCodeChallenge(verifier); + const state = generateState(); + + return { + codeVerifier: verifier, + codeChallenge: challenge, + codeChallengeMethod: "S256", + state: state, + }; +} + diff --git a/cli/lib/util.ts b/cli/lib/util.ts new file mode 100644 index 0000000..afa001b --- /dev/null +++ b/cli/lib/util.ts @@ -0,0 +1,60 @@ +export function maskSecret(secret: string): string { + if (!secret) return ""; + if (secret.length <= 8) return "****"; + if (secret.length <= 16) return `${secret.slice(0, 4)}${"*".repeat(secret.length - 8)}${secret.slice(-4)}`; + return `${secret.slice(0, Math.min(12, secret.length - 8))}${"*".repeat(Math.max(4, secret.length - 16))}${secret.slice(-4)}`; +} + + +export interface RootOptsLike { + apiBaseUrl?: string; + uiBaseUrl?: string; +} + +export interface ConfigLike { + baseUrl?: string | null; +} + +export interface ResolvedBaseUrls { + apiBaseUrl: string; + uiBaseUrl: string; +} + +/** + * Normalize a base URL by trimming a single trailing slash and validating. + * @throws Error if the URL is invalid + */ +export function normalizeBaseUrl(value: string): string { + const trimmed = (value || "").replace(/\/$/, ""); + try { + // Validate + // eslint-disable-next-line no-new + new URL(trimmed); + } catch { + throw new Error(`Invalid base URL: ${value}`); + } + return trimmed; +} + +/** + * Resolve API and UI base URLs using precedence and normalize them. + * Precedence (API): opts.apiBaseUrl โ†’ env.PGAI_API_BASE_URL โ†’ cfg.baseUrl โ†’ default + * Precedence (UI): opts.uiBaseUrl โ†’ env.PGAI_UI_BASE_URL โ†’ default + */ +export function resolveBaseUrls( + opts?: RootOptsLike, + cfg?: ConfigLike, + defaults: { apiBaseUrl?: string; uiBaseUrl?: string } = {} +): ResolvedBaseUrls { + const defApi = defaults.apiBaseUrl || "https://postgres.ai/api/general/"; + const defUi = defaults.uiBaseUrl || "https://console.postgres.ai"; + + const apiCandidate = (opts?.apiBaseUrl || process.env.PGAI_API_BASE_URL || cfg?.baseUrl || defApi) as string; + const uiCandidate = (opts?.uiBaseUrl || process.env.PGAI_UI_BASE_URL || defUi) as string; + + return { + apiBaseUrl: normalizeBaseUrl(apiCandidate), + uiBaseUrl: normalizeBaseUrl(uiCandidate), + }; +} + diff --git a/cli/package-lock.json b/cli/package-lock.json new file mode 100644 index 0000000..3c9d4cf --- /dev/null +++ b/cli/package-lock.json @@ -0,0 +1,1218 @@ +{ + "name": "postgresai", + "version": "0.12.0-beta.7", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "postgresai", + "version": "0.12.0-beta.7", + "license": "Apache-2.0", + "dependencies": { + "@modelcontextprotocol/sdk": "^1.20.2", + "commander": "^12.1.0", + "js-yaml": "^4.1.0", + "pg": "^8.16.3" + }, + "bin": { + "pgai": "dist/bin/postgres-ai.js", + "postgres-ai": "dist/bin/postgres-ai.js", + "postgresai": "dist/bin/postgres-ai.js" + }, + "devDependencies": { + "@types/js-yaml": "^4.0.9", + "@types/node": "^18.19.0", + "@types/pg": "^8.15.6", + "typescript": "^5.3.3" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@modelcontextprotocol/sdk": { + "version": "1.20.2", + "resolved": "https://registry.npmjs.org/@modelcontextprotocol/sdk/-/sdk-1.20.2.tgz", + "integrity": "sha512-6rqTdFt67AAAzln3NOKsXRmv5ZzPkgbfaebKBqUbts7vK1GZudqnrun5a8d3M/h955cam9RHZ6Jb4Y1XhnmFPg==", + "dependencies": { + "ajv": "^6.12.6", + "content-type": "^1.0.5", + "cors": "^2.8.5", + "cross-spawn": "^7.0.5", + "eventsource": "^3.0.2", + "eventsource-parser": "^3.0.0", + "express": "^5.0.1", + "express-rate-limit": "^7.5.0", + "pkce-challenge": "^5.0.0", + "raw-body": "^3.0.0", + "zod": "^3.23.8", + "zod-to-json-schema": "^3.24.1" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@types/js-yaml": { + "version": "4.0.9", + "resolved": "https://registry.npmjs.org/@types/js-yaml/-/js-yaml-4.0.9.tgz", + "integrity": "sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg==", + "dev": true, + "license": "MIT" + }, + "node_modules/@types/node": { + "version": "18.19.130", + "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.130.tgz", + "integrity": "sha512-GRaXQx6jGfL8sKfaIDD6OupbIHBr9jv7Jnaml9tB7l4v068PAOXqfcujMMo5PhbIs6ggR1XODELqahT2R8v0fg==", + "dev": true, + "license": "MIT", + "dependencies": { + "undici-types": "~5.26.4" + } + }, + "node_modules/@types/pg": { + "version": "8.15.6", + "resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.15.6.tgz", + "integrity": "sha512-NoaMtzhxOrubeL/7UZuNTrejB4MPAJ0RpxZqXQf2qXuVlTPuG6Y8p4u9dKRaue4yjmC7ZhzVO2/Yyyn25znrPQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/node": "*", + "pg-protocol": "*", + "pg-types": "^2.2.0" + } + }, + "node_modules/accepts": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/accepts/-/accepts-2.0.0.tgz", + "integrity": "sha512-5cvg6CtKwfgdmVqY1WIiXKc3Q1bkRqGLi+2W/6ao+6Y7gu/RCwRuAhGEzh5B4KlszSuTLgZYuqFqo5bImjNKng==", + "dependencies": { + "mime-types": "^3.0.0", + "negotiator": "^1.0.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/ajv": { + "version": "6.12.6", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", + "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", + "dependencies": { + "fast-deep-equal": "^3.1.1", + "fast-json-stable-stringify": "^2.0.0", + "json-schema-traverse": "^0.4.1", + "uri-js": "^4.2.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/argparse": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", + "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", + "license": "Python-2.0" + }, + "node_modules/body-parser": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-2.2.0.tgz", + "integrity": "sha512-02qvAaxv8tp7fBa/mw1ga98OGm+eCbqzJOKoRt70sLmfEEi+jyBYVTDGfCL/k06/4EMk/z01gCe7HoCH/f2LTg==", + "dependencies": { + "bytes": "^3.1.2", + "content-type": "^1.0.5", + "debug": "^4.4.0", + "http-errors": "^2.0.0", + "iconv-lite": "^0.6.3", + "on-finished": "^2.4.1", + "qs": "^6.14.0", + "raw-body": "^3.0.0", + "type-is": "^2.0.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/bytes": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz", + "integrity": "sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/call-bind-apply-helpers": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", + "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", + "dependencies": { + "es-errors": "^1.3.0", + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/call-bound": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/call-bound/-/call-bound-1.0.4.tgz", + "integrity": "sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg==", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "get-intrinsic": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/commander": { + "version": "12.1.0", + "resolved": "https://registry.npmjs.org/commander/-/commander-12.1.0.tgz", + "integrity": "sha512-Vw8qHK3bZM9y/P10u3Vib8o/DdkvA2OtPtZvD871QKjy74Wj1WSKFILMPRPSdUSx5RFK1arlJzEtA4PkFgnbuA==", + "license": "MIT", + "engines": { + "node": ">=18" + } + }, + "node_modules/content-disposition": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-1.0.0.tgz", + "integrity": "sha512-Au9nRL8VNUut/XSzbQA38+M78dzP4D+eqg3gfJHMIHHYa3bg067xj1KxMUWj+VULbiZMowKngFFbKczUrNJ1mg==", + "dependencies": { + "safe-buffer": "5.2.1" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/content-type": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.5.tgz", + "integrity": "sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/cookie": { + "version": "0.7.2", + "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.7.2.tgz", + "integrity": "sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/cookie-signature": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.2.2.tgz", + "integrity": "sha512-D76uU73ulSXrD1UXF4KE2TMxVVwhsnCgfAyTg9k8P6KGZjlXKrOLe4dJQKI3Bxi5wjesZoFXJWElNWBjPZMbhg==", + "engines": { + "node": ">=6.6.0" + } + }, + "node_modules/cors": { + "version": "2.8.5", + "resolved": "https://registry.npmjs.org/cors/-/cors-2.8.5.tgz", + "integrity": "sha512-KIHbLJqu73RGr/hnbrO9uBeixNGuvSQjul/jdFvS/KFSIH1hWVd1ng7zOHx+YrEfInLG7q4n6GHQ9cDtxv/P6g==", + "dependencies": { + "object-assign": "^4", + "vary": "^1" + }, + "engines": { + "node": ">= 0.10" + } + }, + "node_modules/cross-spawn": { + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", + "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", + "dependencies": { + "path-key": "^3.1.0", + "shebang-command": "^2.0.0", + "which": "^2.0.1" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/debug": { + "version": "4.4.3", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz", + "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==", + "dependencies": { + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, + "node_modules/depd": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz", + "integrity": "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/dunder-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", + "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", + "dependencies": { + "call-bind-apply-helpers": "^1.0.1", + "es-errors": "^1.3.0", + "gopd": "^1.2.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/ee-first": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz", + "integrity": "sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==" + }, + "node_modules/encodeurl": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-2.0.0.tgz", + "integrity": "sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/es-define-property": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", + "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-errors": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", + "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/es-object-atoms": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz", + "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", + "dependencies": { + "es-errors": "^1.3.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/escape-html": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz", + "integrity": "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==" + }, + "node_modules/etag": { + "version": "1.8.1", + "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz", + "integrity": "sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/eventsource": { + "version": "3.0.7", + "resolved": "https://registry.npmjs.org/eventsource/-/eventsource-3.0.7.tgz", + "integrity": "sha512-CRT1WTyuQoD771GW56XEZFQ/ZoSfWid1alKGDYMmkt2yl8UXrVR4pspqWNEcqKvVIzg6PAltWjxcSSPrboA4iA==", + "dependencies": { + "eventsource-parser": "^3.0.1" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/eventsource-parser": { + "version": "3.0.6", + "resolved": "https://registry.npmjs.org/eventsource-parser/-/eventsource-parser-3.0.6.tgz", + "integrity": "sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg==", + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/express": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/express/-/express-5.1.0.tgz", + "integrity": "sha512-DT9ck5YIRU+8GYzzU5kT3eHGA5iL+1Zd0EutOmTE9Dtk+Tvuzd23VBU+ec7HPNSTxXYO55gPV/hq4pSBJDjFpA==", + "dependencies": { + "accepts": "^2.0.0", + "body-parser": "^2.2.0", + "content-disposition": "^1.0.0", + "content-type": "^1.0.5", + "cookie": "^0.7.1", + "cookie-signature": "^1.2.1", + "debug": "^4.4.0", + "encodeurl": "^2.0.0", + "escape-html": "^1.0.3", + "etag": "^1.8.1", + "finalhandler": "^2.1.0", + "fresh": "^2.0.0", + "http-errors": "^2.0.0", + "merge-descriptors": "^2.0.0", + "mime-types": "^3.0.0", + "on-finished": "^2.4.1", + "once": "^1.4.0", + "parseurl": "^1.3.3", + "proxy-addr": "^2.0.7", + "qs": "^6.14.0", + "range-parser": "^1.2.1", + "router": "^2.2.0", + "send": "^1.1.0", + "serve-static": "^2.2.0", + "statuses": "^2.0.1", + "type-is": "^2.0.1", + "vary": "^1.1.2" + }, + "engines": { + "node": ">= 18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/express-rate-limit": { + "version": "7.5.1", + "resolved": "https://registry.npmjs.org/express-rate-limit/-/express-rate-limit-7.5.1.tgz", + "integrity": "sha512-7iN8iPMDzOMHPUYllBEsQdWVB6fPDMPqwjBaFrgr4Jgr/+okjvzAy+UHlYYL/Vs0OsOrMkwS6PJDkFlJwoxUnw==", + "engines": { + "node": ">= 16" + }, + "funding": { + "url": "https://github.com/sponsors/express-rate-limit" + }, + "peerDependencies": { + "express": ">= 4.11" + } + }, + "node_modules/fast-deep-equal": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", + "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==" + }, + "node_modules/fast-json-stable-stringify": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz", + "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==" + }, + "node_modules/finalhandler": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-2.1.0.tgz", + "integrity": "sha512-/t88Ty3d5JWQbWYgaOGCCYfXRwV1+be02WqYYlL6h0lEiUAMPM8o8qKGO01YIkOHzka2up08wvgYD0mDiI+q3Q==", + "dependencies": { + "debug": "^4.4.0", + "encodeurl": "^2.0.0", + "escape-html": "^1.0.3", + "on-finished": "^2.4.1", + "parseurl": "^1.3.3", + "statuses": "^2.0.1" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/forwarded": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz", + "integrity": "sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/fresh": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/fresh/-/fresh-2.0.0.tgz", + "integrity": "sha512-Rx/WycZ60HOaqLKAi6cHRKKI7zxWbJ31MhntmtwMoaTeF7XFH9hhBp8vITaMidfljRQ6eYWCKkaTK+ykVJHP2A==", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/function-bind": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", + "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-intrinsic": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz", + "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", + "dependencies": { + "call-bind-apply-helpers": "^1.0.2", + "es-define-property": "^1.0.1", + "es-errors": "^1.3.0", + "es-object-atoms": "^1.1.1", + "function-bind": "^1.1.2", + "get-proto": "^1.0.1", + "gopd": "^1.2.0", + "has-symbols": "^1.1.0", + "hasown": "^2.0.2", + "math-intrinsics": "^1.1.0" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/get-proto": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz", + "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", + "dependencies": { + "dunder-proto": "^1.0.1", + "es-object-atoms": "^1.0.0" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/gopd": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", + "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/has-symbols": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", + "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/hasown": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", + "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", + "dependencies": { + "function-bind": "^1.1.2" + }, + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/http-errors": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.0.tgz", + "integrity": "sha512-FtwrG/euBzaEjYeRqOgly7G0qviiXoJWnvEH2Z1plBdXgbyjv34pHTSb9zoeHMyDy33+DWy5Wt9Wo+TURtOYSQ==", + "dependencies": { + "depd": "2.0.0", + "inherits": "2.0.4", + "setprototypeof": "1.2.0", + "statuses": "2.0.1", + "toidentifier": "1.0.1" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/http-errors/node_modules/statuses": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.1.tgz", + "integrity": "sha512-RwNA9Z/7PrK06rYLIzFMlaF+l73iwpzsqRIFgbMLbTcLD6cOao82TaWefPXQvB2fOC4AjuYSEndS7N/mTCbkdQ==", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/iconv-lite": { + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz", + "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" + }, + "node_modules/ipaddr.js": { + "version": "1.9.1", + "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz", + "integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==", + "engines": { + "node": ">= 0.10" + } + }, + "node_modules/is-promise": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/is-promise/-/is-promise-4.0.0.tgz", + "integrity": "sha512-hvpoI6korhJMnej285dSg6nu1+e6uxs7zG3BYAm5byqDsgJNWwxzM6z6iZiAgQR4TJ30JmBTOwqZUw3WlyH3AQ==" + }, + "node_modules/isexe": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", + "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==" + }, + "node_modules/js-yaml": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz", + "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==", + "license": "MIT", + "dependencies": { + "argparse": "^2.0.1" + }, + "bin": { + "js-yaml": "bin/js-yaml.js" + } + }, + "node_modules/json-schema-traverse": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", + "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==" + }, + "node_modules/math-intrinsics": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", + "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", + "engines": { + "node": ">= 0.4" + } + }, + "node_modules/media-typer": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-1.1.0.tgz", + "integrity": "sha512-aisnrDP4GNe06UcKFnV5bfMNPBUw4jsLGaWwWfnH3v02GnBuXX2MCVn5RbrWo0j3pczUilYblq7fQ7Nw2t5XKw==", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/merge-descriptors": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-2.0.0.tgz", + "integrity": "sha512-Snk314V5ayFLhp3fkUREub6WtjBfPdCPY1Ln8/8munuLuiYhsABgBVWsozAG+MWMbVEvcdcpbi9R7ww22l9Q3g==", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/mime-db": { + "version": "1.54.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.54.0.tgz", + "integrity": "sha512-aU5EJuIN2WDemCcAp2vFBfp/m4EAhWJnUNSSw0ixs7/kXbd6Pg64EmwJkNdFhB8aWt1sH2CTXrLxo/iAGV3oPQ==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-3.0.1.tgz", + "integrity": "sha512-xRc4oEhT6eaBpU1XF7AjpOFD+xQmXNB5OVKwp4tqCuBpHLS/ZbBDrc07mYTDqVMg6PfxUjjNp85O6Cd2Z/5HWA==", + "dependencies": { + "mime-db": "^1.54.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==" + }, + "node_modules/negotiator": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-1.0.0.tgz", + "integrity": "sha512-8Ofs/AUQh8MaEcrlq5xOX0CQ9ypTF5dl78mjlMNfOK08fzpgTHQRQPBxcPlEtIw0yRpws+Zo/3r+5WRby7u3Gg==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/object-assign": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", + "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/object-inspect": { + "version": "1.13.4", + "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.4.tgz", + "integrity": "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew==", + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/on-finished": { + "version": "2.4.1", + "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.4.1.tgz", + "integrity": "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==", + "dependencies": { + "ee-first": "1.1.1" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/once": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", + "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "dependencies": { + "wrappy": "1" + } + }, + "node_modules/parseurl": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz", + "integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/path-key": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", + "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", + "engines": { + "node": ">=8" + } + }, + "node_modules/path-to-regexp": { + "version": "8.3.0", + "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-8.3.0.tgz", + "integrity": "sha512-7jdwVIRtsP8MYpdXSwOS0YdD0Du+qOoF/AEPIt88PcCFrZCzx41oxku1jD88hZBwbNUIEfpqvuhjFaMAqMTWnA==", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/pg": { + "version": "8.16.3", + "resolved": "https://registry.npmjs.org/pg/-/pg-8.16.3.tgz", + "integrity": "sha512-enxc1h0jA/aq5oSDMvqyW3q89ra6XIIDZgCX9vkMrnz5DFTw/Ny3Li2lFQ+pt3L6MCgm/5o2o8HW9hiJji+xvw==", + "license": "MIT", + "dependencies": { + "pg-connection-string": "^2.9.1", + "pg-pool": "^3.10.1", + "pg-protocol": "^1.10.3", + "pg-types": "2.2.0", + "pgpass": "1.0.5" + }, + "engines": { + "node": ">= 16.0.0" + }, + "optionalDependencies": { + "pg-cloudflare": "^1.2.7" + }, + "peerDependencies": { + "pg-native": ">=3.0.1" + }, + "peerDependenciesMeta": { + "pg-native": { + "optional": true + } + } + }, + "node_modules/pg-cloudflare": { + "version": "1.2.7", + "resolved": "https://registry.npmjs.org/pg-cloudflare/-/pg-cloudflare-1.2.7.tgz", + "integrity": "sha512-YgCtzMH0ptvZJslLM1ffsY4EuGaU0cx4XSdXLRFae8bPP4dS5xL1tNB3k2o/N64cHJpwU7dxKli/nZ2lUa5fLg==", + "license": "MIT", + "optional": true + }, + "node_modules/pg-connection-string": { + "version": "2.9.1", + "resolved": "https://registry.npmjs.org/pg-connection-string/-/pg-connection-string-2.9.1.tgz", + "integrity": "sha512-nkc6NpDcvPVpZXxrreI/FOtX3XemeLl8E0qFr6F2Lrm/I8WOnaWNhIPK2Z7OHpw7gh5XJThi6j6ppgNoaT1w4w==", + "license": "MIT" + }, + "node_modules/pg-int8": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/pg-int8/-/pg-int8-1.0.1.tgz", + "integrity": "sha512-WCtabS6t3c8SkpDBUlb1kjOs7l66xsGdKpIPZsg4wR+B3+u9UAum2odSsF9tnvxg80h4ZxLWMy4pRjOsFIqQpw==", + "license": "ISC", + "engines": { + "node": ">=4.0.0" + } + }, + "node_modules/pg-pool": { + "version": "3.10.1", + "resolved": "https://registry.npmjs.org/pg-pool/-/pg-pool-3.10.1.tgz", + "integrity": "sha512-Tu8jMlcX+9d8+QVzKIvM/uJtp07PKr82IUOYEphaWcoBhIYkoHpLXN3qO59nAI11ripznDsEzEv8nUxBVWajGg==", + "license": "MIT", + "peerDependencies": { + "pg": ">=8.0" + } + }, + "node_modules/pg-protocol": { + "version": "1.10.3", + "resolved": "https://registry.npmjs.org/pg-protocol/-/pg-protocol-1.10.3.tgz", + "integrity": "sha512-6DIBgBQaTKDJyxnXaLiLR8wBpQQcGWuAESkRBX/t6OwA8YsqP+iVSiond2EDy6Y/dsGk8rh/jtax3js5NeV7JQ==", + "license": "MIT" + }, + "node_modules/pg-types": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/pg-types/-/pg-types-2.2.0.tgz", + "integrity": "sha512-qTAAlrEsl8s4OiEQY69wDvcMIdQN6wdz5ojQiOy6YRMuynxenON0O5oCpJI6lshc6scgAY8qvJ2On/p+CXY0GA==", + "license": "MIT", + "dependencies": { + "pg-int8": "1.0.1", + "postgres-array": "~2.0.0", + "postgres-bytea": "~1.0.0", + "postgres-date": "~1.0.4", + "postgres-interval": "^1.1.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/pgpass": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/pgpass/-/pgpass-1.0.5.tgz", + "integrity": "sha512-FdW9r/jQZhSeohs1Z3sI1yxFQNFvMcnmfuj4WBMUTxOrAyLMaTcE1aAMBiTlbMNaXvBCQuVi0R7hd8udDSP7ug==", + "license": "MIT", + "dependencies": { + "split2": "^4.1.0" + } + }, + "node_modules/pkce-challenge": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/pkce-challenge/-/pkce-challenge-5.0.0.tgz", + "integrity": "sha512-ueGLflrrnvwB3xuo/uGob5pd5FN7l0MsLf0Z87o/UQmRtwjvfylfc9MurIxRAWywCYTgrvpXBcqjV4OfCYGCIQ==", + "engines": { + "node": ">=16.20.0" + } + }, + "node_modules/postgres-array": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/postgres-array/-/postgres-array-2.0.0.tgz", + "integrity": "sha512-VpZrUqU5A69eQyW2c5CA1jtLecCsN2U/bD6VilrFDWq5+5UIEVO7nazS3TEcHf1zuPYO/sqGvUvW62g86RXZuA==", + "license": "MIT", + "engines": { + "node": ">=4" + } + }, + "node_modules/postgres-bytea": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/postgres-bytea/-/postgres-bytea-1.0.0.tgz", + "integrity": "sha512-xy3pmLuQqRBZBXDULy7KbaitYqLcmxigw14Q5sj8QBVLqEwXfeybIKVWiqAXTlcvdvb0+xkOtDbfQMOf4lST1w==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/postgres-date": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/postgres-date/-/postgres-date-1.0.7.tgz", + "integrity": "sha512-suDmjLVQg78nMK2UZ454hAG+OAW+HQPZ6n++TNDUX+L0+uUlLywnoxJKDou51Zm+zTCjrCl0Nq6J9C5hP9vK/Q==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/postgres-interval": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/postgres-interval/-/postgres-interval-1.2.0.tgz", + "integrity": "sha512-9ZhXKM/rw350N1ovuWHbGxnGh/SNJ4cnxHiM0rxE4VN41wsg8P8zWn9hv/buK00RP4WvlOyr/RBDiptyxVbkZQ==", + "license": "MIT", + "dependencies": { + "xtend": "^4.0.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/proxy-addr": { + "version": "2.0.7", + "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz", + "integrity": "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==", + "dependencies": { + "forwarded": "0.2.0", + "ipaddr.js": "1.9.1" + }, + "engines": { + "node": ">= 0.10" + } + }, + "node_modules/punycode": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", + "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==", + "engines": { + "node": ">=6" + } + }, + "node_modules/qs": { + "version": "6.14.0", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.14.0.tgz", + "integrity": "sha512-YWWTjgABSKcvs/nWBi9PycY/JiPJqOD4JA6o9Sej2AtvSGarXxKC3OQSk4pAarbdQlKAh5D4FCQkJNkW+GAn3w==", + "dependencies": { + "side-channel": "^1.1.0" + }, + "engines": { + "node": ">=0.6" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/range-parser": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz", + "integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/raw-body": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-3.0.1.tgz", + "integrity": "sha512-9G8cA+tuMS75+6G/TzW8OtLzmBDMo8p1JRxN5AZ+LAp8uxGA8V8GZm4GQ4/N5QNQEnLmg6SS7wyuSmbKepiKqA==", + "dependencies": { + "bytes": "3.1.2", + "http-errors": "2.0.0", + "iconv-lite": "0.7.0", + "unpipe": "1.0.0" + }, + "engines": { + "node": ">= 0.10" + } + }, + "node_modules/raw-body/node_modules/iconv-lite": { + "version": "0.7.0", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.7.0.tgz", + "integrity": "sha512-cf6L2Ds3h57VVmkZe+Pn+5APsT7FpqJtEhhieDCvrE2MK5Qk9MyffgQyuxQTm6BChfeZNtcOLHp9IcWRVcIcBQ==", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + }, + "engines": { + "node": ">=0.10.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/router": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/router/-/router-2.2.0.tgz", + "integrity": "sha512-nLTrUKm2UyiL7rlhapu/Zl45FwNgkZGaCpZbIHajDYgwlJCOzLSk+cIPAnsEqV955GjILJnKbdQC1nVPz+gAYQ==", + "dependencies": { + "debug": "^4.4.0", + "depd": "^2.0.0", + "is-promise": "^4.0.0", + "parseurl": "^1.3.3", + "path-to-regexp": "^8.0.0" + }, + "engines": { + "node": ">= 18" + } + }, + "node_modules/safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ] + }, + "node_modules/safer-buffer": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", + "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==" + }, + "node_modules/send": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/send/-/send-1.2.0.tgz", + "integrity": "sha512-uaW0WwXKpL9blXE2o0bRhoL2EGXIrZxQ2ZQ4mgcfoBxdFmQold+qWsD2jLrfZ0trjKL6vOw0j//eAwcALFjKSw==", + "dependencies": { + "debug": "^4.3.5", + "encodeurl": "^2.0.0", + "escape-html": "^1.0.3", + "etag": "^1.8.1", + "fresh": "^2.0.0", + "http-errors": "^2.0.0", + "mime-types": "^3.0.1", + "ms": "^2.1.3", + "on-finished": "^2.4.1", + "range-parser": "^1.2.1", + "statuses": "^2.0.1" + }, + "engines": { + "node": ">= 18" + } + }, + "node_modules/serve-static": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-2.2.0.tgz", + "integrity": "sha512-61g9pCh0Vnh7IutZjtLGGpTA355+OPn2TyDv/6ivP2h/AdAVX9azsoxmg2/M6nZeQZNYBEwIcsne1mJd9oQItQ==", + "dependencies": { + "encodeurl": "^2.0.0", + "escape-html": "^1.0.3", + "parseurl": "^1.3.3", + "send": "^1.2.0" + }, + "engines": { + "node": ">= 18" + } + }, + "node_modules/setprototypeof": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz", + "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==" + }, + "node_modules/shebang-command": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", + "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", + "dependencies": { + "shebang-regex": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/shebang-regex": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", + "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", + "engines": { + "node": ">=8" + } + }, + "node_modules/side-channel": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.1.0.tgz", + "integrity": "sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==", + "dependencies": { + "es-errors": "^1.3.0", + "object-inspect": "^1.13.3", + "side-channel-list": "^1.0.0", + "side-channel-map": "^1.0.1", + "side-channel-weakmap": "^1.0.2" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-list": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/side-channel-list/-/side-channel-list-1.0.0.tgz", + "integrity": "sha512-FCLHtRD/gnpCiCHEiJLOwdmFP+wzCmDEkc9y7NsYxeF4u7Btsn1ZuwgwJGxImImHicJArLP4R0yX4c2KCrMrTA==", + "dependencies": { + "es-errors": "^1.3.0", + "object-inspect": "^1.13.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-map": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/side-channel-map/-/side-channel-map-1.0.1.tgz", + "integrity": "sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA==", + "dependencies": { + "call-bound": "^1.0.2", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.5", + "object-inspect": "^1.13.3" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/side-channel-weakmap": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/side-channel-weakmap/-/side-channel-weakmap-1.0.2.tgz", + "integrity": "sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A==", + "dependencies": { + "call-bound": "^1.0.2", + "es-errors": "^1.3.0", + "get-intrinsic": "^1.2.5", + "object-inspect": "^1.13.3", + "side-channel-map": "^1.0.1" + }, + "engines": { + "node": ">= 0.4" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/split2": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/split2/-/split2-4.2.0.tgz", + "integrity": "sha512-UcjcJOWknrNkF6PLX83qcHM6KHgVKNkV62Y8a5uYDVv9ydGQVwAHMKqHdJje1VTWpljG0WYpCDhrCdAOYH4TWg==", + "license": "ISC", + "engines": { + "node": ">= 10.x" + } + }, + "node_modules/statuses": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.2.tgz", + "integrity": "sha512-DvEy55V3DB7uknRo+4iOGT5fP1slR8wQohVdknigZPMpMstaKJQWhwiYBACJE3Ul2pTnATihhBYnRhZQHGBiRw==", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/toidentifier": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz", + "integrity": "sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==", + "engines": { + "node": ">=0.6" + } + }, + "node_modules/type-is": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/type-is/-/type-is-2.0.1.tgz", + "integrity": "sha512-OZs6gsjF4vMp32qrCbiVSkrFmXtG/AZhY3t0iAMrMBiAZyV9oALtXO8hsrHbMXF9x6L3grlFuwW2oAz7cav+Gw==", + "dependencies": { + "content-type": "^1.0.5", + "media-typer": "^1.1.0", + "mime-types": "^3.0.0" + }, + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/typescript": { + "version": "5.9.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", + "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/undici-types": { + "version": "5.26.5", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", + "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", + "dev": true, + "license": "MIT" + }, + "node_modules/unpipe": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz", + "integrity": "sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/uri-js": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz", + "integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==", + "dependencies": { + "punycode": "^2.1.0" + } + }, + "node_modules/vary": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz", + "integrity": "sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/which": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", + "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", + "dependencies": { + "isexe": "^2.0.0" + }, + "bin": { + "node-which": "bin/node-which" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==" + }, + "node_modules/xtend": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.2.tgz", + "integrity": "sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==", + "license": "MIT", + "engines": { + "node": ">=0.4" + } + }, + "node_modules/zod": { + "version": "3.25.76", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz", + "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + }, + "node_modules/zod-to-json-schema": { + "version": "3.24.6", + "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.24.6.tgz", + "integrity": "sha512-h/z3PKvcTcTetyjl1fkj79MHNEjm+HpD6NXheWjzOekY7kV+lwDYnHw+ivHkijnCSMz1yJaWBD9vu/Fcmk+vEg==", + "peerDependencies": { + "zod": "^3.24.1" + } + } + } +} diff --git a/cli/package.json b/cli/package.json new file mode 100644 index 0000000..6d5afb0 --- /dev/null +++ b/cli/package.json @@ -0,0 +1,45 @@ +{ + "name": "postgresai", + "version": "0.12.0-beta.7", + "description": "postgres_ai CLI (Node.js)", + "license": "Apache-2.0", + "private": false, + "repository": { + "type": "git", + "url": "git+https://gitlab.com/postgres-ai/postgres_ai.git" + }, + "homepage": "https://gitlab.com/postgres-ai/postgres_ai", + "bugs": { + "url": "https://gitlab.com/postgres-ai/postgres_ai/-/issues" + }, + "bin": { + "postgres-ai": "./dist/bin/postgres-ai.js", + "postgresai": "./dist/bin/postgres-ai.js", + "pgai": "./dist/bin/postgres-ai.js" + }, + "type": "commonjs", + "engines": { + "node": ">=18" + }, + "scripts": { + "build": "tsc", + "prepare": "npm run build", + "start": "node ./dist/bin/postgres-ai.js --help", + "dev": "tsc --watch" + }, + "dependencies": { + "@modelcontextprotocol/sdk": "^1.20.2", + "commander": "^12.1.0", + "js-yaml": "^4.1.0", + "pg": "^8.16.3" + }, + "devDependencies": { + "@types/js-yaml": "^4.0.9", + "@types/node": "^18.19.0", + "@types/pg": "^8.15.6", + "typescript": "^5.3.3" + }, + "publishConfig": { + "access": "public" + } +} diff --git a/cli/tsconfig.json b/cli/tsconfig.json new file mode 100644 index 0000000..8f969b6 --- /dev/null +++ b/cli/tsconfig.json @@ -0,0 +1,28 @@ +{ + "compilerOptions": { + "target": "ES2020", + "module": "node16", + "lib": ["ES2020"], + "outDir": "./dist", + "rootDir": "./", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "resolveJsonModule": true, + "declaration": true, + "declarationMap": true, + "sourceMap": true, + "moduleResolution": "node16", + "types": ["node"] + }, + "include": [ + "bin/**/*", + "lib/**/*" + ], + "exclude": [ + "node_modules", + "dist" + ] +} + diff --git a/config/grafana/dashboards/Dashboard_10_Index health.json b/config/grafana/dashboards/Dashboard_10_Index health.json index b5bf58f..f2f0566 100644 --- a/config/grafana/dashboards/Dashboard_10_Index health.json +++ b/config/grafana/dashboards/Dashboard_10_Index health.json @@ -166,7 +166,7 @@ "root_selector": "", "source": "url", "type": "csv", - "url": "http://flask-pgss-api:5000/btree_bloat/csv", + "url": "http://flask-pgss-api:8000/btree_bloat/csv", "url_options": { "data": "", "method": "GET", @@ -691,13 +691,13 @@ "text": "default", "value": "default" }, - "definition": "label_values(pgwatch_settings_configured,cluster)", + "definition": "label_values(pgwatch_db_size_size_b,cluster)", "label": "Cluster name", "name": "cluster_name", "options": [], "query": { "qryType": 1, - "query": "label_values(pgwatch_settings_configured,cluster)", + "query": "label_values(pgwatch_db_size_size_b,cluster)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -709,13 +709,13 @@ "text": "postgres_ai", "value": "postgres_ai" }, - "definition": "label_values(pgwatch_settings_configured{cluster=\"$cluster_name\"},node_name)", + "definition": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\"},node_name)", "label": "Node name", "name": "node_name", "options": [], "query": { "qryType": 1, - "query": "label_values(pgwatch_settings_configured{cluster=\"$cluster_name\"},node_name)", + "query": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\"},node_name)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -727,13 +727,13 @@ "text": "workloaddb", "value": "workloaddb" }, - "definition": "label_values(pgwatch_pg_database_wraparound_age_datfrozenxid{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", + "definition": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", "label": "DB name", "name": "db_name", "options": [], "query": { "qryType": 1, - "query": "label_values(pgwatch_pg_database_wraparound_age_datfrozenxid{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", + "query": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/config/grafana/dashboards/Dashboard_12_SLRU.json b/config/grafana/dashboards/Dashboard_12_SLRU.json new file mode 100644 index 0000000..3f97a99 --- /dev/null +++ b/config/grafana/dashboards/Dashboard_12_SLRU.json @@ -0,0 +1,820 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": 13, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 53, + "panels": [], + "title": "SLRU stats", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 46, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "editorMode": "code", + "expr": "irate(pgwatch_pg_stat_slru_blks_hit{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval])", + "interval": "10", + "legendFormat": "{{name}}", + "range": true, + "refId": "A" + } + ], + "title": "SLRU blocks hit", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 52, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "irate(pgwatch_pg_stat_slru_blks_exists{cluster=\"$cluster_name\", node_name=\"$node_name\", datname=\"$db_name\"}[$__rate_interval])", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "10", + "legendFormat": "{{name}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "SLRU blocks exist", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 24, + "x": 0, + "y": 23 + }, + "id": 47, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "irate(pgwatch_pg_stat_slru_blks_read{cluster=\"$cluster_name\", node_name=\"$node_name\", datname=\"$db_name\"}[$__rate_interval])", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "10", + "legendFormat": "{{name}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "SLRU blocks read", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 24, + "x": 0, + "y": 34 + }, + "id": 48, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "irate(pgwatch_pg_stat_slru_blks_written{cluster=\"$cluster_name\", node_name=\"$node_name\", datname=\"$db_name\"}[$__rate_interval])", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "10", + "legendFormat": "{{name}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "SLRU blocks written", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 24, + "x": 0, + "y": 45 + }, + "id": 49, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "code", + "expr": "irate(pgwatch_pg_stat_slru_blks_zeroed{cluster=\"$cluster_name\", node_name=\"$node_name\", datname=\"$db_name\"}[$__rate_interval])", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "10", + "legendFormat": "{{name}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "SLRU blocks zeroed", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 24, + "x": 0, + "y": 56 + }, + "id": 50, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "irate(pgwatch_pg_stat_slru_truncates{cluster=\"$cluster_name\", node_name=\"$node_name\", datname=\"$db_name\"}[$__rate_interval])", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "10", + "legendFormat": "{{name}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "SLRU truncates", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 24, + "x": 0, + "y": 67 + }, + "id": 51, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "irate(pgwatch_pg_stat_slru_flushes{cluster=\"$cluster_name\", node_name=\"$node_name\", datname=\"$db_name\"}[$__rate_interval])", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "10", + "legendFormat": "{{name}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "SLRU flushes", + "type": "timeseries" + } + ], + "preload": false, + "refresh": "30s", + "schemaVersion": 41, + "tags": [], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "definition": "label_values(pgwatch_db_size_size_b,cluster)", + "description": "", + "label": "Cluster", + "name": "cluster_name", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(pgwatch_db_size_size_b,cluster)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "postgres_ai", + "value": "postgres_ai" + }, + "definition": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\"},node_name)", + "label": "Node", + "name": "node_name", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\"},node_name)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" + }, + { + "current": { + "text": "workloaddb", + "value": "workloaddb" + }, + "definition": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", + "label": "DB name", + "name": "db_name", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "12. SLRU cache stats", + "uid": "slru_stats", + "version": 2 + } \ No newline at end of file diff --git a/config/grafana/dashboards/Dashboard_13_Lock_waits.json b/config/grafana/dashboards/Dashboard_13_Lock_waits.json new file mode 100644 index 0000000..6831826 --- /dev/null +++ b/config/grafana/dashboards/Dashboard_13_Lock_waits.json @@ -0,0 +1,1114 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 10, + "panels": [], + "title": "Blocking overview", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "description": "Number of active lock conflicts (blockerโ†’blocked pairs).", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 1, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "red", + "value": 10 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 1, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "editorMode": "code", + "expr": "count(pgwatch_lock_waits_blocked_ms{cluster=\"$cluster_name\", node_name=\"$node_name\", datname=~\"$db_name\"}) or vector(0)", + "legendFormat": "Lock conflicts", + "range": true, + "refId": "A" + } + ], + "title": "Lock conflicts", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "description": "How long sessions have been waiting for locks.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1000 + }, + { + "color": "red", + "value": 5000 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 2, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "mean" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "editorMode": "code", + "expr": "avg(pgwatch_lock_waits_blocked_ms{cluster=\"$cluster_name\", node_name=\"$node_name\", datname=~\"$db_name\"})", + "legendFormat": "Average wait time", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "editorMode": "code", + "expr": "max(pgwatch_lock_waits_blocked_ms{cluster=\"$cluster_name\", node_name=\"$node_name\", datname=~\"$db_name\"})", + "legendFormat": "Max wait time", + "range": true, + "refId": "B" + } + ], + "title": "Wait duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "description": "Distribution of blocking events by lock type.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 1, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 3, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "editorMode": "code", + "expr": "count by (blocked_locktype) (pgwatch_lock_waits_blocked_ms{cluster=\"$cluster_name\", node_name=\"$node_name\", datname=~\"$db_name\"})", + "legendFormat": "{{blocked_locktype}}", + "range": true, + "refId": "A" + } + ], + "title": "By lock type", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "description": "How long blocking transactions have been running.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 5000 + }, + { + "color": "red", + "value": 30000 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 17 + }, + "id": 5, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "mean" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "editorMode": "code", + "expr": "avg(pgwatch_lock_waits_blocker_tx_ms{cluster=\"$cluster_name\", node_name=\"$node_name\", datname=~\"$db_name\"})", + "legendFormat": "Avg blocker age", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "editorMode": "code", + "expr": "max(pgwatch_lock_waits_blocker_tx_ms{cluster=\"$cluster_name\", node_name=\"$node_name\", datname=~\"$db_name\"})", + "legendFormat": "Max blocker age", + "range": true, + "refId": "B" + } + ], + "title": "Blocker age", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "description": "Which tables have the most lock contention.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 1, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 17 + }, + "id": 6, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "editorMode": "code", + "expr": "count by (blocked_table) (pgwatch_lock_waits_blocked_ms{cluster=\"$cluster_name\", node_name=\"$node_name\", datname=~\"$db_name\", blocked_table!=\"\"})", + "legendFormat": "{{blocked_table}}", + "range": true, + "refId": "A" + } + ], + "title": "By table", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 25 + }, + "id": 11, + "panels": [], + "title": "Blocking tree", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "description": "Current blocking relationships with process details.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "left", + "cellOptions": { + "type": "auto" + }, + "filterable": true, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1000 + }, + { + "color": "red", + "value": 5000 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "blocked_pid" + }, + "properties": [ + { + "id": "custom.width", + "value": 90 + }, + { + "id": "displayName", + "value": "Blocked PID" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "blocker_pid" + }, + "properties": [ + { + "id": "custom.width", + "value": 90 + }, + { + "id": "displayName", + "value": "Blocker PID" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "custom.width", + "value": 160 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "custom.width", + "value": 100 + }, + { + "id": "unit", + "value": "ms" + }, + { + "id": "displayName", + "value": "Blocked ms" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "blocked_user" + }, + "properties": [ + { + "id": "custom.width", + "value": 100 + }, + { + "id": "displayName", + "value": "Blocked User" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "blocker_user" + }, + "properties": [ + { + "id": "custom.width", + "value": 100 + }, + { + "id": "displayName", + "value": "Blocker User" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "blocked_appname" + }, + "properties": [ + { + "id": "custom.width", + "value": 120 + }, + { + "id": "displayName", + "value": "Blocked App" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "blocker_appname" + }, + "properties": [ + { + "id": "custom.width", + "value": 120 + }, + { + "id": "displayName", + "value": "Blocker App" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "blocked_locktype" + }, + "properties": [ + { + "id": "custom.width", + "value": 110 + }, + { + "id": "displayName", + "value": "Blocked Lock Type" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "blocker_locktype" + }, + "properties": [ + { + "id": "custom.width", + "value": 110 + }, + { + "id": "displayName", + "value": "Blocker Lock Type" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "blocked_mode" + }, + "properties": [ + { + "id": "custom.width", + "value": 110 + }, + { + "id": "displayName", + "value": "Blocked Mode" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "blocker_mode" + }, + "properties": [ + { + "id": "custom.width", + "value": 110 + }, + { + "id": "displayName", + "value": "Blocker Mode" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "blocked_table" + }, + "properties": [ + { + "id": "custom.width", + "value": 130 + }, + { + "id": "displayName", + "value": "Blocked Table" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "blocker_table" + }, + "properties": [ + { + "id": "custom.width", + "value": 130 + }, + { + "id": "displayName", + "value": "Blocker Table" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "datname" + }, + "properties": [ + { + "id": "custom.width", + "value": 120 + }, + { + "id": "displayName", + "value": "Database" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "blocked_query_id" + }, + "properties": [ + { + "id": "custom.width", + "value": 180 + }, + { + "id": "displayName", + "value": "Blocked Query ID" + }, + { + "id": "links", + "value": [ + { + "title": "View query analysis", + "url": "/d/db52944d-b025-4e18-b70b-89c0af3e7e41/03-single-queryid-analysis?var-cluster_name=${cluster_name}&var-node_name=${node_name}&var-db_name=${db_name}&var-query_id=${__value.raw}" + } + ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "blocker_query_id" + }, + "properties": [ + { + "id": "custom.width", + "value": 180 + }, + { + "id": "displayName", + "value": "Blocker Query ID" + }, + { + "id": "links", + "value": [ + { + "title": "View query analysis", + "url": "/d/db52944d-b025-4e18-b70b-89c0af3e7e41/03-single-queryid-analysis?var-cluster_name=${cluster_name}&var-node_name=${node_name}&var-db_name=${db_name}&var-query_id=${__value.raw}" + } + ] + } + ] + } + ] + }, + "gridPos": { + "h": 16, + "w": 24, + "x": 0, + "y": 26 + }, + "id": 4, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "enablePagination": true, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Blocked ms" + } + ] + }, + "pluginVersion": "10.4.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "editorMode": "code", + "exemplar": false, + "expr": "pgwatch_lock_waits_blocked_ms{cluster=\"$cluster_name\", node_name=\"$node_name\", datname=~\"$db_name\"}", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Blocking tree", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "__name__": true, + "cluster": true, + "dbname": true, + "env": true, + "instance": true, + "job": true, + "node_name": true, + "real_dbname": true, + "sink_type": true, + "sys_id": true + }, + "indexByName": { + "Time": 0, + "blocked_pid": 1, + "blocker_pid": 2, + "Value": 3, + "blocked_user": 4, + "blocker_user": 5, + "blocked_appname": 6, + "blocker_appname": 7, + "blocked_locktype": 8, + "blocker_locktype": 9, + "blocked_mode": 10, + "blocker_mode": 11, + "blocked_table": 12, + "blocker_table": 13, + "blocked_query_id": 14, + "blocker_query_id": 15, + "datname": 16 + }, + "renameByName": {} + } + } + ], + "type": "table" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "", + "value": "" + }, + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "definition": "label_values(pgwatch_db_size_size_b,cluster)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "cluster_name", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(pgwatch_db_size_size_b,cluster)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "", + "value": "" + }, + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "definition": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\"},node_name)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "node_name", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\"},node_name)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "", + "value": "" + }, + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "definition": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", + "hide": 0, + "includeAll": true, + "allValue": ".*", + "multi": false, + "name": "db_name", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m" + ] + }, + "timezone": "", + "title": "13. Lock contention", + "uid": "lock-contention", + "version": 1, + "weekStart": "" +} diff --git a/config/grafana/dashboards/Dashboard_1_Node_performance_overview.json b/config/grafana/dashboards/Dashboard_1_Node_performance_overview.json index 3857460..034e768 100644 --- a/config/grafana/dashboards/Dashboard_1_Node_performance_overview.json +++ b/config/grafana/dashboards/Dashboard_1_Node_performance_overview.json @@ -18,7 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 1, - "id": 10, + "id": 3, "links": [], "panels": [ { @@ -52,9 +52,9 @@ "axisPlacement": "auto", "axisSoftMin": 0, "barAlignment": 0, - "barWidthFactor": 0.5, + "barWidthFactor": 1, "drawStyle": "bars", - "fillOpacity": 40, + "fillOpacity": 100, "gradientMode": "none", "hideFrom": { "legend": false, @@ -296,26 +296,13 @@ "title": "Active session history", "type": "timeseries" }, - { - "collapsed": true, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 19 - }, - "id": 2, - "panels": [], - "title": "Host stats", - "type": "row" - }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 20 + "y": 19 }, "id": 3, "panels": [], @@ -483,7 +470,7 @@ "h": 8, "w": 12, "x": 0, - "y": 21 + "y": 20 }, "id": 4, "options": { @@ -499,7 +486,7 @@ }, "tooltip": { "hideZeros": false, - "mode": "single", + "mode": "multi", "sort": "none" } }, @@ -716,7 +703,7 @@ "h": 8, "w": 12, "x": 12, - "y": 21 + "y": 20 }, "id": 39, "options": { @@ -732,7 +719,7 @@ }, "tooltip": { "hideZeros": false, - "mode": "single", + "mode": "multi", "sort": "none" } }, @@ -772,109 +759,6 @@ "title": "Non-idle Sessions", "type": "timeseries" }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "axisSoftMin": 0, - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 100, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 1, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ops/s" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 29 - }, - "id": 6, - "options": { - "legend": { - "calcs": [ - "min", - "max", - "mean" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "12.0.2", - "targets": [ - { - "disableTextWrap": false, - "editorMode": "code", - "expr": "sum(irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", - "fullMetaSearch": false, - "includeNullMetadata": true, - "interval": "20", - "legendFormat": "Calls", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "Calls (pg_stat_statements)", - "type": "timeseries" - }, { "datasource": { "type": "prometheus", @@ -970,8 +854,8 @@ "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 29 + "x": 0, + "y": 28 }, "id": 7, "options": { @@ -983,7 +867,7 @@ }, "tooltip": { "hideZeros": false, - "mode": "single", + "mode": "multi", "sort": "none" } }, @@ -1012,7 +896,7 @@ "refId": "A" } ], - "title": "Transactions", + "title": "TPS", "type": "timeseries" }, { @@ -1035,7 +919,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 0, + "fillOpacity": 100, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1071,21 +955,26 @@ "value": 80 } ] - } + }, + "unit": "ops/s" }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 37 + "x": 12, + "y": 28 }, - "id": 9, + "id": 6, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", "placement": "bottom", "showLegend": true }, @@ -1098,10 +987,19 @@ "pluginVersion": "12.0.2", "targets": [ { - "refId": "A" + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "Calls", + "range": true, + "refId": "A", + "useBackend": false } ], - "title": "TODO: Postgres logs", + "title": "QPS (pg_stat_statements)", "type": "timeseries" }, { @@ -1109,6 +1007,7 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, + "description": "To see planning time, enable pg_stat_statements.track_planning", "fieldConfig": { "defaults": { "color": { @@ -1142,14 +1041,13 @@ "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], - "max": 100, "thresholds": { "mode": "absolute", "steps": [ @@ -1162,48 +1060,17 @@ } ] }, - "unit": "percent" + "unit": "s/s" }, - "overrides": [ - { - "matcher": { - "id": "byFrameRefID", - "options": "A" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byFrameRefID", - "options": "B" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "red", - "mode": "fixed" - } - } - ] - } - ] + "overrides": [] }, "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 37 + "x": 0, + "y": 36 }, - "id": 8, + "id": 10, "options": { "legend": { "calcs": [ @@ -1217,36 +1084,36 @@ }, "tooltip": { "hideZeros": false, - "mode": "single", + "mode": "multi", "sort": "none" } }, "pluginVersion": "12.0.2", "targets": [ + { + "editorMode": "code", + "expr": "sum(irate(pgwatch_pg_stat_statements_exec_time_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))/1000", + "interval": "20", + "legendFormat": "Execution time", + "range": true, + "refId": "A" + }, { "datasource": { "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, "editorMode": "code", - "expr": "sum(irate(pgwatch_db_stats_xact_rollback{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))/(sum(irate(pgwatch_db_stats_xact_commit{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))+sum(irate(pgwatch_db_stats_xact_rollback{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))) * 100", + "expr": "sum(irate(pgwatch_pg_stat_statements_plan_time_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))/1000", "hide": false, "instant": false, "interval": "20", - "legendFormat": "Rollbacks", + "legendFormat": "Planning time", "range": true, "refId": "B" - }, - { - "editorMode": "code", - "expr": "sum(irate(pgwatch_db_stats_xact_commit{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))/(sum(irate(pgwatch_db_stats_xact_commit{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))+sum(irate(pgwatch_db_stats_xact_rollback{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))) * 100", - "interval": "20", - "legendFormat": "Commits", - "range": true, - "refId": "A" } ], - "title": "Commit vs Rollback ratio", + "title": "Query total time (pg_stat_statements)", "type": "timeseries" }, { @@ -1254,6 +1121,7 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, + "description": "To see planning time, enable pg_stat_statements.track_planning", "fieldConfig": { "defaults": { "color": { @@ -1287,7 +1155,7 @@ "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" @@ -1306,17 +1174,17 @@ } ] }, - "unit": "s/s" + "unit": "s" }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 45 + "x": 12, + "y": 36 }, - "id": 10, + "id": 48, "options": { "legend": { "calcs": [ @@ -1330,7 +1198,7 @@ }, "tooltip": { "hideZeros": false, - "mode": "single", + "mode": "multi", "sort": "none" } }, @@ -1338,14 +1206,28 @@ "targets": [ { "editorMode": "code", - "expr": "(sum(irate(pgwatch_pg_stat_statements_exec_time_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval])) + sum(irate(pgwatch_pg_stat_statements_plan_time_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval])))/1000", + "expr": "sum(irate(pgwatch_pg_stat_statements_exec_time_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))/1000/ sum(irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", "interval": "20", - "legendFormat": "Statements total time", + "legendFormat": "Execution time per call", "range": true, "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "editorMode": "code", + "expr": "sum(irate(pgwatch_pg_stat_statements_plan_time_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))/1000/ sum(irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", + "hide": false, + "instant": false, + "interval": "20", + "legendFormat": "Planning time per call", + "range": true, + "refId": "B" } ], - "title": "Statements total time (pg_stat_statements)", + "title": "Query time per call (latency) (pg_stat_statements)", "type": "timeseries" }, { @@ -1368,7 +1250,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 100, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1405,17 +1287,17 @@ } ] }, - "unit": "s" + "unit": "ops" }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 45 + "x": 0, + "y": 44 }, - "id": 11, + "id": 12, "options": { "legend": { "calcs": [ @@ -1429,7 +1311,7 @@ }, "tooltip": { "hideZeros": false, - "mode": "single", + "mode": "multi", "sort": "none" } }, @@ -1438,17 +1320,34 @@ { "disableTextWrap": false, "editorMode": "code", - "expr": "(sum(irate(pgwatch_pg_stat_statements_exec_time_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval])) + sum(irate(pgwatch_pg_stat_statements_plan_time_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))) / sum(irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", + "expr": "irate(pgwatch_db_stats_tup_returned{cluster=\"$cluster_name\", node_name=\"$node_name\", datname=\"$db_name\"}[$__rate_interval])", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "20", - "legendFormat": "Statements time per call", + "legendFormat": "Tuples returned", "range": true, "refId": "A", "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "irate(pgwatch_db_stats_tup_fetched{cluster=\"$cluster_name\", node_name=\"$node_name\", datname=\"$db_name\"}[$__rate_interval])", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "Tuples fetched", + "range": true, + "refId": "B", + "useBackend": false } ], - "title": "Statements time per call (pg_stat_statements) aka latency", + "title": "Tuples fetched and tuples returned per second", "type": "timeseries" }, { @@ -1489,7 +1388,7 @@ "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" @@ -1507,115 +1406,34 @@ "value": 80 } ] - } + }, + "unit": "ops" }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 53 - }, - "id": 12, - "options": { - "legend": { - "calcs": [ - "min", - "max", - "mean" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "12.0.2", - "targets": [ - { - "editorMode": "code", - "expr": "sum(irate(pgwatch_pg_stat_statements_rows[$__rate_interval]))", - "interval": "20", - "legendFormat": "No. of rows ", - "range": true, - "refId": "A" - } - ], - "title": "Total rows (pg_stat_statements)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "axisSoftMin": 0, - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 100, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 1, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Tuples deleted" }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, + "properties": [ { - "color": "red", - "value": 80 + "id": "color", + "value": { + "fixedColor": "dark-red", + "mode": "fixed" + } } ] } - }, - "overrides": [] + ] }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 53 + "y": 44 }, - "id": 13, + "id": 47, "options": { "legend": { "calcs": [ @@ -1629,22 +1447,60 @@ }, "tooltip": { "hideZeros": false, - "mode": "single", + "mode": "multi", "sort": "none" } }, "pluginVersion": "12.0.2", "targets": [ { + "disableTextWrap": false, + "editorMode": "builder", + "expr": "irate(pgwatch_db_stats_tup_inserted{cluster=\"$cluster_name\", node_name=\"$node_name\", datname=\"$db_name\"}[$__rate_interval])", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "Tuples inserted", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "disableTextWrap": false, "editorMode": "code", - "expr": "sum(irate(pgwatch_pg_stat_statements_rows{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))/sum(irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", + "expr": "irate(pgwatch_db_stats_tup_updated{cluster=\"$cluster_name\", node_name=\"$node_name\", datname=\"$db_name\"}[$__rate_interval])", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, "interval": "20", - "legendFormat": "Rows per call", + "legendFormat": "Tuples updated (HOT + non-HOT)", "range": true, - "refId": "A" + "refId": "B", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "irate(pgwatch_db_stats_tup_deleted{cluster=\"$cluster_name\", node_name=\"$node_name\", datname=\"$db_name\"}[$__rate_interval])", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "interval": "20", + "legendFormat": "Tuples deleted", + "range": true, + "refId": "C", + "useBackend": false } ], - "title": "Rows per call (pg_stat_statements)", + "title": "Tuples operations per second", "type": "timeseries" }, { @@ -1685,7 +1541,7 @@ "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" @@ -1704,7 +1560,7 @@ } ] }, - "unit": "s/s" + "unit": "binBps" }, "overrides": [] }, @@ -1712,7 +1568,7 @@ "h": 8, "w": 12, "x": 0, - "y": 61 + "y": 52 }, "id": 14, "options": { @@ -1728,35 +1584,126 @@ }, "tooltip": { "hideZeros": false, - "mode": "single", + "mode": "multi", "sort": "none" } }, "pluginVersion": "12.0.2", "targets": [ { - "editorMode": "code", - "expr": "sum(irate(pgwatch_pg_stat_statements_block_read_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", + "disableTextWrap": false, + "editorMode": "builder", + "expr": "irate(pgwatch_db_stats_blks_read{cluster=\"$cluster_name\", node_name=\"$node_name\", datname=\"$db_name\"}[$__rate_interval])", + "fullMetaSearch": false, + "includeNullMetadata": true, "interval": "20", - "legendFormat": "blk_read_time", + "legendFormat": "blks_read", "range": true, - "refId": "A" + "refId": "A", + "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, - "editorMode": "code", - "expr": "sum(irate(pgwatch_pg_stat_statements_block_write_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", + "disableTextWrap": false, + "editorMode": "builder", + "expr": "irate(pgwatch_db_stats_blks_hit{cluster=\"$cluster_name\", node_name=\"$node_name\", datname=\"$db_name\"}[$__rate_interval])", + "fullMetaSearch": false, "hide": false, + "includeNullMetadata": true, "interval": "20", - "legendFormat": "blk_write_time", + "legendFormat": "blks_hit", "range": true, - "refId": "B" + "refId": "B", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "editorMode": "code", + "expr": "last_over_time(pgwatch_settings_numeric_value{setting_name=\"block_size\"}[1h])", + "hide": false, + "instant": false, + "legendFormat": "block_size", + "range": true, + "refId": "C" + } + ], + "title": "blk_reads and blk_hits per second (bytes)", + "transformations": [ + { + "id": "calculateField", + "options": { + "alias": "blks_read (bytes)", + "binary": { + "left": { + "matcher": { + "id": "byName", + "options": "blks_read" + } + }, + "operator": "*", + "right": { + "matcher": { + "id": "byName", + "options": "block_size" + } + } + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + }, + "replaceFields": false + } + }, + { + "id": "calculateField", + "options": { + "alias": "blks_hit (bytes)", + "binary": { + "left": { + "matcher": { + "id": "byName", + "options": "blks_hit" + } + }, + "operator": "*", + "right": { + "matcher": { + "id": "byName", + "options": "block_size" + } + } + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + }, + "replaceFields": false + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "blks_hit": true, + "blks_read": true, + "block_size": true + }, + "includeByName": {}, + "indexByName": {}, + "renameByName": { + "blks_hit (bytes)": "blks_hit", + "blks_read (bytes)": "blks_read" + } + } } ], - "title": "blk_read_time vs blk_write_time (s/s) (pg_stat_statements)", "type": "timeseries" }, { @@ -1764,6 +1711,7 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, + "description": "To see planning time, enable track_io_timing", "fieldConfig": { "defaults": { "color": { @@ -1797,7 +1745,7 @@ "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" @@ -1816,7 +1764,7 @@ } ] }, - "unit": "ms" + "unit": "s/s" }, "overrides": [] }, @@ -1824,9 +1772,9 @@ "h": 8, "w": 12, "x": 12, - "y": 61 + "y": 52 }, - "id": 15, + "id": 49, "options": { "legend": { "calcs": [ @@ -1840,19 +1788,23 @@ }, "tooltip": { "hideZeros": false, - "mode": "single", + "mode": "multi", "sort": "none" } }, "pluginVersion": "12.0.2", "targets": [ { + "disableTextWrap": false, "editorMode": "code", - "expr": "sum(irate(pgwatch_pg_stat_statements_block_read_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))/sum(irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", + "expr": "irate(pgwatch_db_stats_blk_read_time{cluster=\"$cluster_name\", node_name=\"$node_name\", datname=\"$db_name\"}[$__rate_interval])", + "fullMetaSearch": false, + "includeNullMetadata": true, "interval": "20", "legendFormat": "blk_read_time", "range": true, - "refId": "A" + "refId": "A", + "useBackend": false }, { "datasource": { @@ -1860,7 +1812,7 @@ "uid": "P7A0D6631BB10B34F" }, "editorMode": "code", - "expr": "sum(irate(pgwatch_pg_stat_statements_block_write_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))/sum(irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", + "expr": "irate(pgwatch_db_stats_blk_write_time{cluster=\"$cluster_name\", node_name=\"$node_name\", datname=\"$db_name\"}[$__rate_interval])", "hide": false, "interval": "20", "legendFormat": "blk_write_time", @@ -1868,7 +1820,7 @@ "refId": "B" } ], - "title": "blk_read_time vs blk_write_time per call (pg_stat_statements)", + "title": "blk_read_time and blk_write_time (s/s)", "type": "timeseries" }, { @@ -1936,9 +1888,9 @@ "h": 8, "w": 12, "x": 0, - "y": 69 + "y": 60 }, - "id": 16, + "id": 25, "options": { "legend": { "calcs": [ @@ -1948,7 +1900,7 @@ ], "displayMode": "table", "placement": "bottom", - "showLegend": false + "showLegend": true }, "tooltip": { "hideZeros": false, @@ -1961,17 +1913,17 @@ { "disableTextWrap": false, "editorMode": "code", - "expr": "sum(irate(pgwatch_pg_stat_statements_shared_bytes_hit_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", + "expr": "sum(irate(pgwatch_wal_xlog_location_b{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "20", - "legendFormat": "shared bytes", + "legendFormat": "WAL bytes", "range": true, "refId": "A", "useBackend": false } ], - "title": "shared_blks_hit (bytes) (pg_stat_statements)", + "title": "WAL bytes per second (pg_current_wal_lsn)", "type": "timeseries" }, { @@ -2023,1547 +1975,25 @@ "mode": "absolute", "steps": [ { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 69 - }, - "id": 17, - "options": { - "legend": { - "calcs": [ - "min", - "max", - "mean" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": false - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "12.0.2", - "targets": [ - { - "disableTextWrap": false, - "editorMode": "code", - "expr": "sum(irate(pgwatch_pg_stat_statements_shared_bytes_hit_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))/sum(irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", - "fullMetaSearch": false, - "includeNullMetadata": true, - "interval": "20", - "legendFormat": "shared bytes per call", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "shared_blks_hit (bytes) per call (pg_stat_statements)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "axisSoftMin": 0, - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 100, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 1, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 77 - }, - "id": 18, - "options": { - "legend": { - "calcs": [ - "min", - "max", - "mean" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": false - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "12.0.2", - "targets": [ - { - "disableTextWrap": false, - "editorMode": "code", - "expr": "sum(irate(pgwatch_pg_stat_statements_shared_bytes_read_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", - "fullMetaSearch": false, - "includeNullMetadata": true, - "interval": "20", - "legendFormat": "shared bytes", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "shared_blks_read (bytes) (pg_stat_statements)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "axisSoftMin": 0, - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 100, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 1, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 77 - }, - "id": 19, - "options": { - "legend": { - "calcs": [ - "min", - "max", - "mean" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": false - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "12.0.2", - "targets": [ - { - "disableTextWrap": false, - "editorMode": "code", - "expr": "sum(irate(pgwatch_pg_stat_statements_shared_bytes_read_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))/sum(irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", - "fullMetaSearch": false, - "includeNullMetadata": true, - "interval": "20", - "legendFormat": "shared bytes per call", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "shared_blks_read (bytes) per call (pg_stat_statements)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "axisSoftMin": 0, - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 100, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 1, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 85 - }, - "id": 20, - "options": { - "legend": { - "calcs": [ - "min", - "max", - "mean" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": false - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "12.0.2", - "targets": [ - { - "disableTextWrap": false, - "editorMode": "code", - "expr": "sum(irate(pgwatch_pg_stat_statements_shared_bytes_written_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", - "fullMetaSearch": false, - "includeNullMetadata": true, - "interval": "20", - "legendFormat": "shared bytes", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "shared_blks_written (bytes) (pg_stat_statements)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "axisSoftMin": 0, - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 100, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 1, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 85 - }, - "id": 21, - "options": { - "legend": { - "calcs": [ - "min", - "max", - "mean" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": false - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "12.0.2", - "targets": [ - { - "disableTextWrap": false, - "editorMode": "code", - "expr": "sum(irate(pgwatch_pg_stat_statements_shared_bytes_written_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))/sum(irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", - "fullMetaSearch": false, - "includeNullMetadata": true, - "interval": "20", - "legendFormat": "shared bytes per call", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "shared_blks_written (bytes) per call (pg_stat_statements)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "axisSoftMin": 0, - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 100, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 1, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 93 - }, - "id": 22, - "options": { - "legend": { - "calcs": [ - "min", - "max", - "mean" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": false - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "12.0.2", - "targets": [ - { - "disableTextWrap": false, - "editorMode": "code", - "expr": "sum(irate(pgwatch_pg_stat_statements_shared_bytes_dirtied_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", - "fullMetaSearch": false, - "includeNullMetadata": true, - "interval": "20", - "legendFormat": "shared bytes", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "shared_blks_dirtied (bytes) (pg_stat_statements)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "axisSoftMin": 0, - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 100, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 1, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 93 - }, - "id": 23, - "options": { - "legend": { - "calcs": [ - "min", - "max", - "mean" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": false - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "12.0.2", - "targets": [ - { - "disableTextWrap": false, - "editorMode": "code", - "expr": "sum(irate(pgwatch_pg_stat_statements_shared_bytes_written_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))/sum(irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", - "fullMetaSearch": false, - "includeNullMetadata": true, - "interval": "20", - "legendFormat": "shared bytes per call", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "shared_blks_dirtied (bytes) per call (pg_stat_statements)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "axisSoftMin": 0, - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 100, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 1, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "max": 100, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 101 - }, - "id": 24, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": false - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "12.0.2", - "targets": [ - { - "editorMode": "code", - "expr": "sum(irate(pgwatch_pg_stat_statements_shared_bytes_hit_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))/(sum(irate(pgwatch_pg_stat_statements_shared_bytes_hit_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval])) + sum(irate(pgwatch_pg_stat_statements_shared_bytes_read_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))) * 100", - "interval": "20", - "legendFormat": "shared_blks_read_ratio", - "range": true, - "refId": "A" - } - ], - "title": "shared_blks_read_ratio (pg_stat_statements)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "axisSoftMin": 0, - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 100, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 1, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 109 - }, - "id": 25, - "options": { - "legend": { - "calcs": [ - "min", - "max", - "mean" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": false - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "12.0.2", - "targets": [ - { - "disableTextWrap": false, - "editorMode": "code", - "expr": "sum(irate(pgwatch_wal_xlog_location_b{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", - "fullMetaSearch": false, - "includeNullMetadata": true, - "interval": "20", - "legendFormat": "shared bytes", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "WAL bytes (pg_current_wal_lsn)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "axisSoftMin": 0, - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 100, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 1, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 109 - }, - "id": 26, - "options": { - "legend": { - "calcs": [ - "min", - "max", - "mean" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": false - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "12.0.2", - "targets": [ - { - "disableTextWrap": false, - "editorMode": "code", - "expr": "sum(irate(pgwatch_wal_xlog_location_b{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))/sum(irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", - "fullMetaSearch": false, - "includeNullMetadata": true, - "interval": "20", - "legendFormat": "shared bytes", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "WAL bytes per call (pg_current_wal_lsn)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "axisSoftMin": 0, - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 100, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 1, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 117 - }, - "id": 27, - "options": { - "legend": { - "calcs": [ - "min", - "max", - "mean" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": false - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "12.0.2", - "targets": [ - { - "disableTextWrap": false, - "editorMode": "code", - "expr": "sum(irate(pgwatch_pg_stat_statements_wal_fpi{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", - "fullMetaSearch": false, - "includeNullMetadata": true, - "interval": "20", - "legendFormat": "shared bytes", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "WAL fpi (pg_stat_statements)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "axisSoftMin": 0, - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 100, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 1, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 117 - }, - "id": 28, - "options": { - "legend": { - "calcs": [ - "min", - "max", - "mean" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": false - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "12.0.2", - "targets": [ - { - "disableTextWrap": false, - "editorMode": "code", - "expr": "sum(irate(pgwatch_pg_stat_statements_wal_fpi{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))/sum(irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", - "fullMetaSearch": false, - "includeNullMetadata": true, - "interval": "20", - "legendFormat": "shared bytes", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "WAL fpi per call (pg_current_wal_lsn)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "axisSoftMin": 0, - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 100, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 1, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 125 - }, - "id": 29, - "options": { - "legend": { - "calcs": [ - "min", - "max", - "mean" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "12.0.2", - "targets": [ - { - "disableTextWrap": false, - "editorMode": "code", - "expr": "sum(irate(pgwatch_pg_stat_statements_temp_bytes_read{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", - "fullMetaSearch": false, - "includeNullMetadata": true, - "interval": "20", - "legendFormat": "Temp bytes read", - "range": true, - "refId": "A", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "editorMode": "code", - "expr": "sum(irate(pgwatch_pg_stat_statements_temp_bytes_written{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", - "hide": false, - "interval": "20", - "legendFormat": "Temp bytes written", - "range": true, - "refId": "B" - } - ], - "title": "temp_bytes_read vs temp_bytes_written (pg_stat_statements)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "axisSoftMin": 0, - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 100, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 1, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 125 - }, - "id": 31, - "options": { - "legend": { - "calcs": [ - "min", - "max", - "mean" - ], - "displayMode": "table", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "hideZeros": false, - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "12.0.2", - "targets": [ - { - "disableTextWrap": false, - "editorMode": "code", - "expr": "sum(irate(pgwatch_pg_stat_statements_temp_bytes_read{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))/sum(irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", - "fullMetaSearch": false, - "includeNullMetadata": true, - "interval": "20", - "legendFormat": "Temp bytes read", - "range": true, - "refId": "A", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "editorMode": "code", - "expr": "sum(irate(pgwatch_pg_stat_statements_temp_bytes_written{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))/sum(irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", - "hide": false, - "interval": "20", - "legendFormat": "Temp bytes written", - "range": true, - "refId": "B" - } - ], - "title": "temp_bytes_read vs temp_bytes_written per call (pg_stat_statements)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "axisSoftMin": 0, - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 100, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 1, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ops" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Inserts" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "dark-green", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Deletes" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "dark-red", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "HOT updates" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Non-HOT updates" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } + "color": "green" + }, + { + "color": "red", + "value": 80 } ] - } - ] + }, + "unit": "binBps" + }, + "overrides": [] }, "gridPos": { - "h": 13, - "w": 24, - "x": 0, - "y": 133 + "h": 8, + "w": 12, + "x": 12, + "y": 60 }, - "id": 45, + "id": 29, "options": { "legend": { "calcs": [ @@ -3573,127 +2003,30 @@ ], "displayMode": "table", "placement": "bottom", - "showLegend": true, - "sortBy": "Max", - "sortDesc": true + "showLegend": true }, "tooltip": { "hideZeros": false, - "mode": "multi", + "mode": "single", "sort": "none" } }, "pluginVersion": "12.0.2", "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, "disableTextWrap": false, "editorMode": "code", - "expr": "sum by (node_name, dbname, cluster) (irate(pgwatch_table_stats_n_tup_ins{datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]))", + "expr": "irate(pgwatch_db_stats_temp_bytes{cluster=\"$cluster_name\", node_name=\"$node_name\", datname=\"$db_name\"}[$__rate_interval])", "fullMetaSearch": false, "includeNullMetadata": true, - "interval": "60", - "legendFormat": "Inserts", + "interval": "20", + "legendFormat": "Temp bytes", "range": true, "refId": "A", "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "sum by (node_name, dbname, cluster) (irate(pgwatch_table_stats_n_tup_upd{datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": true, - "interval": "60", - "legendFormat": "Updates", - "range": true, - "refId": "B", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "sum by (node_name, dbname, cluster) (irate(pgwatch_table_stats_n_tup_hot_upd{datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": true, - "interval": "60", - "legendFormat": "HOT updates", - "range": true, - "refId": "C", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "sum by (node_name, dbname, cluster) (irate(pgwatch_table_stats_n_tup_del{datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": true, - "interval": "60", - "legendFormat": "Deletes", - "range": true, - "refId": "D", - "useBackend": false - } - ], - "title": "Tuple operations", - "transformations": [ - { - "id": "calculateField", - "options": { - "alias": "Non-HOT updates", - "binary": { - "left": { - "matcher": { - "id": "byName", - "options": "Updates" - } - }, - "operator": "-", - "right": { - "matcher": { - "id": "byName", - "options": "HOT updates" - } - } - }, - "mode": "binary", - "reduce": { - "reducer": "sum" - } - } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Updates": true - }, - "includeByName": {}, - "indexByName": {}, - "renameByName": { - "Hot Updates": "HOT Updates" - } - } } ], + "title": "temp_bytes written per second", "type": "timeseries" }, { @@ -3764,7 +2097,7 @@ "h": 8, "w": 12, "x": 0, - "y": 146 + "y": 68 }, "id": 32, "options": { @@ -3901,7 +2234,7 @@ "h": 8, "w": 12, "x": 12, - "y": 146 + "y": 68 }, "id": 33, "options": { @@ -3999,7 +2332,7 @@ "h": 8, "w": 12, "x": 0, - "y": 154 + "y": 76 }, "id": 34, "options": { @@ -4052,7 +2385,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 0, + "fillOpacity": 100, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4096,7 +2429,7 @@ "h": 8, "w": 12, "x": 12, - "y": 154 + "y": 76 }, "id": 35, "options": { @@ -4149,7 +2482,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 0, + "fillOpacity": 100, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4164,10 +2497,10 @@ "type": "linear" }, "showPoints": "auto", - "spanNulls": false, + "spanNulls": true, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" @@ -4194,7 +2527,7 @@ "h": 10, "w": 24, "x": 0, - "y": 162 + "y": 84 }, "id": 41, "options": { @@ -4210,7 +2543,7 @@ }, "tooltip": { "hideZeros": false, - "mode": "single", + "mode": "multi", "sort": "none" } }, @@ -4218,12 +2551,12 @@ "targets": [ { "disableTextWrap": false, - "editorMode": "code", - "expr": "rate(pgwatch_bgwriter_buffers_checkpoint{datname=\"$db_name\"}[$__rate_interval]) * on(datname) pgwatch_settings_numeric_value{datname=\"$db_name\", setting_name=\"block_size\"}", + "editorMode": "builder", + "expr": "irate(pgwatch_checkpointer_buffers_written{datname=\"$db_name\"}[$__rate_interval]) * on(datname) pgwatch_settings_numeric_value{datname=\"$db_name\", setting_name=\"block_size\"}", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "20", - "legendFormat": "Cleaned by the Checkpointer", + "legendFormat": "Cleaned by checkpointer", "range": true, "refId": "A", "useBackend": false @@ -4233,12 +2566,12 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, - "editorMode": "code", + "editorMode": "builder", "expr": "irate(pgwatch_bgwriter_buffers_clean[$__rate_interval]) * on(datname) pgwatch_settings_numeric_value{datname=\"$db_name\", setting_name=\"block_size\"}", "hide": false, "instant": false, "interval": "20", - "legendFormat": "Cleaned by the Background Writer", + "legendFormat": "Cleaned by bgwriter", "range": true, "refId": "B" }, @@ -4247,17 +2580,21 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, - "editorMode": "code", - "expr": "irate(pgwatch_bgwriter_buffers_backend[$__rate_interval]) * on(datname) pgwatch_settings_numeric_value{datname=\"$db_name\", setting_name=\"block_size\"}", + "disableTextWrap": false, + "editorMode": "builder", + "expr": "irate(pg_stat_bgwriter_buffers_backend_total[$__rate_interval]) * on(datname) pgwatch_settings_numeric_value{datname=\"$db_name\", setting_name=\"block_size\"}", + "fullMetaSearch": false, "hide": false, + "includeNullMetadata": true, "instant": false, "interval": "20", "legendFormat": "Cleaned by backends", "range": true, - "refId": "C" + "refId": "C", + "useBackend": false } ], - "title": "bgwriter and checkpointer", + "title": "Background writer and checkpointer", "type": "timeseries" }, { @@ -4356,7 +2693,7 @@ "h": 11, "w": 24, "x": 0, - "y": 172 + "y": 94 }, "id": 42, "options": { @@ -4477,7 +2814,7 @@ "h": 14, "w": 24, "x": 0, - "y": 183 + "y": 105 }, "id": 44, "options": { @@ -4588,6 +2925,145 @@ ], "type": "timeseries" }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Safe threshold" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 20, + 10 + ], + "fill": "dash" + } + }, + { + "id": "color", + "value": { + "fixedColor": "dark-red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 24, + "x": 0, + "y": 119 + }, + "id": 46, + "options": { + "legend": { + "calcs": [ + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "editorMode": "code", + "expr": "last_over_time(pgwatch_multixact_size_members_bytes{cluster=\"$cluster_name\", node_name=\"$node_name\"}[1h])", + "legendFormat": "members", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "editorMode": "code", + "expr": "10737418240", + "hide": false, + "instant": false, + "legendFormat": "Safe threshold", + "range": true, + "refId": "B" + } + ], + "title": "Multixact members folder size", + "type": "timeseries" + }, { "fieldConfig": { "defaults": {}, @@ -4597,7 +3073,7 @@ "h": 3, "w": 24, "x": 0, - "y": 197 + "y": 131 }, "id": 40, "options": { @@ -4624,13 +3100,13 @@ "text": "default", "value": "default" }, - "definition": "label_values(pgwatch_settings_configured,cluster)", + "definition": "label_values(pgwatch_db_size_size_b,cluster)", "label": "Cluster name", "name": "cluster_name", "options": [], "query": { "qryType": 1, - "query": "label_values(pgwatch_settings_configured,cluster)", + "query": "label_values(pgwatch_db_size_size_b,cluster)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -4642,13 +3118,13 @@ "text": "postgres_ai", "value": "postgres_ai" }, - "definition": "label_values(pgwatch_settings_configured{cluster=\"$cluster_name\"},node_name)", + "definition": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\"},node_name)", "label": "Node name", "name": "node_name", "options": [], "query": { "qryType": 1, - "query": "label_values(pgwatch_settings_configured{cluster=\"$cluster_name\"},node_name)", + "query": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\"},node_name)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -4660,13 +3136,13 @@ "text": "workloaddb", "value": "workloaddb" }, - "definition": "label_values(pgwatch_pg_database_wraparound_age_datfrozenxid{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", + "definition": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", "label": "DB name", "name": "db_name", "options": [], "query": { "qryType": 1, - "query": "label_values(pgwatch_pg_database_wraparound_age_datfrozenxid{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", + "query": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -4683,5 +3159,5 @@ "timezone": "utc", "title": "01. Single node performance overview (high-level)", "uid": "f90500a0-a12e-4081-a2f0-07ed96f27915", - "version": 24 + "version": 12 } \ No newline at end of file diff --git a/config/grafana/dashboards/Dashboard_2_Aggregated_query_analysis.json b/config/grafana/dashboards/Dashboard_2_Aggregated_query_analysis.json index 812f189..cb1a041 100644 --- a/config/grafana/dashboards/Dashboard_2_Aggregated_query_analysis.json +++ b/config/grafana/dashboards/Dashboard_2_Aggregated_query_analysis.json @@ -364,7 +364,7 @@ "root_selector": "", "source": "url", "type": "csv", - "url": "http://flask-pgss-api:5000/pgss_metrics/csv", + "url": "http://flask-pgss-api:8000/pgss_metrics/csv", "url_options": { "data": "", "method": "GET", @@ -506,7 +506,8 @@ "value": 80 } ] - } + }, + "unit": "calls/s" }, "overrides": [ { @@ -560,14 +561,14 @@ "targets": [ { "editorMode": "code", - "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", + "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}[$__rate_interval]))", "interval": "20", "legendFormat": "{{queryid}}", "range": true, "refId": "A" } ], - "title": "Top $top_n statements by calls (pg_stat_statements)", + "title": "Top $top_n statements by calls per second (pg_stat_statements)", "type": "timeseries" }, { @@ -679,14 +680,14 @@ "targets": [ { "editorMode": "code", - "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_exec_time_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", + "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_exec_time_total{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}[$__rate_interval]))", "interval": "20", "legendFormat": "{{queryid}}", "range": true, "refId": "A" } ], - "title": "Top $top_n statements by execution time (pg_stat_statements)", + "title": "Top $top_n statements by execution time per second (pg_stat_statements)", "type": "timeseries" }, { @@ -800,7 +801,7 @@ "targets": [ { "editorMode": "code", - "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_exec_time_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval])/irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", + "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_exec_time_total{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}[$__rate_interval])/irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}[$__rate_interval]))", "interval": "20", "legendFormat": "{{queryid}}", "range": true, @@ -919,14 +920,14 @@ "targets": [ { "editorMode": "code", - "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_plan_time_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", + "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_plan_time_total{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}[$__rate_interval]))", "interval": "20", "legendFormat": "{{queryid}}", "range": true, "refId": "A" } ], - "title": "Top $top_n statements by planning time (pg_stat_statements)", + "title": "Top $top_n statements by planning time per second (pg_stat_statements)", "type": "timeseries" }, { @@ -1040,7 +1041,7 @@ "targets": [ { "editorMode": "code", - "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_plan_time_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval])/irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", + "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_plan_time_total{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}[$__rate_interval])/irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}[$__rate_interval]))", "interval": "20", "legendFormat": "{{queryid}}", "range": true, @@ -1106,7 +1107,8 @@ "value": 80 } ] - } + }, + "unit": "rows/s" }, "overrides": [ { @@ -1160,14 +1162,14 @@ "targets": [ { "editorMode": "code", - "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_rows{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", + "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_rows{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}[$__rate_interval]))", "interval": "20", "legendFormat": "{{queryid}}", "range": true, "refId": "A" } ], - "title": "Top $top_n statements by rows (pg_stat_statements)", + "title": "Top $top_n statements by rows per second (pg_stat_statements)", "type": "timeseries" }, { @@ -1280,7 +1282,7 @@ "targets": [ { "editorMode": "code", - "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_rows{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval])/irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", + "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_rows{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}[$__rate_interval])/irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}[$__rate_interval]))", "interval": "20", "legendFormat": "{{queryid}}", "range": true, @@ -1399,14 +1401,14 @@ "targets": [ { "editorMode": "code", - "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_shared_bytes_hit_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", + "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_shared_bytes_hit_total{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}[$__rate_interval]))", "interval": "20", "legendFormat": "{{queryid}}", "range": true, "refId": "A" } ], - "title": "Top $top_n statements by shared_blks_hit (in bytes) (pg_stat_statements)", + "title": "Top $top_n statements by shared_blks_hit per second (in bytes) (pg_stat_statements)", "type": "timeseries" }, { @@ -1520,7 +1522,7 @@ "targets": [ { "editorMode": "code", - "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_shared_bytes_hit_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval])/irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", + "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_shared_bytes_hit_total{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}[$__rate_interval])/irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}[$__rate_interval]))", "interval": "20", "legendFormat": "{{queryid}}", "range": true, @@ -1639,14 +1641,14 @@ "targets": [ { "editorMode": "code", - "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_shared_bytes_read_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", + "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_shared_bytes_read_total{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}[$__rate_interval]))", "interval": "20", "legendFormat": "{{queryid}}", "range": true, "refId": "A" } ], - "title": "Top $top_n statements by shared_blks_read (in bytes) (pg_stat_statements)", + "title": "Top $top_n statements by shared_blks_read per second (in bytes) (pg_stat_statements)", "type": "timeseries" }, { @@ -1760,7 +1762,7 @@ "targets": [ { "editorMode": "code", - "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_shared_bytes_read_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval])/irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", + "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_shared_bytes_read_total{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}[$__rate_interval])/irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}[$__rate_interval]))", "interval": "20", "legendFormat": "{{queryid}}", "range": true, @@ -1879,14 +1881,14 @@ "targets": [ { "editorMode": "code", - "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_shared_bytes_written_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", + "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_shared_bytes_written_total{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}[$__rate_interval]))", "interval": "20", "legendFormat": "{{queryid}}", "range": true, "refId": "A" } ], - "title": "Top $top_n statements by shared_blks_written (in bytes) (pg_stat_statements)", + "title": "Top $top_n statements by shared_blks_written per second (in bytes) (pg_stat_statements)", "type": "timeseries" }, { @@ -2000,7 +2002,7 @@ "targets": [ { "editorMode": "code", - "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_shared_bytes_written_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval])/irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", + "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_shared_bytes_written_total{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}[$__rate_interval])/irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}[$__rate_interval]))", "interval": "20", "legendFormat": "{{queryid}}", "range": true, @@ -2119,14 +2121,14 @@ "targets": [ { "editorMode": "code", - "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_shared_bytes_dirtied_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", + "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_shared_bytes_dirtied_total{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}[$__rate_interval]))", "interval": "20", "legendFormat": "{{queryid}}", "range": true, "refId": "A" } ], - "title": "Top $top_n statements by shared_blks_written (in bytes) (pg_stat_statements)", + "title": "Top $top_n statements by shared_blks_dirtied per second (in bytes) (pg_stat_statements)", "type": "timeseries" }, { @@ -2240,7 +2242,7 @@ "targets": [ { "editorMode": "code", - "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_shared_bytes_dirtied_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval])/irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", + "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_shared_bytes_dirtied_total{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}[$__rate_interval])/irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}[$__rate_interval]))", "interval": "20", "legendFormat": "{{queryid}}", "range": true, @@ -2359,14 +2361,14 @@ "targets": [ { "editorMode": "code", - "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_wal_bytes{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", + "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_wal_bytes{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}[$__rate_interval]))", "interval": "20", "legendFormat": "{{queryid}}", "range": true, "refId": "A" } ], - "title": "Top $top_n statements by WAL bytes (pg_stat_statements)", + "title": "Top $top_n statements by WAL bytes per second (pg_stat_statements)", "type": "timeseries" }, { @@ -2478,7 +2480,7 @@ "targets": [ { "editorMode": "code", - "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_wal_bytes{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval])/irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", + "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_wal_bytes{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}[$__rate_interval])/irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}[$__rate_interval]))", "interval": "20", "legendFormat": "{{queryid}}", "range": true, @@ -2597,14 +2599,14 @@ "targets": [ { "editorMode": "code", - "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_wal_fpi{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", + "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_wal_fpi{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}[$__rate_interval]))", "interval": "20", "legendFormat": "{{queryid}}", "range": true, "refId": "A" } ], - "title": "Top $top_n statements by WAL fpi (in bytes) (pg_stat_statements)", + "title": "Top $top_n statements by WAL fpi per second (in bytes) (pg_stat_statements)", "type": "timeseries" }, { @@ -2715,7 +2717,7 @@ "targets": [ { "editorMode": "code", - "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_wal_fpi{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval])/irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", + "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_wal_fpi{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}[$__rate_interval])/irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}[$__rate_interval]))", "interval": "20", "legendFormat": "{{queryid}}", "range": true, @@ -2834,14 +2836,14 @@ "targets": [ { "editorMode": "code", - "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_temp_bytes_read{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", + "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_temp_bytes_read{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}[$__rate_interval]))", "interval": "20", "legendFormat": "{{queryid}}", "range": true, "refId": "A" } ], - "title": "Top $top_n statements by temp bytes read (pg_stat_statements)", + "title": "Top $top_n statements by temp bytes read per second (pg_stat_statements)", "type": "timeseries" }, { @@ -2953,7 +2955,7 @@ "targets": [ { "editorMode": "code", - "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_temp_bytes_read{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval])/irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", + "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_temp_bytes_read{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}[$__rate_interval])/irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}[$__rate_interval]))", "interval": "20", "legendFormat": "{{queryid}}", "range": true, @@ -3072,14 +3074,14 @@ "targets": [ { "editorMode": "code", - "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_temp_bytes_written{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", + "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_temp_bytes_written{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}[$__rate_interval]))", "interval": "20", "legendFormat": "{{queryid}}", "range": true, "refId": "A" } ], - "title": "Top $top_n statements by temp bytes written (pg_stat_statements)", + "title": "Top $top_n statements by temp bytes written per second (pg_stat_statements)", "type": "timeseries" }, { @@ -3191,7 +3193,7 @@ "targets": [ { "editorMode": "code", - "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_temp_bytes_written{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval])/irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", + "expr": "topk($top_n, irate(pgwatch_pg_stat_statements_temp_bytes_written{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}[$__rate_interval])/irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}[$__rate_interval]))", "interval": "20", "legendFormat": "{{queryid}}", "range": true, @@ -3263,13 +3265,13 @@ "text": "local", "value": "local" }, - "definition": "label_values(pgwatch_settings_configured,cluster)", + "definition": "label_values(pgwatch_db_size_size_b,cluster)", "label": "Cluster name", "name": "cluster_name", "options": [], "query": { "qryType": 1, - "query": "label_values(pgwatch_settings_configured,cluster)", + "query": "label_values(pgwatch_db_size_size_b,cluster)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -3281,13 +3283,13 @@ "text": "node-01", "value": "node-01" }, - "definition": "label_values(pgwatch_settings_configured{cluster=\"$cluster_name\"},node_name)", + "definition": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\"},node_name)", "label": "Node name", "name": "node_name", "options": [], "query": { "qryType": 1, - "query": "label_values(pgwatch_settings_configured{cluster=\"$cluster_name\"},node_name)", + "query": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\"},node_name)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -3295,17 +3297,20 @@ "type": "query" }, { + "allowCustomValue": false, "current": { - "text": "target_database", - "value": "target_database" + "text": "All", + "value": ["$__all"] }, - "definition": "label_values(pgwatch_pg_database_wraparound_age_datfrozenxid{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", + "definition": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", + "includeAll": true, "label": "DB name", + "multi": true, "name": "db_name", "options": [], "query": { "qryType": 1, - "query": "label_values(pgwatch_pg_database_wraparound_age_datfrozenxid{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", + "query": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/config/grafana/dashboards/Dashboard_3_Single_query_analysis.json b/config/grafana/dashboards/Dashboard_3_Single_query_analysis.json index 4dc2fab..1f6836b 100644 --- a/config/grafana/dashboards/Dashboard_3_Single_query_analysis.json +++ b/config/grafana/dashboards/Dashboard_3_Single_query_analysis.json @@ -120,9 +120,9 @@ "axisPlacement": "auto", "barAlignment": 0, "axisSoftMin": 0, - "barWidthFactor": 0.5, + "barWidthFactor": 1, "drawStyle": "bars", - "fillOpacity": 40, + "fillOpacity": 100, "gradientMode": "none", "hideFrom": { "legend": false, @@ -168,7 +168,7 @@ { "matcher": { "id": "byRegexp", - "options": "Timeout" + "options": "Timeout.*" }, "properties": [ { @@ -198,7 +198,7 @@ { "matcher": { "id": "byRegexp", - "options": "Lock" + "options": "Lock.*" }, "properties": [ { @@ -213,7 +213,7 @@ { "matcher": { "id": "byRegexp", - "options": "LWLock" + "options": "LWLock.*" }, "properties": [ { @@ -228,7 +228,7 @@ { "matcher": { "id": "byRegexp", - "options": "IO" + "options": "IO.*" }, "properties": [ { @@ -243,7 +243,7 @@ { "matcher": { "id": "byRegexp", - "options": "Client" + "options": "Client.*" }, "properties": [ { @@ -254,37 +254,6 @@ } } ] - }, - { - "__systemRef": "hideSeriesFrom", - "matcher": { - "id": "byNames", - "options": { - "mode": "exclude", - "names": [ - "CPU*", - "Lock", - "LWLock", - "IO", - "Timeout", - "BufferPin", - "Extension", - "IPC" - ], - "prefix": "All except:", - "readOnly": true - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": false, - "tooltip": false, - "viz": true - } - } - ] } ] }, @@ -316,9 +285,9 @@ "targets": [ { "editorMode": "code", - "expr": "sum by (wait_event_type) (pgwatch_wait_events_total{query_id=\"$query_id\"})", + "expr": "sum by (wait_event_type, wait_event) (pgwatch_wait_events_total{query_id=\"$query_id\"})", "hide": false, - "legendFormat": "__auto", + "legendFormat": "{{wait_event_type}}:{{wait_event}}", "range": true, "refId": "A" }, @@ -2181,13 +2150,13 @@ "text": "local", "value": "local" }, - "definition": "label_values(pgwatch_settings_configured,cluster)", + "definition": "label_values(pgwatch_db_size_size_b,cluster)", "label": "Cluster name", "name": "cluster_name", "options": [], "query": { "qryType": 1, - "query": "label_values(pgwatch_settings_configured,cluster)", + "query": "label_values(pgwatch_db_size_size_b,cluster)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -2199,13 +2168,13 @@ "text": "node-01", "value": "node-01" }, - "definition": "label_values(pgwatch_settings_configured{cluster=\"$cluster_name\"},node_name)", + "definition": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\"},node_name)", "label": "Node name", "name": "node_name", "options": [], "query": { "qryType": 1, - "query": "label_values(pgwatch_settings_configured{cluster=\"$cluster_name\"},node_name)", + "query": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\"},node_name)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -2217,13 +2186,13 @@ "text": "postgres", "value": "postgres" }, - "definition": "label_values(pgwatch_pg_database_wraparound_age_datfrozenxid{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", + "definition": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", "label": "DB name", "name": "db_name", "options": [], "query": { "qryType": 1, - "query": "label_values(pgwatch_pg_database_wraparound_age_datfrozenxid{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", + "query": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/config/grafana/dashboards/Dashboard_4_Wait_Sampling_Dashboard.json b/config/grafana/dashboards/Dashboard_4_Wait_Sampling_Dashboard.json index ededee7..269549c 100644 --- a/config/grafana/dashboards/Dashboard_4_Wait_Sampling_Dashboard.json +++ b/config/grafana/dashboards/Dashboard_4_Wait_Sampling_Dashboard.json @@ -18,7 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 1, + "id": 7, "links": [], "panels": [ { @@ -37,11 +37,11 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "barAlignment": 0, "axisSoftMin": 0, - "barWidthFactor": 0.5, + "barAlignment": 0, + "barWidthFactor": 1, "drawStyle": "bars", - "fillOpacity": 40, + "fillOpacity": 100, "gradientMode": "none", "hideFrom": { "legend": false, @@ -172,6 +172,81 @@ } } ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "Activity" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "BufferPin" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "IPC" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "InjectionPoint" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "pink", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "Extension" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-purple", + "mode": "fixed" + } + } + ] } ] }, @@ -203,7 +278,7 @@ "targets": [ { "editorMode": "code", - "expr": "sum by (wait_event_type) (pgwatch_wait_events_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"})", + "expr": "sort_by_label(sum by (wait_event_type) (pgwatch_wait_events_total{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\"}), 'wait_event_type')", "hide": false, "legendFormat": "__auto", "range": true, @@ -268,11 +343,11 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "barAlignment": 0, "axisSoftMin": 0, - "barWidthFactor": 0.5, + "barAlignment": 0, + "barWidthFactor": 1, "drawStyle": "bars", - "fillOpacity": 40, + "fillOpacity": 100, "gradientMode": "none", "hideFrom": { "legend": false, @@ -403,6 +478,81 @@ } } ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "Activity.*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "BufferPin.*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "IPC.*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "InjectionPoint.*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "pink", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "Extension.*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-purple", + "mode": "fixed" + } + } + ] } ] }, @@ -434,7 +584,7 @@ "targets": [ { "editorMode": "code", - "expr": "sum by (wait_event_type, wait_event) (pgwatch_wait_events_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\", wait_event_type=~\"$wait_event_type\"})", + "expr": "sort_by_label(sum by (wait_event_type, wait_event) (pgwatch_wait_events_total{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\", wait_event_type=~\"$wait_event_type\"}), 'wait_event_type')", "hide": false, "legendFormat": "{{wait_event_type}} - {{wait_event}}", "range": true, @@ -473,9 +623,9 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "barAlignment": 0, "axisSoftMin": 0, - "barWidthFactor": 0.5, + "barAlignment": 0, + "barWidthFactor": 1, "drawStyle": "bars", "fillOpacity": 100, "gradientMode": "none", @@ -609,6 +759,81 @@ } ] }, + { + "matcher": { + "id": "byRegexp", + "options": "Activity.*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "BufferPin.*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "IPC.*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "InjectionPoint.*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "pink", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "Extension.*" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-purple", + "mode": "fixed" + } + } + ] + }, { "matcher": { "id": "byFrameRefID", @@ -657,24 +882,11 @@ "targets": [ { "editorMode": "code", - "expr": "sum by (wait_event_type, wait_event, query_id) (pgwatch_wait_events_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\", wait_event_type=~\"$wait_event_type\", wait_event=~\"$wait_event\"})", + "expr": "sort_by_label(sum by (wait_event_type, wait_event, query_id) (pgwatch_wait_events_total{cluster='$cluster_name', node_name='$node_name', datname=~\"$db_name\", wait_event_type=~\"$wait_event_type\", wait_event=~\"$wait_event\"}), 'wait_event_type', 'query_id')", "hide": false, "legendFormat": "{{wait_event_type}} - {{wait_event}} - {{query_id}}", "range": true, "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "editorMode": "code", - "expr": "sum by (event_type) (irate(pgwatch_wait_events_sampling_total[$__rate_interval]))>0", - "hide": true, - "interval": "20", - "legendFormat": " {{event_type}}", - "range": true, - "refId": "D" } ], "title": "Active session history by event type and event", @@ -715,15 +927,13 @@ { "allowCustomValue": false, "current": { - "text": [ - "CPU*" - ], + "text": "All", "value": [ - "CPU*" + "$__all" ] }, "definition": "label_values(pgwatch_wait_events_total,wait_event_type)", - "includeAll": false, + "includeAll": true, "label": "Wait event type", "multi": true, "name": "wait_event_type", @@ -740,14 +950,13 @@ { "allowCustomValue": false, "current": { - "text": [ - "CPU*" - ], + "text": "All", "value": [ - "CPU*" + "$__all" ] }, "definition": "label_values(pgwatch_wait_events_total{wait_event_type=~\"$wait_event_type\"},wait_event)", + "includeAll": true, "label": "Wait event", "multi": true, "name": "wait_event", @@ -763,16 +972,16 @@ }, { "current": { - "text": "local", - "value": "local" + "text": "default", + "value": "default" }, - "definition": "label_values(pgwatch_settings_configured,cluster)", + "definition": "label_values(pgwatch_db_size_size_b,cluster)", "label": "Cluster name", "name": "cluster_name", "options": [], "query": { "qryType": 1, - "query": "label_values(pgwatch_settings_configured,cluster)", + "query": "label_values(pgwatch_db_size_size_b,cluster)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -781,16 +990,16 @@ }, { "current": { - "text": "node-01", - "value": "node-01" + "text": "postgres_ai", + "value": "postgres_ai" }, - "definition": "label_values(pgwatch_settings_configured{cluster=\"$cluster_name\"},node_name)", + "definition": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\"},node_name)", "label": "Node name", "name": "node_name", "options": [], "query": { "qryType": 1, - "query": "label_values(pgwatch_settings_configured{cluster=\"$cluster_name\"},node_name)", + "query": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\"},node_name)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -798,17 +1007,22 @@ "type": "query" }, { + "allowCustomValue": false, "current": { - "text": "postgres", - "value": "postgres" + "text": "All", + "value": [ + "$__all" + ] }, - "definition": "label_values(pgwatch_pg_database_wraparound_age_datfrozenxid{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", + "definition": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", + "includeAll": true, "label": "DB name", + "multi": true, "name": "db_name", "options": [], "query": { "qryType": 1, - "query": "label_values(pgwatch_pg_database_wraparound_age_datfrozenxid{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", + "query": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -818,12 +1032,12 @@ ] }, "time": { - "from": "now-1h", + "from": "now-6h", "to": "now" }, "timepicker": {}, "timezone": "browser", - "title": "04. Wait event analysis (Active Session History)", + "title": "04. Wait event analysis (Active Session History)", "uid": "a222b233-acef-4bac-a451-1591023e4d4f", - "version": 2 -} \ No newline at end of file + "version": 11 +} diff --git a/config/grafana/dashboards/Dashboard_7_Autovacuum_and_bloat.json b/config/grafana/dashboards/Dashboard_7_Autovacuum_and_bloat.json index 70e1226..e5005a0 100644 --- a/config/grafana/dashboards/Dashboard_7_Autovacuum_and_bloat.json +++ b/config/grafana/dashboards/Dashboard_7_Autovacuum_and_bloat.json @@ -165,13 +165,13 @@ "text": "default", "value": "default" }, - "definition": "label_values(pgwatch_settings_configured,cluster)", + "definition": "label_values(pgwatch_db_size_size_b,cluster)", "label": "Cluster name", "name": "cluster_name", "options": [], "query": { "qryType": 1, - "query": "label_values(pgwatch_settings_configured,cluster)", + "query": "label_values(pgwatch_db_size_size_b,cluster)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -183,13 +183,13 @@ "text": "prod-db", "value": "prod-db" }, - "definition": "label_values(pgwatch_settings_configured{cluster=\"$cluster_name\"},node_name)", + "definition": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\"},node_name)", "label": "Node name", "name": "node_name", "options": [], "query": { "qryType": 1, - "query": "label_values(pgwatch_settings_configured{cluster=\"$cluster_name\"},node_name)", + "query": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\"},node_name)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -201,13 +201,13 @@ "text": "postgres", "value": "postgres" }, - "definition": "label_values(pgwatch_pg_database_wraparound_age_datfrozenxid{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", + "definition": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", "label": "DB name", "name": "db_name", "options": [], "query": { "qryType": 1, - "query": "label_values(pgwatch_pg_database_wraparound_age_datfrozenxid{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", + "query": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/config/grafana/dashboards/Dashboard_8_Table_Stats.json b/config/grafana/dashboards/Dashboard_8_Table_Stats.json index 118d312..5663097 100644 --- a/config/grafana/dashboards/Dashboard_8_Table_Stats.json +++ b/config/grafana/dashboards/Dashboard_8_Table_Stats.json @@ -22,13 +22,439 @@ "links": [], "panels": [ { - "collapsed": false, + "collapsed": true, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, + "id": 44, + "panels": [ + { + "datasource": { + "type": "yesoreyeram-infinity-datasource", + "uid": "aerffb0z8rjlsc" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total Size" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + }, + { + "id": "custom.width", + "value": 150 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Table Size" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + }, + { + "id": "custom.width", + "value": 140 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Index Size" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + }, + { + "id": "custom.width", + "value": 140 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "TOAST Size" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + }, + { + "id": "custom.width", + "value": 140 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Seq Scans" + }, + "properties": [ + { + "id": "unit", + "value": "short" + }, + { + "id": "custom.width", + "value": 120 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Idx Scans" + }, + "properties": [ + { + "id": "unit", + "value": "short" + }, + { + "id": "custom.width", + "value": 120 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Inserts" + }, + "properties": [ + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Updates" + }, + "properties": [ + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Deletes" + }, + "properties": [ + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "HOT Updates" + }, + "properties": [ + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Heap Blks Read" + }, + "properties": [ + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Heap Blks Hit" + }, + "properties": [ + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Idx Blks Read" + }, + "properties": [ + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Idx Blks Hit" + }, + "properties": [ + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Schema" + }, + "properties": [ + { + "id": "custom.width", + "value": 150 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Table Name" + }, + "properties": [ + { + "id": "custom.width", + "value": 300 + }, + { + "id": "links", + "value": [ + { + "targetBlank": true, + "title": "Single Table Analysis", + "url": "/d/9-single-table-analysis/9-single-table-analysis?orgId=1&var-cluster_name=${cluster_name}&var-node_name=${node_name}&var-db_name=${db_name}&var-schema_name=${__data.fields[\"Schema\"]}&var-table_name=${__data.fields[\"Table Name\"]}&from=${__from:date}&to=${__to:date}" + } + ] + } + ] + } + ] + }, + "gridPos": { + "h": 19, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 45, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Total Size" + } + ] + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "columns": [ + { + "selector": "schema", + "text": "Schema", + "type": "string" + }, + { + "selector": "table_name", + "text": "Table Name", + "type": "string" + }, + { + "selector": "total_size", + "text": "Total Size", + "type": "number" + }, + { + "selector": "table_size", + "text": "Table Size", + "type": "number" + }, + { + "selector": "index_size", + "text": "Index Size", + "type": "number" + }, + { + "selector": "toast_size", + "text": "TOAST Size", + "type": "number" + }, + { + "selector": "seq_scan", + "text": "Seq Scans", + "type": "number" + }, + { + "selector": "idx_scan", + "text": "Idx Scans", + "type": "number" + }, + { + "selector": "n_tup_ins", + "text": "Inserts", + "type": "number" + }, + { + "selector": "n_tup_upd", + "text": "Updates", + "type": "number" + }, + { + "selector": "n_tup_del", + "text": "Deletes", + "type": "number" + }, + { + "selector": "n_tup_hot_upd", + "text": "HOT Updates", + "type": "number" + }, + { + "selector": "heap_blks_read", + "text": "Heap Blks Read", + "type": "number" + }, + { + "selector": "heap_blks_hit", + "text": "Heap Blks Hit", + "type": "number" + }, + { + "selector": "idx_blks_read", + "text": "Idx Blks Read", + "type": "number" + }, + { + "selector": "idx_blks_hit", + "text": "Idx Blks Hit", + "type": "number" + } + ], + "computed_columns": [], + "datasource": { + "type": "yesoreyeram-infinity-datasource", + "uid": "aerffb0z8rjlsc" + }, + "filters": [], + "format": "table", + "global_query_id": "", + "parser": "backend", + "refId": "A", + "root_selector": "", + "source": "url", + "type": "csv", + "url": "http://flask-pgss-api:8000/table_info/csv", + "url_options": { + "data": "", + "method": "GET", + "params": [ + { + "key": "db_name", + "value": "${db_name}" + }, + { + "key": "cluster_name", + "value": "${cluster_name}" + }, + { + "key": "node_name", + "value": "${node_name}" + }, + { + "key": "schemaname", + "value": "${schema_name}" + } + ] + } + } + ], + "title": "", + "transformations": [], + "type": "table" + } + ], + "title": "Detailed table view (aggregated table statistics)", + "type": "row" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, "id": 27, "panels": [], "title": "Size stats", @@ -2824,13 +3250,13 @@ "text": "default", "value": "default" }, - "definition": "label_values(pgwatch_settings_configured,cluster)", + "definition": "label_values(pgwatch_db_size_size_b,cluster)", "label": "Cluster name", "name": "cluster_name", "options": [], "query": { "qryType": 1, - "query": "label_values(pgwatch_settings_configured,cluster)", + "query": "label_values(pgwatch_db_size_size_b,cluster)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -2842,13 +3268,13 @@ "text": "postgres_ai", "value": "postgres_ai" }, - "definition": "label_values(pgwatch_settings_configured{cluster=\"$cluster_name\"},node_name)", + "definition": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\"},node_name)", "label": "Node name", "name": "node_name", "options": [], "query": { "qryType": 1, - "query": "label_values(pgwatch_settings_configured{cluster=\"$cluster_name\"},node_name)", + "query": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\"},node_name)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -2860,13 +3286,13 @@ "text": "workloaddb", "value": "workloaddb" }, - "definition": "label_values(pgwatch_pg_database_wraparound_age_datfrozenxid{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", + "definition": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", "label": "DB name", "name": "db_name", "options": [], "query": { "qryType": 1, - "query": "label_values(pgwatch_pg_database_wraparound_age_datfrozenxid{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", + "query": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/config/grafana/dashboards/Dashboard_9_Single_table_analysis.json b/config/grafana/dashboards/Dashboard_9_Single_table_analysis.json index a7364ec..1e1b8e3 100644 --- a/config/grafana/dashboards/Dashboard_9_Single_table_analysis.json +++ b/config/grafana/dashboards/Dashboard_9_Single_table_analysis.json @@ -1951,13 +1951,13 @@ "text": "default", "value": "default" }, - "definition": "label_values(pgwatch_settings_configured,cluster)", + "definition": "label_values(pgwatch_db_size_size_b,cluster)", "label": "Cluster name", "name": "cluster_name", "options": [], "query": { "qryType": 1, - "query": "label_values(pgwatch_settings_configured,cluster)", + "query": "label_values(pgwatch_db_size_size_b,cluster)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -1969,13 +1969,13 @@ "text": "postgres_ai", "value": "postgres_ai" }, - "definition": "label_values(pgwatch_settings_configured{cluster=\"$cluster_name\"},node_name)", + "definition": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\"},node_name)", "label": "Node name", "name": "node_name", "options": [], "query": { "qryType": 1, - "query": "label_values(pgwatch_settings_configured{cluster=\"$cluster_name\"},node_name)", + "query": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\"},node_name)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -1987,13 +1987,13 @@ "text": "workloaddb", "value": "workloaddb" }, - "definition": "label_values(pgwatch_pg_database_wraparound_age_datfrozenxid{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", + "definition": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", "label": "DB name", "name": "db_name", "options": [], "query": { "qryType": 1, - "query": "label_values(pgwatch_pg_database_wraparound_age_datfrozenxid{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", + "query": "label_values(pgwatch_db_size_size_b{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, diff --git a/config/grafana/provisioning/grafana.ini b/config/grafana/provisioning/grafana.ini index 1438d05..ecbb7cc 100644 --- a/config/grafana/provisioning/grafana.ini +++ b/config/grafana/provisioning/grafana.ini @@ -1,2 +1,12 @@ [users] home_page = /d/f90500a0-a12e-4081-a2f0-07ed96f27915/1-postgres-node-performance-overview-high-level/ + +[auth] +# When OAuth is enabled, optionally disable the basic login form +disable_login_form = false + +[auth.generic_oauth] +# OAuth is disabled by default; enable via GF_AUTH_GENERIC_OAUTH_ENABLED env var +enabled = false +name = PostgresAI +allow_sign_up = true diff --git a/config/pgwatch-postgres/metrics.yml b/config/pgwatch-postgres/metrics.yml index 7ff6f83..cecda73 100644 --- a/config/pgwatch-postgres/metrics.yml +++ b/config/pgwatch-postgres/metrics.yml @@ -8,14 +8,194 @@ metrics: queryid, query from pg_stat_statements - where queryid is not null + where + queryid is not null + and dbid = (select oid from pg_database where datname = current_database()) order by total_exec_time desc - limit 1000; gauges: - '*' + index_definitions: + description: "Index definitions for unused and redundant indexes only" + sqls: + 11: |- + with fk_indexes as ( + select + n.nspname as schema_name, + ci.relname as index_name, + cr.relname as table_name, + (confrelid::regclass)::text as fk_table_ref, + array_to_string(indclass, ', ') as opclasses + from pg_index i + join pg_class ci on ci.oid = i.indexrelid and ci.relkind = 'i' + join pg_class cr on cr.oid = i.indrelid and cr.relkind = 'r' + join pg_namespace n on n.oid = ci.relnamespace + join pg_constraint cn on cn.conrelid = cr.oid + left join pg_stat_all_indexes as si on si.indexrelid = i.indexrelid + where + contype = 'f' + and not i.indisunique + and conkey is not null + and ci.relpages > 5 + and si.idx_scan < 10 + ), + -- Unused indexes + table_scans as ( + select relid, + tables.idx_scan + tables.seq_scan as all_scans, + (tables.n_tup_ins + tables.n_tup_upd + tables.n_tup_del) as writes, + pg_relation_size(relid) as table_size + from pg_stat_all_tables as tables + join pg_class c on c.oid = relid + where c.relpages > 5 + ), + indexes as ( + select + i.indrelid, + i.indexrelid, + n.nspname as schema_name, + cr.relname as table_name, + ci.relname as index_name, + si.idx_scan, + pg_relation_size(i.indexrelid) as index_bytes, + ci.relpages, + (a.amname = 'btree') as idx_is_btree, + array_to_string(i.indclass, ', ') as opclasses + from pg_index i + join pg_class ci on ci.oid = i.indexrelid and ci.relkind = 'i' + join pg_class cr on cr.oid = i.indrelid and cr.relkind = 'r' + join pg_namespace n on n.oid = ci.relnamespace + join pg_am a on ci.relam = a.oid + left join pg_stat_all_indexes as si on si.indexrelid = i.indexrelid + where + not i.indisunique + and i.indisvalid + and ci.relpages > 5 + ), + unused_index_ids as ( + select + i.indexrelid as index_id, + i.schema_name, + i.table_name, + i.index_name + from indexes i + join table_scans ts on ts.relid = i.indrelid + where + i.idx_scan = 0 + and i.idx_is_btree + order by i.index_bytes desc + limit 10000 + ), + -- Redundant indexes + index_data as ( + select + *, + indkey::text as columns, + array_to_string(indclass, ', ') as opclasses + from pg_index i + join pg_class ci on ci.oid = i.indexrelid and ci.relkind = 'i' + where + indisvalid + and ci.relpages > 5 + ), + redundant_index_pairs as ( + select + i2.indexrelid as redundant_index_id, + i1.indexrelid as reason_index_id, + tnsp.nspname as schema_name, + trel.relname as table_name, + irel.relname as index_name, + pg_relation_size(i2.indexrelid) as index_size_bytes + from ( + select indrelid, indexrelid, opclasses, indclass, indexprs, indpred, indisprimary, indisunique, columns + from index_data + order by indexrelid + ) as i1 + join index_data as i2 on + i1.indrelid = i2.indrelid + and i1.indexrelid <> i2.indexrelid + inner join pg_opclass op1 on i1.indclass[0] = op1.oid + inner join pg_opclass op2 on i2.indclass[0] = op2.oid + inner join pg_am am1 on op1.opcmethod = am1.oid + inner join pg_am am2 on op2.opcmethod = am2.oid + join pg_stat_all_indexes as s on s.indexrelid = i2.indexrelid + join pg_class as trel on trel.oid = i2.indrelid + join pg_namespace as tnsp on trel.relnamespace = tnsp.oid + join pg_class as irel on irel.oid = i2.indexrelid + where + not i2.indisprimary + and not i2.indisunique + and am1.amname = am2.amname + and i1.columns like (i2.columns || '%') + and i1.opclasses like (i2.opclasses || '%') + and pg_get_expr(i1.indexprs, i1.indrelid) is not distinct from pg_get_expr(i2.indexprs, i2.indrelid) + and pg_get_expr(i1.indpred, i1.indrelid) is not distinct from pg_get_expr(i2.indpred, i2.indrelid) + ), + redundant_indexes_tmp_num as ( + select row_number() over () num, rip.* + from redundant_index_pairs rip + ), + redundant_indexes_tmp_links as ( + select + ri1.*, + ri2.num as r_num + from redundant_indexes_tmp_num ri1 + left join redundant_indexes_tmp_num ri2 on + ri2.reason_index_id = ri1.redundant_index_id + and ri1.reason_index_id = ri2.redundant_index_id + ), + redundant_index_ids as ( + select distinct + index_id, + schema_name, + table_name, + index_name, + index_size_bytes + from ( + select + redundant_index_id as index_id, + schema_name, + table_name, + index_name, + index_size_bytes + from redundant_indexes_tmp_links + where num < r_num or r_num is null + union all + select + reason_index_id as index_id, + schema_name, + table_name, + index_name, + index_size_bytes + from redundant_indexes_tmp_links + where + num < r_num + or r_num is null + ) as combined + order by index_size_bytes desc + limit 10000 + ), + -- Combine unused and redundant index IDs + all_target_indexes as ( + select distinct index_id, schema_name, table_name, index_name + from unused_index_ids + union + select distinct index_id, schema_name, table_name, index_name + from redundant_index_ids + ) + select /* pgwatch_generated */ + ati.index_name as indexrelname, + ati.schema_name as schemaname, + ati.table_name as relname, + pg_get_indexdef(ati.index_id) as index_definition + from all_target_indexes ati + order by schemaname, relname, indexrelname; + gauges: + - '*' + presets: full: description: "Full metrics for PostgreSQL storage" metrics: - pgss_queryid_queries: 300 \ No newline at end of file + pgss_queryid_queries: 30 + index_definitions: 3600 \ No newline at end of file diff --git a/config/pgwatch-prometheus/metrics.yml b/config/pgwatch-prometheus/metrics.yml index a87b649..13d5116 100644 --- a/config/pgwatch-prometheus/metrics.yml +++ b/config/pgwatch-prometheus/metrics.yml @@ -127,7 +127,6 @@ metrics: blk_read_time, blk_write_time, extract(epoch from (now() - pg_postmaster_start_time()))::int8 as postmaster_uptime_s, - extract(epoch from (now() - pg_backup_start_time()))::int8 as backup_duration_s, case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int, system_identifier::text as tag_sys_id, (select count(*) from pg_index i @@ -282,7 +281,7 @@ metrics: It returns the lock mode and the count of locks for each mode. This metric helps administrators monitor lock contention and performance. sqls: 13: |- - WITH q_locks as ( + with q_locks as ( /* pgwatch_generated */ select * from @@ -389,7 +388,7 @@ metrics: sqls: 11: |- with recursive - q_root_part as ( + q_root_part as ( /* pgwatch_generated */ select c.oid, c.relkind, n.nspname root_schema, @@ -524,8 +523,8 @@ metrics: ) x order by table_size_b desc nulls last limit 300 16: |- - with recursive /* pgwatch_generated */ - q_root_part as ( + with recursive + q_root_part as ( /* pgwatch_generated */ select c.oid, c.relkind, n.nspname root_schema, @@ -712,24 +711,24 @@ metrics: providing insights into potential bottlenecks and resource contention issues. sqls: 11: |- - select datname as tag_datname, coalesce (wait_event, 'CPU*') as tag_wait_event, coalesce(wait_event_type, 'CPU*') as tag_wait_event_type, count(*) as total + select datname as tag_datname, coalesce (wait_event, 'CPU*') as tag_wait_event, coalesce(wait_event_type, 'CPU*') as tag_wait_event_type, count(*) as total /* pgwatch_generated */ from pg_stat_activity where state = 'active' group by tag_datname, tag_wait_event_type, tag_wait_event union select 'server_process' as tag_datname, coalesce (wait_event, 'CPU*') as tag_wait_event, coalesce(wait_event_type, 'CPU*') as tag_wait_event_type, count(*) as total from pg_stat_activity - where state = 'active' and datname IS null + where state = 'active' and datname is null group by tag_datname, tag_wait_event_type, tag_wait_event 14: |- - select datname as tag_datname, query_id::text as tag_query_id, coalesce (wait_event, 'CPU*') as tag_wait_event, coalesce(wait_event_type, 'CPU*') as tag_wait_event_type, count(*) as total + select datname as tag_datname, query_id::text as tag_query_id, coalesce (wait_event, 'CPU*') as tag_wait_event, coalesce(wait_event_type, 'CPU*') as tag_wait_event_type, count(*) as total /* pgwatch_generated */ from pg_stat_activity where state = 'active' group by tag_datname, tag_query_id, tag_wait_event_type, tag_wait_event union select 'server_process' as tag_datname, null as tag_query_id, coalesce (wait_event, 'CPU*') as tag_wait_event, coalesce(wait_event_type, 'CPU*') as tag_wait_event_type, count(*) as total from pg_stat_activity - where state = 'active' and datname IS null + where state = 'active' and datname is null group by tag_datname, tag_query_id, tag_wait_event_type, tag_wait_event gauges: - total @@ -742,11 +741,11 @@ metrics: pg_database.datname as tag_datname, tmp2.tag_application_name, tmp.tag_state, - COALESCE(count,0) as count, - COALESCE(max_tx_duration,0) as max_tx_duration + coalesce(count,0) as count, + coalesce(max_tx_duration,0) as max_tx_duration from ( - VALUES ('active'), + values ('active'), ('idle'), ('idle in transaction'), ('idle in transaction (aborted)'), @@ -760,7 +759,7 @@ metrics: application_name as tag_application_name, state as tag_state, count(*) as count, - MAX(extract(epoch from now() - xact_start))::float as max_tx_duration + max(extract(epoch from now() - xact_start))::float as max_tx_duration from pg_stat_activity group by datname, tag_application_name, tag_state ) as tmp2 @@ -774,8 +773,8 @@ metrics: pg_archiver: sqls: 11: | - WITH - current_wal_file as ( + with + current_wal_file as ( /* pgwatch_generated */ select case when not pg_is_in_recovery() then pg_walfile_name(pg_current_wal_insert_lsn()) else null end pg_walfile_name ), current_wal as ( @@ -802,7 +801,7 @@ metrics: pg_blocked: sqls: 11: |- - select + select /* pgwatch_generated */ (extract(epoch from now()) * 1e9)::int8 as epoch_ns, current_database() as tag_datname, count(*) as queries, @@ -811,16 +810,83 @@ metrics: else relation::regclass::text end as tag_table from pg_catalog.pg_locks blocked - where not blocked.granted + where + not blocked.granted + and database = (select oid from pg_database where datname = current_database()) group by locktype, relation limit 5000 gauges: - queries statement_timeout_seconds: 15 + lock_waits: + description: > + Retrieves detailed information about lock waits, including blocked and blocking processes with their queries, users, and application names. + It returns blocked and blocker process IDs, lock modes and types, affected tables, queries, and wait/transaction durations. + This metric helps administrators identify and diagnose lock contention issues in detail. + sqls: + 14: |- + with sa_snapshot as ( /* pgwatch_generated */ + select * + from pg_stat_activity + where + datname = current_database() + and pid <> pg_backend_pid() + and state in ('active', 'idle in transaction', 'idle in transaction (aborted)') + ), + pid_tables as ( + select distinct on (pid) pid, relation::regclass::text as table_name + from pg_catalog.pg_locks + where relation is not null + and locktype in ('tuple', 'relation') + and relation::regclass::text not like '%_pkey' + and relation::regclass::text not like '%_idx' + order by pid, locktype + ) + select + blocked.pid as blocked_pid, + current_database() as tag_datname, + blocked_stm.usename::text as tag_blocked_user, + blocked_stm.application_name::text as tag_blocked_appname, + blocked.mode as blocked_mode, + blocked.locktype as blocked_locktype, + coalesce(blocked.relation::regclass::text, blocked_tbl.table_name, '') as tag_blocked_table, + blocked_stm.query_id::text as tag_blocked_query_id, + (extract(epoch from (clock_timestamp() - blocked_stm.state_change)) * 1000)::bigint as blocked_ms, + blocker.pid as blocker_pid, + blocker_stm.usename::text as tag_blocker_user, + blocker_stm.application_name::text as tag_blocker_appname, + blocker.mode as blocker_mode, + blocker.locktype as blocker_locktype, + coalesce(blocker.relation::regclass::text, blocker_tbl.table_name, '') as tag_blocker_table, + blocker_stm.query_id::text as tag_blocker_query_id, + (extract(epoch from (clock_timestamp() - blocker_stm.xact_start)) * 1000)::bigint as blocker_tx_ms + from pg_catalog.pg_locks as blocked + join sa_snapshot as blocked_stm on blocked_stm.pid = blocked.pid + join pg_catalog.pg_locks as blocker on + blocked.pid <> blocker.pid + and blocker.granted + and ( + (blocked.database = blocker.database) + or (blocked.database is null and blocker.database is null) + ) + and ( + blocked.relation = blocker.relation + or blocked.transactionid = blocker.transactionid + ) + join sa_snapshot as blocker_stm on blocker_stm.pid = blocker.pid + left join pid_tables as blocked_tbl on blocked_tbl.pid = blocked.pid + left join pid_tables as blocker_tbl on blocker_tbl.pid = blocker.pid + where not blocked.granted + order by blocked_ms desc + limit 10000 + gauges: + - blocked_ms + - blocker_tx_ms + statement_timeout_seconds: 15 pg_database_wraparound: sqls: 11: | - select + select /* pgwatch_generated */ (extract(epoch from now()) * 1e9)::int8 as epoch_ns, datname as tag_datname, age(d.datfrozenxid) as age_datfrozenxid, @@ -836,19 +902,23 @@ metrics: pg_long_running_transactions: sqls: 11: | - select (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + select (extract(epoch from now()) * 1e9)::int8 as epoch_ns, /* pgwatch_generated */ current_database() as tag_datname, - COUNT(*) as transactions, - COALESCE(MAX(extract(epoch from (clock_timestamp() - xact_start)))::int8, 0) as age_in_seconds + count(*) as transactions, + coalesce(max(extract(epoch from (clock_timestamp() - xact_start)))::int8, 0) as age_in_seconds from pg_catalog.pg_stat_activity - where state is distinct from 'idle' and (now() - xact_start) > '1 minutes'::interval and query not like 'autovacuum:%' + where + state is distinct from 'idle' + and datname = current_database() + and (clock_timestamp() - xact_start) > '1 minutes'::interval + and query not like 'autovacuum:%' gauges: - '*' statement_timeout_seconds: 15 pg_stat_replication: sqls: 11: | - select (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + select (extract(epoch from now()) * 1e9)::int8 as epoch_ns, /* pgwatch_generated */ current_database() as tag_datname, application_name as tag_application_name, (pg_current_wal_lsn() - '0/0') % (2^52)::bigint as current_wal_lsn, @@ -869,7 +939,7 @@ metrics: pg_stat_statements: sqls: 11: | - WITH aggregated_statements as ( + with aggregated_statements as ( /* pgwatch_generated */ select pg_database.datname, pg_stat_statements.queryid, @@ -890,8 +960,8 @@ metrics: sum(current_setting('block_size')::int * pg_stat_statements.temp_blks_read) as temp_bytes_read, sum(current_setting('block_size')::int * pg_stat_statements.temp_blks_written) as temp_bytes_written from pg_stat_statements - join pg_database - on pg_database.oid = pg_stat_statements.dbid + join pg_database on pg_database.oid = pg_stat_statements.dbid + where pg_database.datname = current_database() group by pg_database.datname, pg_stat_statements.queryid ) select @@ -915,7 +985,7 @@ metrics: temp_bytes_written::int8 as temp_bytes_written from aggregated_statements 17: | - with aggregated_statements as ( + with aggregated_statements as ( /* pgwatch_generated */ select pg_database.datname, pg_stat_statements.queryid, @@ -936,8 +1006,8 @@ metrics: sum(current_setting('block_size')::int * pg_stat_statements.temp_blks_read) as temp_bytes_read, sum(current_setting('block_size')::int * pg_stat_statements.temp_blks_written) as temp_bytes_written from pg_stat_statements - join pg_database - on pg_database.oid = pg_stat_statements.dbid + join pg_database on pg_database.oid = pg_stat_statements.dbid + where pg_database.datname = current_database() group by pg_database.datname, pg_stat_statements.queryid ) select @@ -985,7 +1055,7 @@ metrics: as tags for easy filtering and querying. sqls: 11: | - select + select /* pgwatch_generated */ current_database() as tag_datname, n.nspname as tag_schemaname, c.relname as tag_relname, @@ -1009,7 +1079,7 @@ metrics: pg_stat_all_indexes: sqls: 11: | - select schemaname as tag_schemaname, + select schemaname as tag_schemaname, /* pgwatch_generated */ relname as tag_relname, indexrelname as tag_indexrelname, idx_scan, @@ -1026,7 +1096,7 @@ metrics: pg_stat_all_tables: sqls: 11: | - select + select /* pgwatch_generated */ current_database() as tag_datname, schemaname as tag_schemaname, relname as tag_relname, @@ -1040,8 +1110,8 @@ metrics: n_tup_hot_upd, n_live_tup, n_dead_tup, - GREATEST(last_autovacuum, last_vacuum, '1970-01-01Z') as last_vacuum, - GREATEST(last_autoanalyze, last_analyze, '1970-01-01Z') as last_analyze, + greatest(last_autovacuum, last_vacuum, '1970-01-01Z') as last_vacuum, + greatest(last_autoanalyze, last_analyze, '1970-01-01Z') as last_analyze, (vacuum_count + autovacuum_count) as vacuum_count, (analyze_count + autoanalyze_count) as analyze_count from @@ -1067,7 +1137,7 @@ metrics: pg_stat_wal_receiver: sqls: 11: | - select current_database() as tag_datname, + select current_database() as tag_datname, /* pgwatch_generated */ case status when 'stopped' then 0 when 'starting' then 1 when 'streaming' then 2 when 'waiting' then 3 when 'restarting' then 4 when 'stopping' then 5 else -1 end as tag_status, (receive_start_lsn- '0/0') % (2^52)::bigint as receive_start_lsn, receive_start_tli, @@ -1093,19 +1163,53 @@ metrics: - latest_end_time - upstream_node statement_timeout_seconds: 15 + pg_stat_slru: + description: > + Retrieves statistics from the PostgreSQL `pg_stat_slru` view for SLRU (Simple Least Recently Used) caches. + It tracks various SLRU caches like subtrans, multixact_offset, multixact_member, notify, serial, and clog. + Each cache reports blocks zeroed, hit, read, written, exists checks, flushes, truncates, and last stats reset time. + This metric helps administrators monitor SLRU cache performance and identify potential issues with transaction ID wraparound or other system caches. + sqls: + 11: "; -- not available before PG 13" + 13: |- + select /* pgwatch_generated */ + current_database() as tag_datname, + name as tag_name, + blks_zeroed, + blks_hit, + blks_read, + blks_written, + blks_exists, + flushes, + truncates, + extract(epoch from stats_reset)::int8 as stats_reset_epoch + from pg_stat_slru + gauges: + - blks_zeroed + - blks_hit + - blks_read + - blks_written + - blks_exists + - flushes + - truncates + - stats_reset_epoch + statement_timeout_seconds: 15 pg_stuck_idle_in_transaction: sqls: 11: | - select current_database() as tag_datname, COUNT(*) as queries + select current_database() as tag_datname, count(*) as queries /* pgwatch_generated */ from pg_catalog.pg_stat_activity - where state = 'idle in transaction' and (now() - query_start) > '10 minutes'::interval + where + state = 'idle in transaction' + and datname = current_database() + and (clock_timestamp() - state_change) > '1 minutes'::interval gauges: - queries statement_timeout_seconds: 15 pg_total_relation_size: sqls: 11: | - select current_database() as tag_datname, + select current_database() as tag_datname, /* pgwatch_generated */ relnamespace::regnamespace as tag_schemaname, relname as tag_relname, pg_total_relation_size(oid) bytes @@ -1119,7 +1223,7 @@ metrics: pg_txid: sqls: 11: | - select + select /* pgwatch_generated */ current_database() as tag_datname, case when pg_is_in_recovery() then 'NaN'::float else txid_current() % (2^52)::bigint end as current, case when pg_is_in_recovery() then 'NaN'::float else txid_snapshot_xmin(txid_current_snapshot()) % (2^52)::bigint end as xmin, @@ -1132,7 +1236,7 @@ metrics: pg_xlog_position: sqls: 11: | - select current_database() as tag_datname, + select current_database() as tag_datname, /* pgwatch_generated */ case when pg_is_in_recovery() then (pg_last_wal_replay_lsn() - '0/0') % (2^52)::bigint @@ -1148,7 +1252,7 @@ metrics: This metric helps administrators identify indexes that may need maintenance like VACUUM FULL or index reorganization. sqls: 11: | - select current_database() as tag_datname, nspname as tag_schemaname, tblname as tag_tblname, idxname as tag_idxname, (bs*(relpages)/(1024*1024))::float as real_size_mib, + select current_database() as tag_datname, nspname as tag_schemaname, tblname as tag_tblname, idxname as tag_idxname, (bs*(relpages)/(1024*1024))::float as real_size_mib, /* pgwatch_generated */ (bs*(relpages-est_pages))::float as extra_size, 100 * (relpages-est_pages)::float / relpages as extra_pct, fillfactor, @@ -1186,7 +1290,7 @@ metrics: select n.nspname, i.tblname, i.idxname, i.reltuples, i.relpages, i.idxoid, i.fillfactor, current_setting('block_size')::numeric as bs, case -- MAXALIGN: 4 on 32bits, 8 on 64bits (and mingw32 ?) - when version() ~ 'mingw32' OR version() ~ '64-bit|x86_64|ppc64|ia64|amd64' then 8 + when version() ~ 'mingw32' or version() ~ '64-bit|x86_64|ppc64|ia64|amd64' then 8 else 4 end as maxalign, /* per page header, fixed size: 20 for 7.X, 24 for others */ @@ -1204,7 +1308,7 @@ metrics: from ( select ct.relname as tblname, ct.relnamespace, ic.idxname, ic.attpos, ic.indkey, ic.indkey[ic.attpos], ic.reltuples, ic.relpages, ic.tbloid, ic.idxoid, ic.fillfactor, coalesce(a1.attnum, a2.attnum) as attnum, coalesce(a1.attname, a2.attname) as attname, coalesce(a1.atttypid, a2.atttypid) as atttypid, - case when a1.attnum IS null + case when a1.attnum is null then ic.idxname else ct.relname end as attrelname @@ -1244,8 +1348,8 @@ metrics: ) as rows_data_stats ) as rows_hdr_pdg_stats ) as relation_stats - order by real_size_mib desc - limit 5000 + order by is_na = 0 desc, bloat_pct desc + limit 1000 gauges: - real_size_mib - extra_size @@ -1263,7 +1367,7 @@ metrics: This metric helps administrators identify tables that may need maintenance like VACUUM FULL or table reorganization. sqls: 11: | - select current_database() as tag_datname, schemaname as tag_schemaname, tblname as tag_tblname, (bs*tblpages)/(1024*1024)::float as real_size_mib, + select current_database() as tag_datname, schemaname as tag_schemaname, tblname as tag_tblname, (bs*tblpages)/(1024*1024)::float as real_size_mib, /* pgwatch_generated */ (tblpages-est_tblpages)*bs as extra_size, case when tblpages > 0 and tblpages - est_tblpages > 0 then 100 * (tblpages - est_tblpages)/tblpages::float @@ -1300,13 +1404,13 @@ metrics: array_to_string(tbl.reloptions, ' ') from 'fillfactor=([0-9]+)')::smallint, 100) as fillfactor, current_setting('block_size')::numeric as bs, - case when version()~'mingw32' OR version()~'64-bit|x86_64|ppc64|ia64|amd64' then 8 else 4 end as ma, + case when version()~'mingw32' or version()~'64-bit|x86_64|ppc64|ia64|amd64' then 8 else 4 end as ma, 24 as page_hdr, - 23 + case when MAX(coalesce(s.null_frac,0)) > 0 then ( 7 + count(s.attname) ) / 8 else 0::int end + 23 + case when max(coalesce(s.null_frac,0)) > 0 then ( 7 + count(s.attname) ) / 8 else 0::int end + case when bool_or(att.attname = 'oid' and att.attnum < 0) then 4 else 0 end as tpl_hdr_size, sum( (1-coalesce(s.null_frac, 0)) * coalesce(s.avg_width, 0) ) as tpl_data_size, (bool_or(att.atttypid = 'pg_catalog.name'::regtype) - OR sum(case when att.attnum > 0 then 1 else 0 end) <> count(s.attname))::int as is_na + or sum(case when att.attnum > 0 then 1 else 0 end) <> count(s.attname))::int as is_na from pg_attribute as att join pg_class as tbl on att.attrelid = tbl.oid join pg_namespace as ns on ns.oid = tbl.relnamespace @@ -1322,8 +1426,8 @@ metrics: ) as s3 -- where not is_na -- and tblpages*((pst).free_percent + (pst).dead_tuple_percent)::float4/100 >= 1 - order by real_size_mib desc - limit 5000 + order by is_na = 0 desc, bloat_pct desc + limit 1000 gauges: - real_size_mib - extra_size @@ -1341,7 +1445,7 @@ metrics: It helps administrators understand detailed storage breakdown for each table component. sqls: 11: |- - with table_sizes as ( + with table_sizes as ( /* pgwatch_generated */ select (extract(epoch from now()) * 1e9)::int8 as epoch_ns, current_database() as tag_datname, @@ -1371,7 +1475,7 @@ metrics: ) order by pg_total_relation_size(c.oid) desc ) - select /* pgwatch_generated */ + select epoch_ns, tag_datname, tag_schema, @@ -1389,6 +1493,7 @@ metrics: (toast_main_size_b + toast_fsm_size_b + toast_vm_size_b + toast_indexes_size_b) as total_toast_size_b from table_sizes where total_relation_size_b > 0 + limit 1000; gauges: - table_main_size_b - table_fsm_size_b @@ -1411,7 +1516,7 @@ metrics: This metric helps administrators identify and fix invalid indexes to improve database performance. sqls: 11: | - with fk_indexes as ( + with fk_indexes as ( /* pgwatch_generated */ select schemaname as tag_schema_name, (indexrelid::regclass)::text as tag_index_name, @@ -1465,7 +1570,8 @@ metrics: (extract(epoch from now()) * 1e9)::int8 as epoch_ns, current_database() as tag_datname, num_data.* - from num_data; + from num_data + limit 1000; gauges: - '*' statement_timeout_seconds: 15 @@ -1476,7 +1582,7 @@ metrics: and foreign key constraints. Uses the exact logic from tmp.sql with JSON aggregation and proper thresholds. sqls: 11: | - with fk_indexes as ( + with fk_indexes as ( /* pgwatch_generated */ select n.nspname as schema_name, ci.relname as index_name, @@ -1508,10 +1614,10 @@ metrics: ), redundant_indexes as ( select i2.indexrelid as index_id, - tnsp.nspname AS schema_name, - trel.relname AS table_name, + tnsp.nspname as schema_name, + trel.relname as table_name, pg_relation_size(trel.oid) as table_size_bytes, - irel.relname AS index_name, + irel.relname as index_name, am1.amname as access_method, (i1.indexrelid::regclass)::text as reason, i1.indexrelid as reason_index_id, @@ -1522,7 +1628,7 @@ metrics: s.idx_scan as index_usage, quote_ident(tnsp.nspname) as formated_schema_name, coalesce(nullif(quote_ident(tnsp.nspname), 'public') || '.', '') || quote_ident(irel.relname) as formated_index_name, - quote_ident(trel.relname) AS formated_table_name, + quote_ident(trel.relname) as formated_table_name, coalesce(nullif(quote_ident(tnsp.nspname), 'public') || '.', '') || quote_ident(trel.relname) as formated_relation_name, i2.opclasses from ( @@ -1544,9 +1650,7 @@ metrics: join pg_class as irel on irel.oid = i2.indexrelid where not i2.indisprimary -- index 1 is not primary - and not ( -- skip if index1 is (primary or uniq) and is NOT (primary and uniq) - i2.indisunique and not i1.indisprimary - ) + and not i2.indisunique -- index 1 is not unique (unique indexes serve constraint purpose) and am1.amname = am2.amname -- same access type and i1.columns like (i2.columns || '%') -- index 2 includes all columns from index 1 and i1.opclasses like (i2.opclasses || '%') @@ -1621,7 +1725,8 @@ metrics: supports_fk order by index_size_bytes desc ) - select * from redundant_indexes_grouped; + select * from redundant_indexes_grouped + limit 1000; gauges: - '*' statement_timeout_seconds: 15 @@ -1632,7 +1737,7 @@ metrics: This metric helps administrators identify and fix unused indexes to improve database performance. sqls: 11: | - with fk_indexes as ( + with fk_indexes as ( /* pgwatch_generated */ select n.nspname as schema_name, ci.relname as index_name, @@ -1675,7 +1780,7 @@ metrics: join pg_class ci on ci.oid = i.indexrelid and ci.relkind = 'i' join pg_class cr on cr.oid = i.indrelid and cr.relkind = 'r' join pg_namespace n on n.oid = ci.relnamespace - join pg_am a ON ci.relam = a.oid + join pg_am a on ci.relam = a.oid left join pg_stat_all_indexes as si on si.indexrelid = i.indexrelid where i.indisunique = false @@ -1712,9 +1817,9 @@ metrics: select 'Never Used Indexes' as tag_reason, index_id, - schema_name AS tag_schema_name, - table_name AS tag_table_name, - index_name AS tag_index_name, + schema_name as tag_schema_name, + table_name as tag_table_name, + index_name as tag_index_name, idx_scan, all_scans, index_scan_pct, @@ -1724,13 +1829,14 @@ metrics: table_size_bytes, relpages, idx_is_btree, - opclasses AS tag_opclasses, + opclasses as tag_opclasses, supports_fk from index_ratios where idx_scan = 0 and idx_is_btree - order by index_size_bytes desc; + order by index_size_bytes desc + limit 1000; gauges: - '*' statement_timeout_seconds: 15 @@ -1741,7 +1847,7 @@ metrics: This metric helps administrators identify and fix rarely used indexes to improve database performance. sqls: 11: | - with fk_indexes as ( + with fk_indexes as ( /* pgwatch_generated */ select n.nspname as schema_name, ci.relname as index_name, @@ -1787,7 +1893,7 @@ metrics: join pg_class ci on ci.oid = i.indexrelid and ci.relkind = 'i' join pg_class cr on cr.oid = i.indrelid and cr.relkind = 'r' join pg_namespace n on n.oid = ci.relnamespace - join pg_am a ON ci.relam = a.oid + join pg_am a on ci.relam = a.oid left join pg_stat_all_indexes as si on si.indexrelid = i.indexrelid where i.indisunique = false @@ -1824,9 +1930,9 @@ metrics: select tag_reason, index_id, - schema_name AS tag_schema_name, - table_name AS tag_table_name, - index_name AS tag_index_name, + schema_name as tag_schema_name, + table_name as tag_table_name, + index_name as tag_index_name, idx_scan, all_scans, index_scan_pct, @@ -1836,7 +1942,7 @@ metrics: table_size_bytes, relpages, idx_is_btree, - opclasses AS tag_opclasses, + opclasses as tag_opclasses, supports_fk, grp from ( @@ -1874,7 +1980,8 @@ metrics: and not idx_is_btree and index_size_bytes > 100000000 ) t - order by grp, index_size_bytes desc; + order by grp, index_size_bytes desc + limit 1000; gauges: - '*' statement_timeout_seconds: 15 @@ -1887,11 +1994,10 @@ metrics: not per-index or per-table. sqls: 11: | - select + select /* pgwatch_generated */ datname as tag_database_name, - stats_reset, - extract(epoch from stats_reset) as stats_reset_epoch, - extract(epoch from now() - stats_reset) as seconds_since_reset + extract(epoch from stats_reset)::int as stats_reset_epoch, + extract(epoch from now() - stats_reset)::int as seconds_since_reset from pg_stat_database where datname = current_database() and stats_reset is not null; @@ -1907,7 +2013,7 @@ metrics: sqls: 11: | -- postgresql wal archiving lag monitor - with wal_info as ( + with wal_info as ( /* pgwatch_generated */ select last_archived_wal, last_archived_time, @@ -1922,7 +2028,7 @@ metrics: from pg_stat_archiver where last_archived_wal is not null ) - select + select pg_wal_lsn_diff(pg_current_wal_lsn(), '0/0')::bigint as current_lsn_numeric, pg_wal_lsn_diff((log_id_hex || '/' || lpad(to_hex((segment_dec + 1) * wal_segment_size_bytes), 8, '0'))::pg_lsn, '0/0')::bigint as archived_wal_finish_lsn_numeric, @@ -1964,12 +2070,12 @@ metrics: pg_vacuum_progress: sqls: 11: | - select - current_database() AS tag_datname, - N.nspname as tag_schema_name, + select /* pgwatch_generated */ + current_database() as tag_datname, + n.nspname as tag_schema_name, c.relname as tag_table_name, - N.nspname || '.' || c.relname as tag_relname, - s.relid AS tag_relid, + n.nspname || '.' || c.relname as tag_relname, + s.relid as tag_relid, case when A.query ~ '^autovacuum.*(to prevent wraparound)' then 'aggressive_autovacuum' when A.query ~ '^autovacuum' then @@ -1984,22 +2090,22 @@ metrics: 1 when S.phase ~ 'scanning heap' then 2 - WHEN S.phase ~ 'vacuuming indexes' THEN + when s.phase ~ 'vacuuming indexes' then 3 - WHEN S.phase ~ 'vacuuming heap' THEN + when s.phase ~ 'vacuuming heap' then 4 - WHEN S.phase ~ 'cleaning up indexes' THEN + when s.phase ~ 'cleaning up indexes' then 5 - WHEN S.phase ~ 'truncating heap' THEN + when s.phase ~ 'truncating heap' then 6 - WHEN S.phase ~ 'final cleanup' THEN + when s.phase ~ 'final cleanup' then 7 end tag_phase, S.heap_blks_total::float, S.heap_blks_scanned::float, S.heap_blks_vacuumed::float, S.index_vacuum_count::float, - (S.max_dead_tuples / 1024 / 1024)::float as max_dead_tuples_mb, + (S.max_dead_tuples::float / 1024 / 1024) as max_dead_tuples_mb, S.num_dead_tuples::float as num_dead_item_ids from pg_stat_progress_vacuum as S @@ -2007,13 +2113,14 @@ metrics: join pg_stat_activity as A on (S.pid = A.pid) join pg_class C on (C.oid = S.relid) join pg_namespace N on (N.oid = C.relnamespace) + where D.datname = current_database() 17: | - select - current_database() AS tag_datname, - N.nspname as tag_schema_name, + select /* pgwatch_generated */ + current_database() as tag_datname, + n.nspname as tag_schema_name, c.relname as tag_table_name, - N.nspname || '.' || c.relname as tag_relname, - s.relid AS tag_relid, + n.nspname || '.' || c.relname as tag_relname, + s.relid as tag_relid, case when A.query ~ '^autovacuum.*(to prevent wraparound)' then 'aggressive_autovacuum' when A.query ~ '^autovacuum' then @@ -2028,15 +2135,15 @@ metrics: 1 when S.phase ~ 'scanning heap' then 2 - WHEN S.phase ~ 'vacuuming indexes' THEN + when s.phase ~ 'vacuuming indexes' then 3 - WHEN S.phase ~ 'vacuuming heap' THEN + when s.phase ~ 'vacuuming heap' then 4 - WHEN S.phase ~ 'cleaning up indexes' THEN + when s.phase ~ 'cleaning up indexes' then 5 - WHEN S.phase ~ 'truncating heap' THEN + when s.phase ~ 'truncating heap' then 6 - WHEN S.phase ~ 'final cleanup' THEN + when s.phase ~ 'final cleanup' then 7 end tag_phase, S.heap_blks_total::float, @@ -2051,13 +2158,14 @@ metrics: join pg_stat_activity as A on (S.pid = A.pid) join pg_class C on (C.oid = S.relid) join pg_namespace N on (N.oid = C.relnamespace) + where D.datname = current_database() gauges: - '*' statement_timeout_seconds: 15 pg_index_pilot: sqls: 11: | - select + select /* pgwatch_generated */ (extract(epoch from now()) * 1e9)::int8 as epoch_ns, datname as tag_datname, schemaname as tag_schemaname, @@ -2079,7 +2187,7 @@ metrics: pg_index_pilot_config: sqls: 12: | - select + select /* pgwatch_generated */ coalesce(datname, '*') as tag_datname, coalesce(schemaname, '*') as tag_schemaname, coalesce(relname, '*') as tag_relname, @@ -2170,6 +2278,137 @@ metrics: gauges: - '*' statement_timeout_seconds: 15 + + multixact_size: + sqls: + 11: | + with env as ( /* pgwatch_generated */ + select + exists ( + select + from pg_proc p + join pg_namespace n on n.oid = p.pronamespace + where p.proname = 'pg_ls_multixactdir' and n.nspname = 'rds_tools' + ) as has_rds_fn, + exists ( + select + from pg_proc p + join pg_namespace n on n.oid = p.pronamespace + where p.proname = 'aurora_stat_file' and n.nspname = 'pg_catalog' + ) as has_aurora_fn, + exists (select from pg_proc where proname = 'pg_ls_dir') as has_pg_ls_dir_func, + exists (select from pg_proc where proname = 'pg_stat_file') as has_pg_stat_file_func + ), + can_local as ( + select (has_pg_ls_dir_func and has_pg_stat_file_func) as ok from env + ), + -- Use query_to_xml to safely execute Aurora-specific multixact query. + -- Aurora uses aurora_stat_file() function instead of rds_tools.pg_ls_multixactdir(). + aurora_probe_xml as ( + select query_to_xml($q$ + with files as ( + select filename, allocated_bytes, used_bytes + from aurora_stat_file() + where filename like 'pg_multixact/%' + ), + members as ( + select sum(used_bytes)::bigint as sz from files where filename like 'pg_multixact/members%' + ), + offsets as ( + select sum(used_bytes)::bigint as sz from files where filename like 'pg_multixact/offsets%' + ), + has_rows as ( + select exists(select 1 from files) as any_rows + ) + select + case when (select any_rows from has_rows) then coalesce((select sz from members), 0) end as members_bytes, + case when (select any_rows from has_rows) then coalesce((select sz from offsets), 0) end as offsets_bytes, + case when (select any_rows from has_rows) then 0 else 1 end as status_code + $q$, true, true, '') as x + where (select has_aurora_fn from env) + ), + -- Use query_to_xml to safely execute RDS-specific multixact directory listing query. + -- The XML wrapper allows the query to fail gracefully if rds_tools.pg_ls_multixactdir() + -- is unavailable or returns errors, preventing the entire metric from failing. + rds_probe_xml as ( + select query_to_xml($q$ + with files as ( + select name, size + from rds_tools.pg_ls_multixactdir() + ), + members as ( + select sum(size)::bigint as sz from files where name like 'pg_multixact/members%' + ), + offsets as ( + select sum(size)::bigint as sz from files where name like 'pg_multixact/offsets%' + ), + has_rows as ( + select exists(select 1 from files where name like 'pg_multixact/%') as any_rows + ) + select + case when (select any_rows from has_rows) then coalesce((select sz from members), 0) end as members_bytes, + case when (select any_rows from has_rows) then coalesce((select sz from offsets), 0) end as offsets_bytes, + case when (select any_rows from has_rows) then 0 else 1 end as status_code + $q$, true, true, '') as x + where (select has_rds_fn from env) and not (select has_aurora_fn from env) + ), + -- Use query_to_xml to safely execute standard Postgres multixact directory listing query. + -- The XML wrapper allows the query to fail gracefully if pg_stat_file() or pg_ls_dir() + -- are unavailable or return permission errors, preventing the entire metric from failing. + local_probe_xml as ( + select query_to_xml($q$ + with dirs as ( + select + (pg_stat_file('pg_multixact/members', true)).isdir as has_members, + (pg_stat_file('pg_multixact/offsets', true)).isdir as has_offsets + ), + flags as ( + select ((select has_members from dirs) or (select has_offsets from dirs)) as has_any + ), + members as ( + select sum((pg_stat_file(format('pg_multixact/members/%s', d), true)).size)::bigint as sz + from pg_ls_dir('pg_multixact/members') as d(d) + where (select has_members from dirs) + ), + offsets as ( + select sum((pg_stat_file(format('pg_multixact/offsets/%s', d), true)).size)::bigint as sz + from pg_ls_dir('pg_multixact/offsets') as d(d) + where (select has_offsets from dirs) + ) + select + case when (select has_any from flags) then coalesce((select sz from members), 0) end as members_bytes, + case when (select has_any from flags) then coalesce((select sz from offsets), 0) end as offsets_bytes, + case when (select has_any from flags) then 0 else 1 end as status_code + $q$, true, true, '') as x + where not (select has_rds_fn from env) and not (select has_aurora_fn from env) and (select ok from can_local) + ), + picked as ( + select * from aurora_probe_xml + union all + select * from rds_probe_xml + union all + select * from local_probe_xml + limit 1 + ), + parsed as ( + select + (xpath('//members_bytes/text()', x))[1]::text::bigint as members_bytes, + (xpath('//offsets_bytes/text()', x))[1]::text::bigint as offsets_bytes, + (xpath('//status_code/text()', x))[1]::text::int as status_code + from picked + ) + select * from parsed + union all + select + null::bigint as members_bytes, + null::bigint as offsets_bytes, + 2::int as status_code + where not exists (select 1 from parsed); + gauges: + - members_bytes + - offsets_bytes + - status_code + statement_timeout_seconds: 15 presets: full: @@ -2194,9 +2433,11 @@ presets: pg_stat_all_indexes: 30 pg_stat_statements: 30 pg_stat_replication: 30 + pg_stat_slru: 30 pg_statio_all_tables: 30 pg_statio_all_indexes: 30 pg_total_relation_size: 30 + lock_waits: 30 pg_blocked: 30 pg_long_running_transactions: 30 pg_stuck_idle_in_transaction: 30 @@ -2213,6 +2454,7 @@ presets: stats_reset: 3600 archive_lag: 15 pg_vacuum_progress: 30 + multixact_size: 300 pg_index_pilot: metrics: pg_index_pilot: 30 diff --git a/config/sink-postgres/00-configure-pg-hba.sh b/config/sink-postgres/00-configure-pg-hba.sh new file mode 100644 index 0000000..961f26f --- /dev/null +++ b/config/sink-postgres/00-configure-pg-hba.sh @@ -0,0 +1,63 @@ +#!/bin/bash +# Configure pg_hba.conf to allow trust authentication from Docker networks +# +# SECURITY NOTE: This configuration uses trust authentication, which is appropriate +# for this use case because: +# 1. The sink-postgres container runs in an isolated Docker network +# 2. No ports are exposed to the host or external networks +# 3. Only other containers in the same Docker Compose network can connect +# 4. This is an internal data collection service, not a production database +# 5. The container network provides the security boundary +# +# This approach simplifies container-to-container communication while maintaining +# appropriate security isolation from external access. + +cat > ${PGDATA}/pg_hba.conf <>'queryid'; - - -- Allow NULL queryids through - IF queryid_value IS NULL THEN - RETURN NEW; - END IF; - - -- Silently skip if duplicate exists - IF EXISTS ( - SELECT 1 - FROM pgss_queryid_queries - WHERE dbname = NEW.dbname - AND data->>'queryid' = queryid_value - LIMIT 1 - ) THEN - RETURN NULL; -- Cancels INSERT silently - END IF; - - RETURN NEW; -END; -$$ LANGUAGE plpgsql; - - -CREATE OR REPLACE TRIGGER enforce_queryid_uniqueness_trigger - BEFORE INSERT - ON pgss_queryid_queries - FOR EACH ROW - EXECUTE FUNCTION enforce_queryid_uniqueness(); +alter default privileges in schema public grant all on tables to pgwatch; + +create or replace function enforce_queryid_uniqueness() +returns trigger as $$ +declare + queryid_value text; +begin + -- Extract queryid from the data JSONB + queryid_value := new.data->>'queryid'; + + -- Allow NULL queryids through + if queryid_value is null then + return new; + end if; + + -- Silently skip if duplicate exists + if exists ( + select + from pgss_queryid_queries + where + dbname = new.dbname + and data->>'queryid' = queryid_value + limit 1 + ) then + return null; -- Cancels INSERT silently + end if; + + return new; +end; +$$ language plpgsql; + + +create or replace trigger enforce_queryid_uniqueness_trigger + before insert + on pgss_queryid_queries + for each row + execute function enforce_queryid_uniqueness(); + +-- Create a partitioned table for index definitions with LIST partitioning by dbname +create table if not exists public.index_definitions ( + time timestamptz not null, + dbname text not null, + data jsonb not null, + tag_data jsonb +) partition by list (dbname); + +-- Create indexes for efficient lookups +create index if not exists index_definitions_dbname_time_idx on public.index_definitions (dbname, time); + +-- Set ownership and grant permissions to pgwatch +alter table public.index_definitions owner to pgwatch; +grant all privileges on table public.index_definitions to pgwatch; + +-- Create function to enforce index definition uniqueness +create or replace function enforce_index_definition_uniqueness() +returns trigger as $$ +declare + index_name text; + schema_name text; + table_name text; + index_definition text; +begin + -- Extract index information from the data JSONB + index_name := new.data->>'indexrelname'; + schema_name := new.data->>'schemaname'; + table_name := new.data->>'relname'; + index_definition := new.data->>'index_definition'; + + -- Allow NULL index names through + if index_name is null then + return new; + end if; + + -- Silently skip if duplicate exists + if exists ( + select 1 + from index_definitions + where dbname = new.dbname + and data->>'indexrelname' = index_name + and data->>'schemaname' = schema_name + and data->>'relname' = table_name + and data->>'index_definition' = index_definition + limit 1 + ) then + return null; -- Cancels INSERT silently + end if; + + return new; +end; +$$ language plpgsql; + +create or replace trigger enforce_index_definition_uniqueness_trigger + before insert + on index_definitions + for each row + execute function enforce_index_definition_uniqueness(); diff --git a/config/target-db/init.sql b/config/target-db/init.sql index 0ff6558..7f4fc90 100644 --- a/config/target-db/init.sql +++ b/config/target-db/init.sql @@ -1,49 +1,52 @@ -- Initialize target database for monitoring -- Enable pg_stat_statements extension for query monitoring -CREATE EXTENSION IF NOT EXISTS pg_stat_statements; +create extension if not exists pg_stat_statements; -- Create a sample table for demonstration -CREATE TABLE IF NOT EXISTS sample_data ( - id SERIAL PRIMARY KEY, - name VARCHAR(100), - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +create table if not exists sample_data ( + id serial primary key, + name varchar(100), + created_at timestamp default current_timestamp ); -- Insert some sample data -INSERT INTO sample_data (name) VALUES +insert into sample_data (name) values ('Sample Record 1'), ('Sample Record 2'), ('Sample Record 3'); -- Create a user for PGWatch monitoring -CREATE USER monitor WITH PASSWORD 'monitor_pass'; -GRANT CONNECT ON DATABASE target_database TO monitor; -GRANT USAGE ON SCHEMA public TO monitor; +create user monitor with password 'monitor_pass'; +grant connect on database target_database to monitor; +grant usage on schema public to monitor; -- Create a public view for pg_statistic access -CREATE OR REPLACE VIEW public.pg_statistic AS -SELECT +create or replace view public.pg_statistic as +select n.nspname as schemaname, c.relname as tablename, a.attname, s.stanullfrac as null_frac, s.stawidth as avg_width, false as inherited -FROM pg_statistic s -JOIN pg_class c ON c.oid = s.starelid -JOIN pg_namespace n ON n.oid = c.relnamespace -JOIN pg_attribute a ON a.attrelid = s.starelid AND a.attnum = s.staattnum -WHERE a.attnum > 0 AND NOT a.attisdropped; +from pg_statistic s +join pg_class c on c.oid = s.starelid +join pg_namespace n on n.oid = c.relnamespace +join pg_attribute a on a.attrelid = s.starelid and a.attnum = s.staattnum +where a.attnum > 0 and not a.attisdropped; -- Grant specific access instead of all tables -GRANT SELECT ON public.pg_statistic TO pg_monitor; +grant select on public.pg_statistic to pg_monitor; -- Grant access to monitoring views -GRANT SELECT ON pg_stat_statements TO monitor; -GRANT SELECT ON pg_stat_database TO monitor; -GRANT SELECT ON pg_stat_user_tables TO monitor; +grant select on pg_stat_statements to monitor; +grant select on pg_stat_database to monitor; +grant select on pg_stat_user_tables to monitor; -- Grant pg_monitor role to monitor user for enhanced monitoring capabilities -GRANT pg_monitor TO monitor; - +grant pg_monitor to monitor; +grant execute on function pg_stat_file(text) to monitor; +grant execute on function pg_stat_file(text, boolean) to monitor; +grant execute on function pg_ls_dir(text) to monitor; +grant execute on function pg_ls_dir(text, boolean, boolean) to monitor; -- Set search path for the monitor user -ALTER USER monitor SET search_path = "$user", public, pg_catalog; +alter user monitor set search_path = "$user", public, pg_catalog; diff --git a/docker-compose.yml b/docker-compose.yml index d60be9e..1f19b24 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -34,13 +34,15 @@ services: "-c", "pg_stat_statements.track=all", ] - ports: - - "${BIND_HOST:-}55432:5432" volumes: - target_db_data:/var/lib/postgresql/data - ./config/target-db/init.sql:/docker-entrypoint-initdb.d/init.sql # Postgres Sink - Storage for metrics in PostgreSQL format + # Note: pg_hba.conf is configured to allow passwordless connections (trust) + # for local connections within the Docker network. This simplifies pgwatch + # and postgres-exporter connectivity without compromising security since + # the database is not exposed externally. sink-postgres: image: postgres:15 container_name: sink-postgres @@ -48,10 +50,10 @@ services: POSTGRES_DB: postgres POSTGRES_USER: postgres POSTGRES_PASSWORD: postgres - ports: - - "${BIND_HOST:-}55433:5432" + POSTGRES_HOST_AUTH_METHOD: trust volumes: - sink_postgres_data:/var/lib/postgresql/data + - ./config/sink-postgres/00-configure-pg-hba.sh:/docker-entrypoint-initdb.d/00-configure-pg-hba.sh - ./config/sink-postgres/init.sql:/docker-entrypoint-initdb.d/init.sql # VictoriaMetrics Sink - Storage for metrics in Prometheus format @@ -79,11 +81,9 @@ services: [ "--sources=/etc/pgwatch/sources.yml", "--metrics=/etc/pgwatch/metrics.yml", - "--sink=postgresql://pgwatch:pgwatchadmin@sink-postgres:5432/measurements", + "--sink=postgresql://pgwatch@sink-postgres:5432/measurements?sslmode=disable", "--web-addr=:8080", ] - ports: - - "${BIND_HOST:-}58080:8080" depends_on: - sources-generator - sink-postgres @@ -103,9 +103,6 @@ services: "--sink=prometheus://0.0.0.0:9091/pgwatch", "--web-addr=:8089", ] - ports: - - "${BIND_HOST:-}58089:8089" - - "${BIND_HOST:-}59091:9091" depends_on: - sources-generator - sink-prometheus @@ -122,6 +119,19 @@ services: GF_SECURITY_ADMIN_USER: monitor GF_SECURITY_ADMIN_PASSWORD: ${GF_SECURITY_ADMIN_PASSWORD:-demo} GF_INSTALL_PLUGINS: yesoreyeram-infinity-datasource + # OAuth configuration (disabled by default, enabled via Ansible) + GF_AUTH_GENERIC_OAUTH_ENABLED: ${GRAFANA_OAUTH_ENABLED:-false} + GF_AUTH_GENERIC_OAUTH_NAME: ${GRAFANA_OAUTH_NAME:-PostgresAI} + GF_AUTH_GENERIC_OAUTH_ALLOW_SIGN_UP: ${GRAFANA_OAUTH_ALLOW_SIGN_UP:-true} + GF_AUTH_GENERIC_OAUTH_CLIENT_ID: ${GRAFANA_OAUTH_CLIENT_ID:-} + GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET: ${GRAFANA_OAUTH_CLIENT_SECRET:-} + GF_AUTH_GENERIC_OAUTH_SCOPES: ${GRAFANA_OAUTH_SCOPES:-openid email profile} + GF_AUTH_GENERIC_OAUTH_AUTH_URL: ${GRAFANA_OAUTH_AUTH_URL:-} + GF_AUTH_GENERIC_OAUTH_TOKEN_URL: ${GRAFANA_OAUTH_TOKEN_URL:-} + GF_AUTH_GENERIC_OAUTH_API_URL: ${GRAFANA_OAUTH_API_URL:-} + # Optional: disable login form when OAuth is primary auth + GF_AUTH_DISABLE_LOGIN_FORM: ${GRAFANA_DISABLE_LOGIN_FORM:-false} + GF_SERVER_ROOT_URL: ${GF_SERVER_ROOT_URL:-} ports: - "${GRAFANA_BIND_HOST:-}3000:3000" volumes: @@ -133,22 +143,18 @@ services: - sink-postgres - sink-prometheus restart: unless-stopped - flask-backend: - build: - context: ./flask-backend - dockerfile: Dockerfile + monitoring_flask_backend: + image: postgresai/monitoring_flask_backend:latest container_name: flask-pgss-api environment: - FLASK_ENV=production - PROMETHEUS_URL=http://sink-prometheus:9090 depends_on: - sink-prometheus - ports: - - "${BIND_HOST:-}55000:5000" restart: unless-stopped # PostgreSQL Reports Generator - Runs reports after 1 hour postgres-reports: - image: python:3.11-slim + image: postgresai/reporter:1.0.2 container_name: postgres-reports working_dir: /app volumes: @@ -171,16 +177,13 @@ services: echo 'Starting PostgreSQL reports generation...' && while true; do echo 'Extracting cluster and node name from instances.yml...' && - CLUSTER=$$(python3 -c \"import yaml; data=yaml.safe_load(open('instances.yml')); print(data[0]['custom_tags']['cluster'])\") && - NODE_NAME=$$(python3 -c \"import yaml; data=yaml.safe_load(open('instances.yml')); print(data[0]['custom_tags']['node_name'])\") && - echo \"Using cluster: $$CLUSTER, node: $$NODE_NAME\" && echo 'Generating PostgreSQL reports...' && if [ -f /app/.pgwatch-config ] && grep -q '^api_key=' /app/.pgwatch-config; then API_KEY=$$(grep '^api_key=' /app/.pgwatch-config | cut -d'=' -f2-) && - python postgres_reports.py --prometheus-url http://sink-prometheus:9090 --cluster \"$$CLUSTER\" --node-name \"$$NODE_NAME\" --output /app/all_reports_$$(date +%Y%m%d_%H%M%S).json --token $$API_KEY --project postgres-ai-monitoring + python postgres_reports.py --prometheus-url http://sink-prometheus:9090 --output /app/all_reports_$$(date +%Y%m%d_%H%M%S).json --token $$API_KEY --project postgres-ai-monitoring else echo 'No API key configured, generating reports without upload...' && - python postgres_reports.py --prometheus-url http://sink-prometheus:9090 --cluster \"$$CLUSTER\" --node-name \"$$NODE_NAME\" --output /app/all_reports_$$(date +%Y%m%d_%H%M%S).json --no-upload + python postgres_reports.py --prometheus-url http://sink-prometheus:9090 --output /app/all_reports_$$(date +%Y%m%d_%H%M%S).json --no-upload fi && echo 'Reports generated. Sleeping for 24 hours...' && sleep 86400 @@ -194,8 +197,6 @@ services: image: gcr.io/cadvisor/cadvisor:v0.51.0 container_name: cadvisor privileged: true - ports: - - "58081:8080" volumes: - /:/rootfs:ro - /var/run:/var/run:ro @@ -203,20 +204,18 @@ services: - /var/lib/docker/:/var/lib/docker:ro - /dev/disk/:/dev/disk:ro command: - - '--housekeeping_interval=30s' - - '--docker_only=true' - - '--disable_metrics=percpu,sched,tcp,udp,hugetlb,referenced_memory,cpu_topology,resctrl' - - '--store_container_labels=false' + - "--housekeeping_interval=30s" + - "--docker_only=true" + - "--disable_metrics=percpu,sched,tcp,udp,hugetlb,referenced_memory,cpu_topology,resctrl" + - "--store_container_labels=false" # Node Exporter - System metrics node-exporter: image: prom/node-exporter:v1.8.2 container_name: node-exporter - ports: - - "59100:9100" command: - - '--path.rootfs=/host' - - '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)' + - "--path.rootfs=/host" + - "--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)" volumes: - /:/host:ro,rslave restart: unless-stopped @@ -227,8 +226,6 @@ services: container_name: postgres-exporter-sink environment: DATA_SOURCE_NAME: "postgresql://postgres:postgres@sink-postgres:5432/measurements?sslmode=disable" - ports: - - "59187:9187" depends_on: - sink-postgres restart: unless-stopped diff --git a/docs/brew-installation.md b/docs/brew-installation.md new file mode 100644 index 0000000..19ab612 --- /dev/null +++ b/docs/brew-installation.md @@ -0,0 +1,103 @@ +# Homebrew Installation for PostgresAI CLI + +This document describes how to set up and distribute the PostgresAI CLI via Homebrew. + +## For Users + +### Installation + +Once the Homebrew tap is set up, users can install with: + +```bash +# Add the PostgresAI tap +brew tap postgres-ai/tap https://gitlab.com/postgres-ai/homebrew-tap.git + +# Install postgresai +brew install postgresai + +# Verify installation +pgai --version +``` + +### Updating + +```bash +brew update +brew upgrade postgresai +``` + +### Uninstalling + +```bash +brew uninstall postgresai +brew untap postgres-ai/tap +``` + +## For Maintainers + +### Creating the Homebrew Tap Repository + +1. Create a new GitLab repository named `homebrew-tap` at: + `https://gitlab.com/postgres-ai/homebrew-tap` + +2. Add the formula file `Formula/postgresai.rb` to the repository + +3. Update the formula SHA256 after each npm publish: + ```bash + # Download the tarball + curl -L https://registry.npmjs.org/postgresai/-/postgresai-VERSION.tgz -o postgresai.tgz + + # Calculate SHA256 + shasum -a 256 postgresai.tgz + + # Update the sha256 field in the formula + ``` + +### Updating the Formula + +After publishing a new version to npm: + +1. Update the `url` with the new version number +2. Calculate and update the `sha256` hash +3. Test the formula locally: + ```bash + brew install --build-from-source Formula/postgresai.rb + brew test postgresai + ``` +4. Commit and push to the homebrew-tap repository + +### Testing Locally + +Before pushing to the tap: + +```bash +# Install from local formula +brew install --build-from-source Formula/postgresai.rb + +# Run tests +brew test postgresai + +# Audit the formula +brew audit --strict postgresai + +# Uninstall +brew uninstall postgresai +``` + +## Alternative: Homebrew Core + +To submit to the main Homebrew repository (more visibility but stricter requirements): + +1. Formula must meet Homebrew's acceptance criteria +2. Project should be notable/popular +3. Follow instructions at: https://docs.brew.sh/Adding-Software-to-Homebrew + +## Automation + +Consider setting up CI/CD to automatically: +1. Calculate SHA256 from the npm tarball +2. Update the formula +3. Commit to homebrew-tap repository + +This can be done in GitLab CI after successful npm publish. + diff --git a/instances.yml b/instances.yml index c905455..9c91ce7 100644 --- a/instances.yml +++ b/instances.yml @@ -2,7 +2,7 @@ conn_str: postgresql://monitor:monitor_pass@target-db:5432/target_database preset_metrics: full custom_metrics: - is_enabled: true + is_enabled: false group: default custom_tags: env: demo diff --git a/flask-backend/Dockerfile b/monitoring_flask_backend/Dockerfile similarity index 86% rename from flask-backend/Dockerfile rename to monitoring_flask_backend/Dockerfile index ceb6e61..9ba8d26 100644 --- a/flask-backend/Dockerfile +++ b/monitoring_flask_backend/Dockerfile @@ -16,11 +16,11 @@ RUN pip install --no-cache-dir -r requirements.txt COPY app.py . # Expose port -EXPOSE 5000 +EXPOSE 8000 # Set environment variables ENV FLASK_APP=app.py ENV FLASK_ENV=production # Run the application -CMD ["gunicorn", "--bind", "0.0.0.0:5000", "--workers", "4", "--timeout", "120", "app:app"] \ No newline at end of file +CMD ["gunicorn", "--bind", "0.0.0.0:8000", "--workers", "4", "--timeout", "120", "app:app"] \ No newline at end of file diff --git a/flask-backend/app.py b/monitoring_flask_backend/app.py similarity index 61% rename from flask-backend/app.py rename to monitoring_flask_backend/app.py index 9580158..ba45d9b 100644 --- a/flask-backend/app.py +++ b/monitoring_flask_backend/app.py @@ -4,6 +4,7 @@ import io from datetime import datetime, timezone, timedelta import logging +import os # Configure logging logging.basicConfig(level=logging.INFO) @@ -11,8 +12,8 @@ app = Flask(__name__) -# Prometheus connection -PROMETHEUS_URL = "http://sink-prometheus:9090" +# Prometheus connection - use environment variable with fallback +PROMETHEUS_URL = os.environ.get('PROMETHEUS_URL', 'http://localhost:8428') # Metric name mapping for cleaner CSV output METRIC_NAME_MAPPING = { @@ -487,17 +488,43 @@ def get_btree_bloat_csv(): @app.route('/table_info/csv', methods=['GET']) def get_table_info_csv(): """ - Get comprehensive table information including bloat metrics, detailed size information, and I/O statistics as a CSV table. - Combines pg_table_bloat, table_size_detailed, and pg_statio_all_tables metrics for complete table analysis. + Get comprehensive table information including size metrics, tuple statistics, and I/O statistics as a CSV table. + Supports both instant queries (without time parameters) and rate calculations over a time period. + + Query parameters: + - time_start: Start time (ISO format or Unix timestamp) - optional + - time_end: End time (ISO format or Unix timestamp) - optional + - cluster_name: Cluster name filter (optional) + - node_name: Node name filter (optional) + - db_name: Database name filter (optional) + - schemaname: Schema name filter (optional, supports regex with ~) + - tblname: Table name filter (optional) """ try: # Get query parameters + time_start = request.args.get('time_start') + time_end = request.args.get('time_end') cluster_name = request.args.get('cluster_name') node_name = request.args.get('node_name') db_name = request.args.get('db_name') schemaname = request.args.get('schemaname') tblname = request.args.get('tblname') + # Determine if we should calculate rates + calculate_rates = bool(time_start and time_end) + + if calculate_rates: + # Parse time parameters + try: + start_dt = datetime.fromtimestamp(float(time_start), tz=timezone.utc) + except ValueError: + start_dt = datetime.fromisoformat(time_start.replace('Z', '+00:00')) + + try: + end_dt = datetime.fromtimestamp(float(time_end), tz=timezone.utc) + except ValueError: + end_dt = datetime.fromisoformat(time_end.replace('Z', '+00:00')) + # Build label filters filters = [] if cluster_name: @@ -505,7 +532,8 @@ def get_table_info_csv(): if node_name: filters.append(f'node_name="{node_name}"') if schemaname: - filters.append(f'schemaname="{schemaname}"') + # Support regex pattern matching with =~ + filters.append(f'schemaname=~"{schemaname}"') if tblname: filters.append(f'tblname="{tblname}"') if db_name: @@ -513,170 +541,141 @@ def get_table_info_csv(): filter_str = '{' + ','.join(filters) + '}' if filters else '' - # Metrics to fetch with last_over_time to get only the most recent value - # Include bloat metrics, detailed size metrics, and I/O metrics - metric_queries = [ - # Bloat metrics - f'last_over_time(pgwatch_pg_table_bloat_real_size_mib{filter_str}[1d])', - f'last_over_time(pgwatch_pg_table_bloat_extra_size{filter_str}[1d])', - f'last_over_time(pgwatch_pg_table_bloat_extra_pct{filter_str}[1d])', - f'last_over_time(pgwatch_pg_table_bloat_fillfactor{filter_str}[1d])', - f'last_over_time(pgwatch_pg_table_bloat_bloat_size{filter_str}[1d])', - f'last_over_time(pgwatch_pg_table_bloat_bloat_pct{filter_str}[1d])', - f'last_over_time(pgwatch_pg_table_bloat_is_na{filter_str}[1d])', - # Detailed size metrics - f'last_over_time(pgwatch_table_size_detailed_table_main_size_b{filter_str}[1d])', - f'last_over_time(pgwatch_table_size_detailed_table_fsm_size_b{filter_str}[1d])', - f'last_over_time(pgwatch_table_size_detailed_table_vm_size_b{filter_str}[1d])', - f'last_over_time(pgwatch_table_size_detailed_table_indexes_size_b{filter_str}[1d])', - f'last_over_time(pgwatch_table_size_detailed_toast_main_size_b{filter_str}[1d])', - f'last_over_time(pgwatch_table_size_detailed_toast_fsm_size_b{filter_str}[1d])', - f'last_over_time(pgwatch_table_size_detailed_toast_vm_size_b{filter_str}[1d])', - f'last_over_time(pgwatch_table_size_detailed_toast_indexes_size_b{filter_str}[1d])', - f'last_over_time(pgwatch_table_size_detailed_total_relation_size_b{filter_str}[1d])', - f'last_over_time(pgwatch_table_size_detailed_total_toast_size_b{filter_str}[1d])', - # I/O metrics - f'last_over_time(pgwatch_pg_statio_all_tables_heap_blks_read{filter_str}[1d])', - f'last_over_time(pgwatch_pg_statio_all_tables_heap_blks_hit{filter_str}[1d])', - f'last_over_time(pgwatch_pg_statio_all_tables_idx_blks_read{filter_str}[1d])', - f'last_over_time(pgwatch_pg_statio_all_tables_idx_blks_hit{filter_str}[1d])', - f'last_over_time(pgwatch_pg_statio_all_tables_toast_blks_read{filter_str}[1d])', - f'last_over_time(pgwatch_pg_statio_all_tables_toast_blks_hit{filter_str}[1d])', - f'last_over_time(pgwatch_pg_statio_all_tables_tidx_blks_read{filter_str}[1d])', - f'last_over_time(pgwatch_pg_statio_all_tables_tidx_blks_hit{filter_str}[1d])', - ] - prom = get_prometheus_client() - metric_results = {} - - for query in metric_queries: - try: - result = prom.custom_query(query=query) - for entry in result: - metric_labels = entry.get('metric', {}) - key = ( - metric_labels.get('datname', ''), - metric_labels.get('schemaname', ''), - metric_labels.get('tblname', ''), + + # Define base metrics to query (without last_over_time wrapper for rate calculation) + base_metrics = { + # Size metrics + 'total_size': f'pgwatch_pg_class_total_relation_size_bytes{filter_str}', + 'table_size': f'pgwatch_table_size_detailed_table_main_size_b{filter_str}', + 'index_size': f'pgwatch_table_size_detailed_table_indexes_size_b{filter_str}', + 'toast_size': f'pgwatch_table_size_detailed_total_toast_size_b{filter_str}', + # Scan statistics + 'seq_scan': f'pgwatch_pg_stat_all_tables_seq_scan{filter_str}', + 'idx_scan': f'pgwatch_pg_stat_all_tables_idx_scan{filter_str}', + # Tuple statistics + 'n_tup_ins': f'pgwatch_table_stats_n_tup_ins{filter_str}', + 'n_tup_upd': f'pgwatch_table_stats_n_tup_upd{filter_str}', + 'n_tup_del': f'pgwatch_table_stats_n_tup_del{filter_str}', + 'n_tup_hot_upd': f'pgwatch_table_stats_n_tup_hot_upd{filter_str}', + # I/O statistics + 'heap_blks_read': f'pgwatch_pg_statio_all_tables_heap_blks_read{filter_str}', + 'heap_blks_hit': f'pgwatch_pg_statio_all_tables_heap_blks_hit{filter_str}', + 'idx_blks_read': f'pgwatch_pg_statio_all_tables_idx_blks_read{filter_str}', + 'idx_blks_hit': f'pgwatch_pg_statio_all_tables_idx_blks_hit{filter_str}', + } + + if calculate_rates: + # Get metrics at start and end times + start_data = {} + end_data = {} + + for metric_name, metric_query in base_metrics.items(): + try: + # Get data at start time + start_result = prom.get_metric_range_data( + metric_name=metric_query, + start_time=start_dt - timedelta(minutes=1), + end_time=start_dt + timedelta(minutes=1) ) - if key not in metric_results: - metric_results[key] = { - 'database': metric_labels.get('datname', ''), - 'schemaname': metric_labels.get('schemaname', ''), - 'tblname': metric_labels.get('tblname', ''), - } - value = float(entry['value'][1]) + if start_result: + start_data[metric_name] = start_result - # Bloat metrics - if 'real_size_mib' in query: - metric_results[key]['real_size_mib'] = value - elif 'extra_size' in query and 'extra_pct' not in query: - metric_results[key]['extra_size'] = value - elif 'extra_pct' in query: - metric_results[key]['extra_pct'] = value - elif 'fillfactor' in query: - metric_results[key]['fillfactor'] = value - elif 'bloat_size' in query: - metric_results[key]['bloat_size'] = value - elif 'bloat_pct' in query: - metric_results[key]['bloat_pct'] = value - elif 'is_na' in query: - metric_results[key]['is_na'] = int(value) - - # Size metrics (convert bytes to MiB for consistency) - elif 'table_main_size_b' in query: - metric_results[key]['table_main_size_mib'] = value / (1024 * 1024) - elif 'table_fsm_size_b' in query: - metric_results[key]['table_fsm_size_mib'] = value / (1024 * 1024) - elif 'table_vm_size_b' in query: - metric_results[key]['table_vm_size_mib'] = value / (1024 * 1024) - elif 'table_indexes_size_b' in query: - metric_results[key]['table_indexes_size_mib'] = value / (1024 * 1024) - elif 'toast_main_size_b' in query: - metric_results[key]['toast_main_size_mib'] = value / (1024 * 1024) - elif 'toast_fsm_size_b' in query: - metric_results[key]['toast_fsm_size_mib'] = value / (1024 * 1024) - elif 'toast_vm_size_b' in query: - metric_results[key]['toast_vm_size_mib'] = value / (1024 * 1024) - elif 'toast_indexes_size_b' in query: - metric_results[key]['toast_indexes_size_mib'] = value / (1024 * 1024) - elif 'total_relation_size_b' in query: - metric_results[key]['total_relation_size_mib'] = value / (1024 * 1024) - elif 'total_toast_size_b' in query: - metric_results[key]['total_toast_size_mib'] = value / (1024 * 1024) - - # I/O metrics - elif 'heap_blks_read' in query: - metric_results[key]['heap_blks_read'] = int(value) - elif 'heap_blks_hit' in query: - metric_results[key]['heap_blks_hit'] = int(value) - elif 'idx_blks_read' in query: - metric_results[key]['idx_blks_read'] = int(value) - elif 'idx_blks_hit' in query: - metric_results[key]['idx_blks_hit'] = int(value) - elif 'toast_blks_read' in query: - metric_results[key]['toast_blks_read'] = int(value) - elif 'toast_blks_hit' in query: - metric_results[key]['toast_blks_hit'] = int(value) - elif 'tidx_blks_read' in query: - metric_results[key]['tidx_blks_read'] = int(value) - elif 'tidx_blks_hit' in query: - metric_results[key]['tidx_blks_hit'] = int(value) - except Exception as e: - logger.warning(f"Failed to query: {query}, error: {e}") - continue - - # Calculate I/O hit ratios - for key, row in metric_results.items(): - # Heap hit ratio - heap_total = row.get('heap_blks_read', 0) + row.get('heap_blks_hit', 0) - if heap_total > 0: - row['heap_hit_ratio'] = round(row.get('heap_blks_hit', 0) / heap_total * 100, 2) - else: - row['heap_hit_ratio'] = 0.0 - - # Index hit ratio - idx_total = row.get('idx_blks_read', 0) + row.get('idx_blks_hit', 0) - if idx_total > 0: - row['idx_hit_ratio'] = round(row.get('idx_blks_hit', 0) / idx_total * 100, 2) - else: - row['idx_hit_ratio'] = 0.0 - - # TOAST hit ratio - toast_total = row.get('toast_blks_read', 0) + row.get('toast_blks_hit', 0) - if toast_total > 0: - row['toast_hit_ratio'] = round(row.get('toast_blks_hit', 0) / toast_total * 100, 2) - else: - row['toast_hit_ratio'] = 0.0 - - # TOAST index hit ratio - tidx_total = row.get('tidx_blks_read', 0) + row.get('tidx_blks_hit', 0) - if tidx_total > 0: - row['tidx_hit_ratio'] = round(row.get('tidx_blks_hit', 0) / tidx_total * 100, 2) - else: - row['tidx_hit_ratio'] = 0.0 + # Get data at end time + end_result = prom.get_metric_range_data( + metric_name=metric_query, + start_time=end_dt - timedelta(minutes=1), + end_time=end_dt + timedelta(minutes=1) + ) + if end_result: + end_data[metric_name] = end_result + except Exception as e: + logger.warning(f"Failed to query metric {metric_name}: {e}") + continue + + # Process the data to calculate rates + metric_results = process_table_stats_with_rates(start_data, end_data, start_dt, end_dt) + else: + # Get instant values using last_over_time + metric_results = {} + for metric_name, metric_query in base_metrics.items(): + try: + result = prom.custom_query(query=f'last_over_time({metric_query}[1d])') + for entry in result: + metric_labels = entry.get('metric', {}) + + # Use different key depending on label names + schema_label = metric_labels.get('schemaname') or metric_labels.get('schema', '') + table_label = metric_labels.get('relname') or metric_labels.get('table_name') or metric_labels.get('tblname', '') + + key = ( + metric_labels.get('datname', ''), + schema_label, + table_label, + ) + + if key not in metric_results: + metric_results[key] = { + 'database': metric_labels.get('datname', ''), + 'schema': schema_label, + 'table_name': table_label, + } + + value = float(entry['value'][1]) + metric_results[key][metric_name] = value + except Exception as e: + logger.warning(f"Failed to query metric {metric_name}: {e}") + continue # Prepare CSV output output = io.StringIO() - fieldnames = [ - 'database', 'schemaname', 'tblname', - # Bloat metrics - 'real_size_mib', 'extra_size', 'extra_pct', 'fillfactor', - 'bloat_size', 'bloat_pct', 'is_na', - # Size metrics (all in MiB) - 'table_main_size_mib', 'table_fsm_size_mib', 'table_vm_size_mib', - 'table_indexes_size_mib', 'toast_main_size_mib', 'toast_fsm_size_mib', - 'toast_vm_size_mib', 'toast_indexes_size_mib', 'total_relation_size_mib', - 'total_toast_size_mib', - # I/O metrics - 'heap_blks_read', 'heap_blks_hit', 'heap_hit_ratio', - 'idx_blks_read', 'idx_blks_hit', 'idx_hit_ratio', - 'toast_blks_read', 'toast_blks_hit', 'toast_hit_ratio', - 'tidx_blks_read', 'tidx_blks_hit', 'tidx_hit_ratio' - ] + + if calculate_rates: + # Fields with rate calculations + fieldnames = [ + 'schema', 'table_name', + # Size metrics (bytes) + 'total_size', 'table_size', 'index_size', 'toast_size', + # Scan statistics with rates + 'seq_scans', 'seq_scans_per_sec', + 'idx_scans', 'idx_scans_per_sec', + # Tuple statistics with rates + 'inserts', 'inserts_per_sec', + 'updates', 'updates_per_sec', + 'deletes', 'deletes_per_sec', + 'hot_updates', 'hot_updates_per_sec', + # I/O statistics with rates (in bytes using block_size) + 'heap_blks_read', 'heap_blks_read_per_sec', + 'heap_blks_hit', 'heap_blks_hit_per_sec', + 'idx_blks_read', 'idx_blks_read_per_sec', + 'idx_blks_hit', 'idx_blks_hit_per_sec', + 'duration_seconds' + ] + else: + # Fields without rate calculations + fieldnames = [ + 'schema', 'table_name', + 'total_size', 'table_size', 'index_size', 'toast_size', + 'seq_scan', 'idx_scan', + 'n_tup_ins', 'n_tup_upd', 'n_tup_del', 'n_tup_hot_upd', + 'heap_blks_read', 'heap_blks_hit', + 'idx_blks_read', 'idx_blks_hit' + ] + + # Remove 'database' field from rows if present (not in fieldnames) + for row in metric_results.values(): + row.pop('database', None) + writer = csv.DictWriter(output, fieldnames=fieldnames) writer.writeheader() - for row in metric_results.values(): + + # Write rows (handle both dict and list) + if isinstance(metric_results, dict): + rows = metric_results.values() + else: + rows = metric_results + + for row in rows: writer.writerow(row) csv_content = output.getvalue() @@ -685,12 +684,151 @@ def get_table_info_csv(): # Create response response = make_response(csv_content) response.headers['Content-Type'] = 'text/csv' - response.headers['Content-Disposition'] = 'attachment; filename=table_info_latest.csv' + + if calculate_rates: + filename = f'table_stats_{start_dt.strftime("%Y%m%d_%H%M%S")}_{end_dt.strftime("%Y%m%d_%H%M%S")}.csv' + else: + filename = 'table_stats_latest.csv' + + response.headers['Content-Disposition'] = f'attachment; filename={filename}' return response except Exception as e: - logger.error(f"Error processing table info request: {e}") + logger.error(f"Error processing table stats request: {e}") return jsonify({"error": str(e)}), 500 +def process_table_stats_with_rates(start_data, end_data, start_time, end_time): + """ + Process table statistics and calculate rates between start and end times + """ + # Convert data to dictionaries + start_metrics = prometheus_table_to_dict(start_data, start_time) + end_metrics = prometheus_table_to_dict(end_data, end_time) + + if not start_metrics and not end_metrics: + return [] + + # Get all unique table identifiers + all_keys = set() + all_keys.update(start_metrics.keys()) + all_keys.update(end_metrics.keys()) + + result_rows = [] + + for key in all_keys: + start_metric = start_metrics.get(key, {}) + end_metric = end_metrics.get(key, {}) + + # Extract identifier components from key + db_name, schema_name, table_name = key + + # Calculate actual duration + start_timestamp = start_metric.get('timestamp') + end_timestamp = end_metric.get('timestamp') + + if start_timestamp and end_timestamp: + start_dt = datetime.fromisoformat(start_timestamp) + end_dt = datetime.fromisoformat(end_timestamp) + actual_duration = (end_dt - start_dt).total_seconds() + else: + actual_duration = (end_time - start_time).total_seconds() + + # Create result row + row = { + 'schema': schema_name, + 'table_name': table_name, + 'duration_seconds': actual_duration + } + + # Counter metrics to calculate differences and rates + counter_metrics = [ + 'seq_scan', 'idx_scan', 'n_tup_ins', 'n_tup_upd', + 'n_tup_del', 'n_tup_hot_upd', 'heap_blks_read', 'heap_blks_hit', + 'idx_blks_read', 'idx_blks_hit' + ] + + # Mapping for display names + display_names = { + 'seq_scan': 'seq_scans', + 'idx_scan': 'idx_scans', + 'n_tup_ins': 'inserts', + 'n_tup_upd': 'updates', + 'n_tup_del': 'deletes', + 'n_tup_hot_upd': 'hot_updates', + } + + # Calculate differences and rates + for metric in counter_metrics: + start_val = start_metric.get(metric, 0) + end_val = end_metric.get(metric, 0) + diff = end_val - start_val + + # Use display name if available + display_name = display_names.get(metric, metric) + + row[display_name] = diff + + # Calculate rate per second + if actual_duration > 0: + row[f'{display_name}_per_sec'] = diff / actual_duration + else: + row[f'{display_name}_per_sec'] = 0 + + # Size metrics (just use end values, these don't need rates) + for size_metric in ['total_size', 'table_size', 'index_size', 'toast_size']: + row[size_metric] = end_metric.get(size_metric, 0) + + result_rows.append(row) + + # Sort by total size descending + result_rows.sort(key=lambda x: x.get('total_size', 0), reverse=True) + + return result_rows + +def prometheus_table_to_dict(prom_data, timestamp): + """ + Convert Prometheus table metrics to dictionary keyed by table identifiers + """ + if not prom_data: + return {} + + metrics_dict = {} + + for metric_name, metric_results in prom_data.items(): + for metric_data in metric_results: + metric = metric_data.get('metric', {}) + values = metric_data.get('values', []) + + if not values: + continue + + # Get the closest value to our timestamp + closest_value = min(values, key=lambda x: abs(float(x[0]) - timestamp.timestamp())) + + # Handle different label names + schema_label = metric.get('schemaname') or metric.get('schema', '') + table_label = metric.get('relname') or metric.get('table_name') or metric.get('tblname', '') + + # Create unique key for this table + key = ( + metric.get('datname', ''), + schema_label, + table_label, + ) + + # Initialize metric dict if not exists + if key not in metrics_dict: + metrics_dict[key] = { + 'timestamp': datetime.fromtimestamp(float(closest_value[0]), tz=timezone.utc).isoformat(), + } + + # Add metric value + try: + metrics_dict[key][metric_name] = float(closest_value[1]) + except (ValueError, IndexError): + metrics_dict[key][metric_name] = 0 + + return metrics_dict + if __name__ == '__main__': app.run(host='0.0.0.0', port=5000, debug=True) \ No newline at end of file diff --git a/flask-backend/requirements.txt b/monitoring_flask_backend/requirements.txt similarity index 100% rename from flask-backend/requirements.txt rename to monitoring_flask_backend/requirements.txt diff --git a/postgres_ai b/postgres_ai index 53a8941..f0bbbab 100755 --- a/postgres_ai +++ b/postgres_ai @@ -1104,7 +1104,7 @@ start_services() { log_info "Starting Postgres AI monitoring services (production mode)..." log_info "Target demo database not included - add your own PostgreSQL instances to monitor" # Start all services except target-db, including host stats monitoring - $compose_cmd -f "$COMPOSE_FILE" up -d sources-generator sink-postgres sink-prometheus pgwatch-postgres pgwatch-prometheus grafana flask-backend postgres-reports cadvisor node-exporter postgres-exporter + $compose_cmd -f "$COMPOSE_FILE" up -d sources-generator sink-postgres sink-prometheus pgwatch-postgres pgwatch-prometheus grafana monitoring_flask_backend postgres-reports cadvisor node-exporter postgres-exporter fi log_success "Services started!" diff --git a/postgres_ai_helm/.helmignore b/postgres_ai_helm/.helmignore new file mode 100644 index 0000000..517ac5f --- /dev/null +++ b/postgres_ai_helm/.helmignore @@ -0,0 +1,20 @@ +# Patterns to ignore when building packages +.DS_Store +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +*.swp +*.bak +*.tmp +*.orig +*~ +.project +.idea/ +*.tmproj +.vscode/ +*.code-workspace +*.md \ No newline at end of file diff --git a/postgres_ai_helm/Chart.lock b/postgres_ai_helm/Chart.lock new file mode 100644 index 0000000..acc19b0 --- /dev/null +++ b/postgres_ai_helm/Chart.lock @@ -0,0 +1,6 @@ +dependencies: +- name: grafana + repository: https://grafana.github.io/helm-charts + version: 10.1.4 +digest: sha256:0071bd9cc7eb403a2a58b7b0cd92eedbecafda51588ebd6a18c45dbb12416f79 +generated: "2025-11-13T12:20:55.238834+02:00" diff --git a/postgres_ai_helm/Chart.yaml b/postgres_ai_helm/Chart.yaml new file mode 100644 index 0000000..c3a6be7 --- /dev/null +++ b/postgres_ai_helm/Chart.yaml @@ -0,0 +1,25 @@ +apiVersion: v2 +appVersion: 1.0.0 +dependencies: +- condition: grafana.enabled + name: grafana + repository: https://grafana.github.io/helm-charts + version: 10.1.4 +description: PostgresAI monitoring stack with PGWatch, VictoriaMetrics, and Grafana + for Kubernetes +home: https://postgres.ai +keywords: +- postgresql +- monitoring +- pgwatch +- victoriametrics +- grafana +- observability +maintainers: +- name: PostgresAI + url: https://postgres.ai +name: postgres-ai-monitoring +sources: +- https://github.com/PostgresAI/double-pgwatch-poc +type: application +version: 0.12 diff --git a/postgres_ai_helm/INSTALLATION_GUIDE.md b/postgres_ai_helm/INSTALLATION_GUIDE.md new file mode 100644 index 0000000..a84924c --- /dev/null +++ b/postgres_ai_helm/INSTALLATION_GUIDE.md @@ -0,0 +1,178 @@ +# Postgres AI monitoring - Helm chart installation guide + +## Installation + +### 1. Download Helm chart + +### 2. Create namespace + +```bash +kubectl create namespace postgres-ai-mon +``` + +### 3. Create custom-values.yaml + +```yaml +existingSecret: + name: postgres-ai-monitoring-secrets + +global: + clusterName: my-cluster + nodeName: my-node + customTags: + env: production + +monitoredDatabases: + - name: my-db + host: db-host.example.com + port: 5432 + database: postgres + user: postgres_ai_mon + passwordSecretKey: my-db-password + presetMetrics: full + isEnabled: true + group: production + +grafana: + enabled: true + admin: + existingSecret: postgres-ai-monitoring-secrets + userKey: grafana-admin-user + passwordKey: grafana-admin-password + service: + type: ClusterIP + +ingress: + enabled: true + className: nginx + hosts: + grafana: monitoring.example.com + +storage: + postgresSize: 100Gi + victoriaMetricsSize: 200Gi + storageClassName: standard +``` + +**Customize**: `clusterName`, `monitoredDatabases`, `ingress.hosts`, and `storageClassName`. + +### 4. Create secret + +```bash +kubectl create secret generic postgres-ai-monitoring-secrets \ + --namespace postgres-ai-mon \ + --from-literal=postgres-password='SINK_POSTGRES_PASSWORD' \ + --from-literal=grafana-admin-user='monitor' \ + --from-literal=grafana-admin-password='GRAFANA_PASSWORD' \ + --from-literal=pgai-api-key='POSTGRES_AI_API_KEY' \ + --from-literal=db-password-my-db-password='DB_PASSWORD' +``` + +**Notes:** + +- `SINK_POSTGRES_PASSWORD` should be generated by you and will be used to connect to the internal database for storing metrics +- `GRAFANA_PASSWORD` should be generated by you and will be used to access grafana +- `POSTGRES_AI_API_KEY` should be attained from PostgresAI platform and will be used to connect to the PostgresAI platform +- Add `--from-literal` for each database that you want to monitor + - Key must match `passwordSecretKey` in custom-values.yaml + - Key name must be `db-password-` and value must be the password for monitoring user in the database + +### 5. Install helm chart + +```bash +helm install postgres-ai-monitoring ./postgres-ai-monitoring-0.12.tgz \ + --namespace postgres-ai-mon \ + --values custom-values.yaml +``` + +### 6. Verify installation + +```bash +kubectl get pods -n postgres-ai-mon +``` + +## Access grafana + +**Port Forward** (quick access): + +```bash +kubectl port-forward -n postgres-ai-mon svc/postgres-ai-monitoring-grafana 3000:80 +``` + +Open: `http://localhost:3000` + +**Ingress**: Access via configured domain (e.g., `http://monitoring.example.com`) + +**Login**: Username and password from the secret (`grafana-admin-user` / `grafana-admin-password`) + +## Common tasks + +### Update configuration + +```bash +helm upgrade postgres-ai-monitoring ./postgres-ai-monitoring-0.12.tgz \ + --namespace postgres-ai-mon \ + --values custom-values.yaml +``` + +### Add database + +1. Add entry to `monitoredDatabases` in custom-values.yaml + +2. Add password to secret: + +```bash +kubectl create secret generic postgres-ai-monitoring-secrets \ + --namespace postgres-ai-mon \ + --from-literal=new-db-password='password' \ + --dry-run=client -o yaml | kubectl apply -f - +``` + +3. Run `helm upgrade` + +### Check logs + +```bash +kubectl logs -n postgres-ai-mon +``` + +## Uninstall + +### 1. Uninstall Helm release + +```bash +helm uninstall postgres-ai-monitoring --namespace postgres-ai-mon +``` + +This removes all resources created by the Helm chart, but preserves PersistentVolumeClaims and secrets. + +### 2. Delete PersistentVolumeClaims (optional) + +**Warning**: This will permanently delete all stored metrics and Grafana data. + +```bash +kubectl delete pvc -n postgres-ai-mon --all +``` + +Or delete specific PVCs: + +```bash +kubectl delete pvc -n postgres-ai-mon data-postgres-ai-monitoring-sink-postgres-0 +kubectl delete pvc -n postgres-ai-mon data-postgres-ai-monitoring-victoriametrics-0 +``` + +### 3. Delete secrets (optional) + +```bash +kubectl delete secret -n postgres-ai-mon postgres-ai-monitoring-secrets +``` + +### 4. Delete namespace (optional) + +**Warning**: This will delete all resources in the namespace, including any data stored in PersistentVolumes. + +```bash +kubectl delete namespace postgres-ai-mon +``` + +**Note**: Before deleting the namespace, ensure no other applications are using it. diff --git a/postgres_ai_helm/config/grafana/dashboards/Dashboard_10_Index health.json b/postgres_ai_helm/config/grafana/dashboards/Dashboard_10_Index health.json new file mode 120000 index 0000000..104db39 --- /dev/null +++ b/postgres_ai_helm/config/grafana/dashboards/Dashboard_10_Index health.json @@ -0,0 +1 @@ +../../../../config/grafana/dashboards/Dashboard_10_Index health.json \ No newline at end of file diff --git a/postgres_ai_helm/config/grafana/dashboards/Dashboard_11_Single_index_analysis.json b/postgres_ai_helm/config/grafana/dashboards/Dashboard_11_Single_index_analysis.json new file mode 120000 index 0000000..f579c23 --- /dev/null +++ b/postgres_ai_helm/config/grafana/dashboards/Dashboard_11_Single_index_analysis.json @@ -0,0 +1 @@ +../../../../config/grafana/dashboards/Dashboard_11_Single_index_analysis.json \ No newline at end of file diff --git a/postgres_ai_helm/config/grafana/dashboards/Dashboard_12_SLRU.json b/postgres_ai_helm/config/grafana/dashboards/Dashboard_12_SLRU.json new file mode 120000 index 0000000..5225984 --- /dev/null +++ b/postgres_ai_helm/config/grafana/dashboards/Dashboard_12_SLRU.json @@ -0,0 +1 @@ +../../../../config/grafana/dashboards/Dashboard_12_SLRU.json \ No newline at end of file diff --git a/postgres_ai_helm/config/grafana/dashboards/Dashboard_13_Lock_waits.json b/postgres_ai_helm/config/grafana/dashboards/Dashboard_13_Lock_waits.json new file mode 120000 index 0000000..f01f2bd --- /dev/null +++ b/postgres_ai_helm/config/grafana/dashboards/Dashboard_13_Lock_waits.json @@ -0,0 +1 @@ +../../../../config/grafana/dashboards/Dashboard_13_Lock_waits.json \ No newline at end of file diff --git a/postgres_ai_helm/config/grafana/dashboards/Dashboard_1_Node_performance_overview.json b/postgres_ai_helm/config/grafana/dashboards/Dashboard_1_Node_performance_overview.json new file mode 120000 index 0000000..fb4537c --- /dev/null +++ b/postgres_ai_helm/config/grafana/dashboards/Dashboard_1_Node_performance_overview.json @@ -0,0 +1 @@ +../../../../config/grafana/dashboards/Dashboard_1_Node_performance_overview.json \ No newline at end of file diff --git a/postgres_ai_helm/config/grafana/dashboards/Dashboard_2_Aggregated_query_analysis.json b/postgres_ai_helm/config/grafana/dashboards/Dashboard_2_Aggregated_query_analysis.json new file mode 120000 index 0000000..49964f7 --- /dev/null +++ b/postgres_ai_helm/config/grafana/dashboards/Dashboard_2_Aggregated_query_analysis.json @@ -0,0 +1 @@ +../../../../config/grafana/dashboards/Dashboard_2_Aggregated_query_analysis.json \ No newline at end of file diff --git a/postgres_ai_helm/config/grafana/dashboards/Dashboard_3_Single_query_analysis.json b/postgres_ai_helm/config/grafana/dashboards/Dashboard_3_Single_query_analysis.json new file mode 120000 index 0000000..f823f9a --- /dev/null +++ b/postgres_ai_helm/config/grafana/dashboards/Dashboard_3_Single_query_analysis.json @@ -0,0 +1 @@ +../../../../config/grafana/dashboards/Dashboard_3_Single_query_analysis.json \ No newline at end of file diff --git a/postgres_ai_helm/config/grafana/dashboards/Dashboard_4_Wait_Sampling_Dashboard.json b/postgres_ai_helm/config/grafana/dashboards/Dashboard_4_Wait_Sampling_Dashboard.json new file mode 120000 index 0000000..125919a --- /dev/null +++ b/postgres_ai_helm/config/grafana/dashboards/Dashboard_4_Wait_Sampling_Dashboard.json @@ -0,0 +1 @@ +../../../../config/grafana/dashboards/Dashboard_4_Wait_Sampling_Dashboard.json \ No newline at end of file diff --git a/postgres_ai_helm/config/grafana/dashboards/Dashboard_5_Backup_stats.json b/postgres_ai_helm/config/grafana/dashboards/Dashboard_5_Backup_stats.json new file mode 120000 index 0000000..b315232 --- /dev/null +++ b/postgres_ai_helm/config/grafana/dashboards/Dashboard_5_Backup_stats.json @@ -0,0 +1 @@ +../../../../config/grafana/dashboards/Dashboard_5_Backup_stats.json \ No newline at end of file diff --git a/postgres_ai_helm/config/grafana/dashboards/Dashboard_6_Replication_and_HA.json b/postgres_ai_helm/config/grafana/dashboards/Dashboard_6_Replication_and_HA.json new file mode 120000 index 0000000..79a448f --- /dev/null +++ b/postgres_ai_helm/config/grafana/dashboards/Dashboard_6_Replication_and_HA.json @@ -0,0 +1 @@ +../../../../config/grafana/dashboards/Dashboard_6_Replication_and_HA.json \ No newline at end of file diff --git a/postgres_ai_helm/config/grafana/dashboards/Dashboard_7_Autovacuum_and_bloat.json b/postgres_ai_helm/config/grafana/dashboards/Dashboard_7_Autovacuum_and_bloat.json new file mode 120000 index 0000000..6d79eca --- /dev/null +++ b/postgres_ai_helm/config/grafana/dashboards/Dashboard_7_Autovacuum_and_bloat.json @@ -0,0 +1 @@ +../../../../config/grafana/dashboards/Dashboard_7_Autovacuum_and_bloat.json \ No newline at end of file diff --git a/postgres_ai_helm/config/grafana/dashboards/Dashboard_8_Table_Stats.json b/postgres_ai_helm/config/grafana/dashboards/Dashboard_8_Table_Stats.json new file mode 120000 index 0000000..f7ffca3 --- /dev/null +++ b/postgres_ai_helm/config/grafana/dashboards/Dashboard_8_Table_Stats.json @@ -0,0 +1 @@ +../../../../config/grafana/dashboards/Dashboard_8_Table_Stats.json \ No newline at end of file diff --git a/postgres_ai_helm/config/grafana/dashboards/Dashboard_9_Single_table_analysis.json b/postgres_ai_helm/config/grafana/dashboards/Dashboard_9_Single_table_analysis.json new file mode 120000 index 0000000..8d378b6 --- /dev/null +++ b/postgres_ai_helm/config/grafana/dashboards/Dashboard_9_Single_table_analysis.json @@ -0,0 +1 @@ +../../../../config/grafana/dashboards/Dashboard_9_Single_table_analysis.json \ No newline at end of file diff --git a/postgres_ai_helm/config/grafana/dashboards/Self_Monitoring_Dashboard.json b/postgres_ai_helm/config/grafana/dashboards/Self_Monitoring_Dashboard.json new file mode 120000 index 0000000..fdb978a --- /dev/null +++ b/postgres_ai_helm/config/grafana/dashboards/Self_Monitoring_Dashboard.json @@ -0,0 +1 @@ +../../../../config/grafana/dashboards/Self_Monitoring_Dashboard.json \ No newline at end of file diff --git a/postgres_ai_helm/config/metrics-postgres.yml b/postgres_ai_helm/config/metrics-postgres.yml new file mode 120000 index 0000000..351f016 --- /dev/null +++ b/postgres_ai_helm/config/metrics-postgres.yml @@ -0,0 +1 @@ +../../config/pgwatch-postgres/metrics.yml \ No newline at end of file diff --git a/postgres_ai_helm/config/metrics-prometheus.yml b/postgres_ai_helm/config/metrics-prometheus.yml new file mode 120000 index 0000000..7a44cad --- /dev/null +++ b/postgres_ai_helm/config/metrics-prometheus.yml @@ -0,0 +1 @@ +../../config/pgwatch-prometheus/metrics.yml \ No newline at end of file diff --git a/postgres_ai_helm/templates/_helpers.tpl b/postgres_ai_helm/templates/_helpers.tpl new file mode 100644 index 0000000..c1b3b97 --- /dev/null +++ b/postgres_ai_helm/templates/_helpers.tpl @@ -0,0 +1,193 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "postgres-ai-monitoring.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +Truncate to 45 chars to leave room for component suffixes (e.g. -victoriametrics) +*/}} +{{- define "postgres-ai-monitoring.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 45 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 45 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 45 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "postgres-ai-monitoring.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "postgres-ai-monitoring.labels" -}} +helm.sh/chart: {{ include "postgres-ai-monitoring.chart" . }} +{{ include "postgres-ai-monitoring.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- with .Values.commonLabels }} +{{ toYaml . }} +{{- end }} +{{- end }} + +{{/* +Build connection string from database configuration +Password is retrieved from secrets using passwordSecretKey +*/}} +{{- define "postgres-ai-monitoring.dbConnStr" -}} +{{- $db := .db -}} +{{- $root := .root -}} +{{- if $db.connStr }} +{{- $db.connStr }} +{{- else }} +{{- $host := $db.host | default "localhost" }} +{{- $port := $db.port | default 5432 }} +{{- $database := $db.database | default "postgres" }} +{{- $user := $db.user | default "postgres" }} +{{- $passwordKey := printf "db-password-%s" $db.passwordSecretKey }} +postgresql://{{ $user }}:$(DB_PASSWORD)@{{ $host }}:{{ $port }}/{{ $database }} +{{- end }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "postgres-ai-monitoring.selectorLabels" -}} +app.kubernetes.io/name: {{ include "postgres-ai-monitoring.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "postgres-ai-monitoring.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "postgres-ai-monitoring.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* +Namespace +*/}} +{{- define "postgres-ai-monitoring.namespace" -}} +{{- default .Release.Namespace .Values.namespaceOverride }} +{{- end }} + +{{/* +Determine effective cluster name with fallbacks. +*/}} +{{- define "postgres-ai-monitoring.effectiveClusterName" -}} +{{- if .Values.reporter.clusterName }} +{{- .Values.reporter.clusterName }} +{{- else if .Values.global.clusterName }} +{{- .Values.global.clusterName }} +{{- else }} +k8s-cluster +{{- end }} +{{- end }} + +{{/* +Determine effective node name with fallbacks. +*/}} +{{- define "postgres-ai-monitoring.effectiveNodeName" -}} +{{- if .Values.reporter.nodeName }} +{{- .Values.reporter.nodeName }} +{{- else if .Values.global.nodeName }} +{{- .Values.global.nodeName }} +{{- else }} +{{- "" }} +{{- end }} +{{- end }} + +{{/* +Get cluster name for a specific database with fallbacks. +*/}} +{{- define "postgres-ai-monitoring.databaseClusterName" -}} +{{- $db := .db -}} +{{- $root := .root -}} +{{- if $db.clusterName }} +{{- $db.clusterName }} +{{- else if $root.Values.reporter.clusterName }} +{{- $root.Values.reporter.clusterName }} +{{- else if $root.Values.global.clusterName }} +{{- $root.Values.global.clusterName }} +{{- else }} +{{- "k8s-cluster" }} +{{- end }} +{{- end }} + +{{/* +Get node name for a specific database with fallbacks. +*/}} +{{- define "postgres-ai-monitoring.databaseNodeName" -}} +{{- $db := .db -}} +{{- $root := .root -}} +{{- if $db.nodeName }} +{{- $db.nodeName }} +{{- else if $root.Values.reporter.nodeName }} +{{- $root.Values.reporter.nodeName }} +{{- else if $root.Values.global.nodeName }} +{{- $root.Values.global.nodeName }} +{{- else }} +{{- "" }} +{{- end }} +{{- end }} + +{{/* +Get unique cluster/node combinations from monitoredDatabases. +Returns a list of dicts with cluster and nodeName keys. +*/}} +{{- define "postgres-ai-monitoring.uniqueClusterNodeCombinations" -}} +{{- $root := . -}} +{{- $combinations := list -}} +{{- $seen := dict -}} +{{- range $db := .Values.monitoredDatabases }} + {{- $clusterName := include "postgres-ai-monitoring.databaseClusterName" (dict "db" $db "root" $root) | trim -}} + {{- $nodeName := include "postgres-ai-monitoring.databaseNodeName" (dict "db" $db "root" $root) | trim -}} + {{- $key := printf "%s|%s" $clusterName $nodeName -}} + {{- if not (hasKey $seen $key) }} + {{- $_ := set $seen $key true -}} + {{- $combinations = append $combinations (dict "cluster" $clusterName "nodeName" $nodeName) -}} + {{- end }} +{{- end }} +{{- if eq (len $combinations) 0 }} + {{- $clusterName := include "postgres-ai-monitoring.effectiveClusterName" $root | trim -}} + {{- $nodeName := include "postgres-ai-monitoring.effectiveNodeName" $root | trim -}} + {{- $combinations = append $combinations (dict "cluster" $clusterName "nodeName" $nodeName) -}} +{{- end }} +{{- $combinations | toJson }} +{{- end }} + +{{/* +Get the secret name to use. +Returns existingSecret.name if set, otherwise returns the default secret name. +*/}} +{{- define "postgres-ai-monitoring.secretName" -}} +{{- $existingSecretName := "" }} +{{- if .Values.existingSecret }} + {{- $existingSecretName = .Values.existingSecret.name | default "" }} +{{- end }} +{{- if and $existingSecretName (ne $existingSecretName "") }} +{{- $existingSecretName }} +{{- else }} +{{- printf "%s-secrets" (include "postgres-ai-monitoring.fullname" .) }} +{{- end }} +{{- end }} + + diff --git a/postgres_ai_helm/templates/cadvisor-daemonset.yaml b/postgres_ai_helm/templates/cadvisor-daemonset.yaml new file mode 100644 index 0000000..8b96b42 --- /dev/null +++ b/postgres_ai_helm/templates/cadvisor-daemonset.yaml @@ -0,0 +1,82 @@ +{{- if .Values.cadvisor.enabled }} +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: {{ include "postgres-ai-monitoring.fullname" . }}-cadvisor + namespace: {{ include "postgres-ai-monitoring.namespace" . }} + labels: + {{- include "postgres-ai-monitoring.labels" . | nindent 4 }} + app.kubernetes.io/component: cadvisor +spec: + selector: + matchLabels: + {{- include "postgres-ai-monitoring.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: cadvisor + template: + metadata: + labels: + {{- include "postgres-ai-monitoring.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: cadvisor + spec: + hostNetwork: true + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: cadvisor + image: {{ .Values.cadvisor.image }} + imagePullPolicy: IfNotPresent + args: + - --housekeeping_interval=30s + - --docker_only=false + - --disable_metrics=percpu,sched,tcp,udp,hugetlb,referenced_memory,cpu_topology,resctrl + - --store_container_labels=false + ports: + - name: http + containerPort: 8080 + protocol: TCP + volumeMounts: + - name: rootfs + mountPath: /rootfs + readOnly: true + - name: var-run + mountPath: /var/run + readOnly: true + - name: sys + mountPath: /sys + readOnly: true + - name: docker + mountPath: /var/lib/docker + readOnly: true + - name: disk + mountPath: /dev/disk + readOnly: true + {{- with .Values.cadvisor.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + securityContext: + privileged: true + volumes: + - name: rootfs + hostPath: + path: / + - name: var-run + hostPath: + path: /var/run + - name: sys + hostPath: + path: /sys + - name: docker + hostPath: + path: /var/lib/docker + - name: disk + hostPath: + path: /dev/disk + tolerations: + - effect: NoSchedule + operator: Exists +{{- end }} + + diff --git a/postgres_ai_helm/templates/cadvisor-service.yaml b/postgres_ai_helm/templates/cadvisor-service.yaml new file mode 100644 index 0000000..b2da9b7 --- /dev/null +++ b/postgres_ai_helm/templates/cadvisor-service.yaml @@ -0,0 +1,23 @@ +{{- if .Values.cadvisor.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "postgres-ai-monitoring.fullname" . }}-cadvisor + namespace: {{ include "postgres-ai-monitoring.namespace" . }} + labels: + {{- include "postgres-ai-monitoring.labels" . | nindent 4 }} + app.kubernetes.io/component: cadvisor +spec: + type: ClusterIP + clusterIP: None + ports: + - port: 8080 + targetPort: http + protocol: TCP + name: http + selector: + {{- include "postgres-ai-monitoring.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: cadvisor +{{- end }} + + diff --git a/postgres_ai_helm/templates/flask-deployment.yaml b/postgres_ai_helm/templates/flask-deployment.yaml new file mode 100644 index 0000000..30f2f89 --- /dev/null +++ b/postgres_ai_helm/templates/flask-deployment.yaml @@ -0,0 +1,65 @@ +{{- if .Values.flask.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "postgres-ai-monitoring.fullname" . }}-flask + namespace: {{ include "postgres-ai-monitoring.namespace" . }} + labels: + {{- include "postgres-ai-monitoring.labels" . | nindent 4 }} + app.kubernetes.io/component: flask-api +spec: + replicas: {{ .Values.flask.replicas | default 2 }} + selector: + matchLabels: + {{- include "postgres-ai-monitoring.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: flask-api + template: + metadata: + labels: + {{- include "postgres-ai-monitoring.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: flask-api + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + initContainers: + - name: wait-for-victoriametrics + image: busybox:1.36 + command: ['sh', '-c', 'until nc -z {{ include "postgres-ai-monitoring.fullname" . }}-victoriametrics {{ .Values.victoriaMetrics.service.port }}; do echo waiting for victoriametrics; sleep 2; done'] + containers: + - name: flask + image: {{ .Values.flask.image }} + imagePullPolicy: {{ .Values.flask.imagePullPolicy | default "IfNotPresent" }} + env: + - name: FLASK_ENV + value: "production" + - name: PROMETHEUS_URL + value: "http://{{ include "postgres-ai-monitoring.fullname" . }}-victoriametrics:{{ .Values.victoriaMetrics.service.port }}" + {{- range $key, $value := .Values.flask.env }} + - name: {{ $key }} + value: {{ $value | quote }} + {{- end }} + ports: + - name: http + containerPort: {{ .Values.flask.containerPort | default 8000 }} + protocol: TCP + {{- with .Values.flask.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + livenessProbe: + httpGet: + path: {{ .Values.flask.healthPath | default "/health" }} + port: http + initialDelaySeconds: 30 + periodSeconds: 10 + readinessProbe: + httpGet: + path: {{ .Values.flask.healthPath | default "/health" }} + port: http + initialDelaySeconds: 5 + periodSeconds: 5 +{{- end }} + + diff --git a/postgres_ai_helm/templates/flask-service.yaml b/postgres_ai_helm/templates/flask-service.yaml new file mode 100644 index 0000000..225b6b5 --- /dev/null +++ b/postgres_ai_helm/templates/flask-service.yaml @@ -0,0 +1,22 @@ +{{- if .Values.flask.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "postgres-ai-monitoring.fullname" . }}-flask + namespace: {{ include "postgres-ai-monitoring.namespace" . }} + labels: + {{- include "postgres-ai-monitoring.labels" . | nindent 4 }} + app.kubernetes.io/component: flask-api +spec: + type: {{ .Values.flask.service.type | default "ClusterIP" }} + ports: + - port: {{ .Values.flask.service.port | default 8000 }} + targetPort: http + protocol: TCP + name: http + selector: + {{- include "postgres-ai-monitoring.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: flask-api +{{- end }} + + diff --git a/postgres_ai_helm/templates/grafana-dashboards.yaml b/postgres_ai_helm/templates/grafana-dashboards.yaml new file mode 100644 index 0000000..d04ba25 --- /dev/null +++ b/postgres_ai_helm/templates/grafana-dashboards.yaml @@ -0,0 +1,19 @@ +{{- if .Values.grafana.enabled }} +{{- $files := .Files }} +{{- range $path, $_ := .Files.Glob "config/grafana/dashboards/*.json" }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "postgres-ai-monitoring.fullname" $ }}-dashboard-{{ regexReplaceAll "[^a-z0-9-]" (lower (base $path | trimSuffix ".json")) "-" }} + namespace: {{ include "postgres-ai-monitoring.namespace" $ }} + labels: + {{- include "postgres-ai-monitoring.labels" $ | nindent 4 }} + app.kubernetes.io/component: grafana + grafana_dashboard: "1" +data: + {{ base $path | replace " " "-" }}: |- +{{ $files.Get $path | indent 4 }} +{{- end }} +{{- end }} + diff --git a/postgres_ai_helm/templates/grafana-datasources.yaml b/postgres_ai_helm/templates/grafana-datasources.yaml new file mode 100644 index 0000000..26f5272 --- /dev/null +++ b/postgres_ai_helm/templates/grafana-datasources.yaml @@ -0,0 +1,48 @@ +{{- if .Values.grafana.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Release.Name }}-grafana-datasources + namespace: {{ include "postgres-ai-monitoring.namespace" . }} + labels: + {{- include "postgres-ai-monitoring.labels" . | nindent 4 }} + app.kubernetes.io/component: grafana + grafana_datasource: "1" +data: + datasources.yaml: | + apiVersion: 1 + datasources: + - name: VictoriaMetrics + type: prometheus + access: proxy + uid: P7A0D6631BB10B34F + url: http://{{ include "postgres-ai-monitoring.fullname" . }}-victoriametrics:{{ .Values.victoriaMetrics.service.port }} + isDefault: true + editable: true + - name: SinkPostgres + type: postgres + access: proxy + uid: P031DD592934B2F1F + url: {{ include "postgres-ai-monitoring.fullname" . }}-sink-postgres:5432 + user: {{ .Values.sinkPostgres.user }} + editable: true + jsonData: + database: {{ .Values.sinkPostgres.database }} + sslmode: disable + postgresVersion: 1500 + secureJsonData: + password: {{ .Values.secrets.postgres.password }} + {{- if .Values.flask.enabled }} + - name: Flask API + type: yesoreyeram-infinity-datasource + access: proxy + uid: aerffb0z8rjlsc + url: http://{{ include "postgres-ai-monitoring.fullname" . }}-flask:{{ .Values.flask.service.port }} + isDefault: false + editable: true + jsonData: + tlsSkipVerify: true + {{- end }} +{{- end }} + + diff --git a/postgres_ai_helm/templates/ingress.yaml b/postgres_ai_helm/templates/ingress.yaml new file mode 100644 index 0000000..43cbcdd --- /dev/null +++ b/postgres_ai_helm/templates/ingress.yaml @@ -0,0 +1,44 @@ +{{- if .Values.ingress.enabled }} +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ include "postgres-ai-monitoring.fullname" . }}-grafana + namespace: {{ include "postgres-ai-monitoring.namespace" . }} + labels: + {{- include "postgres-ai-monitoring.labels" . | nindent 4 }} + app.kubernetes.io/component: grafana + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- if .Values.ingress.className }} + ingressClassName: {{ .Values.ingress.className }} + {{- end }} + {{- if .Values.ingress.tls }} + tls: + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} + {{- end }} + rules: + {{- if .Values.ingress.hosts.grafana }} + - host: {{ .Values.ingress.hosts.grafana | quote }} + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: {{ .Release.Name }}-grafana + port: + number: 80 + {{- end }} +{{- end }} + + diff --git a/postgres_ai_helm/templates/node-exporter-daemonset.yaml b/postgres_ai_helm/templates/node-exporter-daemonset.yaml new file mode 100644 index 0000000..15ca695 --- /dev/null +++ b/postgres_ai_helm/templates/node-exporter-daemonset.yaml @@ -0,0 +1,56 @@ +{{- if .Values.nodeExporter.enabled }} +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: {{ include "postgres-ai-monitoring.fullname" . }}-node-exporter + namespace: {{ include "postgres-ai-monitoring.namespace" . }} + labels: + {{- include "postgres-ai-monitoring.labels" . | nindent 4 }} + app.kubernetes.io/component: node-exporter +spec: + selector: + matchLabels: + {{- include "postgres-ai-monitoring.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: node-exporter + template: + metadata: + labels: + {{- include "postgres-ai-monitoring.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: node-exporter + spec: + hostNetwork: true + hostPID: true + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: node-exporter + image: {{ .Values.nodeExporter.image }} + imagePullPolicy: IfNotPresent + args: + - --path.rootfs=/host + - --collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/) + ports: + - name: metrics + containerPort: 9100 + protocol: TCP + volumeMounts: + - name: root + mountPath: /host + readOnly: true + mountPropagation: HostToContainer + {{- with .Values.nodeExporter.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + volumes: + - name: root + hostPath: + path: / + tolerations: + - effect: NoSchedule + operator: Exists +{{- end }} + + diff --git a/postgres_ai_helm/templates/node-exporter-service.yaml b/postgres_ai_helm/templates/node-exporter-service.yaml new file mode 100644 index 0000000..f10d17f --- /dev/null +++ b/postgres_ai_helm/templates/node-exporter-service.yaml @@ -0,0 +1,23 @@ +{{- if .Values.nodeExporter.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "postgres-ai-monitoring.fullname" . }}-node-exporter + namespace: {{ include "postgres-ai-monitoring.namespace" . }} + labels: + {{- include "postgres-ai-monitoring.labels" . | nindent 4 }} + app.kubernetes.io/component: node-exporter +spec: + type: ClusterIP + clusterIP: None + ports: + - port: 9100 + targetPort: metrics + protocol: TCP + name: metrics + selector: + {{- include "postgres-ai-monitoring.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: node-exporter +{{- end }} + + diff --git a/postgres_ai_helm/templates/pgwatch-config.yaml b/postgres_ai_helm/templates/pgwatch-config.yaml new file mode 100644 index 0000000..bab8ae5 --- /dev/null +++ b/postgres_ai_helm/templates/pgwatch-config.yaml @@ -0,0 +1,125 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "postgres-ai-monitoring.fullname" . }}-pgwatch-config + namespace: {{ include "postgres-ai-monitoring.namespace" . }} + labels: + {{- include "postgres-ai-monitoring.labels" . | nindent 4 }} +data: + sources-postgres.yml: | + {{- $root := . -}} + {{- range $db := .Values.monitoredDatabases }} + - name: {{ $db.name }} + {{- if $db.connStr }} + conn_str: {{ $db.connStr }} + {{- else }} + # Password will be injected by init container from secrets + conn_str: postgresql://{{ $db.user }}:PASSWORD_PLACEHOLDER_{{ $db.passwordSecretKey }}@{{ $db.host }}:{{ $db.port | default 5432 }}/{{ $db.database }} + {{- end }} + preset_metrics: {{ $db.presetMetrics | default "full" }} + {{- if $db.customMetrics }} + custom_metrics: + {{- toYaml $db.customMetrics | nindent 8 }} + {{- end }} + is_enabled: {{ $db.isEnabled | default true }} + group: {{ $db.group | default "default" }} + {{- $tags := dict -}} + {{- with $root.Values.global.customTags }} + {{- range $key, $value := . }} + {{- $_ := set $tags $key $value }} + {{- end }} + {{- end }} + {{- with $root.Values.global.clusterName }} + {{- if ne . "" }} + {{- $_ := set $tags "cluster" . }} + {{- end }} + {{- end }} + {{- with $root.Values.global.nodeName }} + {{- if ne . "" }} + {{- $_ := set $tags "node_name" . }} + {{- end }} + {{- end }} + {{- with $db.clusterName }} + {{- if ne . "" }} + {{- $_ := set $tags "cluster" . }} + {{- end }} + {{- end }} + {{- with $db.nodeName }} + {{- if ne . "" }} + {{- $_ := set $tags "node_name" . }} + {{- end }} + {{- end }} + {{- with $db.customTags }} + {{- range $key, $value := . }} + {{- $_ := set $tags $key $value }} + {{- end }} + {{- end }} + custom_tags: + {{- if gt (len $tags) 0 }} +{{ toYaml $tags | indent 8 }} + {{- end }} + sink_type: postgresql + {{- end }} + + sources-prometheus.yml: | + {{- $root := . -}} + {{- range $db := .Values.monitoredDatabases }} + - name: {{ $db.name }} + {{- if $db.connStr }} + conn_str: {{ $db.connStr }} + {{- else }} + # Password will be injected by init container from secrets + conn_str: postgresql://{{ $db.user }}:PASSWORD_PLACEHOLDER_{{ $db.passwordSecretKey }}@{{ $db.host }}:{{ $db.port | default 5432 }}/{{ $db.database }} + {{- end }} + preset_metrics: {{ $db.presetMetrics | default "full" }} + {{- if $db.customMetrics }} + custom_metrics: + {{- toYaml $db.customMetrics | nindent 8 }} + {{- end }} + is_enabled: {{ $db.isEnabled | default true }} + group: {{ $db.group | default "default" }} + {{- $tags := dict -}} + {{- with $root.Values.global.customTags }} + {{- range $key, $value := . }} + {{- $_ := set $tags $key $value }} + {{- end }} + {{- end }} + {{- with $root.Values.global.clusterName }} + {{- if ne . "" }} + {{- $_ := set $tags "cluster" . }} + {{- end }} + {{- end }} + {{- with $root.Values.global.nodeName }} + {{- if ne . "" }} + {{- $_ := set $tags "node_name" . }} + {{- end }} + {{- end }} + {{- with $db.clusterName }} + {{- if ne . "" }} + {{- $_ := set $tags "cluster" . }} + {{- end }} + {{- end }} + {{- with $db.nodeName }} + {{- if ne . "" }} + {{- $_ := set $tags "node_name" . }} + {{- end }} + {{- end }} + {{- with $db.customTags }} + {{- range $key, $value := . }} + {{- $_ := set $tags $key $value }} + {{- end }} + {{- end }} + custom_tags: + {{- if gt (len $tags) 0 }} +{{ toYaml $tags | indent 8 }} + {{- end }} + sink_type: prometheus + {{- end }} + + metrics-postgres.yml: | +{{ .Files.Get "config/metrics-postgres.yml" | indent 4 }} + + metrics-prometheus.yml: | +{{ .Files.Get "config/metrics-prometheus.yml" | indent 4 }} + + diff --git a/postgres_ai_helm/templates/pgwatch-postgres-deployment.yaml b/postgres_ai_helm/templates/pgwatch-postgres-deployment.yaml new file mode 100644 index 0000000..3183113 --- /dev/null +++ b/postgres_ai_helm/templates/pgwatch-postgres-deployment.yaml @@ -0,0 +1,109 @@ +{{- if .Values.pgwatchPostgres.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "postgres-ai-monitoring.fullname" . }}-pgwatch-postgres + namespace: {{ include "postgres-ai-monitoring.namespace" . }} + labels: + {{- include "postgres-ai-monitoring.labels" . | nindent 4 }} + app.kubernetes.io/component: pgwatch-postgres +spec: + replicas: 1 + selector: + matchLabels: + {{- include "postgres-ai-monitoring.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: pgwatch-postgres + template: + metadata: + annotations: + checksum/config: {{ include (print $.Template.BasePath "/pgwatch-config.yaml") . | sha256sum }} + labels: + {{- include "postgres-ai-monitoring.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: pgwatch-postgres + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + initContainers: + - name: wait-for-sink + image: busybox:1.36 + command: ['sh', '-c', 'until nc -z {{ include "postgres-ai-monitoring.fullname" . }}-sink-postgres 5432; do echo waiting for postgres; sleep 2; done'] + - name: inject-passwords + image: busybox:1.36 + command: + - sh + - -c + - | + set -eu + # Copy sources file from configmap + cp /config/sources-postgres.yml /output/sources-postgres.yml + {{- range $db := .Values.monitoredDatabases }} + {{- if not $db.connStr }} + # Replace password placeholder for {{ $db.name }} + if [ -n "${DB_PASSWORD_{{ $db.passwordSecretKey | upper | replace "-" "_" }}:-}" ]; then + sed -i "s|PASSWORD_PLACEHOLDER_{{ $db.passwordSecretKey }}|${DB_PASSWORD_{{ $db.passwordSecretKey | upper | replace "-" "_" }}}|g" /output/sources-postgres.yml + fi + {{- end }} + {{- end }} + volumeMounts: + - name: config-template + mountPath: /config + - name: sources-config + mountPath: /output + env: + {{- range $db := .Values.monitoredDatabases }} + {{- if not $db.connStr }} + - name: DB_PASSWORD_{{ $db.passwordSecretKey | upper | replace "-" "_" }} + valueFrom: + secretKeyRef: + name: {{ include "postgres-ai-monitoring.secretName" $ }} + key: db-password-{{ $db.passwordSecretKey }} + {{- end }} + {{- end }} + containers: + - name: pgwatch + image: {{ .Values.pgwatchPostgres.image }} + imagePullPolicy: IfNotPresent + command: + - sh + - -c + - | + exec /pgwatch/pgwatch \ + --sources=/etc/pgwatch/sources.yml \ + --metrics=/etc/pgwatch/metrics.yml \ + --sink=postgresql://{{ .Values.sinkPostgres.user }}:${POSTGRES_PASSWORD}@{{ include "postgres-ai-monitoring.fullname" . }}-sink-postgres:5432/{{ .Values.sinkPostgres.database }}?sslmode=disable \ + --web-addr=:8080 + env: + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: {{ include "postgres-ai-monitoring.secretName" . }} + key: postgres-password + ports: + - name: http + containerPort: 8080 + protocol: TCP + volumeMounts: + - name: sources-config + mountPath: /etc/pgwatch/sources.yml + subPath: sources-postgres.yml + - name: metrics-config + mountPath: /etc/pgwatch/metrics.yml + subPath: metrics-postgres.yml + {{- with .Values.pgwatchPostgres.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + volumes: + - name: config-template + configMap: + name: {{ include "postgres-ai-monitoring.fullname" . }}-pgwatch-config + - name: sources-config + emptyDir: {} + - name: metrics-config + configMap: + name: {{ include "postgres-ai-monitoring.fullname" . }}-pgwatch-config +{{- end }} + + diff --git a/postgres_ai_helm/templates/pgwatch-postgres-service.yaml b/postgres_ai_helm/templates/pgwatch-postgres-service.yaml new file mode 100644 index 0000000..cfb5d2b --- /dev/null +++ b/postgres_ai_helm/templates/pgwatch-postgres-service.yaml @@ -0,0 +1,22 @@ +{{- if .Values.pgwatchPostgres.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "postgres-ai-monitoring.fullname" . }}-pgwatch-postgres + namespace: {{ include "postgres-ai-monitoring.namespace" . }} + labels: + {{- include "postgres-ai-monitoring.labels" . | nindent 4 }} + app.kubernetes.io/component: pgwatch-postgres +spec: + type: ClusterIP + ports: + - port: 8080 + targetPort: http + protocol: TCP + name: http + selector: + {{- include "postgres-ai-monitoring.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: pgwatch-postgres +{{- end }} + + diff --git a/postgres_ai_helm/templates/pgwatch-prometheus-deployment.yaml b/postgres_ai_helm/templates/pgwatch-prometheus-deployment.yaml new file mode 100644 index 0000000..ea92e08 --- /dev/null +++ b/postgres_ai_helm/templates/pgwatch-prometheus-deployment.yaml @@ -0,0 +1,100 @@ +{{- if .Values.pgwatchPrometheus.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "postgres-ai-monitoring.fullname" . }}-pgwatch-prometheus + namespace: {{ include "postgres-ai-monitoring.namespace" . }} + labels: + {{- include "postgres-ai-monitoring.labels" . | nindent 4 }} + app.kubernetes.io/component: pgwatch-prometheus +spec: + replicas: 1 + selector: + matchLabels: + {{- include "postgres-ai-monitoring.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: pgwatch-prometheus + template: + metadata: + annotations: + checksum/config: {{ include (print $.Template.BasePath "/pgwatch-config.yaml") . | sha256sum }} + labels: + {{- include "postgres-ai-monitoring.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: pgwatch-prometheus + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + initContainers: + - name: inject-passwords + image: busybox:1.36 + command: + - sh + - -c + - | + set -eu + # Copy sources file from configmap + cp /config/sources-prometheus.yml /output/sources-prometheus.yml + {{- range $db := .Values.monitoredDatabases }} + {{- if not $db.connStr }} + # Replace password placeholder for {{ $db.name }} + if [ -n "${DB_PASSWORD_{{ $db.passwordSecretKey | upper | replace "-" "_" }}:-}" ]; then + sed -i "s|PASSWORD_PLACEHOLDER_{{ $db.passwordSecretKey }}|${DB_PASSWORD_{{ $db.passwordSecretKey | upper | replace "-" "_" }}}|g" /output/sources-prometheus.yml + fi + {{- end }} + {{- end }} + volumeMounts: + - name: config-template + mountPath: /config + - name: sources-config + mountPath: /output + env: + {{- range $db := .Values.monitoredDatabases }} + {{- if not $db.connStr }} + - name: DB_PASSWORD_{{ $db.passwordSecretKey | upper | replace "-" "_" }} + valueFrom: + secretKeyRef: + name: {{ include "postgres-ai-monitoring.secretName" $ }} + key: db-password-{{ $db.passwordSecretKey }} + {{- end }} + {{- end }} + containers: + - name: pgwatch + image: {{ .Values.pgwatchPrometheus.image }} + imagePullPolicy: IfNotPresent + command: + - /pgwatch/pgwatch + - --sources=/etc/pgwatch/sources.yml + - --metrics=/etc/pgwatch/metrics.yml + - --sink=prometheus://0.0.0.0:9091/pgwatch + - --web-addr=:8089 + ports: + - name: http + containerPort: 8089 + protocol: TCP + - name: metrics + containerPort: 9091 + protocol: TCP + volumeMounts: + - name: sources-config + mountPath: /etc/pgwatch/sources.yml + subPath: sources-prometheus.yml + - name: metrics-config + mountPath: /etc/pgwatch/metrics.yml + subPath: metrics-prometheus.yml + {{- with .Values.pgwatchPrometheus.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + volumes: + - name: config-template + configMap: + name: {{ include "postgres-ai-monitoring.fullname" . }}-pgwatch-config + - name: sources-config + emptyDir: {} + - name: metrics-config + configMap: + name: {{ include "postgres-ai-monitoring.fullname" . }}-pgwatch-config +{{- end }} + + diff --git a/postgres_ai_helm/templates/pgwatch-prometheus-service.yaml b/postgres_ai_helm/templates/pgwatch-prometheus-service.yaml new file mode 100644 index 0000000..0c63955 --- /dev/null +++ b/postgres_ai_helm/templates/pgwatch-prometheus-service.yaml @@ -0,0 +1,26 @@ +{{- if .Values.pgwatchPrometheus.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "postgres-ai-monitoring.fullname" . }}-pgwatch-prometheus + namespace: {{ include "postgres-ai-monitoring.namespace" . }} + labels: + {{- include "postgres-ai-monitoring.labels" . | nindent 4 }} + app.kubernetes.io/component: pgwatch-prometheus +spec: + type: ClusterIP + ports: + - port: 8089 + targetPort: http + protocol: TCP + name: http + - port: 9091 + targetPort: metrics + protocol: TCP + name: metrics + selector: + {{- include "postgres-ai-monitoring.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: pgwatch-prometheus +{{- end }} + + diff --git a/postgres_ai_helm/templates/postgres-exporter-deployment.yaml b/postgres_ai_helm/templates/postgres-exporter-deployment.yaml new file mode 100644 index 0000000..300c15d --- /dev/null +++ b/postgres_ai_helm/templates/postgres-exporter-deployment.yaml @@ -0,0 +1,68 @@ +{{- if .Values.postgresExporter.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "postgres-ai-monitoring.fullname" . }}-postgres-exporter + namespace: {{ include "postgres-ai-monitoring.namespace" . }} + labels: + {{- include "postgres-ai-monitoring.labels" . | nindent 4 }} + app.kubernetes.io/component: postgres-exporter +spec: + replicas: 1 + selector: + matchLabels: + {{- include "postgres-ai-monitoring.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: postgres-exporter + template: + metadata: + labels: + {{- include "postgres-ai-monitoring.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: postgres-exporter + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + initContainers: + - name: wait-for-sink + image: busybox:1.36 + command: ['sh', '-c', 'until nc -z {{ include "postgres-ai-monitoring.fullname" . }}-sink-postgres 5432; do echo waiting for postgres; sleep 2; done'] + containers: + - name: postgres-exporter + image: {{ .Values.postgresExporter.image }} + imagePullPolicy: IfNotPresent + command: + - sh + - -c + - | + export DATA_SOURCE_NAME="postgresql://{{ .Values.sinkPostgres.user }}:${POSTGRES_PASSWORD}@{{ include "postgres-ai-monitoring.fullname" . }}-sink-postgres:5432/{{ .Values.sinkPostgres.database }}?sslmode=disable" + exec /bin/postgres_exporter + env: + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: {{ include "postgres-ai-monitoring.secretName" . }} + key: postgres-password + ports: + - name: metrics + containerPort: 9187 + protocol: TCP + {{- with .Values.postgresExporter.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + livenessProbe: + httpGet: + path: / + port: metrics + initialDelaySeconds: 30 + periodSeconds: 10 + readinessProbe: + httpGet: + path: / + port: metrics + initialDelaySeconds: 5 + periodSeconds: 5 +{{- end }} + + diff --git a/postgres_ai_helm/templates/postgres-exporter-service.yaml b/postgres_ai_helm/templates/postgres-exporter-service.yaml new file mode 100644 index 0000000..d6afad2 --- /dev/null +++ b/postgres_ai_helm/templates/postgres-exporter-service.yaml @@ -0,0 +1,22 @@ +{{- if .Values.postgresExporter.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "postgres-ai-monitoring.fullname" . }}-postgres-exporter + namespace: {{ include "postgres-ai-monitoring.namespace" . }} + labels: + {{- include "postgres-ai-monitoring.labels" . | nindent 4 }} + app.kubernetes.io/component: postgres-exporter +spec: + type: ClusterIP + ports: + - port: 9187 + targetPort: metrics + protocol: TCP + name: metrics + selector: + {{- include "postgres-ai-monitoring.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: postgres-exporter +{{- end }} + + diff --git a/postgres_ai_helm/templates/prometheus-configmap.yaml b/postgres_ai_helm/templates/prometheus-configmap.yaml new file mode 100644 index 0000000..ba0b21a --- /dev/null +++ b/postgres_ai_helm/templates/prometheus-configmap.yaml @@ -0,0 +1,49 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "postgres-ai-monitoring.fullname" . }}-prometheus-config + namespace: {{ include "postgres-ai-monitoring.namespace" . }} + labels: + {{- include "postgres-ai-monitoring.labels" . | nindent 4 }} +data: + prometheus.yml: | + global: + scrape_interval: {{ .Values.victoriaMetrics.scrapeInterval | default "15s" }} + evaluation_interval: 15s + + scrape_configs: + - job_name: 'pgwatch-prometheus' + static_configs: + - targets: ['{{ include "postgres-ai-monitoring.fullname" . }}-pgwatch-prometheus:9091'] + metrics_path: '/metrics' + + - job_name: 'postgres-exporter' + static_configs: + - targets: ['{{ include "postgres-ai-monitoring.fullname" . }}-postgres-exporter:9187'] + + - job_name: 'node-exporter' + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - {{ include "postgres-ai-monitoring.namespace" . }} + relabel_configs: + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_component] + action: keep + regex: node-exporter + + - job_name: 'cadvisor' + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - {{ include "postgres-ai-monitoring.namespace" . }} + relabel_configs: + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_component] + action: keep + regex: cadvisor + - source_labels: [__meta_kubernetes_pod_ip] + target_label: __address__ + replacement: $1:8080 + + diff --git a/postgres_ai_helm/templates/rbac.yaml b/postgres_ai_helm/templates/rbac.yaml new file mode 100644 index 0000000..e5ebc27 --- /dev/null +++ b/postgres_ai_helm/templates/rbac.yaml @@ -0,0 +1,41 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "postgres-ai-monitoring.fullname" . }}-discovery + labels: + {{- include "postgres-ai-monitoring.labels" . | nindent 4 }} +rules: + - apiGroups: [""] + resources: + - nodes + - nodes/metrics + - services + - endpoints + - pods + verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: + - configmaps + verbs: ["get"] + - nonResourceURLs: + - /metrics + verbs: ["get"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "postgres-ai-monitoring.fullname" . }}-discovery + labels: + {{- include "postgres-ai-monitoring.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ include "postgres-ai-monitoring.fullname" . }}-discovery +subjects: + - kind: ServiceAccount + name: {{ include "postgres-ai-monitoring.serviceAccountName" . }} + namespace: {{ include "postgres-ai-monitoring.namespace" . }} +{{- end }} + + diff --git a/postgres_ai_helm/templates/reporter-cronjob.yaml b/postgres_ai_helm/templates/reporter-cronjob.yaml new file mode 100644 index 0000000..da3557f --- /dev/null +++ b/postgres_ai_helm/templates/reporter-cronjob.yaml @@ -0,0 +1,172 @@ +{{- if .Values.reporter.enabled }} +{{- $root := . }} +{{- $seenCombinations := dict }} +{{- $combinations := list }} +{{- range $db := .Values.monitoredDatabases }} + {{- $clusterName := "" }} + {{- $nodeName := "" }} + {{- if $db.clusterName }} + {{- $clusterName = $db.clusterName }} + {{- else if $root.Values.reporter.clusterName }} + {{- $clusterName = $root.Values.reporter.clusterName }} + {{- else if $root.Values.global.clusterName }} + {{- $clusterName = $root.Values.global.clusterName }} + {{- else }} + {{- $clusterName = "k8s-cluster" }} + {{- end }} + {{- if $db.nodeName }} + {{- $nodeName = $db.nodeName }} + {{- else if $root.Values.reporter.nodeName }} + {{- $nodeName = $root.Values.reporter.nodeName }} + {{- else if $root.Values.global.nodeName }} + {{- $nodeName = $root.Values.global.nodeName }} + {{- end }} + {{- $key := printf "%s|%s" $clusterName $nodeName }} + {{- if not (hasKey $seenCombinations $key) }} + {{- $_ := set $seenCombinations $key true }} + {{- $combinations = append $combinations (dict "cluster" $clusterName "nodeName" $nodeName) }} + {{- end }} +{{- end }} +{{- if and (eq (len $combinations) 0) (or $root.Values.reporter.clusterName $root.Values.global.clusterName) }} + {{- $clusterName := "" }} + {{- $nodeName := "" }} + {{- if $root.Values.reporter.clusterName }} + {{- $clusterName = $root.Values.reporter.clusterName }} + {{- else if $root.Values.global.clusterName }} + {{- $clusterName = $root.Values.global.clusterName }} + {{- else }} + {{- $clusterName = "k8s-cluster" }} + {{- end }} + {{- if $root.Values.reporter.nodeName }} + {{- $nodeName = $root.Values.reporter.nodeName }} + {{- else if $root.Values.global.nodeName }} + {{- $nodeName = $root.Values.global.nodeName }} + {{- end }} + {{- $combinations = append $combinations (dict "cluster" $clusterName "nodeName" $nodeName) }} +{{- end }} +{{- range $idx, $combo := $combinations }} +{{- $clusterName := $combo.cluster }} +{{- $nodeName := $combo.nodeName }} +{{- $suffix := "" }} +{{- if gt (len $combinations) 1 }} + {{- $safeCluster := $clusterName | replace "_" "-" | replace "." "-" | lower | trunc 20 }} + {{- $safeNode := $nodeName | replace "_" "-" | replace "." "-" | lower | trunc 10 }} + {{- if $safeNode }} + {{- $suffix = printf "-%s-%s" $safeCluster $safeNode }} + {{- else }} + {{- $suffix = printf "-%s" $safeCluster }} + {{- end }} +{{- end }} +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: {{ include "postgres-ai-monitoring.fullname" $root }}-reporter{{ $suffix }} + namespace: {{ include "postgres-ai-monitoring.namespace" $root }} + labels: + {{- include "postgres-ai-monitoring.labels" $root | nindent 4 }} + app.kubernetes.io/component: reporter + {{- if gt (len $combinations) 1 }} + postgres.ai/cluster: {{ $clusterName | quote }} + {{- if $nodeName }} + postgres.ai/node: {{ $nodeName | quote }} + {{- end }} + {{- end }} +spec: + schedule: {{ $root.Values.reporter.schedule | quote }} + successfulJobsHistoryLimit: {{ $root.Values.reporter.successfulJobsHistoryLimit | default 3 }} + failedJobsHistoryLimit: {{ $root.Values.reporter.failedJobsHistoryLimit | default 3 }} + jobTemplate: + spec: + template: + metadata: + labels: + {{- include "postgres-ai-monitoring.selectorLabels" $root | nindent 12 }} + app.kubernetes.io/component: reporter + {{- if gt (len $combinations) 1 }} + postgres.ai/cluster: {{ $clusterName | quote }} + {{- if $nodeName }} + postgres.ai/node: {{ $nodeName | quote }} + {{- end }} + {{- end }} + spec: + {{- with $root.Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 12 }} + {{- end }} + restartPolicy: OnFailure + initContainers: + - name: wait-for-victoriametrics + image: busybox:1.36 + command: ['sh', '-c', 'until nc -z {{ include "postgres-ai-monitoring.fullname" $root }}-victoriametrics {{ $root.Values.victoriaMetrics.service.port }}; do echo waiting for victoriametrics; sleep 2; done'] + containers: + - name: reporter + image: {{ $root.Values.reporter.image }} + imagePullPolicy: {{ $root.Values.reporter.imagePullPolicy | default "IfNotPresent" }} + command: + - /bin/sh + - -c + - | + set -eu + if [ -n "${API_KEY:-}" ]; then + exec python postgres_reports.py \ + --prometheus-url "$PROMETHEUS_URL" \ + --postgres-sink-url "postgresql://{{ $root.Values.sinkPostgres.user }}:${POSTGRES_PASSWORD}@{{ include "postgres-ai-monitoring.fullname" $root }}-sink-postgres:5432/{{ $root.Values.sinkPostgres.database }}" \ + --cluster "$CLUSTER" \ + --node-name "$NODE_NAME" \ + --output /app/reports/report.json \ + --api-url "{{ $root.Values.reporter.apiUrl | default "https://postgres.ai/api/general" }}" \ + --project "{{ $root.Values.reporter.project | default "postgres-ai-monitoring" }}" \ + --token "$API_KEY" + else + exec python postgres_reports.py \ + --prometheus-url "$PROMETHEUS_URL" \ + --postgres-sink-url "postgresql://{{ $root.Values.sinkPostgres.user }}:${POSTGRES_PASSWORD}@{{ include "postgres-ai-monitoring.fullname" $root }}-sink-postgres:5432/{{ $root.Values.sinkPostgres.database }}" \ + --cluster "$CLUSTER" \ + --node-name "$NODE_NAME" \ + --output /app/reports/report.json \ + --no-upload + fi + env: + - name: PROMETHEUS_URL + value: "http://{{ include "postgres-ai-monitoring.fullname" $root }}-victoriametrics:{{ $root.Values.victoriaMetrics.service.port }}" + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: {{ include "postgres-ai-monitoring.secretName" $root }} + key: postgres-password + - name: CLUSTER + value: {{ $clusterName | default "k8s-cluster" | quote }} + - name: NODE_NAME +{{- if $nodeName }} + value: {{ $nodeName | quote }} +{{- else }} + valueFrom: + fieldRef: + fieldPath: spec.nodeName +{{- end }} + {{- if $root.Values.secrets.pgwatchConfig }} + - name: API_KEY + valueFrom: + secretKeyRef: + name: {{ include "postgres-ai-monitoring.secretName" $root }} + key: pgai-api-key + optional: true + {{- end }} + + {{- range $key, $value := $root.Values.reporter.env }} + - name: {{ $key }} + value: {{ $value | quote }} + {{- end }} + {{- with $root.Values.reporter.resources }} + resources: + {{- toYaml . | nindent 16 }} + {{- end }} + volumeMounts: + - name: reports + mountPath: /app/reports + volumes: + - name: reports + emptyDir: {} +{{- end }} +{{- end }} diff --git a/postgres_ai_helm/templates/secret.yaml b/postgres_ai_helm/templates/secret.yaml new file mode 100644 index 0000000..cb6ab34 --- /dev/null +++ b/postgres_ai_helm/templates/secret.yaml @@ -0,0 +1,28 @@ +{{- $createFromValues := .Values.secrets.createFromValues | default false }} +{{- $existingSecretName := "" }} +{{- if .Values.existingSecret }} + {{- $existingSecretName = .Values.existingSecret.name | default "" }} +{{- end }} +{{- if and $createFromValues (or (not $existingSecretName) (eq $existingSecretName "")) }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "postgres-ai-monitoring.secretName" . }} + namespace: {{ include "postgres-ai-monitoring.namespace" . }} + labels: + {{- include "postgres-ai-monitoring.labels" . | nindent 4 }} +type: Opaque +stringData: + postgres-password: {{ .Values.secrets.postgres.password | quote }} + grafana-admin-user: {{ .Values.secrets.grafana.adminUser | quote }} + grafana-admin-password: {{ .Values.secrets.grafana.adminPassword | quote }} + {{- if .Values.secrets.pgwatchConfig }} + pgai-api-key: {{ .Values.secrets.pgwatchConfig.apiKey | default "" | quote }} + {{- end }} + {{- if .Values.secrets.monitoredDatabases }} + {{- range $key, $value := .Values.secrets.monitoredDatabases }} + db-password-{{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} +{{- end }} + diff --git a/postgres_ai_helm/templates/serviceaccount.yaml b/postgres_ai_helm/templates/serviceaccount.yaml new file mode 100644 index 0000000..e2b06a9 --- /dev/null +++ b/postgres_ai_helm/templates/serviceaccount.yaml @@ -0,0 +1,15 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "postgres-ai-monitoring.serviceAccountName" . }} + namespace: {{ include "postgres-ai-monitoring.namespace" . }} + labels: + {{- include "postgres-ai-monitoring.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} + + diff --git a/postgres_ai_helm/templates/sink-postgres-configmap.yaml b/postgres_ai_helm/templates/sink-postgres-configmap.yaml new file mode 100644 index 0000000..77982a6 --- /dev/null +++ b/postgres_ai_helm/templates/sink-postgres-configmap.yaml @@ -0,0 +1,27 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "postgres-ai-monitoring.fullname" . }}-sink-postgres-init + namespace: {{ include "postgres-ai-monitoring.namespace" . }} + labels: + {{- include "postgres-ai-monitoring.labels" . | nindent 4 }} + app.kubernetes.io/component: sink-postgres +data: + 00-configure-pg-hba.sh: | + #!/bin/bash + set -e + # Configure pg_hba.conf to allow passwordless connections from within cluster + cat > "$PGDATA/pg_hba.conf" <-postgres-ai-monitoring-secrets (or -secrets) + # Set admin.existingSecret manually if using custom release name or fullnameOverride + admin: + existingSecret: "" # Set to: -postgres-ai-monitoring-secrets (or configure manually) + userKey: grafana-admin-user + passwordKey: grafana-admin-password + plugins: + - yesoreyeram-infinity-datasource + + + persistence: + enabled: true + size: 5Gi + storageClassName: "" + + service: + type: ClusterIP + port: 80 + + ingress: + enabled: false + + sidecar: + datasources: + enabled: true + label: grafana_datasource + dashboards: + enabled: true + label: grafana_dashboard + + dashboardProviders: + dashboardproviders.yaml: + apiVersion: 1 + providers: + - name: default + orgId: 1 + folder: '' + type: file + disableDeletion: false + editable: true + options: + path: /tmp/dashboards \ No newline at end of file diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..d56cd31 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,8 @@ +[pytest] +addopts = -ra --import-mode=importlib +pythonpath = . +testpaths = tests +markers = + unit: Marks fast unit tests that mock external services. + integration: Marks tests that talk to real services like PostgreSQL. + requires_postgres: Alias for tests needing a live Postgres instance. diff --git a/reporter/Dockerfile b/reporter/Dockerfile new file mode 100644 index 0000000..f9fa751 --- /dev/null +++ b/reporter/Dockerfile @@ -0,0 +1,30 @@ +FROM python:3.11-slim + +ARG VERSION +RUN test -n "${VERSION}" || (echo "VERSION build arg is required" && exit 1) + +LABEL org.opencontainers.image.title="PostgresAI Reporter" +LABEL org.opencontainers.image.description="Automated Postgres health check and monitoring reports" +LABEL org.opencontainers.image.vendor="PostgresAI" +LABEL org.opencontainers.image.source="https://github.com/PostgresAI/postgres-ai-monitoring" +LABEL org.opencontainers.image.version="${VERSION}" + +# Set working directory +WORKDIR /app + +# Install dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy reporter script +COPY postgres_reports.py . + +# Make script executable +RUN chmod +x postgres_reports.py + +# Create reports directory +RUN mkdir -p /app/reports + +# Default command +CMD ["python", "postgres_reports.py"] + diff --git a/reporter/__init__.py b/reporter/__init__.py new file mode 100644 index 0000000..9e176ed --- /dev/null +++ b/reporter/__init__.py @@ -0,0 +1 @@ +"""Reporter package exposing report generation utilities.""" diff --git a/reporter/postgres_reports.py b/reporter/postgres_reports.py index 40d684d..c003146 100644 --- a/reporter/postgres_reports.py +++ b/reporter/postgres_reports.py @@ -6,26 +6,44 @@ by querying Prometheus metrics using PromQL queries. """ +__version__ = "1.0.2" + import requests import json import time -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from typing import Dict, List, Any, Optional import argparse import sys import os +import psycopg2 +import psycopg2.extras class PostgresReportGenerator: - def __init__(self, prometheus_url: str = "http://localhost:9090"): + # Default databases to always exclude + DEFAULT_EXCLUDED_DATABASES = {'template0', 'template1', 'rdsadmin', 'azure_maintenance', 'cloudsqladmin'} + + def __init__(self, prometheus_url: str = "http://sink-prometheus:9090", + postgres_sink_url: str = "postgresql://pgwatch@sink-postgres:5432/measurements", + excluded_databases: Optional[List[str]] = None): """ Initialize the PostgreSQL report generator. Args: - prometheus_url: URL of the Prometheus instance + prometheus_url: URL of the Prometheus instance (default: http://sink-prometheus:9090) + postgres_sink_url: Connection string for the Postgres sink database + (default: postgresql://pgwatch@sink-postgres:5432/measurements) + excluded_databases: Additional databases to exclude from reports """ self.prometheus_url = prometheus_url self.base_url = f"{prometheus_url}/api/v1" + self.postgres_sink_url = postgres_sink_url + self.pg_conn = None + # Combine default exclusions with user-provided exclusions + self.excluded_databases = self.DEFAULT_EXCLUDED_DATABASES.copy() + if excluded_databases: + self.excluded_databases.update(excluded_databases) def test_connection(self) -> bool: """Test connection to Prometheus.""" @@ -36,6 +54,84 @@ def test_connection(self) -> bool: print(f"Connection failed: {e}") return False + def connect_postgres_sink(self) -> bool: + """Connect to Postgres sink database.""" + if not self.postgres_sink_url: + return False + + try: + self.pg_conn = psycopg2.connect(self.postgres_sink_url) + return True + except Exception as e: + print(f"Postgres sink connection failed: {e}") + return False + + def close_postgres_sink(self): + """Close Postgres sink connection.""" + if self.pg_conn: + self.pg_conn.close() + self.pg_conn = None + + def get_index_definitions_from_sink(self, db_name: str = None) -> Dict[str, str]: + """ + Get index definitions from the Postgres sink database. + + Args: + db_name: Optional database name to filter results + + Returns: + Dictionary mapping index names to their definitions + """ + if not self.pg_conn: + if not self.connect_postgres_sink(): + return {} + + index_definitions = {} + + try: + with self.pg_conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cursor: + # Query the index_definitions table for the most recent data + # + # PERFORMANCE NOTE: This query will use a Seq Scan on index_definitions table. + # This is acceptable because: + # 1. This method is called VERY rarely (only during report generation) + # 2. The table size is expected to remain small (< 10000 rows per database) + # 3. Current latency is well under 1 second for typical workloads + # + # If the table grows significantly larger (>> 10000 rows) or latency exceeds 1s, + # consider adding a GIN index on the data JSONB column or materialized view. + if db_name: + query = """ + select distinct on (data->>'indexrelname') + data->>'indexrelname' as indexrelname, + data->>'index_definition' as index_definition, + dbname + from public.index_definitions + order by data->>'indexrelname', time desc + """ + cursor.execute(query, (db_name,)) + else: + query = """ + select distinct on (dbname, data->>'indexrelname') + data->>'indexrelname' as indexrelname, + data->>'index_definition' as index_definition, + dbname + from public.index_definitions + order by dbname, data->>'indexrelname', time desc + """ + cursor.execute(query) + + for row in cursor.fetchall(): + if row['indexrelname']: + # Include database name in the key to avoid collisions across databases + key = f"{row['dbname']}.{row['indexrelname']}" if not db_name else row['indexrelname'] + index_definitions[key] = row['index_definition'] + + except Exception as e: + print(f"Error fetching index definitions from Postgres sink: {e}") + + return index_definitions + def query_instant(self, query: str) -> Dict[str, Any]: """ Execute an instant PromQL query. @@ -59,6 +155,62 @@ def query_instant(self, query: str) -> Dict[str, Any]: print(f"Query error: {e}") return {} + def _get_postgres_version_info(self, cluster: str, node_name: str) -> Dict[str, str]: + """ + Fetch and parse Postgres version information from pgwatch settings metrics. + + Notes: + - This helper is intentionally defensive: it validates the returned setting_name label + (tests may stub query responses broadly by metric name substring). + - Uses a single query with a regex on setting_name to reduce roundtrips. + """ + query = ( + f'last_over_time(pgwatch_settings_configured{{' + f'cluster="{cluster}", node_name="{node_name}", ' + f'setting_name=~"server_version|server_version_num"}}[3h])' + ) + + result = self.query_instant(query) + version_str = None + version_num = None + + if result.get("status") == "success": + if result.get("data", {}).get("result"): + for item in result["data"]["result"]: + metric = item.get("metric", {}) or {} + setting_name = metric.get("setting_name", "") + setting_value = metric.get("setting_value", "") + if setting_name == "server_version" and setting_value: + version_str = setting_value + elif setting_name == "server_version_num" and setting_value: + version_num = setting_value + else: + print(f"Warning: No version data found (cluster={cluster}, node_name={node_name})") + else: + print(f"Warning: Version query failed (cluster={cluster}, node_name={node_name}): status={result.get('status')}") + + server_version = version_str or "Unknown" + version_info: Dict[str, str] = { + "version": server_version, + "server_version_num": version_num or "Unknown", + "server_major_ver": "Unknown", + "server_minor_ver": "Unknown", + } + + if server_version != "Unknown": + # Handle both formats: + # - "15.3" + # - "15.3 (Ubuntu 15.3-1.pgdg20.04+1)" + version_parts = server_version.split()[0].split(".") + if len(version_parts) >= 1 and version_parts[0]: + version_info["server_major_ver"] = version_parts[0] + if len(version_parts) >= 2: + version_info["server_minor_ver"] = ".".join(version_parts[1:]) + else: + version_info["server_minor_ver"] = "0" + + return version_info + def generate_a002_version_report(self, cluster: str = "local", node_name: str = "node-01") -> Dict[str, Any]: """ Generate A002 Version Information report. @@ -70,35 +222,8 @@ def generate_a002_version_report(self, cluster: str = "local", node_name: str = Returns: Dictionary containing version information """ - print("Generating A002 Version Information report...") - settings_query = f'pgwatch_settings_configured{{cluster="{cluster}", node_name="{node_name}"}}' - # Query PostgreSQL version information - - version_queries = { - 'server_version': f'pgwatch_settings_configured{{cluster="{cluster}", node_name="{node_name}", setting_name="server_version"}}', - 'server_version_num': f'pgwatch_settings_configured{{cluster="{cluster}", node_name="{node_name}", setting_name="server_version_num"}}', - 'max_connections': f'pgwatch_settings_configured{{cluster="{cluster}", node_name="{node_name}", setting_name="max_connections"}}', - 'shared_buffers': f'pgwatch_settings_configured{{cluster="{cluster}", node_name="{node_name}", setting_name="shared_buffers"}}', - 'effective_cache_size': f'pgwatch_settings_configured{{cluster="{cluster}", node_name="{node_name}", setting_name="effective_cache_size"}}', - } - - version_data = {} - for metric_name, query in version_queries.items(): - result = self.query_instant(query) - if result.get('status') == 'success' and result.get('data', {}).get('result'): - latest_value = result['data']['result'][0]['metric'].get('setting_value', None) - version_data[metric_name] = latest_value - - # Format the version data - version_info = { - "version": version_data.get('server_version', 'Unknown'), - "server_version_num": version_data.get('server_version_num', 'Unknown'), - "server_major_ver": version_data.get('server_version', '').split('.')[0] if version_data.get( - 'server_version') else 'Unknown', - "server_minor_ver": version_data.get('server_version', '').split('.', 1)[1] if version_data.get( - 'server_version') and '.' in version_data.get('server_version', '') else 'Unknown' - } - + print(f"Generating A002 Version Information report for cluster='{cluster}', node_name='{node_name}'...") + version_info = self._get_postgres_version_info(cluster, node_name) return self.format_report_data("A002", {"version": version_info}, node_name) def generate_a003_settings_report(self, cluster: str = "local", node_name: str = "node-01") -> Dict[str, Any]: @@ -114,17 +239,21 @@ def generate_a003_settings_report(self, cluster: str = "local", node_name: str = """ print("Generating A003 PostgreSQL Settings report...") - # Query all PostgreSQL settings using the pgwatch_settings_setting metric + # Query all PostgreSQL settings using the pgwatch_settings_configured metric with last_over_time # This metric has labels for each setting name - settings_query = f'pgwatch_settings_configured{{cluster="{cluster}", node_name="{node_name}"}}' + settings_query = f'last_over_time(pgwatch_settings_configured{{cluster="{cluster}", node_name="{node_name}"}}[3h])' result = self.query_instant(settings_query) settings_data = {} if result.get('status') == 'success' and result.get('data', {}).get('result'): for item in result['data']['result']: # Extract setting name from labels - setting_name = item['metric'].get('setting_name', 'unknown') + setting_name = item['metric'].get('setting_name', '') setting_value = item['metric'].get('setting_value', '') + + # Skip if we don't have a setting name + if not setting_name: + continue # Get additional metadata from labels category = item['metric'].get('category', 'Other') @@ -140,8 +269,12 @@ def generate_a003_settings_report(self, cluster: str = "local", node_name: str = "vartype": vartype, "pretty_value": self.format_setting_value(setting_name, setting_value, unit) } + else: + print(f"Warning: A003 - No settings data returned for cluster={cluster}, node_name={node_name}") + print(f"Query result status: {result.get('status')}") + print(f"Query result data: {result.get('data', {})}") - return self.format_report_data("A003", settings_data, node_name) + return self.format_report_data("A003", settings_data, node_name, postgres_version=self._get_postgres_version_info(cluster, node_name)) def generate_a004_cluster_report(self, cluster: str = "local", node_name: str = "node-01") -> Dict[str, Any]: """ @@ -158,16 +291,16 @@ def generate_a004_cluster_report(self, cluster: str = "local", node_name: str = # Query cluster information cluster_queries = { - 'active_connections': f'sum(pgwatch_pg_stat_activity_count{{cluster="{cluster}", node_name="{node_name}", state="active"}})', - 'idle_connections': f'sum(pgwatch_pg_stat_activity_count{{cluster="{cluster}", node_name="{node_name}", state="idle"}})', - 'total_connections': f'sum(pgwatch_pg_stat_activity_count{{cluster="{cluster}", node_name="{node_name}"}})', - 'database_size': f'sum(pgwatch_pg_database_size_bytes{{cluster="{cluster}", node_name="{node_name}"}})', - 'cache_hit_ratio': f'sum(pgwatch_db_stats_blks_hit{{cluster="{cluster}", node_name="{node_name}"}}) / (sum(pgwatch_db_stats_blks_hit{{cluster="{cluster}", node_name="{node_name}"}}) + sum(pgwatch_db_stats_blks_read{{cluster="{cluster}", node_name="{node_name}"}})) * 100', + 'active_connections': f'sum(last_over_time(pgwatch_pg_stat_activity_count{{cluster="{cluster}", node_name="{node_name}", state="active"}}[3h]))', + 'idle_connections': f'sum(last_over_time(pgwatch_pg_stat_activity_count{{cluster="{cluster}", node_name="{node_name}", state="idle"}}[3h]))', + 'total_connections': f'sum(last_over_time(pgwatch_pg_stat_activity_count{{cluster="{cluster}", node_name="{node_name}"}}[3h]))', + 'database_sizes': f'sum(last_over_time(pgwatch_db_size_size_b{{cluster="{cluster}", node_name="{node_name}"}}[3h]))', + 'cache_hit_ratio': f'sum(last_over_time(pgwatch_db_stats_blks_hit{{cluster="{cluster}", node_name="{node_name}"}}[3h])) / clamp_min(sum(last_over_time(pgwatch_db_stats_blks_hit{{cluster="{cluster}", node_name="{node_name}"}}[3h])) + sum(last_over_time(pgwatch_db_stats_blks_read{{cluster="{cluster}", node_name="{node_name}"}}[3h])), 1) * 100', 'transactions_per_sec': f'sum(rate(pgwatch_db_stats_xact_commit{{cluster="{cluster}", node_name="{node_name}"}}[5m])) + sum(rate(pgwatch_db_stats_xact_rollback{{cluster="{cluster}", node_name="{node_name}"}}[5m]))', 'checkpoints_per_sec': f'sum(rate(pgwatch_pg_stat_bgwriter_checkpoints_timed{{cluster="{cluster}", node_name="{node_name}"}}[5m])) + sum(rate(pgwatch_pg_stat_bgwriter_checkpoints_req{{cluster="{cluster}", node_name="{node_name}"}}[5m]))', - 'deadlocks': f'sum(pgwatch_db_stats_deadlocks{{cluster="{cluster}", node_name="{node_name}"}})', - 'temp_files': f'sum(pgwatch_db_stats_temp_files{{cluster="{cluster}", node_name="{node_name}"}})', - 'temp_bytes': f'sum(pgwatch_db_stats_temp_bytes{{cluster="{cluster}", node_name="{node_name}"}})', + 'deadlocks': f'sum(last_over_time(pgwatch_db_stats_deadlocks{{cluster="{cluster}", node_name="{node_name}"}}[3h]))', + 'temp_files': f'sum(last_over_time(pgwatch_db_stats_temp_files{{cluster="{cluster}", node_name="{node_name}"}}[3h]))', + 'temp_bytes': f'sum(last_over_time(pgwatch_db_stats_temp_bytes{{cluster="{cluster}", node_name="{node_name}"}}[3h]))', } cluster_data = {} @@ -184,7 +317,7 @@ def generate_a004_cluster_report(self, cluster: str = "local", node_name: str = } # Get database sizes - db_sizes_query = f'pgwatch_pg_database_size_bytes{{cluster="{cluster}", node_name="{node_name}"}}' + db_sizes_query = f'last_over_time(pgwatch_db_size_size_b{{cluster="{cluster}", node_name="{node_name}"}}[3h])' db_sizes_result = self.query_instant(db_sizes_query) database_sizes = {} @@ -194,10 +327,15 @@ def generate_a004_cluster_report(self, cluster: str = "local", node_name: str = size_bytes = float(result['value'][1]) database_sizes[db_name] = size_bytes - return self.format_report_data("A004", { - "general_info": cluster_data, - "database_sizes": database_sizes - }, node_name) + return self.format_report_data( + "A004", + { + "general_info": cluster_data, + "database_sizes": database_sizes, + }, + node_name, + postgres_version=self._get_postgres_version_info(cluster, node_name), + ) def generate_a007_altered_settings_report(self, cluster: str = "local", node_name: str = "node-01") -> Dict[ str, Any]: @@ -213,22 +351,24 @@ def generate_a007_altered_settings_report(self, cluster: str = "local", node_nam """ print("Generating A007 Altered Settings report...") - # Query settings by source using the pgwatch_settings_setting metric - settings_by_source_query = f'pgwatch_settings_is_default{{cluster="{cluster}", node_name="{node_name}"}} < 1' + # Query settings by source using the pgwatch_settings_is_default metric with last_over_time + # This returns settings where is_default = 0 (i.e., non-default/altered settings) + settings_by_source_query = f'last_over_time(pgwatch_settings_is_default{{cluster="{cluster}", node_name="{node_name}"}}[3h]) < 1' result = self.query_instant(settings_by_source_query) - settings_count = {} - changes = [] - + altered_settings = {} if result.get('status') == 'success' and result.get('data', {}).get('result'): - # Group settings by source - altered_settings = {} for item in result['data']['result']: - # Extract source from labels - setting_name = item['metric'].get('setting_name', 'unknown') - value = item['metric'].get('setting_value', 'unknown') + # Extract setting information from labels + setting_name = item['metric'].get('setting_name', '') + value = item['metric'].get('setting_value', '') unit = item['metric'].get('unit', '') - category = item['metric'].get('category', 'unknown') + category = item['metric'].get('category', 'Other') + + # Skip if we don't have a setting name + if not setting_name: + continue + pretty_value = self.format_setting_value(setting_name, value, unit) altered_settings[setting_name] = { "value": value, @@ -236,8 +376,11 @@ def generate_a007_altered_settings_report(self, cluster: str = "local", node_nam "category": category, "pretty_value": pretty_value } + else: + print(f"Warning: A007 - No altered settings data returned for cluster={cluster}, node_name={node_name}") + print(f"Query result status: {result.get('status')}") - return self.format_report_data("A007", altered_settings, node_name) + return self.format_report_data("A007", altered_settings, node_name, postgres_version=self._get_postgres_version_info(cluster, node_name)) def generate_h001_invalid_indexes_report(self, cluster: str = "local", node_name: str = "node-01") -> Dict[ str, Any]: @@ -256,10 +399,21 @@ def generate_h001_invalid_indexes_report(self, cluster: str = "local", node_name # Get all databases databases = self.get_all_databases(cluster, node_name) + # Get database sizes + db_sizes_query = f'last_over_time(pgwatch_db_size_size_b{{cluster="{cluster}", node_name="{node_name}"}}[3h])' + db_sizes_result = self.query_instant(db_sizes_query) + database_sizes = {} + + if db_sizes_result.get('status') == 'success' and db_sizes_result.get('data', {}).get('result'): + for result in db_sizes_result['data']['result']: + db_name = result['metric'].get('datname', 'unknown') + size_bytes = float(result['value'][1]) + database_sizes[db_name] = size_bytes + invalid_indexes_by_db = {} for db_name in databases: # Query invalid indexes for each database - invalid_indexes_query = f'pgwatch_pg_invalid_indexes{{cluster="{cluster}", node_name="{node_name}", datname="{db_name}"}}' + invalid_indexes_query = f'last_over_time(pgwatch_pg_invalid_indexes{{cluster="{cluster}", node_name="{node_name}", datname="{db_name}"}}[3h])' result = self.query_instant(invalid_indexes_query) invalid_indexes = [] @@ -290,35 +444,82 @@ def generate_h001_invalid_indexes_report(self, cluster: str = "local", node_name invalid_indexes.append(invalid_index) total_size += index_size_bytes + db_size_bytes = database_sizes.get(db_name, 0) invalid_indexes_by_db[db_name] = { "invalid_indexes": invalid_indexes, "total_count": len(invalid_indexes), "total_size_bytes": total_size, - "total_size_pretty": self.format_bytes(total_size) + "total_size_pretty": self.format_bytes(total_size), + "database_size_bytes": db_size_bytes, + "database_size_pretty": self.format_bytes(db_size_bytes) } - return self.format_report_data("H001", invalid_indexes_by_db, node_name) + return self.format_report_data( + "H001", + invalid_indexes_by_db, + node_name, + postgres_version=self._get_postgres_version_info(cluster, node_name), + ) def generate_h002_unused_indexes_report(self, cluster: str = "local", node_name: str = "node-01") -> Dict[str, Any]: """ - Generate H002 Unused and rarely used Indexes report. + Generate H002 Unused Indexes report. Args: cluster: Cluster name node_name: Node name Returns: - Dictionary containing unused and rarely used indexes information + Dictionary containing unused indexes information """ - print("Generating H002 Unused and rarely used Indexes report...") + print("Generating H002 Unused Indexes report...") # Get all databases databases = self.get_all_databases(cluster, node_name) + # Get database sizes + db_sizes_query = f'last_over_time(pgwatch_db_size_size_b{{cluster="{cluster}", node_name="{node_name}"}}[3h])' + db_sizes_result = self.query_instant(db_sizes_query) + database_sizes = {} + + if db_sizes_result.get('status') == 'success' and db_sizes_result.get('data', {}).get('result'): + for result in db_sizes_result['data']['result']: + db_name = result['metric'].get('datname', 'unknown') + size_bytes = float(result['value'][1]) + database_sizes[db_name] = size_bytes + + # Query postmaster uptime to get startup time + postmaster_uptime_query = f'last_over_time(pgwatch_db_stats_postmaster_uptime_s{{cluster="{cluster}", node_name="{node_name}"}}[3h])' + postmaster_uptime_result = self.query_instant(postmaster_uptime_query) + + postmaster_startup_time = None + postmaster_startup_epoch = None + if postmaster_uptime_result.get('status') == 'success' and postmaster_uptime_result.get('data', {}).get('result'): + uptime_seconds = float(postmaster_uptime_result['data']['result'][0]['value'][1]) if postmaster_uptime_result['data']['result'] else None + if uptime_seconds: + postmaster_startup_epoch = datetime.now().timestamp() - uptime_seconds + postmaster_startup_time = datetime.fromtimestamp(postmaster_startup_epoch).isoformat() + unused_indexes_by_db = {} for db_name in databases: - # Query unused indexes for each database - unused_indexes_query = f'pgwatch_unused_indexes_index_size_bytes{{cluster="{cluster}", node_name="{node_name}", datname="{db_name}"}}' + # Get index definitions from Postgres sink database for this specific database + index_definitions = self.get_index_definitions_from_sink(db_name) + # Query stats_reset timestamp for this database + stats_reset_query = f'last_over_time(pgwatch_stats_reset_stats_reset_epoch{{cluster="{cluster}", node_name="{node_name}", dbname="{db_name}"}}[3h])' + stats_reset_result = self.query_instant(stats_reset_query) + + stats_reset_epoch = None + days_since_reset = None + stats_reset_time = None + + if stats_reset_result.get('status') == 'success' and stats_reset_result.get('data', {}).get('result'): + stats_reset_epoch = float(stats_reset_result['data']['result'][0]['value'][1]) if stats_reset_result['data']['result'] else None + if stats_reset_epoch: + stats_reset_time = datetime.fromtimestamp(stats_reset_epoch).isoformat() + days_since_reset = (datetime.now() - datetime.fromtimestamp(stats_reset_epoch)).days + + # Query unused indexes for each database using last_over_time to get most recent value + unused_indexes_query = f'last_over_time(pgwatch_unused_indexes_index_size_bytes{{cluster="{cluster}", node_name="{node_name}", dbname="{db_name}"}}[3h])' unused_result = self.query_instant(unused_indexes_query) unused_indexes = [] @@ -333,20 +534,24 @@ def generate_h002_unused_indexes_report(self, cluster: str = "local", node_name: index_size_bytes = float(item['value'][1]) if item.get('value') else 0 # Query other related metrics for this index - idx_scan_query = f'pgwatch_unused_indexes_idx_scan{{cluster="{cluster}", node_name="{node_name}", datname="{db_name}", schema_name="{schema_name}", table_name="{table_name}", index_name="{index_name}"}}' + idx_scan_query = f'last_over_time(pgwatch_unused_indexes_idx_scan{{cluster="{cluster}", node_name="{node_name}", dbname="{db_name}", schema_name="{schema_name}", table_name="{table_name}", index_name="{index_name}"}}[3h])' idx_scan_result = self.query_instant(idx_scan_query) idx_scan = float(idx_scan_result['data']['result'][0]['value'][1]) if idx_scan_result.get('data', {}).get( 'result') else 0 + # Get index definition from collected metrics + index_definition = index_definitions.get(index_name, 'Definition not available') + index_data = { "schema_name": schema_name, "table_name": table_name, "index_name": index_name, + "index_definition": index_definition, "reason": reason, "idx_scan": idx_scan, "index_size_bytes": index_size_bytes, - "idx_is_btree": item['metric'].get('opclasses', '').startswith('btree'), + "idx_is_btree": item['metric'].get('idx_is_btree', 'false') == 'true', "supports_fk": bool(int(item['metric'].get('supports_fk', 0))) } @@ -354,36 +559,34 @@ def generate_h002_unused_indexes_report(self, cluster: str = "local", node_name: unused_indexes.append(index_data) - # Query rarely used indexes (note: logs show 0 rows, but we'll include the structure) - rarely_used_indexes = [] # Currently empty as per logs - - # Combine and calculate totals - all_indexes = unused_indexes + rarely_used_indexes - total_unused_size = sum(idx['index_size_bytes'] for idx in unused_indexes) - total_rarely_used_size = sum(idx['index_size_bytes'] for idx in rarely_used_indexes) - total_size = total_unused_size + total_rarely_used_size - # Sort by index size descending - all_indexes.sort(key=lambda x: x['index_size_bytes'], reverse=True) + unused_indexes.sort(key=lambda x: x['index_size_bytes'], reverse=True) + + total_unused_size = sum(idx['index_size_bytes'] for idx in unused_indexes) + db_size_bytes = database_sizes.get(db_name, 0) unused_indexes_by_db[db_name] = { "unused_indexes": unused_indexes, - "rarely_used_indexes": rarely_used_indexes, - "all_indexes": all_indexes, - "summary": { - "total_unused_count": len(unused_indexes), - "total_rarely_used_count": len(rarely_used_indexes), - "total_count": len(all_indexes), - "total_unused_size_bytes": total_unused_size, - "total_rarely_used_size_bytes": total_rarely_used_size, - "total_size_bytes": total_size, - "total_unused_size_pretty": self.format_bytes(total_unused_size), - "total_rarely_used_size_pretty": self.format_bytes(total_rarely_used_size), - "total_size_pretty": self.format_bytes(total_size) + "total_count": len(unused_indexes), + "total_size_bytes": total_unused_size, + "total_size_pretty": self.format_bytes(total_unused_size), + "database_size_bytes": db_size_bytes, + "database_size_pretty": self.format_bytes(db_size_bytes), + "stats_reset": { + "stats_reset_epoch": stats_reset_epoch, + "stats_reset_time": stats_reset_time, + "days_since_reset": days_since_reset, + "postmaster_startup_epoch": postmaster_startup_epoch, + "postmaster_startup_time": postmaster_startup_time } } - return self.format_report_data("H002", unused_indexes_by_db, node_name) + return self.format_report_data( + "H002", + unused_indexes_by_db, + node_name, + postgres_version=self._get_postgres_version_info(cluster, node_name), + ) def generate_h004_redundant_indexes_report(self, cluster: str = "local", node_name: str = "node-01") -> Dict[ str, Any]: @@ -402,10 +605,23 @@ def generate_h004_redundant_indexes_report(self, cluster: str = "local", node_na # Get all databases databases = self.get_all_databases(cluster, node_name) + # Get database sizes + db_sizes_query = f'last_over_time(pgwatch_db_size_size_b{{cluster="{cluster}", node_name="{node_name}"}}[3h])' + db_sizes_result = self.query_instant(db_sizes_query) + database_sizes = {} + + if db_sizes_result.get('status') == 'success' and db_sizes_result.get('data', {}).get('result'): + for result in db_sizes_result['data']['result']: + db_name = result['metric'].get('datname', 'unknown') + size_bytes = float(result['value'][1]) + database_sizes[db_name] = size_bytes + redundant_indexes_by_db = {} for db_name in databases: - # Query redundant indexes for each database - redundant_indexes_query = f'pgwatch_redundant_indexes_index_size_bytes{{cluster="{cluster}", node_name="{node_name}", datname="{db_name}"}}' + # Fetch index definitions from the sink for this database (used to aid remediation) + index_definitions = self.get_index_definitions_from_sink(db_name) + # Query redundant indexes for each database using last_over_time to get most recent value + redundant_indexes_query = f'last_over_time(pgwatch_redundant_indexes_index_size_bytes{{cluster="{cluster}", node_name="{node_name}", dbname="{db_name}"}}[3h])' result = self.query_instant(redundant_indexes_query) redundant_indexes = [] @@ -424,18 +640,18 @@ def generate_h004_redundant_indexes_report(self, cluster: str = "local", node_na index_size_bytes = float(item['value'][1]) if item.get('value') else 0 # Query other related metrics for this index - table_size_query = f'pgwatch_redundant_indexes_table_size_bytes{{cluster="{cluster}", node_name="{node_name}", datname="{db_name}", schema_name="{schema_name}", table_name="{table_name}", index_name="{index_name}"}}' + table_size_query = f'last_over_time(pgwatch_redundant_indexes_table_size_bytes{{cluster="{cluster}", node_name="{node_name}", dbname="{db_name}", schema_name="{schema_name}", table_name="{table_name}", index_name="{index_name}"}}[3h])' table_size_result = self.query_instant(table_size_query) table_size_bytes = float( table_size_result['data']['result'][0]['value'][1]) if table_size_result.get('data', {}).get( 'result') else 0 - index_usage_query = f'pgwatch_redundant_indexes_index_usage{{cluster="{cluster}", node_name="{node_name}", datname="{db_name}", schema_name="{schema_name}", table_name="{table_name}", index_name="{index_name}"}}' + index_usage_query = f'last_over_time(pgwatch_redundant_indexes_index_usage{{cluster="{cluster}", node_name="{node_name}", dbname="{db_name}", schema_name="{schema_name}", table_name="{table_name}", index_name="{index_name}"}}[3h])' index_usage_result = self.query_instant(index_usage_query) index_usage = float(index_usage_result['data']['result'][0]['value'][1]) if index_usage_result.get( 'data', {}).get('result') else 0 - supports_fk_query = f'pgwatch_redundant_indexes_supports_fk{{cluster="{cluster}", node_name="{node_name}", datname="{db_name}", schema_name="{schema_name}", table_name="{table_name}", index_name="{index_name}"}}' + supports_fk_query = f'last_over_time(pgwatch_redundant_indexes_supports_fk{{cluster="{cluster}", node_name="{node_name}", dbname="{db_name}", schema_name="{schema_name}", table_name="{table_name}", index_name="{index_name}"}}[3h])' supports_fk_result = self.query_instant(supports_fk_query) supports_fk = bool( int(supports_fk_result['data']['result'][0]['value'][1])) if supports_fk_result.get('data', @@ -453,6 +669,7 @@ def generate_h004_redundant_indexes_report(self, cluster: str = "local", node_na "table_size_bytes": table_size_bytes, "index_usage": index_usage, "supports_fk": supports_fk, + "index_definition": index_definitions.get(index_name, 'Definition not available'), "index_size_pretty": self.format_bytes(index_size_bytes), "table_size_pretty": self.format_bytes(table_size_bytes) } @@ -463,14 +680,22 @@ def generate_h004_redundant_indexes_report(self, cluster: str = "local", node_na # Sort by index size descending redundant_indexes.sort(key=lambda x: x['index_size_bytes'], reverse=True) + db_size_bytes = database_sizes.get(db_name, 0) redundant_indexes_by_db[db_name] = { "redundant_indexes": redundant_indexes, "total_count": len(redundant_indexes), "total_size_bytes": total_size, - "total_size_pretty": self.format_bytes(total_size) + "total_size_pretty": self.format_bytes(total_size), + "database_size_bytes": db_size_bytes, + "database_size_pretty": self.format_bytes(db_size_bytes) } - return self.format_report_data("H004", redundant_indexes_by_db, node_name) + return self.format_report_data( + "H004", + redundant_indexes_by_db, + node_name, + postgres_version=self._get_postgres_version_info(cluster, node_name), + ) def generate_d004_pgstat_settings_report(self, cluster: str = "local", node_name: str = "node-01") -> Dict[ str, Any]: @@ -501,17 +726,21 @@ def generate_d004_pgstat_settings_report(self, cluster: str = "local", node_name 'track_wal_io_timing' ] - # Query all PostgreSQL settings for pg_stat_statements and related - settings_query = f'pgwatch_settings_configured{{cluster="{cluster}", node_name="{node_name}"}}' + # Query all PostgreSQL settings for pg_stat_statements and related using last_over_time + settings_query = f'last_over_time(pgwatch_settings_configured{{cluster="{cluster}", node_name="{node_name}"}}[3h])' result = self.query_instant(settings_query) pgstat_data = {} if result.get('status') == 'success' and result.get('data', {}).get('result'): for item in result['data']['result']: - setting_name = item['metric'].get('setting_name', 'unknown') + setting_name = item['metric'].get('setting_name', '') + + # Skip if no setting name + if not setting_name: + continue # Filter for pg_stat_statements and related settings - if any(pgstat_setting in setting_name for pgstat_setting in pgstat_settings): + if setting_name in pgstat_settings: setting_value = item['metric'].get('setting_value', '') category = item['metric'].get('category', 'Statistics') unit = item['metric'].get('unit', '') @@ -526,6 +755,8 @@ def generate_d004_pgstat_settings_report(self, cluster: str = "local", node_name "vartype": vartype, "pretty_value": self.format_setting_value(setting_name, setting_value, unit) } + else: + print(f"Warning: D004 - No settings data returned for cluster={cluster}, node_name={node_name}") # Check if pg_stat_kcache extension is available and working by querying its metrics kcache_status = self._check_pg_stat_kcache_status(cluster, node_name) @@ -533,11 +764,16 @@ def generate_d004_pgstat_settings_report(self, cluster: str = "local", node_name # Check if pg_stat_statements is available and working by querying its metrics pgss_status = self._check_pg_stat_statements_status(cluster, node_name) - return self.format_report_data("D004", { - "settings": pgstat_data, - "pg_stat_statements_status": pgss_status, - "pg_stat_kcache_status": kcache_status - }, node_name) + return self.format_report_data( + "D004", + { + "settings": pgstat_data, + "pg_stat_statements_status": pgss_status, + "pg_stat_kcache_status": kcache_status, + }, + node_name, + postgres_version=self._get_postgres_version_info(cluster, node_name), + ) def _check_pg_stat_kcache_status(self, cluster: str, node_name: str) -> Dict[str, Any]: """ @@ -551,9 +787,9 @@ def _check_pg_stat_kcache_status(self, cluster: str, node_name: str) -> Dict[str Dictionary containing pg_stat_kcache status information """ kcache_queries = { - 'exec_user_time': f'pgwatch_pg_stat_kcache_exec_user_time{{cluster="{cluster}", node_name="{node_name}"}}', - 'exec_system_time': f'pgwatch_pg_stat_kcache_exec_system_time{{cluster="{cluster}", node_name="{node_name}"}}', - 'exec_total_time': f'pgwatch_pg_stat_kcache_exec_total_time{{cluster="{cluster}", node_name="{node_name}"}}' + 'exec_user_time': f'last_over_time(pgwatch_pg_stat_kcache_exec_user_time{{cluster="{cluster}", node_name="{node_name}"}}[3h])', + 'exec_system_time': f'last_over_time(pgwatch_pg_stat_kcache_exec_system_time{{cluster="{cluster}", node_name="{node_name}"}}[3h])', + 'exec_total_time': f'last_over_time(pgwatch_pg_stat_kcache_exec_total_time{{cluster="{cluster}", node_name="{node_name}"}}[3h])' } kcache_status = { @@ -606,7 +842,7 @@ def _check_pg_stat_statements_status(self, cluster: str, node_name: str) -> Dict Returns: Dictionary containing pg_stat_statements status information """ - pgss_query = f'pgwatch_pg_stat_statements_calls{{cluster="{cluster}", node_name="{node_name}"}}' + pgss_query = f'last_over_time(pgwatch_pg_stat_statements_calls{{cluster="{cluster}", node_name="{node_name}"}}[3h])' result = self.query_instant(pgss_query) pgss_status = { @@ -665,6 +901,7 @@ def generate_f001_autovacuum_settings_report(self, cluster: str = "local", node_ 'autovacuum_naptime', 'autovacuum_vacuum_cost_delay', 'autovacuum_vacuum_cost_limit', + 'autovacuum_vacuum_insert_scale_factor', 'autovacuum_vacuum_scale_factor', 'autovacuum_vacuum_threshold', 'autovacuum_work_mem', @@ -679,8 +916,8 @@ def generate_f001_autovacuum_settings_report(self, cluster: str = "local", node_ 'vacuum_multixact_freeze_table_age' ] - # Query all PostgreSQL settings for autovacuum - settings_query = f'pgwatch_settings_configured{{cluster="{cluster}", node_name="{node_name}"}}' + # Query all PostgreSQL settings for autovacuum using last_over_time + settings_query = f'last_over_time(pgwatch_settings_configured{{cluster="{cluster}", node_name="{node_name}"}}[3h])' result = self.query_instant(settings_query) autovacuum_data = {} @@ -705,7 +942,7 @@ def generate_f001_autovacuum_settings_report(self, cluster: str = "local", node_ "pretty_value": self.format_setting_value(setting_name, setting_value, unit) } - return self.format_report_data("F001", autovacuum_data, node_name) + return self.format_report_data("F001", autovacuum_data, node_name, postgres_version=self._get_postgres_version_info(cluster, node_name)) def generate_f005_btree_bloat_report(self, cluster: str = "local", node_name: str = "node-01") -> Dict[str, Any]: """ @@ -723,14 +960,25 @@ def generate_f005_btree_bloat_report(self, cluster: str = "local", node_name: st # Get all databases databases = self.get_all_databases(cluster, node_name) + # Get database sizes + db_sizes_query = f'last_over_time(pgwatch_db_size_size_b{{cluster="{cluster}", node_name="{node_name}"}}[3h])' + db_sizes_result = self.query_instant(db_sizes_query) + database_sizes = {} + + if db_sizes_result.get('status') == 'success' and db_sizes_result.get('data', {}).get('result'): + for result in db_sizes_result['data']['result']: + db_name = result['metric'].get('datname', 'unknown') + size_bytes = float(result['value'][1]) + database_sizes[db_name] = size_bytes + bloated_indexes_by_db = {} for db_name in databases: # Query btree bloat using multiple metrics for each database with last_over_time [1d] bloat_queries = { - 'extra_size': f'last_over_time(pgwatch_pg_btree_bloat_extra_size{{cluster="{cluster}", node_name="{node_name}", datname="{db_name}"}}[1d])', - 'extra_pct': f'last_over_time(pgwatch_pg_btree_bloat_extra_pct{{cluster="{cluster}", node_name="{node_name}", datname="{db_name}"}}[1d])', - 'bloat_size': f'last_over_time(pgwatch_pg_btree_bloat_bloat_size{{cluster="{cluster}", node_name="{node_name}", datname="{db_name}"}}[1d])', - 'bloat_pct': f'last_over_time(pgwatch_pg_btree_bloat_bloat_pct{{cluster="{cluster}", node_name="{node_name}", datname="{db_name}"}}[1d])', + 'extra_size': f'last_over_time(pgwatch_pg_btree_bloat_extra_size{{cluster="{cluster}", node_name="{node_name}", datname="{db_name}"}}[3h])', + 'extra_pct': f'last_over_time(pgwatch_pg_btree_bloat_extra_pct{{cluster="{cluster}", node_name="{node_name}", datname="{db_name}"}}[3h])', + 'bloat_size': f'last_over_time(pgwatch_pg_btree_bloat_bloat_size{{cluster="{cluster}", node_name="{node_name}", datname="{db_name}"}}[3h])', + 'bloat_pct': f'last_over_time(pgwatch_pg_btree_bloat_bloat_pct{{cluster="{cluster}", node_name="{node_name}", datname="{db_name}"}}[3h])', } bloated_indexes = {} @@ -773,14 +1021,22 @@ def generate_f005_btree_bloat_report(self, cluster: str = "local", node_name: st # Sort by bloat percentage descending bloated_indexes_list.sort(key=lambda x: x['bloat_pct'], reverse=True) + db_size_bytes = database_sizes.get(db_name, 0) bloated_indexes_by_db[db_name] = { "bloated_indexes": bloated_indexes_list, "total_count": len(bloated_indexes_list), "total_bloat_size_bytes": total_bloat_size, - "total_bloat_size_pretty": self.format_bytes(total_bloat_size) + "total_bloat_size_pretty": self.format_bytes(total_bloat_size), + "database_size_bytes": db_size_bytes, + "database_size_pretty": self.format_bytes(db_size_bytes) } - return self.format_report_data("F005", bloated_indexes_by_db, node_name) + return self.format_report_data( + "F005", + bloated_indexes_by_db, + node_name, + postgres_version=self._get_postgres_version_info(cluster, node_name), + ) def generate_g001_memory_settings_report(self, cluster: str = "local", node_name: str = "node-01") -> Dict[ str, Any]: @@ -823,14 +1079,18 @@ def generate_g001_memory_settings_report(self, cluster: str = "local", node_name 'max_stack_depth' ] - # Query all PostgreSQL settings for memory-related settings - settings_query = f'pgwatch_settings_configured{{cluster="{cluster}", node_name="{node_name}"}}' + # Query all PostgreSQL settings for memory-related settings using last_over_time + settings_query = f'last_over_time(pgwatch_settings_configured{{cluster="{cluster}", node_name="{node_name}"}}[3h])' result = self.query_instant(settings_query) memory_data = {} if result.get('status') == 'success' and result.get('data', {}).get('result'): for item in result['data']['result']: - setting_name = item['metric'].get('setting_name', 'unknown') + setting_name = item['metric'].get('setting_name', '') + + # Skip if no setting name + if not setting_name: + continue # Filter for memory-related settings if setting_name in memory_settings: @@ -848,14 +1108,21 @@ def generate_g001_memory_settings_report(self, cluster: str = "local", node_name "vartype": vartype, "pretty_value": self.format_setting_value(setting_name, setting_value, unit) } + else: + print(f"Warning: G001 - No settings data returned for cluster={cluster}, node_name={node_name}") # Calculate some memory usage estimates and recommendations memory_analysis = self._analyze_memory_settings(memory_data) - return self.format_report_data("G001", { - "settings": memory_data, - "analysis": memory_analysis - }, node_name) + return self.format_report_data( + "G001", + { + "settings": memory_data, + "analysis": memory_analysis, + }, + node_name, + postgres_version=self._get_postgres_version_info(cluster, node_name), + ) def _analyze_memory_settings(self, memory_data: Dict[str, Any]) -> Dict[str, Any]: """ @@ -961,16 +1228,31 @@ def generate_f004_heap_bloat_report(self, cluster: str = "local", node_name: str # Get all databases databases = self.get_all_databases(cluster, node_name) + + if not databases: + print("Warning: F004 - No databases found") + + # Get database sizes + db_sizes_query = f'last_over_time(pgwatch_db_size_size_b{{cluster="{cluster}", node_name="{node_name}"}}[3h])' + db_sizes_result = self.query_instant(db_sizes_query) + database_sizes = {} + + if db_sizes_result.get('status') == 'success' and db_sizes_result.get('data', {}).get('result'): + for result in db_sizes_result['data']['result']: + db_name = result['metric'].get('datname', 'unknown') + size_bytes = float(result['value'][1]) + database_sizes[db_name] = size_bytes bloated_tables_by_db = {} for db_name in databases: # Query table bloat using multiple metrics for each database + # Try with 10h window first, then fall back to instant query bloat_queries = { - 'real_size': f'last_over_time(pgwatch_pg_table_bloat_real_size{{cluster="{cluster}", node_name="{node_name}", datname="{db_name}"}}[1d])', - 'extra_size': f'last_over_time(pgwatch_pg_table_bloat_extra_size{{cluster="{cluster}", node_name="{node_name}", datname="{db_name}"}}[1d])', - 'extra_pct': f'last_over_time(pgwatch_pg_table_bloat_extra_pct{{cluster="{cluster}", node_name="{node_name}", datname="{db_name}"}}[1d])', - 'bloat_size': f'last_over_time(pgwatch_pg_table_bloat_bloat_size{{cluster="{cluster}", node_name="{node_name}", datname="{db_name}"}}[1d])', - 'bloat_pct': f'last_over_time(pgwatch_pg_table_bloat_bloat_pct{{cluster="{cluster}", node_name="{node_name}", datname="{db_name}"}}[1d])', + 'real_size': f'last_over_time(pgwatch_pg_table_bloat_real_size{{cluster="{cluster}", node_name="{node_name}", datname="{db_name}"}}[3h])', + 'extra_size': f'last_over_time(pgwatch_pg_table_bloat_extra_size{{cluster="{cluster}", node_name="{node_name}", datname="{db_name}"}}[3h])', + 'extra_pct': f'last_over_time(pgwatch_pg_table_bloat_extra_pct{{cluster="{cluster}", node_name="{node_name}", datname="{db_name}"}}[3h])', + 'bloat_size': f'last_over_time(pgwatch_pg_table_bloat_bloat_size{{cluster="{cluster}", node_name="{node_name}", datname="{db_name}"}}[3h])', + 'bloat_pct': f'last_over_time(pgwatch_pg_table_bloat_bloat_pct{{cluster="{cluster}", node_name="{node_name}", datname="{db_name}"}}[3h])', } bloated_tables = {} @@ -996,6 +1278,9 @@ def generate_f004_heap_bloat_report(self, cluster: str = "local", node_name: str value = float(item['value'][1]) if item.get('value') else 0 bloated_tables[table_key][metric_type] = value + else: + if metric_type == 'real_size': # Only log once per database + print(f"Warning: F004 - No bloat data for database {db_name}, metric {metric_type}") # Convert to list and add pretty formatting bloated_tables_list = [] @@ -1012,14 +1297,22 @@ def generate_f004_heap_bloat_report(self, cluster: str = "local", node_name: str # Sort by bloat percentage descending bloated_tables_list.sort(key=lambda x: x['bloat_pct'], reverse=True) + db_size_bytes = database_sizes.get(db_name, 0) bloated_tables_by_db[db_name] = { "bloated_tables": bloated_tables_list, "total_count": len(bloated_tables_list), "total_bloat_size_bytes": total_bloat_size, - "total_bloat_size_pretty": self.format_bytes(total_bloat_size) + "total_bloat_size_pretty": self.format_bytes(total_bloat_size), + "database_size_bytes": db_size_bytes, + "database_size_pretty": self.format_bytes(db_size_bytes) } - return self.format_report_data("F004", bloated_tables_by_db, node_name) + return self.format_report_data( + "F004", + bloated_tables_by_db, + node_name, + postgres_version=self._get_postgres_version_info(cluster, node_name), + ) def generate_k001_query_calls_report(self, cluster: str = "local", node_name: str = "node-01", time_range_minutes: int = 60) -> Dict[str, Any]: @@ -1038,6 +1331,9 @@ def generate_k001_query_calls_report(self, cluster: str = "local", node_name: st # Get all databases databases = self.get_all_databases(cluster, node_name) + + if not databases: + print("Warning: K001 - No databases found") # Calculate time range end_time = datetime.now() @@ -1045,9 +1341,13 @@ def generate_k001_query_calls_report(self, cluster: str = "local", node_name: st queries_by_db = {} for db_name in databases: + print(f"K001: Processing database {db_name}...") # Get pg_stat_statements metrics for this database query_metrics = self._get_pgss_metrics_data_by_db(cluster, node_name, db_name, start_time, end_time) + if not query_metrics: + print(f"Warning: K001 - No query metrics returned for database {db_name}") + # Sort by calls (descending) sorted_metrics = sorted(query_metrics, key=lambda x: x.get('calls', 0), reverse=True) @@ -1069,7 +1369,12 @@ def generate_k001_query_calls_report(self, cluster: str = "local", node_name: st } } - return self.format_report_data("K001", queries_by_db, node_name) + return self.format_report_data( + "K001", + queries_by_db, + node_name, + postgres_version=self._get_postgres_version_info(cluster, node_name), + ) def generate_k003_top_queries_report(self, cluster: str = "local", node_name: str = "node-01", time_range_minutes: int = 60, limit: int = 50) -> Dict[str, Any]: @@ -1089,6 +1394,9 @@ def generate_k003_top_queries_report(self, cluster: str = "local", node_name: st # Get all databases databases = self.get_all_databases(cluster, node_name) + + if not databases: + print("Warning: K003 - No databases found") # Calculate time range end_time = datetime.now() @@ -1096,9 +1404,13 @@ def generate_k003_top_queries_report(self, cluster: str = "local", node_name: st queries_by_db = {} for db_name in databases: + print(f"K003: Processing database {db_name}...") # Get pg_stat_statements metrics for this database query_metrics = self._get_pgss_metrics_data_by_db(cluster, node_name, db_name, start_time, end_time) + if not query_metrics: + print(f"Warning: K003 - No query metrics returned for database {db_name}") + # Sort by total_time (descending) and limit to top N per database sorted_metrics = sorted(query_metrics, key=lambda x: x.get('total_time', 0), reverse=True)[:limit] @@ -1121,13 +1433,18 @@ def generate_k003_top_queries_report(self, cluster: str = "local", node_name: st } } - return self.format_report_data("K003", queries_by_db, node_name) + return self.format_report_data( + "K003", + queries_by_db, + node_name, + postgres_version=self._get_postgres_version_info(cluster, node_name), + ) def _get_pgss_metrics_data(self, cluster: str, node_name: str, start_time: datetime, end_time: datetime) -> List[ Dict[str, Any]]: """ Get pg_stat_statements metrics data between two time points. - Adapted from the logic in flask-backend/app.py get_pgss_metrics_csv(). + Adapted from the logic in monitoring_flask_backend/app.py get_pgss_metrics_csv(). Args: cluster: Cluster name @@ -1234,7 +1551,7 @@ def _process_pgss_data(self, start_data: List[Dict], end_data: List[Dict], metric_mapping: Dict[str, str]) -> List[Dict[str, Any]]: """ Process pg_stat_statements data and calculate differences between start and end times. - Adapted from the logic in flask-backend/app.py process_pgss_data(). + Adapted from the logic in monitoring_flask_backend/app.py process_pgss_data(). """ # Convert Prometheus data to dictionaries start_metrics = self._prometheus_to_dict(start_data, start_time) @@ -1316,7 +1633,7 @@ def _process_pgss_data(self, start_data: List[Dict], end_data: List[Dict], def _prometheus_to_dict(self, prom_data: List[Dict], timestamp: datetime) -> Dict: """ Convert Prometheus API response to dictionary keyed by query identifiers. - Adapted from the logic in flask-backend/app.py prometheus_to_dict(). + Adapted from the logic in monitoring_flask_backend/app.py prometheus_to_dict(). """ if not prom_data: return {} @@ -1334,10 +1651,11 @@ def _prometheus_to_dict(self, prom_data: List[Dict], timestamp: datetime) -> Dic closest_value = min(values, key=lambda x: abs(float(x[0]) - timestamp.timestamp())) # Create unique key for this query + # Note: 'user' label may not exist in all metric configurations key = ( metric.get('datname', ''), metric.get('queryid', ''), - metric.get('user', ''), + metric.get('user', metric.get('tag_user', '')), # Fallback to tag_user or empty metric.get('instance', '') ) @@ -1378,36 +1696,132 @@ def format_bytes(self, bytes_value: float) -> str: else: return f"{value:.2f} {units[unit_index]}" - def format_report_data(self, check_id: str, data: Dict[str, Any], host: str = "target-database") -> Dict[str, Any]: + def format_report_data(self, check_id: str, data: Dict[str, Any], host: str = "target-database", + all_hosts: Dict[str, List[str]] = None, + postgres_version: Dict[str, str] = None) -> Dict[str, Any]: """ Format data to match template structure. Args: check_id: The check identifier - data: The data to format - host: Host identifier + data: The data to format (can be a dict with node keys if combining multiple nodes) + host: Primary host identifier (used if all_hosts not provided) + all_hosts: Optional dict with 'primary' and 'standbys' keys for multi-node reports + postgres_version: Optional Postgres version info to include at report level Returns: Dictionary formatted for templates """ - now = datetime.now() + now = datetime.now(timezone.utc) + + # If all_hosts is provided, use it; otherwise use the single host as primary + if all_hosts: + hosts = all_hosts + else: + hosts = { + "primary": host, + "standbys": [], + } + + # Handle both single-node and multi-node data structures + if isinstance(data, dict) and any(isinstance(v, dict) and 'data' in v for v in data.values()): + # Multi-node structure: data is already in {node_name: {"data": ...}} format + # postgres_version should already be embedded per-node; warn if passed here + if postgres_version: + print(f"Warning: postgres_version parameter ignored for multi-node data in {check_id}") + results = data + else: + # Single-node structure: wrap data in host key + node_result = {"data": data} + if postgres_version: + node_result["postgres_version"] = postgres_version + results = {host: node_result} template_data = { "checkId": check_id, + "checkTitle": self.get_check_title(check_id), "timestamptz": now.isoformat(), - "hosts": { - "master": host, - "replicas": [] - }, - "results": { - host: { - "data": data - } - } + "nodes": hosts, + "results": results } return template_data + def get_check_title(self, check_id: str) -> str: + """ + Get the human-readable title for a check ID. + + Args: + check_id: The check identifier (e.g., "H004") + + Returns: + Human-readable title for the check + """ + # Mapping based on postgres-checkup README + # https://gitlab.com/postgres-ai/postgres-checkup + check_titles = { + "A001": "System information", + "A002": "Postgres major version", + "A003": "Postgres settings", + "A004": "Cluster information", + "A005": "Extensions", + "A006": "Postgres setting deviations", + "A007": "Altered settings", + "A008": "Disk usage and file system type", + "A010": "Data checksums, wal_log_hints", + "A011": "Connection pooling. pgbouncer", + "A012": "Anti-crash checks", + "A013": "Postgres minor version", + "B001": "SLO/SLA, RPO, RTO", + "B002": "File system, mount flags", + "B003": "Full backups / incremental", + "B004": "WAL archiving", + "B005": "Restore checks, monitoring, alerting", + "C001": "SLO/SLA", + "C002": "Sync/async, Streaming / wal transfer; logical decoding", + "C003": "SPOFs; standby with traffic", + "C004": "Failover", + "C005": "Switchover", + "C006": "Delayed replica", + "C007": "Replication slots. Lags. Standby feedbacks", + "D001": "Logging settings", + "D002": "Useful Linux tools", + "D003": "List of monitoring metrics", + "D004": "pg_stat_statements and pg_stat_kcache settings", + "D005": "track_io_timing, auto_explain", + "D006": "Recommended DBA toolsets", + "D007": "Postgres-specific tools for troubleshooting", + "E001": "WAL/checkpoint settings, IO", + "E002": "Checkpoints, bgwriter, IO", + "F001": "Autovacuum: current settings", + "F002": "Autovacuum: transaction ID wraparound check", + "F003": "Autovacuum: dead tuples", + "F004": "Autovacuum: heap bloat (estimated)", + "F005": "Autovacuum: index bloat (estimated)", + "F006": "Precise heap bloat analysis", + "F007": "Precise index bloat analysis", + "F008": "Autovacuum: resource usage", + "G001": "Memory-related settings", + "G002": "Connections and current activity", + "G003": "Timeouts, locks, deadlocks", + "G004": "Query planner", + "G005": "I/O settings", + "G006": "Default_statistics_target", + "H001": "Invalid indexes", + "H002": "Unused indexes", + "H003": "Non-indexed foreign keys", + "H004": "Redundant indexes", + "J001": "Capacity planning", + "K001": "Globally aggregated query metrics", + "K002": "Workload type", + "K003": "Top-50 queries by total_time", + "L001": "Table sizes", + "L002": "Data types being used", + "L003": "Integer out-of-range risks in PKs", + "L004": "Tables without PK/UK", + } + return check_titles.get(check_id, f"Check {check_id}") + def get_setting_unit(self, setting_name: str) -> str: """Get the unit for a PostgreSQL setting.""" units = { @@ -1525,7 +1939,8 @@ def format_setting_value(self, setting_name: str, value: str, unit: str = "") -> return f"{val // 1024} MB" else: return f"{val} kB" - elif setting_name in ['autovacuum_analyze_scale_factor', 'autovacuum_vacuum_scale_factor']: + elif setting_name in ['autovacuum_analyze_scale_factor', 'autovacuum_vacuum_scale_factor', + 'autovacuum_vacuum_insert_scale_factor']: return f"{float(value) * 100:.1f}%" elif setting_name in ['autovacuum', 'track_activities', 'track_counts', 'track_functions', 'track_io_timing', 'track_wal_io_timing', 'pg_stat_statements.track_utility', @@ -1570,37 +1985,172 @@ def get_cluster_metric_description(self, metric_name: str) -> str: } return descriptions.get(metric_name, '') - def generate_all_reports(self, cluster: str = "local", node_name: str = "node-01") -> Dict[str, Any]: + def generate_all_reports(self, cluster: str = "local", node_name: str = None, combine_nodes: bool = True) -> Dict[str, Any]: """ Generate all reports. Args: cluster: Cluster name - node_name: Node name + node_name: Node name (if None and combine_nodes=True, will query all nodes) + combine_nodes: If True, combine primary and replica reports into single report Returns: Dictionary containing all reports """ reports = {} - # Generate each report - reports['A002'] = self.generate_a002_version_report(cluster, node_name) - reports['A003'] = self.generate_a003_settings_report(cluster, node_name) - reports['A004'] = self.generate_a004_cluster_report(cluster, node_name) - reports['A007'] = self.generate_a007_altered_settings_report(cluster, node_name) - reports['D004'] = self.generate_d004_pgstat_settings_report(cluster, node_name) - reports['F001'] = self.generate_f001_autovacuum_settings_report(cluster, node_name) - reports['F004'] = self.generate_f004_heap_bloat_report(cluster, node_name) - reports['F005'] = self.generate_f005_btree_bloat_report(cluster, node_name) - reports['G001'] = self.generate_g001_memory_settings_report(cluster, node_name) - reports['H001'] = self.generate_h001_invalid_indexes_report(cluster, node_name) - reports['H002'] = self.generate_h002_unused_indexes_report(cluster, node_name) - reports['H004'] = self.generate_h004_redundant_indexes_report(cluster, node_name) - reports['K001'] = self.generate_k001_query_calls_report(cluster, node_name) - reports['K003'] = self.generate_k003_top_queries_report(cluster, node_name) + # Determine which nodes to process + if combine_nodes and node_name is None: + # Get all nodes and combine them + all_nodes = self.get_all_nodes(cluster) + nodes_to_process = [] + if all_nodes["primary"]: + nodes_to_process.append(all_nodes["primary"]) + nodes_to_process.extend(all_nodes["standbys"]) + + # If no nodes found, fall back to default + if not nodes_to_process: + print(f"Warning: No nodes found in cluster '{cluster}', using default 'node-01'") + nodes_to_process = ["node-01"] + all_nodes = {"primary": "node-01", "standbys": []} + else: + print(f"Combining reports from nodes: {nodes_to_process}") + else: + # Use single node (backward compatibility) + if node_name is None: + node_name = "node-01" + nodes_to_process = [node_name] + all_nodes = {"primary": node_name, "standbys": []} + + # Generate each report type + report_types = [ + ('A002', self.generate_a002_version_report), + ('A003', self.generate_a003_settings_report), + ('A004', self.generate_a004_cluster_report), + ('A007', self.generate_a007_altered_settings_report), + ('D004', self.generate_d004_pgstat_settings_report), + ('F001', self.generate_f001_autovacuum_settings_report), + ('F004', self.generate_f004_heap_bloat_report), + ('F005', self.generate_f005_btree_bloat_report), + ('G001', self.generate_g001_memory_settings_report), + ('H001', self.generate_h001_invalid_indexes_report), + ('H002', self.generate_h002_unused_indexes_report), + ('H004', self.generate_h004_redundant_indexes_report), + ('K001', self.generate_k001_query_calls_report), + ('K003', self.generate_k003_top_queries_report), + ] + + for check_id, report_func in report_types: + if len(nodes_to_process) == 1: + # Single node - generate report normally + reports[check_id] = report_func(cluster, nodes_to_process[0]) + else: + # Multiple nodes - combine reports + combined_results = {} + for node in nodes_to_process: + print(f"Generating {check_id} report for node {node}...") + node_report = report_func(cluster, node) + # Extract the data from the node report + if 'results' in node_report and node in node_report['results']: + combined_results[node] = node_report['results'][node] + + # Create combined report with all nodes + reports[check_id] = self.format_report_data( + check_id, + combined_results, + all_nodes["primary"] if all_nodes["primary"] else nodes_to_process[0], + all_nodes + ) return reports + def get_all_clusters(self) -> List[str]: + """ + Get all unique cluster names (projects) from the metrics. + + Returns: + List of cluster names + """ + # Query for all clusters using last_over_time to get recent values + clusters_query = 'last_over_time(pgwatch_settings_configured[3h])' + result = self.query_instant(clusters_query) + + cluster_set = set() + + if result.get('status') == 'success' and result.get('data', {}).get('result'): + for item in result['data']['result']: + cluster_name = item['metric'].get('cluster', '') + if cluster_name: + cluster_set.add(cluster_name) + else: + # Debug output + print(f"Debug - get_all_clusters query status: {result.get('status')}") + print(f"Debug - get_all_clusters result count: {len(result.get('data', {}).get('result', []))}") + + if cluster_set: + print(f"Found {len(cluster_set)} cluster(s): {sorted(list(cluster_set))}") + + return sorted(list(cluster_set)) + + def get_all_nodes(self, cluster: str = "local") -> Dict[str, List[str]]: + """ + Get all nodes (primary and replicas) from the metrics. + Uses pgwatch_db_stats_in_recovery_int to determine primary vs standby. + + Args: + cluster: Cluster name + + Returns: + Dictionary with 'primary' and 'standbys' keys containing node names + """ + # Query for all nodes in the cluster using last_over_time + nodes_query = f'last_over_time(pgwatch_settings_configured{{cluster="{cluster}"}}[3h])' + result = self.query_instant(nodes_query) + + nodes = {"primary": None, "standbys": []} + node_set = set() + + if result.get('status') == 'success' and result.get('data', {}).get('result'): + for item in result['data']['result']: + node_name = item['metric'].get('node_name', '') + if node_name and node_name not in node_set: + node_set.add(node_name) + + # Convert to sorted list + node_list = sorted(list(node_set)) + + if node_list: + print(f" Found {len(node_list)} node(s) in cluster '{cluster}': {node_list}") + else: + print(f" Warning: No nodes found in cluster '{cluster}'") + + # Use pgwatch_db_stats_in_recovery_int to determine primary vs standby + # in_recovery = 0 means primary, in_recovery = 1 means standby + for node_name in node_list: + recovery_query = f'last_over_time(pgwatch_db_stats_in_recovery_int{{cluster="{cluster}", node_name="{node_name}"}}[3h])' + recovery_result = self.query_instant(recovery_query) + + is_standby = False + if recovery_result.get('status') == 'success' and recovery_result.get('data', {}).get('result'): + if recovery_result['data']['result']: + in_recovery_value = float(recovery_result['data']['result'][0]['value'][1]) + is_standby = (in_recovery_value > 0) + print(f" Node '{node_name}': in_recovery={int(in_recovery_value)} ({'standby' if is_standby else 'primary'})") + + if is_standby: + nodes["standbys"].append(node_name) + else: + # First non-standby node becomes primary + if nodes["primary"] is None: + nodes["primary"] = node_name + else: + # If we have multiple primaries (shouldn't happen), treat as replicas + print(f" Warning: Multiple primary nodes detected, treating '{node_name}' as replica") + nodes["standbys"].append(node_name) + + print(f" Result: primary={nodes['primary']}, replicas={nodes['standbys']}") + return nodes + def get_all_databases(self, cluster: str = "local", node_name: str = "node-01") -> List[str]: """ Get all databases from the metrics. @@ -1612,25 +2162,41 @@ def get_all_databases(self, cluster: str = "local", node_name: str = "node-01") Returns: List of database names """ - # Query for all databases using pg_stat_database metrics - db_query = f'pgwatch_pg_database_wraparound_age_datfrozenxid{{cluster="{cluster}", node_name="{node_name}", datname!="template1"}}' - result = self.query_instant(db_query) - - databases = [] - if result.get('status') == 'success' and result.get('data', {}).get('result'): - for item in result['data']['result']: - db_name = item['metric'].get('datname', '') - if db_name and db_name not in databases: - databases.append(db_name) - # If no databases found, try alternative query - if not databases: - db_query = f'pgwatch_pg_database_size_bytes{{cluster="{cluster}", node_name="{node_name}"}}' - result = self.query_instant(db_query) - if result.get('status') == 'success' and result.get('data', {}).get('result'): - for item in result['data']['result']: - db_name = item['metric'].get('datname', '') - if db_name and db_name not in databases: - databases.append(db_name) + # Build a source-agnostic database list by unifying labels from: + # 1) Generic per-database metric (wraparound) โ†’ datname + # 2) Custom index reports (unused/redundant) โ†’ dbname + # 3) Btree bloat (for completeness) โ†’ datname + databases: List[str] = [] + database_set = set() + + # Helper to add a name safely + def add_db(name: str) -> None: + if name and name not in self.excluded_databases and name not in database_set: + database_set.add(name) + databases.append(name) + + # 1) Generic per-database metric + wrap_q = f'last_over_time(pgwatch_pg_database_wraparound_age_datfrozenxid{{cluster="{cluster}", node_name="{node_name}"}}[3h])' + wrap_res = self.query_instant(wrap_q) + if wrap_res.get('status') == 'success' and wrap_res.get('data', {}).get('result'): + for item in wrap_res['data']['result']: + add_db(item["metric"].get("datname", "")) + + # 2) Custom reports using dbname + unused_q = f'last_over_time(pgwatch_unused_indexes_index_size_bytes{{cluster="{cluster}", node_name="{node_name}"}}[3h])' + redun_q = f'last_over_time(pgwatch_redundant_indexes_index_size_bytes{{cluster="{cluster}", node_name="{node_name}"}}[3h])' + for q in (unused_q, redun_q): + res = self.query_instant(q) + if res.get('status') == 'success' and res.get('data', {}).get('result'): + for item in res['data']['result']: + add_db(item["metric"].get("dbname", "")) + + # 3) Btree bloat family + bloat_q = f'last_over_time(pgwatch_pg_btree_bloat_bloat_pct{{cluster="{cluster}", node_name="{node_name}"}}[3h])' + bloat_res = self.query_instant(bloat_q) + if bloat_res.get('status') == 'success' and bloat_res.get('data', {}).get('result'): + for item in bloat_res['data']['result']: + add_db(item["metric"].get("datname", "")) return databases @@ -1682,6 +2248,8 @@ def _get_pgss_metrics_data_by_db(self, cluster: str, node_name: str, db_name: st # Get metrics at start and end times start_data = [] end_data = [] + + metrics_found = 0 for metric in all_metrics: metric_with_filters = f'{metric}{filter_str}' @@ -1692,6 +2260,7 @@ def _get_pgss_metrics_data_by_db(self, cluster: str, node_name: str, db_name: st start_time + timedelta(minutes=1)) if start_result: start_data.extend(start_result) + metrics_found += 1 # Query metrics around end time end_result = self.query_range(metric_with_filters, end_time - timedelta(minutes=1), @@ -1702,14 +2271,35 @@ def _get_pgss_metrics_data_by_db(self, cluster: str, node_name: str, db_name: st except Exception as e: print(f"Warning: Failed to query metric {metric} for database {db_name}: {e}") continue + + if metrics_found == 0: + print(f"Warning: No pg_stat_statements metrics found for database {db_name}") + print(f" Checked time range: {start_time.isoformat()} to {end_time.isoformat()}") # Process the data to calculate differences - return self._process_pgss_data(start_data, end_data, start_time, end_time, METRIC_NAME_MAPPING) + result = self._process_pgss_data(start_data, end_data, start_time, end_time, METRIC_NAME_MAPPING) + + if not result: + print(f"Warning: _process_pgss_data returned empty result for database {db_name}") + + return result - def create_report(self, api_url, token, project, epoch): + def create_report(self, api_url, token, project_name, epoch): + """ + Create a new report in the API. + + Args: + api_url: API URL + token: API token + project_name: Project name (cluster identifier) + epoch: Epoch identifier + + Returns: + Report ID + """ request_data = { "access_token": token, - "project": project, + "project": project_name, "epoch": epoch, } @@ -1736,6 +2326,7 @@ def upload_report_file(self, api_url, token, report_id, path): "filename": file_name, "data": data, "type": file_type, + "generate_issue": True } response = make_request(api_url, "/rpc/checkup_report_file_post", request_data) @@ -1751,12 +2342,17 @@ def make_request(api_url, endpoint, request_data): def main(): parser = argparse.ArgumentParser(description='Generate PostgreSQL reports using PromQL') - parser.add_argument('--prometheus-url', default='http://localhost:9090', - help='Prometheus URL (default: http://localhost:9090)') - parser.add_argument('--cluster', default='local', - help='Cluster name (default: local)') - parser.add_argument('--node-name', default='node-01', - help='Node name (default: node-01)') + parser.add_argument('--version', action='version', version=f'%(prog)s {__version__}') + parser.add_argument('--prometheus-url', default='http://sink-prometheus:9090', + help='Prometheus URL (default: http://sink-prometheus:9090)') + parser.add_argument('--postgres-sink-url', default='postgresql://pgwatch@sink-postgres:5432/measurements', + help='Postgres sink connection string (default: postgresql://pgwatch@sink-postgres:5432/measurements)') + parser.add_argument('--cluster', default=None, + help='Cluster name (default: auto-detect all clusters)') + parser.add_argument('--node-name', default=None, + help='Node name (default: auto-detect all nodes when combine-nodes is true)') + parser.add_argument('--no-combine-nodes', action='store_true', default=False, + help='Disable combining primary and replica reports into single report') parser.add_argument('--check-id', choices=['A002', 'A003', 'A004', 'A007', 'D004', 'F001', 'F004', 'F005', 'G001', 'H001', 'H002', 'H004', 'K001', 'K003', 'ALL'], @@ -1765,14 +2361,23 @@ def main(): help='Output file (default: stdout)') parser.add_argument('--api-url', default='https://postgres.ai/api/general') parser.add_argument('--token', default='') - parser.add_argument('--project', default='project-name') + parser.add_argument('--project-name', default='project-name', + help='Project name for API upload (default: project-name)') parser.add_argument('--epoch', default='1') parser.add_argument('--no-upload', action='store_true', default=False, help='Do not upload reports to the API') + parser.add_argument('--exclude-databases', type=str, default=None, + help='Comma-separated list of additional databases to exclude from reports ' + f'(default exclusions: {", ".join(sorted(PostgresReportGenerator.DEFAULT_EXCLUDED_DATABASES))})') args = parser.parse_args() + + # Parse excluded databases + excluded_databases = None + if args.exclude_databases: + excluded_databases = [db.strip() for db in args.exclude_databases.split(',')] - generator = PostgresReportGenerator(args.prometheus_url) + generator = PostgresReportGenerator(args.prometheus_url, args.postgres_sink_url, excluded_databases) # Test connection if not generator.test_connection(): @@ -1780,65 +2385,113 @@ def main(): sys.exit(1) try: - if args.check_id == 'ALL' or args.check_id is None: - # Generate all reports - if not args.no_upload: - report_id = generator.create_report(args.api_url, args.token, args.project, args.epoch) - reports = generator.generate_all_reports(args.cluster, args.node_name) - for report in reports: - json.dump(reports[report], open(f"{report}.json", "w")) - if not args.no_upload: - generator.upload_report_file(args.api_url, args.token, report_id, f"{report}.json") - if args.output == '-': - pass - else: - with open(args.output, 'w') as f: - json.dump(reports, f, indent=2) - print(f"All reports written to {args.output}") + # Discover all clusters if not specified + clusters_to_process = [] + if args.cluster: + clusters_to_process = [args.cluster] else: - # Generate specific report - if args.check_id == 'A002': - report = generator.generate_a002_version_report(args.cluster, args.node_name) - elif args.check_id == 'A003': - report = generator.generate_a003_settings_report(args.cluster, args.node_name) - elif args.check_id == 'A004': - report = generator.generate_a004_cluster_report(args.cluster, args.node_name) - elif args.check_id == 'A007': - report = generator.generate_a007_altered_settings_report(args.cluster, args.node_name) - elif args.check_id == 'D004': - report = generator.generate_d004_pgstat_settings_report(args.cluster, args.node_name) - elif args.check_id == 'F001': - report = generator.generate_f001_autovacuum_settings_report(args.cluster, args.node_name) - elif args.check_id == 'F004': - report = generator.generate_f004_heap_bloat_report(args.cluster, args.node_name) - elif args.check_id == 'F005': - report = generator.generate_f005_btree_bloat_report(args.cluster, args.node_name) - elif args.check_id == 'G001': - report = generator.generate_g001_memory_settings_report(args.cluster, args.node_name) - elif args.check_id == 'G003': - report = generator.generate_g003_database_stats_report(args.cluster, args.node_name) - elif args.check_id == 'H001': - report = generator.generate_h001_invalid_indexes_report(args.cluster, args.node_name) - elif args.check_id == 'H002': - report = generator.generate_h002_unused_indexes_report(args.cluster, args.node_name) - elif args.check_id == 'H004': - report = generator.generate_h004_redundant_indexes_report(args.cluster, args.node_name) - elif args.check_id == 'K001': - report = generator.generate_k001_query_calls_report(args.cluster, args.node_name) - elif args.check_id == 'K003': - report = generator.generate_k003_top_queries_report(args.cluster, args.node_name) - - if args.output == '-': - print(json.dumps(report, indent=2)) + clusters_to_process = generator.get_all_clusters() + if not clusters_to_process: + print("Warning: No clusters found, using default 'local'") + clusters_to_process = ['local'] else: - with open(args.output, 'w') as f: - json.dump(report, f, indent=2) + print(f"Discovered clusters: {clusters_to_process}") + + # Process each cluster + for cluster in clusters_to_process: + print(f"\n{'='*60}") + print(f"Processing cluster: {cluster}") + print(f"{'='*60}\n") + + # Set default node_name if not provided and not combining nodes + combine_nodes = not args.no_combine_nodes + if args.node_name is None and not combine_nodes: + args.node_name = "node-01" + + if args.check_id == 'ALL' or args.check_id is None: + # Generate all reports for this cluster if not args.no_upload: - generator.upload_report_file(args.api_url, args.token, args.project, args.epoch, args.output) + # Use cluster name as project name if not specified + project_name = args.project_name if args.project_name != 'project-name' else cluster + report_id = generator.create_report(args.api_url, args.token, project_name, args.epoch) + + reports = generator.generate_all_reports(cluster, args.node_name, combine_nodes) + + # Save reports with cluster name prefix + for report in reports: + output_filename = f"{cluster}_{report}.json" if len(clusters_to_process) > 1 else f"{report}.json" + with open(output_filename, "w") as f: + json.dump(reports[report], f, indent=2) + print(f"Generated report: {output_filename}") + if not args.no_upload: + generator.upload_report_file(args.api_url, args.token, report_id, output_filename) + + if args.output == '-': + pass + elif len(clusters_to_process) == 1: + # Single cluster - use specified output + with open(args.output, 'w') as f: + json.dump(reports, f, indent=2) + print(f"All reports written to {args.output}") + else: + # Multiple clusters - create combined output + combined_output = f"{cluster}_all_reports.json" + with open(combined_output, 'w') as f: + json.dump(reports, f, indent=2) + print(f"All reports for cluster {cluster} written to {combined_output}") + else: + # Generate specific report - use node_name or default + if args.node_name is None: + args.node_name = "node-01" + + if args.check_id == 'A002': + report = generator.generate_a002_version_report(cluster, args.node_name) + elif args.check_id == 'A003': + report = generator.generate_a003_settings_report(cluster, args.node_name) + elif args.check_id == 'A004': + report = generator.generate_a004_cluster_report(cluster, args.node_name) + elif args.check_id == 'A007': + report = generator.generate_a007_altered_settings_report(cluster, args.node_name) + elif args.check_id == 'D004': + report = generator.generate_d004_pgstat_settings_report(cluster, args.node_name) + elif args.check_id == 'F001': + report = generator.generate_f001_autovacuum_settings_report(cluster, args.node_name) + elif args.check_id == 'F004': + report = generator.generate_f004_heap_bloat_report(cluster, args.node_name) + elif args.check_id == 'F005': + report = generator.generate_f005_btree_bloat_report(cluster, args.node_name) + elif args.check_id == 'G001': + report = generator.generate_g001_memory_settings_report(cluster, args.node_name) + elif args.check_id == 'H001': + report = generator.generate_h001_invalid_indexes_report(cluster, args.node_name) + elif args.check_id == 'H002': + report = generator.generate_h002_unused_indexes_report(cluster, args.node_name) + elif args.check_id == 'H004': + report = generator.generate_h004_redundant_indexes_report(cluster, args.node_name) + elif args.check_id == 'K001': + report = generator.generate_k001_query_calls_report(cluster, args.node_name) + elif args.check_id == 'K003': + report = generator.generate_k003_top_queries_report(cluster, args.node_name) + + output_filename = f"{cluster}_{args.check_id}.json" if len(clusters_to_process) > 1 else args.output + + if args.output == '-' and len(clusters_to_process) == 1: + print(json.dumps(report, indent=2)) + else: + with open(output_filename, 'w') as f: + json.dump(report, f, indent=2) + print(f"Report written to {output_filename}") + if not args.no_upload: + project_name = args.project_name if args.project_name != 'project-name' else cluster + report_id = generator.create_report(args.api_url, args.token, project_name, args.epoch) + generator.upload_report_file(args.api_url, args.token, report_id, output_filename) except Exception as e: print(f"Error generating reports: {e}") raise e sys.exit(1) + finally: + # Clean up postgres connection + generator.close_postgres_sink() if __name__ == "__main__": diff --git a/reporter/requirements-dev.txt b/reporter/requirements-dev.txt new file mode 100644 index 0000000..7499b74 --- /dev/null +++ b/reporter/requirements-dev.txt @@ -0,0 +1,5 @@ +-r requirements.txt +pytest==9.0.1 +pytest-postgresql==7.0.2 +coverage==7.6.10 +pytest-cov==6.0.0 diff --git a/reporter/requirements.txt b/reporter/requirements.txt index 659c37c..9a4b410 100644 --- a/reporter/requirements.txt +++ b/reporter/requirements.txt @@ -1 +1,2 @@ -requests>=2.31.0 \ No newline at end of file +requests==2.32.5 +psycopg2-binary==2.9.11 \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/e2e.cli.sh b/tests/e2e.cli.sh new file mode 100755 index 0000000..64d3587 --- /dev/null +++ b/tests/e2e.cli.sh @@ -0,0 +1,51 @@ +#!/bin/bash +# E2E tests for postgres_ai CLI (Node.js) +# Usage: ./tests/e2e.cli.sh + +set -e + +CLI_CMD="node ./cli/dist/bin/postgres-ai.js" +MON_CMD="$CLI_CMD mon" + +echo "=== Testing service commands ===" +$MON_CMD check || true +$MON_CMD config || true +$MON_CMD update-config +$MON_CMD start +sleep 10 +$MON_CMD status +$MON_CMD logs --tail 5 grafana || true +$MON_CMD health --wait 60 || true + +echo "" +echo "=== Testing instance commands ===" +$MON_CMD targets list +$MON_CMD targets add "postgresql://monitor:monitor_pass@target-db:5432/target_database" ci-test +$MON_CMD targets list | grep -q ci-test +sleep 5 +$MON_CMD targets test ci-test || true +$MON_CMD targets remove ci-test + +echo "" +echo "=== Testing API key commands ===" +$CLI_CMD add-key "test_api_key_12345" +$CLI_CMD show-key | grep -q "test_api" +$CLI_CMD remove-key + +echo "" +echo "=== Testing Grafana commands ===" +$MON_CMD show-grafana-credentials || true +$MON_CMD generate-grafana-password || true +$MON_CMD show-grafana-credentials || true + +echo "" +echo "=== Testing service management ===" +$MON_CMD restart grafana +sleep 3 +$MON_CMD status +$MON_CMD stop +$MON_CMD clean || true + +echo "" +echo "โœ“ All E2E tests passed" + diff --git a/tests/lock_waits/README.md b/tests/lock_waits/README.md new file mode 100644 index 0000000..3838b5c --- /dev/null +++ b/tests/lock_waits/README.md @@ -0,0 +1,226 @@ +# Lock Waits Metric Testing + +This directory contains tests and scripts to verify that the `lock_waits` metric is working correctly. + +## Overview + +The `lock_waits` metric collects detailed information about lock waits in PostgreSQL, including: +- Waiting and blocking process IDs +- User names and application names +- Lock modes and types +- Affected tables +- Query IDs (PostgreSQL 14+) +- Wait durations and blocker transaction durations + +## Test Components + +### 1. Python Test Script (`test_lock_waits_metric.py`) + +Automated test that: +- Creates lock contention scenarios in the target database +- Waits for pgwatch to collect metrics +- Verifies the metric is collected in Prometheus/VictoriaMetrics +- Validates the metric structure and labels + +### 2. SQL Script (`create_lock_contention.sql`) + +Manual SQL script to create lock contention for testing. Can be run in multiple psql sessions. + +## Prerequisites + +1. Docker Compose stack running: + ```bash + docker-compose up -d + ``` + +2. Python dependencies: + ```bash + pip install psycopg requests + ``` + +3. Ensure `lock_waits` metric is enabled in pgwatch configuration: + - Check `config/pgwatch-prometheus/metrics.yml` includes `lock_waits` + - Verify pgwatch is collecting metrics from the target database + +## Running the Automated Test + +### Basic Usage + +```bash +# From the project root +python tests/lock_waits/test_lock_waits_metric.py +``` + +### With Custom Configuration + +```bash +python tests/lock_waits/test_lock_waits_metric.py \ + --target-db-url "postgresql://postgres:postgres@localhost:55432/target_database" \ + --prometheus-url "http://localhost:59090" \ + --test-dbname "target_database" \ + --collection-wait 90 +``` + +### Environment Variables + +You can also set these via environment variables: + +```bash +export TARGET_DB_URL="postgresql://postgres:postgres@localhost:55432/target_database" +export PROMETHEUS_URL="http://localhost:59090" +export TEST_DBNAME="target_database" +export COLLECTION_WAIT_SECONDS=90 + +python tests/lock_waits/test_lock_waits_metric.py +``` + +## Manual Testing + +### Step 1: Create Lock Contention + +Open two psql sessions to the target database: + +**Session 1 (Blocker):** +```sql +BEGIN; +SELECT * FROM lock_test_table WHERE id = 1 FOR UPDATE; +-- Keep this transaction open +``` + +**Session 2 (Waiter):** +```sql +BEGIN; +SELECT * FROM lock_test_table WHERE id = 1 FOR UPDATE; +-- This will wait for Session 1 to release the lock +``` + +### Step 2: Verify Metric Collection + +Wait for pgwatch to collect metrics (check collection interval in pgwatch config, typically 15-30 seconds), then query Prometheus: + +```bash +# Query Prometheus API for lock_waits metrics +curl "http://localhost:59090/api/v1/query?query=pgwatch_lock_waits_waiting_ms{datname=\"target_database\"}" + +# Or use PromQL in Grafana Explore +pgwatch_lock_waits_waiting_ms{datname="target_database"} +pgwatch_lock_waits_blocker_tx_ms{datname="target_database"} +``` + +### Step 3: Check Grafana Dashboard + +1. Open Grafana: http://localhost:3000 +2. Navigate to "Lock waits details" dashboard +3. Select the database from the dropdown +4. Verify that lock wait events appear in the panels + +## Expected Results + +### Successful Test Output + +``` +Setting up test environment... +โœ“ Test table created + +Creating lock contention for 30 seconds... +โœ“ Blocker transaction started (holding lock on row id=1) +โœ“ Waiter transaction started (waiting for lock on row id=1) + Holding locks for 30 seconds... +โœ“ Lock contention ended + +Verifying metric collection... + Waiting 60 seconds for pgwatch to collect metrics... + โœ“ Found 5 lock_waits records + +Validating metric structure... + + Record 1: + โœ“ All required data fields present + โœ“ waiting_ms is numeric: 25000 ms + โœ“ blocker_tx_ms is numeric: 30000 ms + +โœ… Test PASSED: lock_waits metric is working correctly +``` + +## Troubleshooting + +### No Records Found + +- **Check pgwatch is running**: `docker ps | grep pgwatch-prometheus` +- **Check pgwatch logs**: `docker logs pgwatch-prometheus` +- **Verify metric is enabled**: Check `config/pgwatch-prometheus/metrics.yml` +- **Check Prometheus is accessible**: `curl http://localhost:59090/api/v1/status/config` +- **Increase wait time**: Use `--collection-wait 120` to wait longer +- **Check database name**: Ensure `--test-dbname` matches the monitored database +- **Verify metrics exist**: `curl "http://localhost:59090/api/v1/label/__name__/values" | grep lock_waits` + +### Invalid Data Structure + +- **Check PostgreSQL version**: Metric requires PostgreSQL 14+ for query_id support +- **Verify metric SQL**: Check the SQL query in `metrics.yml` is correct +- **Check pgwatch version**: Ensure pgwatch version supports the metric format +- **Check Prometheus labels**: Verify metrics have expected labels (datname, waiting_pid, blocker_pid, etc.) + +### Connection Errors + +- **Verify Docker containers**: `docker-compose ps` +- **Check connection strings**: Verify URLs match your docker-compose configuration +- **Check Prometheus URL**: Ensure Prometheus/VictoriaMetrics is accessible at the specified URL +- **Check network**: Ensure containers can communicate (same Docker network) + +## Integration with CI/CD + +The test can be integrated into CI/CD pipelines: + +```yaml +# Example GitLab CI +test_lock_waits: + stage: test + script: + - docker-compose up -d + - sleep 30 # Wait for services to start + - pip install psycopg + - python tests/lock_waits/test_lock_waits_metric.py + --target-db-url "$TARGET_DB_URL" + --sink-db-url "$SINK_DB_URL" + --collection-wait 90 + only: + - merge_requests + - main +``` + +## Additional Test Scenarios + +### Test Different Lock Types + +Modify the test to create different types of locks: + +```sql +-- Table-level lock +LOCK TABLE lock_test_table IN EXCLUSIVE MODE; + +-- Advisory lock +SELECT pg_advisory_lock(12345); +``` + +### Test Multiple Concurrent Waits + +Create multiple waiting transactions to test the LIMIT clause: + +```sql +-- Session 1: Blocker +BEGIN; +SELECT * FROM lock_test_table WHERE id = 1 FOR UPDATE; + +-- Sessions 2-10: Multiple waiters +-- Each in separate psql session +BEGIN; +SELECT * FROM lock_test_table WHERE id = 1 FOR UPDATE; +``` + +## Related Files + +- `config/pgwatch-prometheus/metrics.yml` - Metric definition +- `config/grafana/dashboards/Dashboard_13_Lock_waits.json` - Grafana dashboard +- `workload_examples/lock_wait_test.sql` - Basic lock test SQL + diff --git a/tests/lock_waits/__init__.py b/tests/lock_waits/__init__.py new file mode 100644 index 0000000..228403c --- /dev/null +++ b/tests/lock_waits/__init__.py @@ -0,0 +1,2 @@ +# Lock waits metric testing package + diff --git a/tests/lock_waits/create_lock_contention.sql b/tests/lock_waits/create_lock_contention.sql new file mode 100644 index 0000000..5e5da7a --- /dev/null +++ b/tests/lock_waits/create_lock_contention.sql @@ -0,0 +1,73 @@ +-- SQL script to manually create lock contention for testing lock_waits metric +-- +-- Usage: +-- 1. Run this script in Session 1 (blocker) +-- 2. Run the same script in Session 2 (waiter) - it will wait +-- 3. Check the sink database for lock_waits records +-- 4. Commit or rollback Session 1 to release the lock + +-- Create test table if it doesn't exist +drop table if exists lock_test_table cascade; +create table lock_test_table ( + id int8 generated always as identity primary key, + name text not null, + value numeric(10, 2), + created_at timestamptz default now() +); + +insert into lock_test_table (name, value) +values + ('Item 1', 100.50), + ('Item 2', 200.75), + ('Item 3', 300.25); + +-- ============================================ +-- SESSION 1 (BLOCKER) - Run this first +-- ============================================ +begin; + +-- Acquire exclusive lock on row id=1 +-- Keep this transaction open to hold the lock +select * from lock_test_table where id = 1 for update; + +-- Transaction is now holding the lock +-- DO NOT COMMIT YET - keep this session open + +-- ============================================ +-- SESSION 2 (WAITER) - Run this in another psql session +-- ============================================ +begin; + +-- This will wait for Session 1 to release the lock +select * from lock_test_table where id = 1 for update; + +-- This query will block until Session 1 commits or rolls back +-- You should see it waiting in pg_stat_activity + +-- ============================================ +-- To release the lock, commit or rollback Session 1: +-- ============================================ +-- commit; -- or rollback; + +-- ============================================ +-- Alternative: Test with different lock types +-- ============================================ + +-- Test with table-level lock +-- SESSION 1: +-- begin; +-- lock table lock_test_table in exclusive mode; + +-- SESSION 2: +-- begin; +-- select * from lock_test_table; -- Will wait + +-- Test with advisory lock +-- SESSION 1: +-- begin; +-- select pg_advisory_lock(12345); + +-- SESSION 2: +-- begin; +-- select pg_advisory_lock(12345); -- Will wait + diff --git a/tests/lock_waits/run_test.sh b/tests/lock_waits/run_test.sh new file mode 100755 index 0000000..de45803 --- /dev/null +++ b/tests/lock_waits/run_test.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# Simple wrapper script to run the lock_waits metric test + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" + +# Default values (can be overridden by environment variables) +TARGET_DB_URL="${TARGET_DB_URL:-postgresql://postgres:postgres@localhost:55432/target_database}" +PROMETHEUS_URL="${PROMETHEUS_URL:-http://localhost:59090}" +TEST_DBNAME="${TEST_DBNAME:-target_database}" +COLLECTION_WAIT="${COLLECTION_WAIT_SECONDS:-60}" + +echo "==========================================" +echo "Lock Waits Metric Test" +echo "==========================================" +echo "" +echo "Configuration:" +echo " Target DB: $TARGET_DB_URL" +echo " Prometheus URL: $PROMETHEUS_URL" +echo " Test DB Name: $TEST_DBNAME" +echo " Collection Wait: ${COLLECTION_WAIT}s" +echo "" + +# Check if required packages are installed +if ! python3 -c "import psycopg" 2>/dev/null; then + echo "Installing psycopg..." + pip3 install psycopg +fi + +if ! python3 -c "import requests" 2>/dev/null; then + echo "Installing requests..." + pip3 install requests +fi + +# Run the test +cd "$PROJECT_ROOT" +python3 tests/lock_waits/test_lock_waits_metric.py \ + --target-db-url "$TARGET_DB_URL" \ + --prometheus-url "$PROMETHEUS_URL" \ + --test-dbname "$TEST_DBNAME" \ + --collection-wait "$COLLECTION_WAIT" + diff --git a/tests/lock_waits/test_lock_waits_metric.py b/tests/lock_waits/test_lock_waits_metric.py new file mode 100644 index 0000000..b4bbaca --- /dev/null +++ b/tests/lock_waits/test_lock_waits_metric.py @@ -0,0 +1,426 @@ +""" +Test script to verify lock_waits metric collection. + +This script: +1. Creates lock contention scenarios in the target database +2. Waits for pgwatch to collect metrics +3. Verifies the lock_waits metric is collected in Prometheus +4. Validates the data structure and content +""" + +import json +import os +import threading +import time +from datetime import datetime, timezone, timedelta +from typing import Dict, List, Optional + +import psycopg +import requests + + +class LockWaitsTest: + def __init__( + self, + target_db_url: str, + prometheus_url: str, + test_dbname: str = "target_database", + collection_wait_seconds: int = 60, + ): + """ + Initialize the test. + + Args: + target_db_url: Connection string for the target database being monitored + prometheus_url: URL for Prometheus/VictoriaMetrics API + test_dbname: Name of the database being monitored + collection_wait_seconds: How long to wait for pgwatch to collect metrics + """ + self.target_db_url = target_db_url + self.prometheus_url = prometheus_url.rstrip("/") + self.test_dbname = test_dbname + self.collection_wait_seconds = collection_wait_seconds + self.target_conn: Optional[psycopg.Connection] = None + self.blocker_conn: Optional[psycopg.Connection] = None + + def setup(self): + """Set up database connections and test table.""" + print("Setting up test environment...") + + # Connect to target database + self.target_conn = psycopg.connect(self.target_db_url) + self.target_conn.autocommit = True + + # Verify Prometheus is accessible + try: + response = requests.get(f"{self.prometheus_url}/api/v1/status/config", timeout=5) + response.raise_for_status() + print("โœ“ Prometheus connection verified") + except Exception as e: + print(f"โš  Warning: Could not verify Prometheus connection: {e}") + + # Create test table + with self.target_conn.cursor() as cur: + cur.execute( + """ + drop table if exists lock_test_table cascade; + create table lock_test_table ( + id int8 generated always as identity primary key, + name text not null, + value numeric(10, 2), + created_at timestamptz default now() + ); + insert into lock_test_table (name, value) + values + ('Item 1', 100.50), + ('Item 2', 200.75), + ('Item 3', 300.25); + """ + ) + print("โœ“ Test table created") + + def create_lock_contention(self, duration_seconds: int = 30): + """ + Create lock contention by: + 1. Starting a transaction that locks a row + 2. Starting another transaction that tries to lock the same row (will wait) + 3. Keeping both transactions open for the specified duration + """ + print(f"\nCreating lock contention for {duration_seconds} seconds...") + + # Connection 1: Blocker - acquires lock and holds it + self.blocker_conn = psycopg.connect(self.target_db_url) + self.blocker_conn.autocommit = False + blocker_cur = self.blocker_conn.cursor() + blocker_cur.execute("begin") + blocker_cur.execute( + "select * from lock_test_table where id = 1 for update" + ) + blocker_cur.fetchone() + print("โœ“ Blocker transaction started (holding lock on row id=1)") + + # Small delay to ensure blocker has the lock + time.sleep(1) + + # Connection 2: Waiter - tries to acquire same lock (will wait) + waiter_conn = psycopg.connect(self.target_db_url) + waiter_conn.autocommit = False + waiter_cur = waiter_conn.cursor() + waiter_cur.execute("begin") + print("โœ“ Waiter transaction started (waiting for lock on row id=1)") + + # Execute the waiting query in a separate thread so it can block + waiter_error = [] + waiter_done = threading.Event() + + def run_waiter(): + try: + # This will block until blocker releases the lock + waiter_cur.execute( + "select * from lock_test_table where id = 1 for update" + ) + waiter_cur.fetchone() + print(" โœ“ Waiter acquired lock (blocker released)") + except Exception as e: + waiter_error.append(str(e)) + print(f" Waiter error: {e}") + finally: + waiter_done.set() + + waiter_thread = threading.Thread(target=run_waiter, daemon=True) + waiter_thread.start() + + # Give waiter time to start waiting + time.sleep(2) + + # Verify waiter is actually waiting + with self.target_conn.cursor() as check_cur: + check_cur.execute( + """ + select pid, state, wait_event_type, wait_event + from pg_stat_activity + where datname = current_database() + and pid <> pg_backend_pid() + and wait_event_type = 'Lock' + """ + ) + waiting_pids = check_cur.fetchall() + if waiting_pids: + print(f" โœ“ Confirmed {len(waiting_pids)} process(es) waiting for locks") + for pid, state, wait_type, wait_event in waiting_pids: + print(f" PID {pid}: state={state}, wait_event={wait_event}") + else: + print(" โš  No processes found waiting for locks") + + # Keep locks held for the duration + print(f" Holding locks for {duration_seconds} seconds...") + time.sleep(duration_seconds) + + # Cleanup: commit blocker first, then waiter + print(" Releasing blocker lock...") + blocker_cur.execute("commit") + blocker_cur.close() + self.blocker_conn.close() + self.blocker_conn = None + + # Wait for waiter to complete + waiter_done.wait(timeout=5) + try: + waiter_cur.execute("commit") + except Exception: + pass + waiter_cur.close() + waiter_conn.close() + + print("โœ“ Lock contention ended") + + def verify_metric_collected(self) -> List[Dict]: + """ + Verify that lock_waits metric was collected in Prometheus. + + Returns: + List of lock_waits metric samples found + """ + print("\nVerifying metric collection...") + + # Wait for pgwatch to collect metrics + print(f" Waiting {self.collection_wait_seconds} seconds for pgwatch to collect metrics...") + time.sleep(self.collection_wait_seconds) + + # Query Prometheus for lock_waits metrics + # pgwatch exports metrics with prefix pgwatch__ + metrics_to_check = [ + "pgwatch_lock_waits_waiting_ms", + "pgwatch_lock_waits_blocker_tx_ms", + ] + + records = [] + cutoff_time = datetime.now(timezone.utc) - timedelta(minutes=5) + + for metric_name in metrics_to_check: + try: + # Query for recent samples + query = f'{metric_name}{{datname="{self.test_dbname}"}}' + response = requests.get( + f"{self.prometheus_url}/api/v1/query", + params={ + "query": query, + "time": datetime.now(timezone.utc).timestamp(), + }, + timeout=10, + ) + response.raise_for_status() + data = response.json() + + if data.get("status") == "success" and data.get("data", {}).get("result"): + for result in data["data"]["result"]: + metric = result.get("metric", {}) + value = result.get("value", [None, None]) + + # Convert timestamp + timestamp = float(value[0]) if value[0] else None + if timestamp: + metric_time = datetime.fromtimestamp(timestamp, tz=timezone.utc) + if metric_time >= cutoff_time: + records.append( + { + "time": metric_time, + "metric": metric_name, + "labels": metric, + "value": float(value[1]) if value[1] else None, + } + ) + except Exception as e: + print(f" โš  Error querying {metric_name}: {e}") + + print(f" โœ“ Found {len(records)} lock_waits metric samples") + + return records + + def validate_metric_structure(self, records: List[Dict]) -> bool: + """ + Validate that the metric records have the expected structure. + + Args: + records: List of metric samples to validate + + Returns: + True if validation passes, False otherwise + """ + if not records: + print(" โš  No records to validate") + return False + + print("\nValidating metric structure...") + + # Expected labels in Prometheus metrics + expected_labels = [ + "datname", + "waiting_user", + "waiting_appname", + "waiting_table", + "waiting_query_id", + "waiting_mode", + "waiting_locktype", + "waiting_pid", + "blocker_user", + "blocker_appname", + "blocker_table", + "blocker_query_id", + "blocker_mode", + "blocker_locktype", + "blocker_pid", + ] + + all_valid = True + unique_samples = {} + + # Group samples by their label combination + for record in records: + labels = record.get("labels", {}) + # Create a key from relevant labels + key = ( + labels.get("waiting_pid"), + labels.get("blocker_pid"), + labels.get("waiting_table"), + ) + if key not in unique_samples: + unique_samples[key] = record + + print(f" Found {len(unique_samples)} unique lock wait samples") + + for i, (key, record) in enumerate(list(unique_samples.items())[:5]): # Validate first 5 + print(f"\n Sample {i+1}:") + labels = record.get("labels", {}) + metric_name = record.get("metric", "") + value = record.get("value") + + # Check datname matches + if labels.get("datname") != self.test_dbname: + print(f" โš  datname mismatch: {labels.get('datname')} != {self.test_dbname}") + else: + print(f" โœ“ datname matches: {labels.get('datname')}") + + # Check key labels are present + key_labels = ["waiting_pid", "blocker_pid", "waiting_mode", "blocker_mode"] + missing_labels = [label for label in key_labels if not labels.get(label)] + if missing_labels: + print(f" โš  Missing key labels: {missing_labels}") + else: + print(f" โœ“ Key labels present") + + # Validate metric value + if value is not None: + try: + float(value) + print(f" โœ“ Metric value is numeric: {value}") + if "waiting_ms" in metric_name or "blocker_tx_ms" in metric_name: + print(f" Value: {value} ms") + except (ValueError, TypeError): + print(f" โœ— Metric value is not numeric: {value}") + all_valid = False + else: + print(f" โš  Metric value is None") + + return all_valid + + def cleanup(self): + """Clean up test resources.""" + print("\nCleaning up...") + + if self.blocker_conn: + try: + self.blocker_conn.close() + except Exception: + pass + + if self.target_conn: + try: + with self.target_conn.cursor() as cur: + cur.execute("drop table if exists lock_test_table cascade") + self.target_conn.close() + except Exception: + pass + + print("โœ“ Cleanup complete") + + def run(self) -> bool: + """ + Run the complete test. + + Returns: + True if test passes, False otherwise + """ + try: + self.setup() + self.create_lock_contention(duration_seconds=30) + records = self.verify_metric_collected() + is_valid = self.validate_metric_structure(records) + + if is_valid and records: + print("\nโœ… Test PASSED: lock_waits metric is working correctly") + return True + else: + print("\nโŒ Test FAILED: lock_waits metric validation failed") + return False + + except Exception as e: + print(f"\nโŒ Test ERROR: {e}") + import traceback + + traceback.print_exc() + return False + finally: + self.cleanup() + + +def main(): + """Main entry point for the test.""" + import argparse + + parser = argparse.ArgumentParser( + description="Test lock_waits metric collection" + ) + parser.add_argument( + "--target-db-url", + default=os.getenv( + "TARGET_DB_URL", "postgresql://postgres:postgres@localhost:55432/target_database" + ), + help="Target database connection URL", + ) + parser.add_argument( + "--prometheus-url", + default=os.getenv( + "PROMETHEUS_URL", + "http://localhost:59090", + ), + help="Prometheus/VictoriaMetrics API URL", + ) + parser.add_argument( + "--test-dbname", + default=os.getenv("TEST_DBNAME", "target_database"), + help="Name of the database being monitored", + ) + parser.add_argument( + "--collection-wait", + type=int, + default=int(os.getenv("COLLECTION_WAIT_SECONDS", "60")), + help="Seconds to wait for pgwatch to collect metrics", + ) + + args = parser.parse_args() + + test = LockWaitsTest( + target_db_url=args.target_db_url, + prometheus_url=args.prometheus_url, + test_dbname=args.test_dbname, + collection_wait_seconds=args.collection_wait, + ) + + success = test.run() + exit(0 if success else 1) + + +if __name__ == "__main__": + main() + diff --git a/tests/reporter/__init__.py b/tests/reporter/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/reporter/conftest.py b/tests/reporter/conftest.py new file mode 100644 index 0000000..63b3255 --- /dev/null +++ b/tests/reporter/conftest.py @@ -0,0 +1,58 @@ +from typing import Callable + +import pytest + + +def pytest_addoption(parser: pytest.Parser) -> None: + """Add a flag for enabling integration tests that require services.""" + parser.addoption( + "--run-integration", + action="store_true", + default=False, + help="Run tests marked as integration/requires_postgres.", + ) + + +def pytest_collection_modifyitems(config: pytest.Config, items: list[pytest.Item]) -> None: + """Skip integration tests unless --run-integration is given.""" + if config.getoption("--run-integration"): + return + + skip_marker = pytest.mark.skip(reason="integration tests require --run-integration") + for item in items: + if "integration" in item.keywords or "requires_postgres" in item.keywords: + item.add_marker(skip_marker) + + +@pytest.fixture(name="prom_result") +def fixture_prom_result() -> Callable[[list[dict] | None, str], dict]: + """Build a Prometheus-like payload for the happy-path tests.""" + + def _builder(rows: list[dict] | None = None, status: str = "success") -> dict: + return { + "status": status, + "data": { + "result": rows or [], + }, + } + + return _builder + + +@pytest.fixture(name="series_sample") +def fixture_series_sample() -> Callable[[str, dict | None, list[tuple[float | int, float | int | str]] | None], dict]: + """Create metric entries (metric metadata + values array) for query_range tests.""" + + def _builder( + metric_name: str, + labels: dict | None = None, + values: list[tuple[float | int, float | int | str]] | None = None, + ) -> dict: + labels = labels or {} + values = values or [] + return { + "metric": {"__name__": metric_name, **labels}, + "values": [[ts, str(val)] for ts, val in values], + } + + return _builder diff --git a/tests/reporter/test_formatters.py b/tests/reporter/test_formatters.py new file mode 100644 index 0000000..c5ebf2d --- /dev/null +++ b/tests/reporter/test_formatters.py @@ -0,0 +1,75 @@ +import pytest + +from reporter.postgres_reports import PostgresReportGenerator + + +@pytest.fixture(name="generator") +def fixture_generator() -> PostgresReportGenerator: + return PostgresReportGenerator(prometheus_url="http://test", postgres_sink_url="") + + +@pytest.mark.unit +@pytest.mark.parametrize( + "value,expected", + [ + (0, "0 B"), + (1, "1.00 B"), + (1024, "1.00 KB"), + (10 * 1024, "10.0 KB"), + (1048576, "1.00 MB"), + (5 * 1024 ** 3, "5.00 GB"), + ], +) +def test_format_bytes(generator: PostgresReportGenerator, value: int, expected: str) -> None: + assert generator.format_bytes(value) == expected + + +@pytest.mark.unit +@pytest.mark.parametrize( + "name,value,unit,expected", + [ + ("shared_buffers", "128", "8kB", "1 MB"), + ("work_mem", "512", "", "512 kB"), + ("log_min_duration_statement", "2000", "ms", "2 s"), + ("log_min_duration_statement", "500", "ms", "500 ms"), + ("autovacuum_naptime", "120", "", "2 min"), + ("autovacuum", "on", "", "on"), + ("autovacuum", "OFF", "", "off"), + ], +) +def test_format_setting_value( + generator: PostgresReportGenerator, + name: str, + value: str, + unit: str, + expected: str, +) -> None: + assert generator.format_setting_value(name, value, unit) == expected + + +@pytest.mark.unit +def test_get_cluster_metric_metadata(generator: PostgresReportGenerator) -> None: + assert generator.get_cluster_metric_unit("active_connections") == "connections" + assert generator.get_cluster_metric_description( + "active_connections" + ).startswith("Number of active") + assert generator.get_cluster_metric_unit("unknown") == "" + + +@pytest.mark.unit +def test_get_setting_unit_and_category(generator: PostgresReportGenerator) -> None: + assert generator.get_setting_unit("shared_buffers") == "8kB" + assert generator.get_setting_category("shared_buffers") == "Memory" + assert generator.get_setting_unit("nonexistent") == "" + assert generator.get_setting_category("nonexistent") == "Other" + + +@pytest.mark.unit +def test_format_report_data_structure(generator: PostgresReportGenerator) -> None: + host = "db-1" + payload = generator.format_report_data("A002", {"foo": "bar"}, host) + + assert payload["checkId"] == "A002" + # Newer reporter returns a 'nodes' structure instead of legacy 'hosts'. + assert payload["nodes"]["primary"] == host + assert payload["results"][host]["data"] == {"foo": "bar"} diff --git a/tests/reporter/test_generators_unit.py b/tests/reporter/test_generators_unit.py new file mode 100644 index 0000000..0c3f5e1 --- /dev/null +++ b/tests/reporter/test_generators_unit.py @@ -0,0 +1,1078 @@ +import json +import sys +from datetime import datetime, timedelta +from typing import Any, Callable + +import pytest + +from reporter import postgres_reports as postgres_reports_module +from reporter.postgres_reports import PostgresReportGenerator + + +@pytest.fixture(name="generator") +def fixture_generator() -> PostgresReportGenerator: + return PostgresReportGenerator( + prometheus_url="http://prom.test", + postgres_sink_url="", + ) + + +def _success_metric(value: str) -> dict[str, Any]: + return { + "status": "success", + "data": { + "result": [ + { + "value": [datetime.now().timestamp(), value], + } + ] + }, + } + + +def _query_stub_factory(prom_result, mapping: dict[str, Any]) -> Callable[[str], dict[str, Any]]: + """Return a query_instant stub that matches substrings defined in mapping keys. + + Args: + prom_result: Fallback callable that returns a default Prometheus response + mapping: Dict mapping query substrings to responses (either dict or callable) + + Returns: + A callable that takes a query string and returns a Prometheus-like response + """ + + def _fake(query: str) -> dict[str, Any]: + for needle, payload in mapping.items(): + if needle in query: + return payload(query) if callable(payload) else payload + return prom_result() + + return _fake + + +@pytest.mark.unit +def test_query_instant_hits_prometheus( + monkeypatch: pytest.MonkeyPatch, + generator: PostgresReportGenerator, +) -> None: + captured: dict[str, Any] = {} + + class DummyResponse: + status_code = 200 + text = "{}" + + @staticmethod + def json() -> dict[str, Any]: + return {"status": "success", "data": {"result": []}} + + def fake_get( + url: str, + params: dict[str, Any] | None = None, + timeout: int | None = None, + ): + captured["url"] = url + captured["params"] = params + return DummyResponse() + + monkeypatch.setattr(postgres_reports_module.requests, "get", fake_get) + + payload = generator.query_instant("up") + + assert payload["status"] == "success" + assert captured["url"].endswith("/api/v1/query") + assert captured["params"] == {"query": "up"} + + +@pytest.mark.unit +def test_query_range_hits_prometheus( + monkeypatch: pytest.MonkeyPatch, + generator: PostgresReportGenerator, +) -> None: + start = datetime(2024, 1, 1, 0, 0, 0) + end = start + timedelta(minutes=5) + captured: dict[str, Any] = {} + + class DummyResponse: + status_code = 200 + text = "{}" + + @staticmethod + def json() -> dict[str, Any]: + return {"status": "success", "data": {"result": []}} + + def fake_get( + url: str, + params: dict[str, Any] | None = None, + timeout: int | None = None, + ): + captured["url"] = url + captured["params"] = params + return DummyResponse() + + monkeypatch.setattr(postgres_reports_module.requests, "get", fake_get) + + payload = generator.query_range("up", start, end, step="60s") + + assert payload == [] + assert captured["url"].endswith("/api/v1/query_range") + assert captured["params"]["query"] == "up" + assert captured["params"]["start"] == start.timestamp() + + +@pytest.mark.unit +def test_generate_a002_version_report( + monkeypatch: pytest.MonkeyPatch, + generator: PostgresReportGenerator, +) -> None: + values = { + "server_version": "15.3", + "server_version_num": "150003", + "max_connections": "200", + "shared_buffers": "1024", + "effective_cache_size": "2048", + } + + def fake_query(query: str) -> dict[str, Any]: + # A002 uses a helper that queries both settings via a single regex selector. + if 'setting_name=~"server_version|server_version_num"' in query: + return { + "status": "success", + "data": { + "result": [ + { + "metric": { + "setting_name": "server_version", + "setting_value": values["server_version"], + } + }, + { + "metric": { + "setting_name": "server_version_num", + "setting_value": values["server_version_num"], + } + }, + ] + }, + } + return {"status": "success", "data": {"result": []}} + + monkeypatch.setattr(generator, "query_instant", fake_query) + + report = generator.generate_a002_version_report("local", "node-1") + version = report["results"]["node-1"]["data"]["version"] + + assert version["version"] == "15.3" + assert version["server_major_ver"] == "15" + assert version["server_minor_ver"] == "3" + + +@pytest.mark.unit +def test_generate_a004_cluster_report( + monkeypatch: pytest.MonkeyPatch, + generator: PostgresReportGenerator, +) -> None: + def fake_query(query: str) -> dict[str, Any]: + if "pgwatch_db_size_size_b" in query and "sum(" not in query: + return { + "status": "success", + "data": { + "result": [ + {"metric": {"datname": "db1"}, "value": [0, "1024"]}, + {"metric": {"datname": "db2"}, "value": [0, "2048"]}, + ] + }, + } + return _success_metric("42") + + monkeypatch.setattr(generator, "query_instant", fake_query) + + report = generator.generate_a004_cluster_report("local", "node-1") + data = report["results"]["node-1"]["data"] + + assert "general_info" in data and "database_sizes" in data + assert data["general_info"]["active_connections"]["value"] == "42" + assert data["database_sizes"] == {"db1": 1024.0, "db2": 2048.0} + + +@pytest.mark.unit +def test_prometheus_to_dict_and_process_pgss(generator: PostgresReportGenerator) -> None: + base_time = datetime(2024, 1, 1, 0, 0, 0) + later_time = base_time + timedelta(seconds=60) + + def make_metric(name: str, value: float, ts: datetime) -> dict[str, Any]: + return { + "metric": { + "__name__": name, + "datname": "db1", + "queryid": "123", + "user": "postgres", + "instance": "inst1", + }, + "values": [[ts.timestamp(), str(value)]], + } + + start_metrics = [ + make_metric("pgwatch_pg_stat_statements_calls", 10, base_time), + make_metric("pgwatch_pg_stat_statements_exec_time_total", 1000, base_time), + make_metric("pgwatch_pg_stat_statements_rows", 200, base_time), + ] + end_metrics = [ + make_metric("pgwatch_pg_stat_statements_calls", 40, later_time), + make_metric("pgwatch_pg_stat_statements_exec_time_total", 4000, later_time), + make_metric("pgwatch_pg_stat_statements_rows", 260, later_time), + ] + + mapping = { + "calls": "calls", + "exec_time_total": "total_time", + "rows": "rows", + } + + rows = generator._process_pgss_data( + start_metrics, + end_metrics, + base_time, + later_time, + mapping, + ) + + assert len(rows) == 1 + row = rows[0] + assert row["calls"] == 30 + assert row["total_time"] == 3000 + assert pytest.approx(row["total_time_per_sec"], 0.01) == 50 + assert row["rows_per_call"] == pytest.approx(2.0) + + +@pytest.mark.unit +def test_prometheus_to_dict_closest_value(generator: PostgresReportGenerator) -> None: + reference_time = datetime(2024, 1, 1, 12, 0, 0) + + prom_data: list[dict[str, Any]] = [ + { + "metric": { + "__name__": "pgwatch_pg_stat_statements_calls", + "datname": "db1", + "queryid": "q1", + "user": "postgres", + "instance": "inst1", + }, + "values": [ + [reference_time.timestamp() - 10, "10"], + [reference_time.timestamp() + 5, "20"], + ], + } + ] + + converted = generator._prometheus_to_dict(prom_data, reference_time) + + key = ("db1", "q1", "postgres", "inst1") + assert key in converted + assert converted[key]["calls"] == 20 + + +@pytest.mark.unit +def test_generate_a003_settings_report(monkeypatch: pytest.MonkeyPatch, generator: PostgresReportGenerator) -> None: + def fake_query(query: str) -> dict[str, Any]: + assert "pgwatch_settings_configured" in query + return { + "status": "success", + "data": { + "result": [ + { + "metric": { + "setting_name": "shared_buffers", + "setting_value": "128", + "category": "Memory", + "unit": "8kB", + "context": "postmaster", + "vartype": "integer", + } + }, + { + "metric": { + "setting_name": "work_mem", + "setting_value": "512", + "category": "Memory", + "unit": "", + "context": "user", + "vartype": "integer", + } + }, + ] + }, + } + + monkeypatch.setattr(generator, "query_instant", fake_query) + + report = generator.generate_a003_settings_report("local", "node-1") + data = report["results"]["node-1"]["data"] + + assert data["shared_buffers"]["pretty_value"] == "1 MB" + assert data["work_mem"]["unit"] == "" + assert data["work_mem"]["category"] == "Memory" + + +@pytest.mark.unit +def test_generate_a007_altered_settings_report(monkeypatch: pytest.MonkeyPatch, generator: PostgresReportGenerator) -> None: + def fake_query(query: str) -> dict[str, Any]: + # Handle version info query from _get_postgres_version_info + if 'setting_name=~"server_version|server_version_num"' in query: + return { + "status": "success", + "data": { + "result": [ + {"metric": {"setting_name": "server_version", "setting_value": "15.0"}}, + {"metric": {"setting_name": "server_version_num", "setting_value": "150000"}}, + ] + }, + } + # Handle altered settings query + assert "pgwatch_settings_is_default" in query + return { + "status": "success", + "data": { + "result": [ + { + "metric": { + "setting_name": "work_mem", + "setting_value": "1024", + "unit": "", + "category": "Memory", + } + }, + { + "metric": { + "setting_name": "autovacuum", + "setting_value": "off", + "unit": "", + "category": "Autovacuum", + } + }, + ] + }, + } + + monkeypatch.setattr(generator, "query_instant", fake_query) + + payload = generator.generate_a007_altered_settings_report("local", "node-1") + data = payload["results"]["node-1"]["data"] + + assert set(data.keys()) == {"work_mem", "autovacuum"} + assert "postgres_version" in payload["results"]["node-1"] # postgres_version is at node level + assert data["work_mem"]["pretty_value"] == "1 MB" + assert data["autovacuum"]["pretty_value"] == "off" + + +@pytest.mark.unit +def test_get_all_databases_merges_sources(monkeypatch: pytest.MonkeyPatch, generator: PostgresReportGenerator) -> None: + def fake_query(query: str) -> dict[str, Any]: + if "wraparound" in query: + return { + "status": "success", + "data": { + "result": [ + {"metric": {"datname": "appdb"}, "value": [0, "1"]}, + {"metric": {"datname": "template0"}, "value": [0, "1"]}, + ] + }, + } + if "unused_indexes" in query: + return { + "status": "success", + "data": { + "result": [ + {"metric": {"dbname": "analytics"}, "value": [0, "1"]}, + {"metric": {"dbname": "appdb"}, "value": [0, "1"]}, + ] + }, + } + if "redundant_indexes" in query: + return { + "status": "success", + "data": { + "result": [ + {"metric": {"dbname": "warehouse"}, "value": [0, "1"]}, + ] + }, + } + if "pg_btree_bloat_bloat_pct" in query: + return { + "status": "success", + "data": { + "result": [ + {"metric": {"datname": "inventory"}, "value": [0, "1"]}, + ] + }, + } + return {"status": "success", "data": {"result": []}} + + monkeypatch.setattr(generator, "query_instant", fake_query) + + databases = generator.get_all_databases("local", "node-1") + + assert databases == ["appdb", "analytics", "warehouse", "inventory"] + + +@pytest.mark.unit +def test_check_pg_stat_kcache_status(monkeypatch: pytest.MonkeyPatch, generator: PostgresReportGenerator, prom_result) -> None: + responses = { + "pgwatch_pg_stat_kcache_exec_total_time": prom_result( + [ + { + "metric": {"queryid": "1", "tag_user": "postgres"}, + "value": [0, "10"], + } + ] + ), + "pgwatch_pg_stat_kcache_exec_user_time": prom_result([{"metric": {}, "value": [0, "4"]}]), + "pgwatch_pg_stat_kcache_exec_system_time": prom_result([{"metric": {}, "value": [0, "6"]}]), + } + monkeypatch.setattr(generator, "query_instant", _query_stub_factory(prom_result, responses)) + + status = generator._check_pg_stat_kcache_status("local", "node-1") + + assert status["extension_available"] is True + assert status["metrics_count"] == 1 + assert status["total_exec_time"] == 10.0 + assert status["total_user_time"] == 4.0 + assert status["sample_queries"][0]["queryid"] == "1" + + +@pytest.mark.unit +def test_check_pg_stat_statements_status(monkeypatch: pytest.MonkeyPatch, generator: PostgresReportGenerator, prom_result) -> None: + response = prom_result( + [ + { + "metric": {"queryid": "1", "tag_user": "postgres", "datname": "db1"}, + "value": [0, "5"], + } + ] + ) + monkeypatch.setattr(generator, "query_instant", lambda query: response) + + status = generator._check_pg_stat_statements_status("local", "node-1") + + assert status["extension_available"] is True + assert status["metrics_count"] == 1 + assert status["total_calls"] == 5.0 + assert status["sample_queries"][0]["database"] == "db1" + + +@pytest.mark.unit +def test_generate_h001_invalid_indexes_report( + monkeypatch: pytest.MonkeyPatch, + generator: PostgresReportGenerator, + prom_result, +) -> None: + monkeypatch.setattr(generator, "get_all_databases", lambda *args, **kwargs: ["maindb"]) + + responses = { + "pgwatch_pg_invalid_indexes": prom_result( + [ + { + "metric": { + "schema_name": "public", + "table_name": "tbl", + "index_name": "idx_invalid", + "relation_name": "public.tbl", + "supports_fk": "1", + }, + "value": [0, "2048"], + } + ] + ) + } + monkeypatch.setattr(generator, "query_instant", _query_stub_factory(prom_result, responses)) + + payload = generator.generate_h001_invalid_indexes_report("local", "node-1") + db_data = payload["results"]["node-1"]["data"]["maindb"] + + assert db_data["total_count"] == 1 + assert db_data["total_size_bytes"] == 2048.0 + entry = db_data["invalid_indexes"][0] + assert entry["index_name"] == "idx_invalid" + assert entry["index_size_pretty"].endswith("KB") + assert entry["supports_fk"] is True + + +@pytest.mark.unit +def test_generate_h002_unused_indexes_report( + monkeypatch: pytest.MonkeyPatch, + generator: PostgresReportGenerator, + prom_result, +) -> None: + monkeypatch.setattr(generator, "get_all_databases", lambda *args, **kwargs: ["app"]) + monkeypatch.setattr(generator, "get_index_definitions_from_sink", lambda db: {"idx_unused": "CREATE INDEX idx_unused ON t(c)"}) + + responses = { + "pgwatch_db_stats_postmaster_uptime_s": prom_result([{"value": [0, "3600"]}]), + "pgwatch_stats_reset_stats_reset_epoch": prom_result([{"value": [0, "1700000000"]}]), + "pgwatch_unused_indexes_index_size_bytes": prom_result( + [ + { + "metric": { + "schema_name": "public", + "table_name": "tbl", + "index_name": "idx_unused", + "reason": "never scanned", + "idx_is_btree": "true", + "supports_fk": "0", + }, + "value": [0, "1024"], + } + ] + ), + "pgwatch_unused_indexes_idx_scan": prom_result([{"value": [0, "0"]}]), + } + monkeypatch.setattr(generator, "query_instant", _query_stub_factory(prom_result, responses)) + + payload = generator.generate_h002_unused_indexes_report("local", "node-1") + db_data = payload["results"]["node-1"]["data"]["app"] + + assert db_data["total_count"] == 1 + unused = db_data["unused_indexes"][0] + assert unused["index_definition"].startswith("CREATE INDEX") + assert unused["idx_scan"] == 0 + assert unused["index_size_pretty"].endswith("KB") + stats_reset = db_data["stats_reset"] + assert stats_reset["stats_reset_epoch"] == 1700000000.0 + assert stats_reset["postmaster_startup_epoch"] is not None + + +@pytest.mark.unit +def test_generate_h004_redundant_indexes_report( + monkeypatch: pytest.MonkeyPatch, + generator: PostgresReportGenerator, + prom_result, +) -> None: + monkeypatch.setattr(generator, "get_all_databases", lambda *args, **kwargs: ["app"]) + monkeypatch.setattr(generator, "get_index_definitions_from_sink", lambda db: {"idx_dup": "CREATE INDEX idx_dup ON t(c)"}) + + responses = { + "pgwatch_redundant_indexes_index_size_bytes": prom_result( + [ + { + "metric": { + "schema_name": "public", + "table_name": "tbl", + "index_name": "idx_dup", + "relation_name": "public.tbl", + "access_method": "btree", + "reason": "covers columns", + }, + "value": [0, "4096"], + } + ] + ), + "pgwatch_redundant_indexes_table_size_bytes": prom_result([{"value": [0, "8192"]}]), + "pgwatch_redundant_indexes_index_usage": prom_result([{"value": [0, "2"]}]), + "pgwatch_redundant_indexes_supports_fk": prom_result([{"value": [0, "1"]}]), + } + monkeypatch.setattr(generator, "query_instant", _query_stub_factory(prom_result, responses)) + + payload = generator.generate_h004_redundant_indexes_report("local", "node-1") + db_data = payload["results"]["node-1"]["data"]["app"] + + assert db_data["total_count"] == 1 + redundant = db_data["redundant_indexes"][0] + assert redundant["index_definition"].startswith("CREATE INDEX") + assert redundant["index_usage"] == 2.0 + assert redundant["index_size_pretty"].endswith("KB") + assert redundant["supports_fk"] is True + + +@pytest.mark.unit +def test_generate_d004_pgstat_settings_report( + monkeypatch: pytest.MonkeyPatch, + generator: PostgresReportGenerator, + prom_result, +) -> None: + responses = { + "pgwatch_settings_configured": prom_result( + [ + { + "metric": { + "setting_name": "pg_stat_statements.max", + "setting_value": "1000", + "category": "Stats", + "unit": "", + "context": "postmaster", + "vartype": "integer", + } + } + ] + ) + } + monkeypatch.setattr(generator, "query_instant", _query_stub_factory(prom_result, responses)) + monkeypatch.setattr(generator, "_check_pg_stat_kcache_status", lambda *args, **kwargs: {"extension_available": True}) + monkeypatch.setattr(generator, "_check_pg_stat_statements_status", lambda *args, **kwargs: {"extension_available": False}) + + payload = generator.generate_d004_pgstat_settings_report("local", "node-1") + data = payload["results"]["node-1"]["data"] + + assert "pg_stat_statements.max" in data["settings"] + assert data["pg_stat_kcache_status"]["extension_available"] is True + + +@pytest.mark.unit +def test_generate_f001_autovacuum_settings_report( + monkeypatch: pytest.MonkeyPatch, + generator: PostgresReportGenerator, + prom_result, +) -> None: + responses = { + "pgwatch_settings_configured": prom_result( + [ + { + "metric": { + "setting_name": "autovacuum_naptime", + "setting_value": "60", + "category": "Autovacuum", + "unit": "", + "context": "sighup", + "vartype": "integer", + } + } + ] + ) + } + monkeypatch.setattr(generator, "query_instant", _query_stub_factory(prom_result, responses)) + + payload = generator.generate_f001_autovacuum_settings_report("local", "node-1") + data = payload["results"]["node-1"]["data"] + + assert data["autovacuum_naptime"]["setting"] == "60" + assert data["autovacuum_naptime"]["pretty_value"] == "1 min" + + +@pytest.mark.unit +def test_generate_f005_btree_bloat_report( + monkeypatch: pytest.MonkeyPatch, + generator: PostgresReportGenerator, + prom_result, +) -> None: + monkeypatch.setattr(generator, "get_all_databases", lambda *args, **kwargs: ["db1"]) + + responses = { + "pgwatch_pg_btree_bloat_extra_size": prom_result( + [ + { + "metric": {"schemaname": "public", "tblname": "t", "idxname": "idx"}, + "value": [0, "1024"], + } + ] + ), + "pgwatch_pg_btree_bloat_extra_pct": prom_result( + [ + { + "metric": {"schemaname": "public", "tblname": "t", "idxname": "idx"}, + "value": [0, "20"], + } + ] + ), + "pgwatch_pg_btree_bloat_bloat_size": prom_result( + [ + { + "metric": {"schemaname": "public", "tblname": "t", "idxname": "idx"}, + "value": [0, "2048"], + } + ] + ), + "pgwatch_pg_btree_bloat_bloat_pct": prom_result( + [ + { + "metric": {"schemaname": "public", "tblname": "t", "idxname": "idx"}, + "value": [0, "50"], + } + ] + ), + } + monkeypatch.setattr(generator, "query_instant", _query_stub_factory(prom_result, responses)) + + payload = generator.generate_f005_btree_bloat_report("local", "node-1") + db_data = payload["results"]["node-1"]["data"]["db1"] + entry = db_data["bloated_indexes"][0] + + assert entry["extra_size"] == 1024.0 + assert entry["bloat_pct"] == 50.0 + assert entry["bloat_size_pretty"].endswith("KB") + + +@pytest.mark.unit +def test_get_pgss_metrics_data_by_db_invokes_all_metrics(monkeypatch: pytest.MonkeyPatch, generator: PostgresReportGenerator) -> None: + captured: list[str] = [] + + def fake_query_range(query: str, start, end, step: str = "30s") -> list[dict]: + captured.append(query) + return [] + + monkeypatch.setattr(generator, "query_range", fake_query_range) + sentinel = [{"result": "ok"}] + monkeypatch.setattr(generator, "_process_pgss_data", lambda *args, **kwargs: sentinel) + + start = datetime(2024, 1, 1, 0, 0, 0) + end = start + timedelta(hours=1) + result = generator._get_pgss_metrics_data_by_db("local", "node-1", "db1", start, end) + + assert result == sentinel + # Ensure at least one representative metric was queried with filters + assert any("pgwatch_pg_stat_statements_calls" in q for q in captured) + + +@pytest.mark.unit +def test_generate_all_reports_invokes_every_builder(monkeypatch: pytest.MonkeyPatch) -> None: + generator = PostgresReportGenerator() + called: list[str] = [] + + def stub(name: str): + def _(*args, **kwargs): + called.append(name) + return {name: True} + + return _ + + builders = [ + "generate_a002_version_report", + "generate_a003_settings_report", + "generate_a004_cluster_report", + "generate_a007_altered_settings_report", + "generate_d004_pgstat_settings_report", + "generate_f001_autovacuum_settings_report", + "generate_f004_heap_bloat_report", + "generate_f005_btree_bloat_report", + "generate_g001_memory_settings_report", + "generate_h001_invalid_indexes_report", + "generate_h002_unused_indexes_report", + "generate_h004_redundant_indexes_report", + "generate_k001_query_calls_report", + "generate_k003_top_queries_report", + ] + + for name in builders: + monkeypatch.setattr(generator, name, stub(name)) + + reports = generator.generate_all_reports("local", "node-1") + + assert set(reports.keys()) == {code.split("_")[1].upper() for code in builders} + assert set(called) == set(builders) + + +@pytest.mark.unit +def test_create_report_uses_api(monkeypatch: pytest.MonkeyPatch) -> None: + generator = PostgresReportGenerator() + payloads: list[dict] = [] + + def fake_make_request(api_url, endpoint, request_data): + payloads.append({"endpoint": endpoint, "data": request_data}) + return {"report_id": 42} + + monkeypatch.setattr(postgres_reports_module, "make_request", fake_make_request) + + report_id = generator.create_report("https://api", "tok", "proj", "123") + + assert report_id == 42 + assert payloads[0]["endpoint"] == "/rpc/checkup_report_create" + assert payloads[0]["data"]["project"] == "proj" + + +@pytest.mark.unit +def test_upload_report_file_sends_contents(tmp_path, monkeypatch: pytest.MonkeyPatch) -> None: + generator = PostgresReportGenerator() + captured: dict = {} + + def fake_make_request(api_url, endpoint, request_data): + captured["endpoint"] = endpoint + captured["data"] = request_data + return {} + + monkeypatch.setattr(postgres_reports_module, "make_request", fake_make_request) + + report_file = tmp_path / "A002_report.json" + report_file.write_text('{"foo": "bar"}', encoding="utf-8") + + generator.upload_report_file("https://api", "tok", 100, str(report_file)) + + assert captured["endpoint"] == "/rpc/checkup_report_file_post" + assert captured["data"]["check_id"] == "A002" + assert captured["data"]["filename"] == report_file.name + + +@pytest.mark.unit +def test_main_runs_specific_check_without_upload(monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]) -> None: + class DummyGenerator: + DEFAULT_EXCLUDED_DATABASES = {'template0', 'template1', 'rdsadmin', 'azure_maintenance', 'cloudsqladmin'} + + def __init__(self, *args, **kwargs): + self.closed = False + + def get_all_clusters(self): + # Match current reporter.main() behavior which always calls + # get_all_clusters() when cluster is not explicitly provided. + return ["local"] + + def test_connection(self) -> bool: + return True + + def generate_a002_version_report(self, cluster, node_name): + return {"checkId": "A002", "results": {node_name: {"data": {"ok": True}}}} + + def close_postgres_sink(self): + self.closed = True + + monkeypatch.setattr(postgres_reports_module, "PostgresReportGenerator", DummyGenerator) + monkeypatch.setattr(sys, "argv", ["postgres_reports.py", "--check-id", "A002", "--output", "-", "--no-upload"]) + + postgres_reports_module.main() + + captured = capsys.readouterr().out + + # main() prints progress banners along with the JSON payload. + # Extract the JSON object from the captured stdout by finding the + # first line that looks like JSON and joining from there. + lines = captured.splitlines() + start_idx = 0 + for i, line in enumerate(lines): + if line.strip().startswith("{"): + start_idx = i + break + json_str = "\n".join(lines[start_idx:]) + + output = json.loads(json_str) + assert output["checkId"] == "A002" + assert "results" in output + + +@pytest.mark.unit +def test_main_exits_when_connection_fails(monkeypatch: pytest.MonkeyPatch) -> None: + class FailingGenerator: + DEFAULT_EXCLUDED_DATABASES = {'template0', 'template1', 'rdsadmin', 'azure_maintenance', 'cloudsqladmin'} + + def __init__(self, *args, **kwargs): + pass + + def test_connection(self) -> bool: + return False + + monkeypatch.setattr(postgres_reports_module, "PostgresReportGenerator", FailingGenerator) + monkeypatch.setattr(sys, "argv", ["postgres_reports.py", "--check-id", "A002"]) + + with pytest.raises(SystemExit): + postgres_reports_module.main() + + +# ============================================================================ +# Negative test cases - Error handling +# ============================================================================ + + +@pytest.mark.unit +def test_query_instant_handles_http_404_error(monkeypatch: pytest.MonkeyPatch, generator: PostgresReportGenerator) -> None: + """Test that query_instant returns empty dict on HTTP 404 error.""" + class MockResponse: + status_code = 404 + text = "Not Found" + + def json(self): + return {"error": "not found"} + + def fake_get(url: str, params: dict[str, Any] | None = None, timeout: int | None = None): + return MockResponse() + + monkeypatch.setattr("requests.get", fake_get) + + result = generator.query_instant("test_query") + + assert result == {} + + +@pytest.mark.unit +def test_query_instant_handles_http_500_error(monkeypatch: pytest.MonkeyPatch, generator: PostgresReportGenerator) -> None: + """Test that query_instant returns empty dict on HTTP 500 error.""" + class MockResponse: + status_code = 500 + text = "Internal Server Error" + + def json(self): + raise ValueError("Invalid JSON") + + def fake_get(url: str, params: dict[str, Any] | None = None, timeout: int | None = None): + return MockResponse() + + monkeypatch.setattr("requests.get", fake_get) + + result = generator.query_instant("test_query") + + assert result == {} + + +@pytest.mark.unit +def test_query_instant_handles_timeout(monkeypatch: pytest.MonkeyPatch, generator: PostgresReportGenerator) -> None: + """Test that query_instant returns empty dict on request timeout.""" + import requests + + def fake_get(url: str, params: dict[str, Any] | None = None, timeout: int | None = None): + raise requests.Timeout("Connection timed out") + + monkeypatch.setattr("requests.get", fake_get) + + result = generator.query_instant("test_query") + + assert result == {} + + +@pytest.mark.unit +def test_query_instant_handles_connection_error(monkeypatch: pytest.MonkeyPatch, generator: PostgresReportGenerator) -> None: + """Test that query_instant returns empty dict on connection error.""" + import requests + + def fake_get(url: str, params: dict[str, Any] | None = None, timeout: int | None = None): + raise requests.ConnectionError("Failed to establish connection") + + monkeypatch.setattr("requests.get", fake_get) + + result = generator.query_instant("test_query") + + assert result == {} + + +@pytest.mark.unit +def test_query_instant_handles_malformed_json(monkeypatch: pytest.MonkeyPatch, generator: PostgresReportGenerator) -> None: + """Test that query_instant returns empty dict when response has invalid JSON.""" + class MockResponse: + status_code = 200 + + def json(self): + raise ValueError("Invalid JSON") + + def fake_get(url: str, params: dict[str, Any] | None = None, timeout: int | None = None): + return MockResponse() + + monkeypatch.setattr("requests.get", fake_get) + + result = generator.query_instant("test_query") + + assert result == {} + + +@pytest.mark.unit +def test_query_range_handles_http_error(monkeypatch: pytest.MonkeyPatch, generator: PostgresReportGenerator) -> None: + """Test that query_range returns empty list on HTTP error.""" + class MockResponse: + status_code = 503 + text = "Service Unavailable" + + def json(self): + return {"error": "service unavailable"} + + def fake_get(url: str, params: dict[str, Any] | None = None, timeout: int | None = None): + return MockResponse() + + monkeypatch.setattr("requests.get", fake_get) + + start = datetime.now() + end = start + timedelta(hours=1) + result = generator.query_range("test_query", start, end) + + assert result == [] + + +@pytest.mark.unit +def test_query_range_handles_timeout(monkeypatch: pytest.MonkeyPatch, generator: PostgresReportGenerator) -> None: + """Test that query_range returns empty list on timeout.""" + import requests + + def fake_get(url: str, params: dict[str, Any] | None = None, timeout: int | None = None): + raise requests.Timeout("Connection timed out") + + monkeypatch.setattr("requests.get", fake_get) + + start = datetime.now() + end = start + timedelta(hours=1) + result = generator.query_range("test_query", start, end) + + assert result == [] + + +@pytest.mark.unit +def test_query_range_handles_malformed_response(monkeypatch: pytest.MonkeyPatch, generator: PostgresReportGenerator) -> None: + """Test that query_range handles response with missing expected fields.""" + class MockResponse: + status_code = 200 + + def json(self): + # Missing 'data' or 'result' fields + return {"status": "success"} + + def fake_get(url: str, params: dict[str, Any] | None = None, timeout: int | None = None): + return MockResponse() + + monkeypatch.setattr("requests.get", fake_get) + + start = datetime.now() + end = start + timedelta(hours=1) + result = generator.query_range("test_query", start, end) + + assert result == [] + + +@pytest.mark.unit +def test_query_range_handles_failed_status(monkeypatch: pytest.MonkeyPatch, generator: PostgresReportGenerator) -> None: + """Test that query_range handles Prometheus error status.""" + class MockResponse: + status_code = 200 + + def json(self): + return { + "status": "error", + "errorType": "bad_data", + "error": "invalid query" + } + + def fake_get(url: str, params: dict[str, Any] | None = None, timeout: int | None = None): + return MockResponse() + + monkeypatch.setattr("requests.get", fake_get) + + start = datetime.now() + end = start + timedelta(hours=1) + result = generator.query_range("test_query", start, end) + + assert result == [] + + +@pytest.mark.unit +def test_make_request_raises_on_http_error(monkeypatch: pytest.MonkeyPatch) -> None: + """Test that make_request raises exception on HTTP error.""" + class MockResponse: + status_code = 400 + + def raise_for_status(self): + import requests + raise requests.HTTPError("400 Client Error") + + def json(self): + return {} + + def fake_post(url: str, json: dict[str, Any] | None = None): + return MockResponse() + + monkeypatch.setattr("requests.post", fake_post) + + import requests + with pytest.raises(requests.HTTPError): + postgres_reports_module.make_request("http://api.test", "/endpoint", {"data": "test"}) + + +@pytest.mark.unit +def test_make_request_raises_on_connection_error(monkeypatch: pytest.MonkeyPatch) -> None: + """Test that make_request raises exception on connection error.""" + import requests + + def fake_post(url: str, json: dict[str, Any] | None = None): + raise requests.ConnectionError("Connection failed") + + monkeypatch.setattr("requests.post", fake_post) + + with pytest.raises(requests.ConnectionError): + postgres_reports_module.make_request("http://api.test", "/endpoint", {"data": "test"}) diff --git a/tests/reporter/test_postgres_integration.py b/tests/reporter/test_postgres_integration.py new file mode 100644 index 0000000..414d009 --- /dev/null +++ b/tests/reporter/test_postgres_integration.py @@ -0,0 +1,75 @@ +import json +from datetime import datetime, timezone +from typing import Callable, Tuple + +import pytest + +from reporter.postgres_reports import PostgresReportGenerator + +Seeder = Callable[[str, str, str], None] + + +@pytest.fixture(scope="function") +def sink_index_data(postgresql) -> Tuple[str, Seeder]: + conn = postgresql + conn.autocommit = True + cur = conn.cursor() + cur.execute( + """ + create table if not exists public.index_definitions ( + time timestamptz not null, + dbname text not null, + data jsonb not null, + tag_data jsonb + ) + """ + ) + + def seed(dbname: str, index_name: str, index_def: str) -> None: + payload = { + "indexrelname": index_name, + "index_definition": index_def, + "schemaname": "public", + "relname": "tbl", + } + with conn.cursor() as seed_cur: + seed_cur.execute( + ( + "insert into public.index_definitions " + "(time, dbname, data) values (%s, %s, %s::jsonb)" + ), + (datetime.now(timezone.utc), dbname, json.dumps(payload)), + ) + + host = conn.info.host or conn.info.hostaddr or "localhost" + port = conn.info.port + user = conn.info.user + dbname = conn.info.dbname + dsn = f"postgresql://{user}@{host}:{port}/{dbname}" + + yield dsn, seed + + cur.execute("truncate table public.index_definitions") + cur.close() + + +@pytest.mark.integration +@pytest.mark.requires_postgres +def test_get_index_definitions_from_sink(sink_index_data) -> None: + dsn, seed = sink_index_data + seed("db1", "idx_users", "CREATE INDEX idx_users ON users(id)") + seed("db2", "idx_orders", "CREATE INDEX idx_orders ON orders(id)") + + generator = PostgresReportGenerator( + prometheus_url="http://unused", + postgres_sink_url=dsn, + ) + assert generator.connect_postgres_sink() + + definitions = generator.get_index_definitions_from_sink() + + assert definitions["db1.idx_users"] == "CREATE INDEX idx_users ON users(id)" + assert definitions["db2.idx_orders"] == "CREATE INDEX idx_orders ON orders(id)" + + generator.close_postgres_sink() + assert generator.pg_conn is None