本文汇总 PostgreSQL 日常运维中的核心知识点,涵盖安装部署、权限管理、性能调优、备份恢复、高可用、监控排障等全流程,附带大量可直接复用的命令和配置示例。适合 DBA 和后端工程师作为案头速查手册。


一、安装部署

1.1 Ubuntu/Debian 安装

# 添加官方 APT 源
sudo sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list'
wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add -
sudo apt-get update

# 安装 PostgreSQL 16
sudo apt-get install -y postgresql-16 postgresql-client-16

# 验证安装
pg_config --version
sudo systemctl status postgresql

1.2 CentOS/RHEL 安装

# 添加官方 YUM 源
sudo yum install -y https://download.postgresql.org/pub/repos/yum/reporpms/EL-8-x86_64/pgdg-redhat-repo-latest.noarch.rpm
sudo yum module disable -y postgresql  # 禁用系统自带版本
sudo yum install -y postgresql16-server postgresql16

# 初始化数据库
sudo /usr/pgsql-16/bin/postgresql-16-setup initdb
sudo systemctl enable --now postgresql-16

1.3 Docker 快速部署

# 单实例启动
docker run -d \
  --name pg16 \
  -e POSTGRES_PASSWORD=StrongP@ss123 \
  -e POSTGRES_DB=myapp \
  -p 5432:5432 \
  -v pgdata:/var/lib/postgresql/data \
  postgres:16-alpine

# 使用自定义配置启动
docker run -d \
  --name pg16-tuned \
  -e POSTGRES_PASSWORD=StrongP@ss123 \
  -p 5432:5432 \
  -v pgdata:/var/lib/postgresql/data \
  -v ./postgresql.conf:/etc/postgresql/postgresql.conf \
  postgres:16-alpine \
  -c config_file=/etc/postgresql/postgresql.conf

1.4 初始化后的首要操作

# 切换到 postgres 用户
sudo -i -u postgres

# 创建业务数据库和用户
psql -c "CREATE USER appuser WITH PASSWORD 'AppP@ss456';"
psql -c "CREATE DATABASE myappdb OWNER appuser;"
psql -c "GRANT ALL PRIVILEGES ON DATABASE myappdb TO appuser;"

# 修改 pg_hba.conf 允许远程连接
# 编辑 /etc/postgresql/16/main/pg_hba.conf(Ubuntu)
# 或 /var/lib/pgsql/16/data/pg_hba.conf(CentOS)
# 添加:
# host  myappdb  appuser  0.0.0.0/0  scram-sha-256

二、用户与权限管理

2.1 角色与用户

-- 创建带权限的角色
CREATE ROLE readonly NOLOGIN;
GRANT CONNECT ON DATABASE myappdb TO readonly;
GRANT USAGE ON SCHEMA public TO readonly;
GRANT SELECT ON ALL TABLES IN SCHEMA public TO readonly;
ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT ON TABLES TO readonly;

-- 创建可登录用户并继承角色
CREATE USER analyst WITH PASSWORD 'AnalystP@ss';
GRANT readonly TO analyst;

-- 创建读写用户
CREATE ROLE readwrite NOLOGIN;
GRANT readonly TO readwrite;
GRANT INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public TO readwrite;
GRANT USAGE ON ALL SEQUENCES IN SCHEMA public TO readwrite;

CREATE USER developer WITH PASSWORD 'DevP@ss';
GRANT readwrite TO developer;

2.2 行级权限(RLS)

-- 启用行级安全
ALTER TABLE orders ENABLE ROW LEVEL SECURITY;

-- 用户只能看到自己的订单
CREATE POLICY user_orders ON orders
  FOR ALL
  USING (user_id = current_setting('app.current_user_id')::int);

-- 应用层设置上下文
SET app.current_user_id = '12345';
SELECT * FROM orders;  -- 只返回 user_id=12345 的行

2.3 权限审计

-- 查看所有角色
SELECT rolname, rolsuper, rolcreatedb, rolcanlogin, rolconnlimit
FROM pg_roles WHERE rolname NOT LIKE 'pg_%';

-- 查看某用户的权限
\du appuser
\z mytable  -- 查看表级权限

-- 查看 schema 权限
SELECT grantee, privilege_type
FROM information_schema.schema_privileges
WHERE schema_name = 'public';

三、配置调优

3.1 核心参数(postgresql.conf)

# ========== 连接 ==========
max_connections = 200           # 根据业务并发调整,配合连接池使用
superuser_reserved_connections = 3

# ========== 内存 ==========
# shared_buffers: 推荐物理内存的 25%,不超过 8GB(超出部分交给 OS page cache)
shared_buffers = '4GB'

# effective_cache_size: 估算 OS 可用于缓存的内存总量(shared_buffers + OS cache)
effective_cache_size = '12GB'

# work_mem: 每个排序/哈希操作的内存,注意总消耗 = work_mem × 并发数 × 操作数
work_mem = '64MB'

# maintenance_work_mem: VACUUM/CREATE INDEX 等维护操作的内存
maintenance_work_mem = '1GB'

# ========== WAL ==========
wal_level = replica             # 支持流复制和 PITR
max_wal_size = '4GB'
min_wal_size = '1GB'
wal_buffers = '64MB'

# ========== 查询优化 ==========
random_page_cost = 1.1          # SSD 建议 1.1,HDD 保持默认 4.0
effective_io_concurrency = 200  # SSD 建议 200,HDD 保持默认 1
default_statistics_target = 200

# ========== 日志 ==========
logging_collector = on
log_directory = 'pg_log'
log_filename = 'postgresql-%Y-%m-%d.log'
log_rotation_age = '1d'
log_rotation_size = '100MB'
log_min_duration_statement = 1000  # 记录超过 1 秒的慢查询
log_checkpoints = on
log_connections = on
log_disconnections = on
log_lock_waits = on
log_line_prefix = '%t [%p-%l] %q%u@%d '

# ========== 自动清理 ==========
autovacuum = on
autovacuum_max_workers = 4
autovacuum_naptime = '30s'
autovacuum_vacuum_threshold = 50
autovacuum_vacuum_scale_factor = 0.05      # 默认 0.2,高写入场景调低
autovacuum_analyze_threshold = 50
autovacuum_analyze_scale_factor = 0.025

3.2 连接池:PgBouncer

# /etc/pgbouncer/pgbouncer.ini

[databases]
myappdb = host=127.0.0.1 port=5432 dbname=myappdb

[pgbouncer]
listen_addr = 0.0.0.0
listen_port = 6432
auth_type = scram-sha-256
auth_file = /etc/pgbouncer/userlist.txt

pool_mode = transaction        # transaction 模式最常用,每个事务结束后归还连接
default_pool_size = 50
min_pool_size = 10
reserve_pool_size = 5
reserve_pool_timeout = 3
max_client_conn = 1000         # 客户端连接数可以远大于后端连接数
server_idle_timeout = 300
server_lifetime = 3600
client_idle_timeout = 0

log_connections = 1
log_disconnections = 1
stats_period = 60

# 管理接口
admin_users = pgbouncer_admin
stats_users = pgbouncer_stats
# userlist.txt 格式
"appuser" "SCRAM-SHA-256$4096:xxxx..."
"pgbouncer_admin" "admin_pass"

# 启动
sudo systemctl enable --now pgbouncer

# 通过 PgBouncer 连接
psql -h 127.0.0.1 -p 6432 -U appuser myappdb

# 管理命令(通过 psql 连接到 pgbouncer 数据库)
psql -h 127.0.0.1 -p 6432 -U pgbouncer_admin pgbouncer
SHOW POOLS;      -- 查看连接池状态
SHOW STATS;      -- 查看统计信息
SHOW CLIENTS;    -- 查看客户端连接
RELOAD;          -- 重载配置

3.3 参数在线调整

-- 不重启生效的部分参数
ALTER SYSTEM SET work_mem = '128MB';
ALTER SYSTEM SET log_min_duration_statement = 500;
SELECT pg_reload_conf();  -- 重载配置

-- 查看当前值
SHOW work_mem;
SHOW ALL;

-- 查看哪些参数需要重启
SELECT name, setting, context
FROM pg_settings
WHERE context = 'postmaster';  -- 这些参数修改后需要重启

四、索引优化

4.1 索引类型速查

-- B-tree(默认,适用于等值和范围查询)
CREATE INDEX idx_users_email ON users (email);
CREATE INDEX idx_users_name ON users (last_name, first_name);  -- 复合索引

-- 唯一索引
CREATE UNIQUE INDEX idx_users_email_uniq ON users (email);

-- 部分索引(条件索引,减小索引体积)
CREATE INDEX idx_orders_active ON orders (created_at)
WHERE status = 'active';

-- 覆盖索引(INCLUDE,避免回表)
CREATE INDEX idx_orders_covering ON orders (user_id)
INCLUDE (total_amount, status);

-- Hash(仅适用于等值查询,PG 10+ 支持 WAL 记录)
CREATE INDEX idx_sessions_token ON sessions USING hash (token);

-- GiST(几何、全文搜索、范围类型)
CREATE INDEX idx_places_geo ON places USING gist (location);
CREATE INDEX idx_docs_fts ON documents USING gist (to_tsvector('english', content));

-- GIN(全文搜索、JSONB、数组)
CREATE INDEX idx_docs_tsv ON documents USING gin (to_tsvector('english', content));
CREATE INDEX idx_events_meta ON events USING gin (metadata jsonb_path_ops);

-- BRIN(大表顺序数据,极小索引体积)
CREATE INDEX idx_logs_time ON access_logs USING brin (created_at)
WITH (pages_per_range = 128);

-- 并发创建(不阻塞写入)
CREATE INDEX CONCURRENTLY idx_big_table_col ON big_table (col);

4.2 EXPLAIN 深度解读

-- 基础执行计划
EXPLAIN SELECT * FROM orders WHERE user_id = 12345 AND status = 'shipped';

-- 包含实际执行统计(推荐)
EXPLAIN (ANALYZE, BUFFERS, FORMAT TEXT)
SELECT o.*, u.name
FROM orders o
JOIN users u ON o.user_id = u.id
WHERE o.created_at > '2026-01-01'
  AND o.status = 'shipped'
ORDER BY o.created_at DESC
LIMIT 100;

-- JSON 格式(便于工具解析)
EXPLAIN (ANALYZE, BUFFERS, FORMAT JSON) SELECT ...;

关键指标解读:

指标

含义

关注点

Seq Scan

全表扫描

大表出现说明缺索引

Index Scan

索引扫描+回表

正常

Index Only Scan

仅索引扫描

最优,无需回表

Bitmap Index/Heap Scan

位图扫描

选择性较差时出现

Nested Loop

嵌套循环

小表驱动大表时正常

Hash Join

哈希连接

适合等值连接大表

Sort

排序

检查是否有 INDEX SCAN 替代

rows estimate vs actual

估算行数 vs 实际行数

差距大则需要 ANALYZE

Buffers: shared hit/read

缓存命中/物理读

hit 率应 > 99%

4.3 索引维护

-- 查看索引使用情况
SELECT schemaname, relname, indexrelname,
       idx_scan, idx_tup_read, idx_tup_fetch,
       pg_size_pretty(pg_relation_size(indexrelid)) AS index_size
FROM pg_stat_user_indexes
ORDER BY idx_scan ASC;

-- 查找未使用的索引(候选删除)
SELECT schemaname, relname, indexrelname, idx_scan,
       pg_size_pretty(pg_relation_size(indexrelid)) AS index_size
FROM pg_stat_user_indexes
WHERE idx_scan = 0 AND indexrelid NOT IN (
  SELECT conindid FROM pg_constraint WHERE contype IN ('p','u')
)
ORDER BY pg_relation_size(indexrelid) DESC;

-- 索引膨胀检查与重建
-- 先查看索引大小
SELECT indexname, pg_size_pretty(pg_relation_size(indexname::regclass))
FROM pg_indexes WHERE tablename = 'orders';

-- 重建索引(不阻塞读写)
REINDEX INDEX CONCURRENTLY idx_orders_user_id;

-- 查看索引碎片
SELECT schemaname, tablename, indexname,
       pg_size_pretty(pg_relation_size(indexname::regclass)) AS index_size
FROM pg_indexes
WHERE schemaname = 'public'
ORDER BY pg_relation_size(indexname::regclass) DESC;

五、备份恢复

5.1 逻辑备份:pg_dump / pg_dumpall

# 单库备份(自定义格式,支持并行恢复)
pg_dump -h localhost -U postgres -Fc -j 4 -f myappdb.dump myappdb

# 纯 SQL 格式备份
pg_dump -h localhost -U postgres -f myappdb.sql myappdb

# 仅备份指定表
pg_dump -h localhost -U postgres -Fc -t orders -t users -f partial.dump myappdb

# 仅备份 schema(不含数据)
pg_dump -h localhost -U postgres --schema-only -f schema.sql myappdb

# 仅备份数据
pg_dump -h localhost -U postgres --data-only -f data.sql myappdb

# 全实例备份(含角色和表空间)
pg_dumpall -h localhost -U postgres -f full_backup.sql

# 恢复
pg_restore -h localhost -U postgres -d myappdb -j 4 myappdb.dump
psql -h localhost -U postgres -d myappdb -f myappdb.sql

5.2 物理备份:pg_basebackup

# 基础备份
pg_basebackup -h localhost -U replicator -D /backup/base \
  -Fp -Xs -P -R

# 压缩备份(tar 格式)
pg_basebackup -h localhost -U replicator -D /backup/base \
  -Ft -z -P -R

# 参数说明:
# -Fp: plain 格式   -Ft: tar 格式
# -Xs: stream WAL   -Xf: fetch WAL
# -P: 显示进度       -R: 自动生成 standby.signal 和连接信息

5.3 WAL 归档配置

# postgresql.conf
archive_mode = on
archive_command = 'test ! -f /archive/%f && cp %p /archive/%f'
# 或使用更安全的方式:
# archive_command = 'gzip < %p > /archive/%f.gz'
# 或使用 rsync 推送到远程:
# archive_command = 'rsync -a %p remote:/archive/%f'

5.4 时间点恢复(PITR)

# 1. 停止 PostgreSQL
sudo systemctl stop postgresql

# 2. 备份当前数据目录(以防万一)
mv /var/lib/postgresql/16/main /var/lib/postgresql/16/main_broken

# 3. 恢复基础备份
cp -r /backup/base /var/lib/postgresql/16/main

# 4. 配置恢复参数
cat > /var/lib/postgresql/16/main/postgresql.auto.conf << 'EOF'
restore_command = 'cp /archive/%f %p'
recovery_target_time = '2026-06-10 10:30:00+08'
recovery_target_action = 'promote'
EOF

# 5. 创建恢复信号文件
touch /var/lib/postgresql/16/main/recovery.signal

# 6. 启动恢复
sudo systemctl start postgresql

# 恢复完成后,recovery.signal 自动删除,数据库正常接受连接

5.5 pgBackRest(企业级备份方案)

# /etc/pgbackrest/pgbackrest.conf
[global]
repo1-path=/backup/pgbackrest
repo1-retention-full=2
repo1-retention-diff=7
repo1-cipher-type=aes-256-cbc
repo1-cipher-pass=BackupEncryptionKey
process-max=4
compress-type=zst
compress-level=6
log-level-console=info
start-fast=y

[mydb]
pg1-path=/var/lib/postgresql/16/main
pg1-port=5432
# 初始化 stanza
sudo -u postgres pgbackrest --stanza=mydb stanza-create

# 全量备份
sudo -u postgres pgbackrest --stanza=mydb --type=full backup

# 增量备份(每天执行)
sudo -u postgres pgbackrest --stanza=mydb --type=diff backup

# 查看备份信息
sudo -u postgres pgbackrest --stanza=mydb info

# 恢复到指定时间
sudo systemctl stop postgresql
sudo -u postgres pgbackrest --stanza=mydb \
  --target="2026-06-10 10:30:00+08:00" \
  --type=time restore
sudo systemctl start postgresql

# 定时备份(crontab)
# 每天凌晨 2 点增量备份,每周日全量
# 0 2 * * 1-6  pgbackrest --stanza=mydb --type=diff backup
# 0 2 * * 0    pgbackrest --stanza=mydb --type=full backup

六、主从复制

6.1 流复制(Streaming Replication)

主库配置:

# 创建复制用户
psql -c "CREATE USER replicator WITH REPLICATION PASSWORD 'ReplP@ss';"
# postgresql.conf(主库)
wal_level = replica
max_wal_senders = 10
wal_keep_size = '2GB'          # 或使用 replication slot
synchronous_standby_names = '' # 空 = 异步复制;填名称 = 同步复制

# 可选:使用 replication slot 防止 WAL 过早清理
# max_replication_slots = 10
# pg_hba.conf(主库)
host  replication  replicator  10.0.0.0/24  scram-sha-256

从库搭建:

# 1. 清空从库数据目录
sudo systemctl stop postgresql
sudo rm -rf /var/lib/postgresql/16/main/*

# 2. 使用 pg_basebackup 搭建
sudo -u postgres pg_basebackup \
  -h 10.0.0.1 -U replicator -D /var/lib/postgresql/16/main \
  -Fp -Xs -P -R

# -R 会自动生成:
#   standby.signal 文件
#   postgresql.auto.conf 中写入 primary_conninfo

# 3. 启动从库
sudo systemctl start postgresql

监控复制状态:

-- 主库查看
SELECT client_addr, state, sent_lsn, write_lsn, flush_lsn, replay_lsn,
       pg_wal_lsn_diff(sent_lsn, replay_lsn) AS replay_lag_bytes
FROM pg_stat_replication;

-- 从库查看
SELECT pg_is_in_recovery();              -- true = 从库
SELECT pg_last_wal_receive_lsn();        -- 最后接收的 LSN
SELECT pg_last_wal_replay_lsn();         -- 最后重放的 LSN
SELECT pg_last_xact_replay_timestamp();  -- 最后重放事务的时间

-- 从库延迟(秒)
SELECT EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp()))::int AS lag_seconds;

6.2 逻辑复制(Logical Replication)

# postgresql.conf(发布端和订阅端都需要)
wal_level = logical
max_replication_slots = 10
max_wal_senders = 10
-- 发布端
CREATE PUBLICATION my_pub FOR TABLE orders, users;
-- 或发布所有表
CREATE PUBLICATION my_pub FOR ALL TABLES;

-- 查看发布
SELECT * FROM pg_publication;
SELECT * FROM pg_publication_tables;

-- 订阅端
CREATE SUBSCRIPTION my_sub
  CONNECTION 'host=10.0.0.1 dbname=myappdb user=replicator password=ReplP@ss'
  PUBLICATION my_pub;

-- 查看订阅状态
SELECT * FROM pg_subscription;
SELECT * FROM pg_stat_subscription;

-- 管理
ALTER SUBSCRIPTION my_sub DISABLE;
ALTER SUBSCRIPTION my_sub ENABLE;
ALTER SUBSCRIPTION my_sub REFRESH PUBLICATION;  -- 新增表后刷新
DROP SUBSCRIPTION my_sub;

七、性能优化

7.1 VACUUM 策略

-- 查看表膨胀率
SELECT schemaname, relname,
       n_live_tup, n_dead_tup,
       CASE WHEN n_live_tup > 0
            THEN round(100.0 * n_dead_tup / n_live_tup, 2)
            ELSE 0 END AS dead_pct,
       last_vacuum, last_autovacuum, last_analyze
FROM pg_stat_user_tables
WHERE n_dead_tup > 1000
ORDER BY n_dead_tup DESC;

-- 手动 VACUUM(不锁表)
VACUUM VERBOSE orders;

-- VACUUM FULL(会锁表并重写表,回收空间到 OS)
-- ⚠️ 生产环境谨慎使用,建议在低峰期执行
VACUUM FULL VERBOSE orders;

-- 更好的选择:使用 pg_repack(不锁表重建表)
-- pg_repack -t orders myappdb

-- 查看 VACUUM 进度
SELECT * FROM pg_stat_progress_vacuum;

-- 设置表级 autovacuum 参数(大表或高写入表)
ALTER TABLE orders SET (
  autovacuum_vacuum_scale_factor = 0.01,
  autovacuum_analyze_scale_factor = 0.005,
  autovacuum_vacuum_cost_limit = 2000
);

-- 禁用某张表的 autovacuum(极端情况)
ALTER TABLE legacy_logs SET (autovacuum_enabled = off);

7.2 分区表

-- 范围分区(按时间)
CREATE TABLE orders (
    id BIGSERIAL,
    user_id INT NOT NULL,
    amount NUMERIC(12,2),
    status VARCHAR(20),
    created_at TIMESTAMPTZ NOT NULL DEFAULT now()
) PARTITION BY RANGE (created_at);

-- 创建月度分区
CREATE TABLE orders_2026_01 PARTITION OF orders
    FOR VALUES FROM ('2026-01-01') TO ('2026-02-01');
CREATE TABLE orders_2026_02 PARTITION OF orders
    FOR VALUES FROM ('2026-02-01') TO ('2026-03-01');
CREATE TABLE orders_2026_03 PARTITION OF orders
    FOR VALUES FROM ('2026-03-01') TO ('2026-04-01');

-- 默认分区(兜底)
CREATE TABLE orders_default PARTITION OF orders DEFAULT;

-- 列表分区(按状态)
CREATE TABLE events (
    id BIGSERIAL,
    event_type VARCHAR(50) NOT NULL,
    payload JSONB,
    created_at TIMESTAMPTZ DEFAULT now()
) PARTITION BY LIST (event_type);

CREATE TABLE events_user PARTITION OF events
    FOR VALUES IN ('login', 'logout', 'register');
CREATE TABLE events_order PARTITION OF events
    FOR VALUES IN ('created', 'paid', 'shipped', 'completed');

-- 自动创建分区(pg_partman 扩展)
CREATE EXTENSION pg_partman;

SELECT partman.create_parent(
    p_parent_table := 'public.orders',
    p_control := 'created_at',
    p_type := 'range',
    p_interval := '1 month',
    p_premake := 3  -- 提前创建 3 个未来分区
);

-- 定时维护
UPDATE partman.part_config
SET infinite_time_partitions = true,
    retention = '12 months',
    retention_keep_table = false;

-- 查看分区信息
SELECT inhrelid::regclass, pg_get_expr(c.relpartbound, c.oid, true)
FROM pg_inherits i
JOIN pg_class c ON c.oid = i.inhrelid
WHERE i.inhparent = 'orders'::regclass;

7.3 查询性能排查

-- 查看当前活跃查询
SELECT pid, now() - pg_stat_activity.query_start AS duration, query, state, wait_event_type, wait_event
FROM pg_stat_activity
WHERE state != 'idle' AND pid != pg_backend_pid()
ORDER BY duration DESC;

-- 查看等待事件
SELECT wait_event_type, wait_event, count(*)
FROM pg_stat_activity
WHERE state = 'active'
GROUP BY wait_event_type, wait_event
ORDER BY count(*) DESC;

-- 杀掉长时间运行的查询
SELECT pg_terminate_backend(pid);

-- 查看锁等待
SELECT blocked_locks.pid AS blocked_pid,
       blocked_activity.usename AS blocked_user,
       blocking_locks.pid AS blocking_pid,
       blocking_activity.usename AS blocking_user,
       blocked_activity.query AS blocked_query,
       blocking_activity.query AS blocking_query
FROM pg_catalog.pg_locks blocked_locks
JOIN pg_catalog.pg_stat_activity blocked_activity ON blocked_activity.pid = blocked_locks.pid
JOIN pg_catalog.pg_locks blocking_locks
    ON blocking_locks.locktype = blocked_locks.locktype
    AND blocking_locks.relation = blocked_locks.relation
    AND blocking_locks.pid != blocked_locks.pid
JOIN pg_catalog.pg_stat_activity blocking_activity ON blocking_activity.pid = blocking_locks.pid
WHERE NOT blocked_locks.granted;

-- 表大小排序
SELECT schemaname, relname,
       pg_size_pretty(pg_total_relation_size(relid)) AS total_size,
       pg_size_pretty(pg_relation_size(relid)) AS table_size,
       pg_size_pretty(pg_indexes_size(relid)) AS index_size,
       n_live_tup
FROM pg_stat_user_tables
ORDER BY pg_total_relation_size(relid) DESC
LIMIT 20;

-- 数据库大小
SELECT datname, pg_size_pretty(pg_database_size(datname))
FROM pg_database ORDER BY pg_database_size(datname) DESC;

八、扩展管理

8.1 pg_stat_statements(查询统计)

# postgresql.conf
shared_preload_libraries = 'pg_stat_statements'
pg_stat_statements.max = 10000
pg_stat_statements.track = all
-- 安装扩展
CREATE EXTENSION pg_stat_statements;

-- Top 10 最耗时查询
SELECT calls, total_exec_time::numeric(10,2) AS total_ms,
       mean_exec_time::numeric(10,2) AS avg_ms,
       rows, query
FROM pg_stat_statements
ORDER BY total_exec_time DESC LIMIT 10;

-- Top 10 最频繁查询
SELECT calls, total_exec_time::numeric(10,2) AS total_ms,
       mean_exec_time::numeric(10,2) AS avg_ms,
       query
FROM pg_stat_statements
ORDER BY calls DESC LIMIT 10;

-- Top 10 IO 最大查询
SELECT calls,
       shared_blks_hit + shared_blks_read AS total_blks,
       shared_blks_hit,
       shared_blks_read,
       query
FROM pg_stat_statements
ORDER BY shared_blks_read DESC LIMIT 10;

-- 重置统计
SELECT pg_stat_statements_reset();

8.2 PostGIS(地理空间)

-- 安装
CREATE EXTENSION postgis;
CREATE EXTENSION postgis_topology;

-- 创建空间表
CREATE TABLE locations (
    id SERIAL PRIMARY KEY,
    name VARCHAR(200),
    geom GEOMETRY(Point, 4326)
);

-- 插入数据
INSERT INTO locations (name, geom) VALUES
    ('北京', ST_SetSRID(ST_MakePoint(116.4074, 39.9042), 4326)),
    ('上海', ST_SetSRID(ST_MakePoint(121.4737, 31.2304), 4326));

-- 空间索引
CREATE INDEX idx_locations_geom ON locations USING gist (geom);

-- 查找 500km 范围内的点
SELECT name, ST_Distance(
    geom::geography,
    ST_SetSRID(ST_MakePoint(116.4074, 39.9042), 4326)::geography
) / 1000 AS distance_km
FROM locations
WHERE ST_DWithin(
    geom::geography,
    ST_SetSRID(ST_MakePoint(116.4074, 39.9042), 4326)::geography,
    500000  -- 500km
)
ORDER BY distance_km;

8.3 常用扩展速查

-- 查看已安装扩展
SELECT * FROM pg_available_extensions WHERE installed_version IS NOT NULL;

-- 查看可安装扩展
SELECT name, default_version, comment FROM pg_available_extensions ORDER BY name;

-- 常用扩展
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";        -- UUID 生成
CREATE EXTENSION IF NOT EXISTS "pgcrypto";          -- 加密函数
CREATE EXTENSION IF NOT EXISTS "pg_trgm";           -- 模糊搜索加速
CREATE EXTENSION IF NOT EXISTS "btree_gist";        -- GiST 支持标量类型
CREATE EXTENSION IF NOT EXISTS "btree_gin";         -- GIN 支持标量类型
CREATE EXTENSION IF NOT EXISTS "hstore";            -- KV 对类型
CREATE EXTENSION IF NOT EXISTS "pg_stat_statements";-- 查询统计
CREATE EXTENSION IF NOT EXISTS "auto_explain";      -- 自动记录慢查询计划
CREATE EXTENSION IF NOT EXISTS "pg_repack";         -- 在线表/索引重建

-- 模糊搜索加速示例
CREATE EXTENSION pg_trgm;
CREATE INDEX idx_users_name_trgm ON users USING gin (name gin_trgm_ops);
SELECT * FROM users WHERE name % '张三';  -- 相似度搜索

九、监控与告警

9.1 核心监控指标

-- 连接数
SELECT count(*) AS total,
       count(*) FILTER (WHERE state = 'active') AS active,
       count(*) FILTER (WHERE state = 'idle') AS idle,
       count(*) FILTER (WHERE state = 'idle in transaction') AS idle_in_tx,
       current_setting('max_connections')::int AS max_conn
FROM pg_stat_activity;

-- 事务统计
SELECT datname,
       xact_commit, xact_rollback,
       deadlocks, temp_files, temp_bytes,
       blocks_hit, blocks_read,
       CASE WHEN blocks_hit + blocks_read > 0
            THEN round(100.0 * blocks_hit / (blocks_hit + blocks_read), 2)
            ELSE 100 END AS cache_hit_ratio
FROM pg_stat_database WHERE datname NOT LIKE 'template%';

-- 长事务检测
SELECT pid, usename, datname, state,
       now() - xact_start AS xact_duration,
       now() - query_start AS query_duration,
       query
FROM pg_stat_activity
WHERE xact_start IS NOT NULL
ORDER BY xact_start
LIMIT 20;

-- 表膨胀与死行
SELECT relname, n_live_tup, n_dead_tup,
       CASE WHEN n_live_tup > 0
            THEN round(100.0 * n_dead_tup / n_live_tup, 2) ELSE 0 END AS dead_pct,
       last_autovacuum
FROM pg_stat_user_tables
WHERE n_dead_tup > 0
ORDER BY n_dead_tup DESC LIMIT 20;

-- 复制延迟
SELECT client_addr, state,
       pg_wal_lsn_diff(pg_current_wal_lsn(), replay_lsn) AS replay_lag_bytes,
       replay_lag
FROM pg_stat_replication;

-- WAL 生成速率
SELECT pg_current_wal_lsn() AS current_lsn;
-- 等 1 分钟后再查
-- SELECT pg_wal_lsn_diff(pg_current_wal_lsn(), '上次的LSN') AS wal_bytes_per_min;

9.2 Prometheus + Grafana 监控

# 安装 postgres_exporter
wget https://github.com/prometheus-community/postgres_exporter/releases/download/v0.15.0/postgres_exporter-0.15.0.linux-amd64.tar.gz
tar xzf postgres_exporter-0.15.0.linux-amd64.tar.gz
sudo mv postgres_exporter-0.15.0.linux-amd64/postgres_exporter /usr/local/bin/

# 创建监控用户
psql -c "CREATE USER pg_monitor WITH PASSWORD 'MonitorP@ss';"
psql -c "GRANT pg_monitor TO pg_monitor;"

# Systemd 服务
cat > /etc/systemd/system/postgres_exporter.service << 'EOF'
[Unit]
Description=PostgreSQL Exporter
After=network.target

[Service]
Type=simple
User=postgres
Environment="DATA_SOURCE_NAME=postgresql://pg_monitor:MonitorP@ss@localhost:5432/postgres?sslmode=disable"
ExecStart=/usr/local/bin/postgres_exporter \
  --web.listen-address=:9187 \
  --collector.stat_statements
Restart=always

[Install]
WantedBy=multi-user.target
EOF

sudo systemctl daemon-reload
sudo systemctl enable --now postgres_exporter

9.3 关键告警规则

# Prometheus 告警规则示例(pg_alerts.yml)
groups:
  - name: postgresql
    rules:
      - alert: PG_TooManyConnections
        expr: sum(pg_stat_activity_count) / on() pg_settings_max_connections > 0.8
        for: 5m
        labels: { severity: warning }
        annotations:
          summary: "PostgreSQL 连接数超过 80%"

      - alert: PG_ReplicationLag
        expr: pg_stat_replication_lag > 60
        for: 2m
        labels: { severity: critical }
        annotations:
          summary: "复制延迟超过 60 秒"

      - alert: PG_DeadlockDetected
        expr: increase(pg_stat_database_deadlocks[5m]) > 0
        labels: { severity: warning }
        annotations:
          summary: "检测到死锁"

      - alert: PG_CacheHitRateLow
        expr: pg_stat_database_blks_hit / (pg_stat_database_blks_hit + pg_stat_database_blks_read) < 0.95
        for: 10m
        labels: { severity: warning }
        annotations:
          summary: "缓存命中率低于 95%"

      - alert: PG_DeadTupleRatioHigh
        expr: pg_stat_user_tables_n_dead_tup / (pg_stat_user_tables_n_live_tup + 1) > 0.2
        for: 30m
        labels: { severity: warning }
        annotations:
          summary: "表 {{ $labels.relname }} 死行比例超过 20%"

十、常见问题排查

10.1 连接问题

# 无法连接
# 1. 检查服务状态
sudo systemctl status postgresql

# 2. 检查监听地址
sudo ss -tlnp | grep 5432
# 如果只监听 127.0.0.1,修改 postgresql.conf: listen_addresses = '*'

# 3. 检查 pg_hba.conf
# host all all 0.0.0.0/0 scram-sha-256

# 4. 检查防火墙
sudo ufw allow 5432/tcp   # Ubuntu
sudo firewall-cmd --add-port=5432/tcp --permanent && sudo firewall-cmd --reload  # CentOS

# 5. 检查最大连接数
psql -c "SELECT count(*), current_setting('max_connections') FROM pg_stat_activity;"
# 如果满了,考虑 PgBouncer 连接池

10.2 性能问题

-- 慢查询排查流程
-- 1. 找到慢查询
SELECT pid, duration, query FROM (
  SELECT pid, now() - query_start AS duration, query
  FROM pg_stat_activity
  WHERE state = 'active' AND query NOT LIKE '%pg_stat_activity%'
) t ORDER BY duration DESC LIMIT 10;

-- 2. 查看执行计划
EXPLAIN (ANALYZE, BUFFERS) SELECT ...;

-- 3. 检查表统计信息是否过期
SELECT relname, last_analyze, last_autoanalyze,
       n_live_tup, n_dead_tup
FROM pg_stat_user_tables
WHERE relname = 'problem_table';

-- 手动更新统计
ANALYZE problem_table;

-- 4. 检查是否有锁等待
-- (见上面的锁等待查询)

-- 5. 检查 I/O 等待
SELECT * FROM pg_stat_bgwriter;

10.3 磁盘空间问题

# 快速查找大文件
sudo du -sh /var/lib/postgresql/16/main/base/*
sudo du -sh /var/lib/postgresql/16/main/pg_wal/*

# 清理不需要的 WAL(确认已归档后)
# pg_archivecleanup /archive <oldest_needed_wal>

# 清理大表
# 方案 1:VACUUM FULL(锁表)
VACUUM FULL VERBOSE big_table;

# 方案 2:pg_repack(不锁表)
pg_repack -t big_table myappdb

# 方案 3:分批删除
DELETE FROM big_table WHERE created_at < '2025-01-01';
VACUUM big_table;
-- 或者更高效:创建新表、迁移数据、重命名

10.4 数据损坏修复

# 检查数据一致性
amcheck --heapallindexed myappdb  # 需要 amcheck 扩展

# 只读模式启动(临时应急)
# 在 postgresql.conf 添加: default_transaction_read_only = on

# 使用 pg_resetwal 最后手段(⚠️ 可能丢数据)
sudo systemctl stop postgresql
sudo -u postgres pg_resetwal /var/lib/postgresql/16/main
sudo systemctl start postgresql

十一、运维命令速查表

11.1 服务管理

# 启停服务
sudo systemctl start postgresql
sudo systemctl stop postgresql
sudo systemctl restart postgresql
sudo systemctl reload postgresql    # 仅重载配置,不中断连接
sudo systemctl status postgresql

# 重载配置(不重启)
SELECT pg_reload_conf();

# 查看配置文件位置
SHOW config_file;          -- postgresql.conf
SHOW hba_file;             -- pg_hba.conf
SHOW data_directory;       -- 数据目录

11.2 数据库操作

-- 数据库列表
\l

-- 切换数据库
\c myappdb

-- 表列表
\dt+

-- 表结构
\d tablename
\d+ tablename  -- 含更多信息

-- 查看所有 schema
\dn

-- 查看所有视图
\dv

-- 查看所有函数
\df

-- 查看所有索射
\di+

-- 查看所有序列
\ds

-- 执行 SQL 文件
\i /path/to/script.sql

-- 输出到文件
\o /tmp/output.txt
SELECT ...;
\o

-- 设置显示格式
\x  -- 扩展显示(类似 MySQL 的 \G)

11.3 维护操作速查

-- 统计信息更新
ANALYZE tablename;

-- 重建索引
REINDEX INDEX CONCURRENTLY indexname;
REINDEX TABLE CONCURRENTLY tablename;

-- 截断表(不可回滚)
TRUNCATE tablename CASCADE;

-- 修改表结构
ALTER TABLE tablename ADD COLUMN colname TYPE;
ALTER TABLE tablename DROP COLUMN colname;
ALTER TABLE tablename ALTER COLUMN colname TYPE newtype;
ALTER TABLE tablename ALTER COLUMN colname SET DEFAULT value;
ALTER TABLE tablename ADD CONSTRAINT name CHECK (condition);

-- 修改索引
ALTER INDEX indexname SET TABLESPACE new_tablespace;
ALTER INDEX indexname RENAME TO new_name;

-- 序列操作
SELECT nextval('seq_name');
SELECT setval('seq_name', 1000);
SELECT currval('seq_name');

11.4 psql 快捷命令汇总

\?           -- psql 帮助
\h           -- SQL 命令帮助
\q           -- 退出
\c db        -- 切换数据库
\dt          -- 列出表
\di          -- 列出索引
\dv          -- 列出视图
\df          -- 列出函数
\dn          -- 列出 schema
\du          -- 列出用户/角色
\db          -- 列出表空间
\dp          -- 列出权限
\dx          -- 列出扩展
\dt *.*      -- 列出所有表(含系统表)
\encoding    -- 查看/设置编码
\pset format wrapped  -- 自动换行显示
\timing on   -- 开启执行时间显示

附录:快速检查清单

检查项

命令/指标

健康阈值

连接使用率

count(*)/max_connections

< 80%

缓存命中率

blks_hit/(blks_hit+blks_read)

> 99%

事务提交率

xact_commit/(xact_commit+xact_rollback)

> 99.9%

复制延迟

pg_stat_replication.replay_lag

< 10s

死行比例

n_dead_tup/n_live_tup

< 5%

长事务

now()-xact_start

< 5min

idle in transaction

state='idle in transaction'

< 5 个

WAL 目录大小

du -sh pg_wal/

< max_wal_size × 3

锁等待

pg_locks WHERE NOT granted

0

死锁次数

pg_stat_database.deadlocks

0