How to Install and Configure Postgres Exporter on RHEL9
How to Install and Configure Postgres Exporter on RHEL9
Postgres Exporter setup
Create the postgres_exporter directory.
mkdir /opt/postgres_exporter
Enter the created directory.
cd /opt/postgres_exporter
Download Prometheus Postgres Exporter from the following link.
wget https://github.com/prometheus-community/postgres_exporter/releases/download/v0.12.1/postgres_exporter-0.12.1.linux-amd64.tar.gz
Unpack the downloaded package.
tar -xzvf postgres_exporter-0.12.1.linux-amd64.tar.gz
Navigate into the extracted package directory.
cd postgres_exporter-0.12.1.linux-amd64
Move the postgres_exporter package to be able to run as a service.
cp postgres_exporter /usr/local/bin
Return to the created directory.
cd /opt/postgres_exporter
Create postgres_exporter.env file to hold environment variables.
nano postgres_exporter.env
Save and exit after entering the following settings.
PG_EXPORTER_EXTEND_QUERY_PATH="/opt/postgres_exporter/queries.yaml"
DATA_SOURCE_NAME="postgresql://postgres:kemal123@localhost:5432/?sslmode=disable"
/* please enter your server ip adreses with localhost */
Additionally, define the queries.yaml file for extra metrics. Use this file to add additional metrics to your database.
nano queries.yaml
pg_replication:
query: "SELECT CASE WHEN NOT pg_is_in_recovery() THEN 0 ELSE GREATEST (0, EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp()))) END AS lag"
master: true
metrics:
- lag:
usage: "GAUGE"
description: "Replication lag behind master in seconds"
pg_postmaster:
query: "SELECT pg_postmaster_start_time as start_time_seconds from pg_postmaster_start_time()"
master: true
metrics:
- start_time_seconds:
usage: "GAUGE"
description: "Time at which postmaster started"
pg_database:
query: "SELECT sum(pg_database_size(pg_database.datname)) as size FROM pg_database"
master: true
metrics:
- size:
usage: "GAUGE"
description: "DB Size"
pg_stat_user_tables:
query: "SELECT
current_database() datname,
schemaname,
relname,
seq_scan,
seq_tup_read,
idx_scan,
idx_tup_fetch,
n_tup_ins,
n_tup_upd,
n_tup_del,
n_tup_hot_upd,
n_live_tup,
n_dead_tup,
n_mod_since_analyze,
COALESCE(last_vacuum, '1970-01-01Z') as last_vacuum,
COALESCE(last_autovacuum, '1970-01-01Z') as last_autovacuum,
COALESCE(last_analyze, '1970-01-01Z') as last_analyze,
COALESCE(last_autoanalyze, '1970-01-01Z') as last_autoanalyze,
vacuum_count,
autovacuum_count,
analyze_count,
autoanalyze_count
FROM
pg_stat_user_tables"
metrics:
- datname:
usage: "LABEL"
description: "Name of current database"
- schemaname:
usage: "LABEL"
description: "Name of the schema that this table is in"
- relname:
usage: "LABEL"
description: "Name of this table"
- seq_scan:
usage: "COUNTER"
description: "Number of sequential scans initiated on this table"
- seq_tup_read:
usage: "COUNTER"
description: "Number of live rows fetched by sequential scans"
- idx_scan:
usage: "COUNTER"
description: "Number of index scans initiated on this table"
- idx_tup_fetch:
usage: "COUNTER"
description: "Number of live rows fetched by index scans"
- n_tup_ins:
usage: "COUNTER"
description: "Number of rows inserted"
- n_tup_upd:
usage: "COUNTER"
description: "Number of rows updated"
- n_tup_del:
usage: "COUNTER"
description: "Number of rows deleted"
- n_tup_hot_upd:
usage: "COUNTER"
description: "Number of rows HOT updated (i.e., with no separate index update required)"
- n_live_tup:
usage: "GAUGE"
description: "Estimated number of live rows"
- n_dead_tup:
usage: "GAUGE"
description: "Estimated number of dead rows"
- n_mod_since_analyze:
usage: "GAUGE"
description: "Estimated number of rows changed since last analyze"
- last_vacuum:
usage: "GAUGE"
description: "Last time at which this table was manually vacuumed (not counting VACUUM FULL)"
- last_autovacuum:
usage: "GAUGE"
description: "Last time at which this table was vacuumed by the autovacuum daemon"
- last_analyze:
usage: "GAUGE"
description: "Last time at which this table was manually analyzed"
- last_autoanalyze:
usage: "GAUGE"
description: "Last time at which this table was analyzed by the autovacuum daemon"
- vacuum_count:
usage: "COUNTER"
description: "Number of times this table has been manually vacuumed (not counting VACUUM FULL)"
- autovacuum_count:
usage: "COUNTER"
description: "Number of times this table has been vacuumed by the autovacuum daemon"
- analyze_count:
usage: "COUNTER"
description: "Number of times this table has been manually analyzed"
- autoanalyze_count:
usage: "COUNTER"
description: "Number of times this table has been analyzed by the autovacuum daemon"
pg_statio_user_tables:
query: "SELECT current_database() datname, schemaname, relname, heap_blks_read, heap_blks_hit, idx_blks_read, idx_blks_hit, toast_blks_read, toast_blks_hit, tidx_blks_read, tidx_blks_hit FROM pg_statio_user_tables"
metrics:
- datname:
usage: "LABEL"
description: "Name of current database"
- schemaname:
usage: "LABEL"
description: "Name of the schema that this table is in"
- relname:
usage: "LABEL"
description: "Name of this table"
- heap_blks_read:
usage: "COUNTER"
description: "Number of disk blocks read from this table"
- heap_blks_hit:
usage: "COUNTER"
description: "Number of buffer hits in this table"
- idx_blks_read:
usage: "COUNTER"
description: "Number of disk blocks read from all indexes on this table"
- idx_blks_hit:
usage: "COUNTER"
description: "Number of buffer hits in all indexes on this table"
- toast_blks_read:
usage: "COUNTER"
description: "Number of disk blocks read from this table's TOAST table (if any)"
- toast_blks_hit:
usage: "COUNTER"
description: "Number of buffer hits in this table's TOAST table (if any)"
- tidx_blks_read:
usage: "COUNTER"
description: "Number of disk blocks read from this table's TOAST table indexes (if any)"
- tidx_blks_hit:
usage: "COUNTER"
description: "Number of buffer hits in this table's TOAST table indexes (if any)"
pg_stat_statements:
query: "select
t2.rolname,
t3.datname,
queryid,
calls,
(total_plan_time + total_exec_time) / 1000 as total_time_seconds,
(min_plan_time + min_exec_time) / 1000 as min_time_seconds,
(max_plan_time + max_exec_time) / 1000 as max_time_seconds,
(mean_plan_time + mean_exec_time) / 1000 as mean_time_seconds,
(stddev_plan_time + stddev_exec_time) / 1000 as stddev_time_seconds,
rows,
shared_blks_hit,
shared_blks_read,
shared_blks_dirtied,
shared_blks_written,
local_blks_hit,
local_blks_read,
local_blks_dirtied,
local_blks_written,
temp_blks_read,
temp_blks_written,
blk_read_time / 1000 as blk_read_time_seconds,
blk_write_time / 1000 as blk_write_time_seconds
from
pg_stat_statements t1
join pg_roles t2 on
(t1.userid = t2.oid)
join pg_database t3 on
(t1.dbid = t3.oid)
where
t2.rolname != 'rdsadmin'"
master: true
metrics:
- rolname:
usage: "LABEL"
description: "Name of user"
- datname:
usage: "LABEL"
description: "Name of database"
- queryid:
usage: "LABEL"
description: "Query ID"
- calls:
usage: "COUNTER"
description: "Number of times executed"
- total_time_seconds:
usage: "COUNTER"
description: "Total time spent in the statement, in milliseconds"
- min_time_seconds:
usage: "GAUGE"
description: "Minimum time spent in the statement, in milliseconds"
- max_time_seconds:
usage: "GAUGE"
description: "Maximum time spent in the statement, in milliseconds"
- mean_time_seconds:
usage: "GAUGE"
description: "Mean time spent in the statement, in milliseconds"
- stddev_time_seconds:
usage: "GAUGE"
description: "Population standard deviation of time spent in the statement, in milliseconds"
- rows:
usage: "COUNTER"
description: "Total number of rows retrieved or affected by the statement"
- shared_blks_hit:
usage: "COUNTER"
description: "Total number of shared block cache hits by the statement"
- shared_blks_read:
usage: "COUNTER"
description: "Total number of shared blocks read by the statement"
- shared_blks_dirtied:
usage: "COUNTER"
description: "Total number of shared blocks dirtied by the statement"
- shared_blks_written:
usage: "COUNTER"
description: "Total number of shared blocks written by the statement"
- local_blks_hit:
usage: "COUNTER"
description: "Total number of local block cache hits by the statement"
- local_blks_read:
usage: "COUNTER"
description: "Total number of local blocks read by the statement"
- local_blks_dirtied:
usage: "COUNTER"
description: "Total number of local blocks dirtied by the statement"
- local_blks_written:
usage: "COUNTER"
description: "Total number of local blocks written by the statement"
- temp_blks_read:
usage: "COUNTER"
description: "Total number of temp blocks read by the statement"
- temp_blks_written:
usage: "COUNTER"
description: "Total number of temp blocks written by the statement"
- blk_read_time_seconds:
usage: "COUNTER"
description: "Total time the statement spent reading blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)"
- blk_write_time_seconds:
usage: "COUNTER"
description: "Total time the statement spent writing blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)"
pg_process_idle:
query: |
WITH
metrics AS (
SELECT
application_name,
SUM(EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - state_change))::bigint)::float AS process_idle_seconds_sum,
COUNT(*) AS process_idle_seconds_count
FROM pg_stat_activity
WHERE state = 'idle'
GROUP BY application_name
),
buckets AS (
SELECT
application_name,
le,
SUM(
CASE WHEN EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - state_change)) <= le
THEN 1
ELSE 0
END
)::bigint AS bucket
FROM
pg_stat_activity,
UNNEST(ARRAY[1, 2, 5, 15, 30, 60, 90, 120, 300]) AS le
GROUP BY application_name, le
ORDER BY application_name, le
)
SELECT
application_name,
process_idle_seconds_sum as seconds_sum,
process_idle_seconds_count as seconds_count,
ARRAY_AGG(le) AS seconds,
ARRAY_AGG(bucket) AS seconds_bucket
FROM metrics JOIN buckets USING (application_name)
GROUP BY 1, 2, 3
metrics:
- application_name:
usage: "LABEL"
description: "Application Name"
- seconds:
usage: "HISTOGRAM"
description: "Idle time of server processes"
pg_active_lockedsql:
query: |
select case when replace(replace(pg_blocking_pids(pid)::text,'{',''),'}','')='' then 'numsofnopidblock' else 'numsofsomepidblock' end pidblock,
count(1) pidnums from pg_stat_activity
where state not in('idle') and query !='' group by pidblock order by pidblock;
metrics:
- pidblock:
usage: "LABEL"
description: "Possible values:numsofnopidblock--The processes that are not locked; numsofsomepidblock--The processes locked by some "
- pidnums:
usage: "COUNTER"
description: "The number of processes"
pg_active_slowsql:
query: |
select datname,usename,count(1) slowsql_count
from pg_stat_activity where state not in('idle') and query !=''
and extract(epoch from (now() - query_start)) > 60*5 group by datname,usename order by count(1) desc;
metrics:
- datname:
usage: "LABEL"
description: "Name of database"
- usename:
usage: "LABEL"
description: "Name of user"
- slowsql_count:
usage: "COUNTER"
description: "the numbers of slow sqls"
pg_never_used_indexes:
query: |
select pi.schemaname, pi.relname, pi.indexrelname,
pg_table_size(pi.indexrelid) as index_size from pg_indexes pis join
pg_stat_user_indexes pi on pis.schemaname = pi.schemaname
and pis.tablename = pi.relname and pis.indexname = pi.indexrelname
left join pg_constraint pco on pco.conname = pi.indexrelname
and pco.conrelid = pi.relid where pco.contype is distinct from 'p'
and pco.contype is distinct from 'u' and (idx_scan,idx_tup_read,idx_tup_fetch) = (0,0,0)
and pis.indexdef !~ ' UNIQUE INDEX ' and pi.relname !~ 'backup$'
order by pg_table_size(indexrelid) desc;
metrics:
- schemaname:
usage: "LABEL"
description: "Schema of table"
- relname:
usage: "LABEL"
description: "Name of table"
- indexrelname:
usage: "LABEL"
description: "Name of index"
- index_size:
usage: "GAUGE"
description: "Size of index"
pg_tablelocktops:
query: |
select db.datname,relname tbname,mode locktype,count(1) locknums
from pg_database db join pg_locks lk on db.oid=lk.database
join pg_class cl on lk.relation=cl.oid
join pg_stat_activity act on lk.pid=act.pid
where db.datname not in ('template0','template1') and fastpath='t'
and cl.oid not in (select oid from pg_class where relname in ('pg_class','pg_locks'))
and act.pid <>pg_backend_pid() and cl.reltablespace in (select oid from pg_tablespace)
group by db.datname,relname,mode order by count(1) desc limit 10;
metrics:
- datname:
usage: "LABEL"
description: "database of table"
- tbname:
usage: "LABEL"
description: "Name of table"
- locktype:
usage: "LABEL"
description: "type of lock"
- locknums:
usage: "COUNTER"
description: "the numbers of this lock"
Create a service file for Postgres Exporter.
nano /etc/systemd/system/postgres_exporter.service
Save and exit after entering the following settings.
[Unit]
Description=Prometheus exporter for Postgresql
Wants=network-online.target
After=network-online.target
[Service]
User=postgres
Group=postgres
WorkingDirectory=/opt/postgres_exporter
EnvironmentFile=/opt/postgres_exporter/postgres_exporter.env
ExecStart=/usr/local/bin/postgres_exporter --web.listen-address=:9187 --web.telemetry-path=/metrics
Restart=always
[Install]
WantedBy=multi-user.target
Load the changes made in the service file.
systemctl daemon-reload
Automatically start on OS reboot.
systemctl enable postgres_exporter
Start the service.
systemctl start postgres_exporter
Check the service status.
systemctl status postgres_exporter
The URL http://10.5.56.68:9187/metrics is where you can access the metrics exposed by Postgres Exporter. This URL provides a list of various metrics related to database performance and more. If you access this URL, it will display these metrics in a text format, allowing you to monitor your system's health and performance.
Prometheus Config Update and Control
nano /etc/prometheus/prometheus.yml
nano /etc/prometheus/prometheus.yml
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
# - alertmanager:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: "prometheus"
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ["localhost:9090"]
- job_name: "postgres_exporter"
static_configs:
- targets: ["10.**.**.68:9187"]
# You want to multiple service please add below configuration instead of above
- job_name: "postgres_exporter"
static_configs:
- targets: ["10.**.**.**3:9187","10.**.**.**4:9187","10.**.**.**5:9187"]
systemctl restart prometheus.service
systemctl status prometheus.service
You can monitor your database using grafana

For more detailed and technical articles like this, keep following our blog on Medium. If you have any questions or need further assistance, feel free to reach out in the comments below and directly.
← PostgreSQL Blog