MVCCの実装によるテーブルとインデックスの「膨張」のトピックは、PostgreSQLユーザーと管理者にとって悩みの種です。
私はかつて「DBA:VACUUMが失敗したとき-テーブルを手動でクリアする」という記事で取り上げました。トランザクションが時間内に実行されなかった場合や、同時トランザクションVACUUM
。
しかし、速度への影響に加えて、占領地への影響という事実もあります。パスが成功した後、単一の「ライブ」レコードを持つテーブルが、autovacuum
高価なSSDでギガバイトのスペースを占有し続けると、おそらく非常に驚かれることでしょう。
今日は、ファイル内のデータストレージの構造について少し調査し、pg_catalog
PostgreSQLデータベースを記述するスキーマを掘り下げて、疑わしいほど大量のスペースを明らかに占有しているテーブルを特定する方法を理解します。
, PostgreSQL 12, - MVCC.
CREATE TABLE bloat AS SELECT 0 i;
CREATE INDEX ON bloat(i);
-- "" pg_temp,
CREATE OR REPLACE PROCEDURE pg_temp.upd() AS $$
UPDATE bloat SET i = i + 1;
$$ LANGUAGE sql; -- , plainSQL-,
DO $$
DECLARE
i integer;
ts timestamp;
BEGIN
FOR i IN 1 .. 1 << 14 LOOP
ts := clock_timestamp();
CALL pg_temp.upd();
RAISE NOTICE '% : %', i, clock_timestamp() - ts;
END LOOP;
END;
$$ LANGUAGE plpgsql;
, UPDATE
. ?
TABLE bloat;
i
-----
16384
?
SELECT pg_relation_size('bloat');
-- 598016
... ! , , HOT-update. VACUUM
- !
VACUUM bloat;
-- ... !
SELECT pg_relation_size('bloat');
-- 598016
, , PostgreSQL, :
- (
pg_class.relfilenode
)
, 1GB
() 8KB
"" VACUUM
(VACUUM FULL
) - , , , "" .
"" , "" - VACUUM
.
pg_catalog
pg_catalog - , - , , , , , ... , , Joel Jacobson. , :
pg_namespace - (schema)
pg_class - - , , matview, ...
pg_depend - , -
-
-
"" ( ):
(
pg_depend
) (pg_namespace
)public
(pg_catalog
information_schema
)
(
pg_class.relpages
) (pg_class.reltuples
)
" " (
pg_statistic
) (pg_attribute
) , NULL- (pg_statistic.stanullfrac
) (pg_statistic.stawidth
) (pg_attribute.attalign
)
-
PostgreSQL, 8KB. , , block_size:
SELECT current_setting('block_size')::integer;
-- 8192
, , 27 , :
" " NULL'
NULL- . NULL- , HeapTupleHeaderData
23 "" .
8 NULL, 1 , 24 . 9, ... 2 , 25, 64-bit - 32 .
"" NULL .
, NULL , pg_statistic.stanullfrac
NULL- .
, , NULL, . -, , , sum/avg/min/max/count
.
! "", "SQL HowTo: 1000 ":
!
, , :
CREATE TABLE nobloat AS
SELECT 0 i;
CREATE INDEX ON nobloat(i);
-- objects-in-scheme
WITH dep AS (
-- https://postgrespro.ru/docs/postgresql/12/catalog-pg-depend
SELECT
objid
FROM
pg_depend
WHERE
(
refclassid
, refobjid
, classid
) = (
'pg_namespace'::regclass
, (
-- https://postgrespro.ru/docs/postgresql/12/catalog-pg-namespace
SELECT
oid
FROM
pg_namespace
WHERE
nspname = 'public' -- schema
LIMIT 1
)
, 'pg_class'::regclass
)
)
-- objects
, cl AS (
-- https://postgrespro.ru/docs/postgresql/12/catalog-pg-class
SELECT
oid
, relname
, relpages
, reltuples
FROM
pg_class cl
WHERE
oid = ANY(ARRAY(TABLE dep)::oid[]) AND
relkind IN ('r', 'm', 'p') -- relation | matview | partition
)
SELECT
T.*
, cl.*
, CASE
WHEN ratio >= 1 THEN (ratio - 1) * def.PAGESIZE
END::bigint repack_effect_size
FROM
(
-- https://postgrespro.ru/docs/postgresql/12/storage-page-layout
SELECT
current_setting('block_size')::integer PAGESIZE
, CASE
WHEN version() ~ 'mingw32' OR version() ~ '64-bit' THEN 8
ELSE 4
END MAXALIGN
) def
, cl
, LATERAL (
WITH cols AS (
SELECT
*
, (sz + szq - 1) / szq * szq sza -- aligned size
FROM
(
-- https://postgrespro.ru/docs/postgresql/12/catalog-pg-attribute
-- https://postgrespro.ru/docs/postgresql/12/catalog-pg-statistic
SELECT
attname
, attnum
, coalesce(st.stanullfrac, 0) nfr
, CASE
WHEN attlen = -1 THEN
CASE
WHEN attstorage IN ('p', 'm') OR stawidth < 2048 THEN -- toast_tuple_target
stawidth
ELSE 8 -- len + toast_id
END
ELSE attlen
END sz
, CASE attalign
WHEN 'c' THEN 1
WHEN 's' THEN 2
WHEN 'i' THEN 4
WHEN 'd' THEN 8
END szq
FROM
pg_attribute at
LEFT JOIN
pg_statistic st
ON (st.starelid, st.staattnum) = (at.attrelid, at.attnum)
WHERE
at.attrelid = cl.oid AND
NOT at.attisdropped AND -- without dropped columns
at.attnum > 0 -- without system columns (tableoid, ctid, ...)
ORDER BY
attnum
) T
)
SELECT
CASE
WHEN reltuples = 0 AND relpages = 0 THEN 1
WHEN reltuples = 0 AND relpages > 0 THEN NULL
ELSE
relpages /
ceil( -- need pages
reltuples /
(
(PAGESIZE - 24) / -- PageHeaderData
ceil(szt::double precision / reltuples)::bigint -- avg tuple size
) -- tuples-per-page
)
END ratio
FROM
(
SELECT
reltuples * 4 + -- ItemIdData
(
(reltuples - fnnt) * hdr_tuple_w_nulls + -- hdr, tuples w/nulls
fnnt * hdr_tuple_wo_nulls -- hdr, tuples wo/nulls
) +
sztc szt -- size of tuples
FROM
(
SELECT
sztc
, fnnt
-- aligned headers
, ceil(hdr_cols_sys::double precision / MAXALIGN)::integer * MAXALIGN hdr_tuple_wo_nulls
, ceil((hdr_cols_sys + hdr_cols_null)::double precision / MAXALIGN)::integer * MAXALIGN hdr_tuple_w_nulls
FROM
(
SELECT
-- https://doxygen.postgresql.org/htup__details_8h_source.html#l00121
-- 4 : t_xmin
-- 4 : t_xmax
-- 4 : t_cid | t_xvac
-- 6 : t_ctid
-- 2 : t_infomask2
-- 2 : t_infomask
-- 1 : t_hoff
23 hdr_cols_sys
, ceil(count(*)::double precision / 8)::integer hdr_cols_null
, sum(ceil(sza * reltuples * (1 - nfr))) sztc -- size of tuples cols
, trunc(exp(sum(ln(CASE WHEN nfr < 1 THEN 1 - nfr ELSE 1 END))) * reltuples) fnnt -- full-not-null-tuples
FROM
cols
) T
) T
) T
) T;
?
ratio | oid | relname | relpages | reltuples | repack_effect_size
-------------------------------------------------------------------
73 | 41333 | bloat | 73 | 15 | 589824
1 | 41337 | nobloat | 1 | 1 | 0
, PostgreSQL , bloat
15 , 1, - ANALYZE
. , ratio
1.
- , "" :