プロローグ
, , , . , . , , . , - . : , , / in/out, - -, . PostgreSQL . .
( boolean)
. boolean. . -, , «», — «». -, boolean , , , . , XOR . -, . , , , .
, , "other", , , , , , . , . null
boolean
. null
« », , "sex" , . "other" — , , «». sex
boolean
, .
Enum — PostgreSQL
PostgreSQL , , enum
. :
CREATE TYPE sex AS ENUM ('', '', '');
:
select id from table where sex='';
, , , : "Cherchez la femme".
63 ( UTF-8, ). 4 . , , — . , . pg_enum
, OID. , . . , syntax error
, , .
, , enum
. , ( ), (, ALTER TYPE
). : 4 , , . , Open Source : « , ».
Char — PostgreSQL
, PostgreSQL , "char"
. , char( )
. "char"
1 , .. 4 , enum
. UTF-8 , , — . , , - . , m
, f
, x
. : , , , . . , domain
.
CREATE DOMAIN sex_char AS "char" CHECK (VALUE in ('m','f','x'));
CREATE FUNCTION sex(txt varchar, OUT ch sex_char) LANGUAGE plpgsql IMMUTABLE STRICT PARALLEL SAFE AS
$sex$
BEgin
ch:= case txt
when '' then 'm'::sex_char
when '' then 'f'::sex_char
when '' then 'x'::sex_char
else null
end;
if ch is null then
raise invalid_parameter_value;
end if;
END
$sex$;
CREATE FUNCTION sex(ch sex_char, OUT txt varchar) LANGUAGE plpgsql IMMUTABLE STRICT PARALLEL SAFE AS
$sex$
BEgin
txt:= case ch
when 'm'::sex_char then ''
when 'f'::sex_char then ''
when 'x'::sex_char then ''
else null
end;
if txt is null then
raise invalid_parameter_value;
end if;
END
$sex$;
, , . :
=> select sex(ch=>'f');
sex
---------
(1 row)
=> select sex(txt=>'');
sex
-----
f
(1 row)
( ) , , , . , , . , , :
=> select sex(sex(txt=>''));
sex
---------
(1 row)
:
select id from table where sex='f';
select id from table where sex=sex(txt=>'');
: 1 , .
.
create table sex_t (
sex_t_id smallint primary key,
sex varchar not null unique
);
. :
select id from table join sex_t using (sex_t_id) where sex='';
, enum
. , enum
: , ; , - , .. : 2 4 (.. , enum
).
, . . , « », smallint
serial
. sequence
0 ( ), , ( ): , . 10.
, , . , , , .
«-», , , jsonb
. , , jsonb
, jsonb
, .
, , , , , .
, , 75 % , 24 % 1 % . 23 , , 8 . , . , 1 . , , .
( , ), «» , . - , , . , , PostgreSQL , , .
10 000 000 , ( ). , index only scan. , count(*)
count(id)
, .. , , .
, : MSI, 8 , 16 (hugepages 2 14 ), 0 swap. , , . CentOS 8, PostgreSQL 13 shared_buffers ( PostgreSQL) 14 .
100 , 100 , 10 000 . , .
postgresql.conf
postgresql.conf.
# WAL
wal_level = minimal
max_wal_senders = 0
# , "" .
random_page_cost = 1
#
max_parallel_workers_per_gather=0
# PostgreSQL
shared_buffers = 14GB
prewarm.sql
pg_prewarm
.
select pg_prewarm('sex1');
select pg_prewarm('sex1_btree');
select pg_prewarm('sex2');
select pg_prewarm('sex2_btree');
select pg_prewarm('sex3');
select pg_prewarm('sex3_btree');
select pg_prewarm('sex4');
select pg_prewarm('sex4_btree');
select pg_prewarm('sex5');
select pg_prewarm('sex5_btree');
select pg_prewarm('sex5h');
select pg_prewarm('sex5h_hash');
select pg_prewarm('sex6');
select pg_prewarm('sex6_gin');
select pg_prewarm('sex6h');
select pg_prewarm('sex6h_gin_hash');
test.sql
. (pg_prewarm
). , count(id)
, index only scan.
select count(id) from sex1 where sex='';
select count(id) from sex1 where sex='';
select count(id) from sex1 where sex='';
select count(id) from sex2 where sex_char=sex(txt=>'');
select count(id) from sex2 where sex_char=sex(txt=>'');
select count(id) from sex2 where sex_char=sex(txt=>'');
select count(id) from sex3 join sex_t using (sex_t_id) where sex='';
select count(id) from sex3 join sex_t using (sex_t_id) where sex='';
select count(id) from sex3 join sex_t using (sex_t_id) where sex='';
select count(id) from sex3 where sex_t_id=(select t.sex_t_id from sex_t t where sex='');
select count(id) from sex3 where sex_t_id=(select t.sex_t_id from sex_t t where sex='');
select count(id) from sex3 where sex_t_id=(select t.sex_t_id from sex_t t where sex='');
select count(id) from sex4 join sex_t4 using (sex_t4_id) where sex='';
select count(id) from sex4 join sex_t4 using (sex_t4_id) where sex='';
select count(id) from sex4 join sex_t4 using (sex_t4_id) where sex='';
select count(id) from sex4 where sex_t4_id=(select t.sex_t4_id from sex_t4 t where sex='');
select count(id) from sex4 where sex_t4_id=(select t.sex_t4_id from sex_t4 t where sex='');
select count(id) from sex4 where sex_t4_id=(select t.sex_t4_id from sex_t4 t where sex='');
select count(id) from sex5 where sex='';
select count(id) from sex5 where sex='';
select count(id) from sex5 where sex='';
select count(id) from sex5h where sex='';
select count(id) from sex5h where sex='';
select count(id) from sex5h where sex='';
select count(id) from sex6 where jdoc@>'{"sex":""}';
select count(id) from sex6 where jdoc@>'{"sex":""}';
select count(id) from sex6 where jdoc@>'{"sex":""}';
select count(id) from sex6h where jdoc@>'{"sex":""}';
select count(id) from sex6h where jdoc@>'{"sex":""}';
select count(id) from sex6h where jdoc@>'{"sex":""}';
init.sql
:
-- ,
\set table_size 10000000
-- view
create or replace view disk as SELECT n.nspname AS schema,
c.relname,
pg_size_pretty(pg_relation_size(c.oid::regclass)) AS size,
pg_relation_size(c.oid::regclass)/1024 AS size_KiB
FROM pg_class c
LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
ORDER BY (pg_relation_size(c.oid::regclass)) DESC
LIMIT 20;
begin;
-- sex1 enum
CREATE TYPE sex_enum AS ENUM ('', '', '');
create table sex1 (id float, sex sex_enum not null);
-- sex2 "char"
CREATE DOMAIN sex_char AS "char" CHECK (VALUE in ('m','f','x'));
CREATE FUNCTION sex(txt varchar, OUT ch sex_char) LANGUAGE plpgsql IMMUTABLE STRICT PARALLEL SAFE AS
$sex$
BEgin
ch:= case txt
when '' then 'm'::sex_char
when '' then 'f'::sex_char
when '' then 'x'::sex_char
else null
end;
if ch is null then
raise invalid_parameter_value;
end if;
END
$sex$;
CREATE FUNCTION sex(ch sex_char, OUT txt varchar) LANGUAGE plpgsql IMMUTABLE STRICT PARALLEL SAFE AS
$sex$
BEgin
txt:= case ch
when 'm'::sex_char then ''
when 'f'::sex_char then ''
when 'x'::sex_char then ''
else null
end;
if txt is null then
raise invalid_parameter_value;
end if;
END
$sex$;
create table sex2 (id float, sex_char "char" not null);
-- sex3 c smallint
create table sex_t (
sex_t_id smallint primary key,
sex varchar not null unique
);
insert into sex_t (sex_t_id,sex) values (1,''),(0,''),(-1,'');
create table sex3 (id float, sex_t_id smallint not null references sex_t);
-- sex4 serial, , ,
create table sex_t4 (
sex_t4_id serial primary key,
sex varchar not null unique
);
insert into sex_t4 (sex_t4_id,sex) values (1,''),(0,''),(-1,'');
create table sex4 (id float, sex_t4_id integer not null references sex_t4);
--
create table sex_t5 (
sex varchar primary key
);
insert into sex_t5 (sex) values (''),(''),('');
-- btree
create table sex5 (id float, sex varchar not null references sex_t5);
-- hash
create table sex5h (id float, sex varchar not null references sex_t5);
-- jsonb
-- gin
create table sex6 (id float, jdoc jsonb not null);
-- gin
create table sex6h (id float, jdoc jsonb not null);
--
insert into sex1 (id,sex) select random, case when random<0.75 then ''::sex_enum when random<0.99 then ''::sex_enum else ''::sex_enum end from (select random() as random, generate_series(1,:table_size)) as subselect;
insert into sex5 (id,sex) select id,sex::varchar from sex1;
insert into sex2 (id,sex_char) select id,sex(sex) from sex5;
insert into sex3 (id,sex_t_id) select id,sex_t_id from sex5 join sex_t using (sex);
insert into sex4 (id,sex_t4_id) select id,sex_t4_id from sex5 join sex_t4 using (sex);
insert into sex5h (id,sex) select id,sex from sex5;
insert into sex6 (id,jdoc) select id,('{"sex": "'||sex||'"}')::jsonb from sex5;
insert into sex6h (id,jdoc) select id,jdoc from sex6;
--
create index sex1_btree on sex1(sex);
create index sex2_btree on sex2(sex_char);
create index sex3_btree on sex3(sex_t_id);
create index sex4_btree on sex4(sex_t4_id);
create index sex5_btree on sex5(sex);
-- hash
create index sex5h_hash on sex5h using hash(sex);
create index sex6_gin on sex6 using gin(jdoc);
-- , , hash
create index sex6h_gin_hash on sex6h using gin(jdoc jsonb_path_ops);
commit;
set role postgres;
-- ( PostgreSQL)
create extension if not exists pg_prewarm;
--
create extension if not exists pg_buffercache;
create or replace view cache as SELECT n.nspname AS schema,
c.relname,
pg_size_pretty(count(*) * 8192) AS buffered,
count(*) * 8 AS buffered_KiB,
round(100.0 * count(*)::numeric / ((( SELECT pg_settings.setting
FROM pg_settings
WHERE pg_settings.name = 'shared_buffers'::text))::integer)::numeric, 1) AS buffer_percent,
round(100.0 * count(*)::numeric * 8192::numeric / pg_table_size(c.oid::regclass)::numeric, 1) AS percent_of_relation
FROM pg_class c
JOIN pg_buffercache b ON b.relfilenode = c.relfilenode
JOIN pg_database d ON b.reldatabase = d.oid AND d.datname = current_database()
LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
GROUP BY c.oid, n.nspname, c.relname
ORDER BY buffered_kib DESC
LIMIT 20;
-- vacuum
vacuum freeze analyze;
test
:
#!/bin/sh
set -o errexit -o noclobber -o nounset -o pipefail
#set -o errexit -o noclobber -o nounset -o pipefail -o xtrace
# for pgbench
PATH="$PATH:/usr/pgsql-13/bin"
# config
# database connection parameters
readonly PGDATABASE='sex'
readonly PGPORT=5432
export PGDATABASE PGPORT
# output data file
readonly data_csv='data.csv'
# init data files
readonly header='sex:,male,female,other'
if [ ! -s "$data_csv" ]
then
echo "$header" >|"$data_csv"
fi
# prewarm to the cache
psql --quiet -f prewarm.sql >/dev/null
# more prewarm
pgbench --no-vacuum --transaction 100 --file test.sql >/dev/null
for i in $(seq 1 100)
do
echo -n "$i "
date --iso-8601=seconds
pgbench --no-vacuum --transaction 100 --report-latencies --file 'test.sql' | \
awk "
/from sex1 where sex='';\$/ {printf \"enum,%s,\", \$1 >>\"$data_csv\";}
/from sex1 where sex='';\$/ {printf \"%s,\", \$1 >>\"$data_csv\";}
/from sex1 where sex='';\$/ {printf \"%s\\n\", \$1 >>\"$data_csv\";}
/from sex2 where sex_char=sex\(txt=>''\);\$/ {printf \"\\\"char\\\",%s,\", \$1 >>\"$data_csv\";}
/from sex2 where sex_char=sex\(txt=>''\);\$/ {printf \"%s,\", \$1 >>\"$data_csv\";}
/from sex2 where sex_char=sex\(txt=>''\);\$/ {printf \"%s\\n\", \$1 >>\"$data_csv\";}
/from sex3 join sex_t using \(sex_t_id\) where sex='';\$/ {printf \"smallint(join),%s,\", \$1 >>\"$data_csv\";}
/from sex3 join sex_t using \(sex_t_id\) where sex='';\$/ {printf \"%s,\", \$1 >>\"$data_csv\";}
/from sex3 join sex_t using \(sex_t_id\) where sex='';\$/ {printf \"%s\\n\", \$1 >>\"$data_csv\";}
/from sex3 where sex_t_id=\(select t.sex_t_id from sex_t t where sex=''\);\$/ {printf \"smallint(subsel),%s,\", \$1 >>\"$data_csv\";}
/from sex3 where sex_t_id=\(select t.sex_t_id from sex_t t where sex=''\);\$/ {printf \"%s,\", \$1 >>\"$data_csv\";}
/from sex3 where sex_t_id=\(select t.sex_t_id from sex_t t where sex=''\);\$/ {printf \"%s\\n\", \$1 >>\"$data_csv\";}
/from sex4 join sex_t4 using \(sex_t4_id\) where sex='';\$/ {printf \"integer(join),%s,\", \$1 >>\"$data_csv\";}
/from sex4 join sex_t4 using \(sex_t4_id\) where sex='';\$/ {printf \"%s,\", \$1 >>\"$data_csv\";}
/from sex4 join sex_t4 using \(sex_t4_id\) where sex='';\$/ {printf \"%s\\n\", \$1 >>\"$data_csv\";}
/from sex4 where sex_t4_id=\(select t.sex_t4_id from sex_t4 t where sex=''\);\$/ {printf \"integer(subsel),%s,\", \$1 >>\"$data_csv\";}
/from sex4 where sex_t4_id=\(select t.sex_t4_id from sex_t4 t where sex=''\);\$/ {printf \"%s,\", \$1 >>\"$data_csv\";}
/from sex4 where sex_t4_id=\(select t.sex_t4_id from sex_t4 t where sex=''\);\$/ {printf \"%s\\n\", \$1 >>\"$data_csv\";}
/from sex5 where sex='';\$/ {printf \"varchar(btree),%s,\", \$1 >>\"$data_csv\";}
/from sex5 where sex='';\$/ {printf \"%s,\", \$1 >>\"$data_csv\";}
/from sex5 where sex='';\$/ {printf \"%s\\n\", \$1 >>\"$data_csv\";}
/from sex5h where sex='';\$/ {printf \"varchar(hash),%s,\", \$1 >>\"$data_csv\";}
/from sex5h where sex='';\$/ {printf \"%s,\", \$1 >>\"$data_csv\";}
/from sex5h where sex='';\$/ {printf \"%s\\n\", \$1 >>\"$data_csv\";}
/from sex6 where jdoc@>'{\"sex\":\"\"}';\$/ {printf \"jsonb(gin),%s,\", \$1 >>\"$data_csv\";}
/from sex6 where jdoc@>'{\"sex\":\"\"}';\$/ {printf \"%s,\", \$1 >>\"$data_csv\";}
/from sex6 where jdoc@>'{\"sex\":\"\"}';\$/ {printf \"%s\\n\", \$1 >>\"$data_csv\";}
/from sex6h where jdoc@>'{\"sex\":\"\"}';\$/ {printf \"jsonb(gin+hash),%s,\", \$1 >>\"$data_csv\";}
/from sex6h where jdoc@>'{\"sex\":\"\"}';\$/ {printf \"%s,\", \$1 >>\"$data_csv\";}
/from sex6h where jdoc@>'{\"sex\":\"\"}';\$/ {printf \"%s\\n\", \$1 >>\"$data_csv\";}
"
done
echo 'Done'
=> \dt+
List of relations
Schema | Name | Type | Owner | Persistence | Size | Description
--------+--------+-------+-------+-------------+--------+-------------
public | sex1 | table | olleg | permanent | 422 MB |
public | sex2 | table | olleg | permanent | 422 MB |
public | sex3 | table | olleg | permanent | 422 MB |
public | sex4 | table | olleg | permanent | 422 MB |
public | sex5 | table | olleg | permanent | 498 MB |
public | sex5h | table | olleg | permanent | 498 MB |
public | sex6 | table | olleg | permanent | 651 MB |
public | sex6h | table | olleg | permanent | 651 MB |
public | sex_t | table | olleg | permanent | 48 kB |
public | sex_t4 | table | olleg | permanent | 48 kB |
public | sex_t5 | table | olleg | permanent | 48 kB |
(11 rows)
=> \di+
List of relations
Schema | Name | Type | Owner | Table | Persistence | Size | Description
--------+----------------+-------+-------+--------+-------------+--------+-------------
public | sex1_btree | index | olleg | sex1 | permanent | 66 MB |
public | sex2_btree | index | olleg | sex2 | permanent | 66 MB |
public | sex3_btree | index | olleg | sex3 | permanent | 66 MB |
public | sex4_btree | index | olleg | sex4 | permanent | 66 MB |
public | sex5_btree | index | olleg | sex5 | permanent | 67 MB |
public | sex5h_hash | index | olleg | sex5h | permanent | 448 MB |
public | sex6_gin | index | olleg | sex6 | permanent | 21 MB |
public | sex6h_gin_hash | index | olleg | sex6h | permanent | 10 MB |
public | sex_t4_pkey | index | olleg | sex_t4 | permanent | 16 kB |
public | sex_t4_sex_key | index | olleg | sex_t4 | permanent | 16 kB |
public | sex_t5_pkey | index | olleg | sex_t5 | permanent | 16 kB |
public | sex_t_pkey | index | olleg | sex_t | permanent | 16 kB |
public | sex_t_sex_key | index | olleg | sex_t | permanent | 16 kB |
(13 rows)
, 1 2 4 , . , - PostgreSQL «». , , . , , ( ) .
json , , . , , , - , json, , , .
hash-, , . hash- ( ) . , PostgreSQL hash- hash , . PostgreSQL, .
, gin ( btree). , , . - , gin- , , .
75% , , . 24% , . 1% .
M$ Exel « » (, , ). , « » , , () , .
json . :
enum "char"
- , ,
"char"
. , , PostgreSQL «»."char"
,enum
. - , 75% , 24% 1% — .
- ,
enum
integer
() . , - : , 75% ,integer
, 75%enum
; 24% 1%enum
,select
join
,select
.
:
=> explain (costs false) select count(id) from sex1 where sex='';
QUERY PLAN
-------------------------------------------------
Aggregate
-> Index Scan using sex1_btree on sex1
Index Cond: (sex = ''::sex_enum)
(3 rows)
=> explain (costs false) select count(id) from sex2 where sex_char=sex(txt=>'');
QUERY PLAN
----------------------------------------------
Aggregate
-> Index Scan using sex2_btree on sex2
Index Cond: (sex_char = 'f'::"char")
(3 rows)
smallint integer
-
smallint
integer
(serial
) . , , PostgreSQL - . -
enum
"char"
75% , . , , , . 1% 24% . -
join
(Nested Loop) - , . , , . .. : Nested Loop. , Nested Loop .
75% , , . . 1% 24% .
=> explain (costs false) select count(id) from sex3 join sex_t using (sex_t_id) where sex='';
QUERY PLAN
-------------------------------------------------------
Aggregate
-> Nested Loop
-> Seq Scan on sex_t
Filter: ((sex)::text = ''::text)
-> Index Scan using sex3_btree on sex3
Index Cond: (sex_t_id = sex_t.sex_t_id)
(6 rows)
=> explain (costs false) select count(id) from sex3 where sex_t_id=(select t.sex_t_id from sex_t t where sex='');
QUERY PLAN
---------------------------------------------------
Aggregate
InitPlan 1 (returns $0)
-> Seq Scan on sex_t t
Filter: ((sex)::text = ''::text)
-> Index Scan using sex3_btree on sex3
Index Cond: (sex_t_id = $0)
(6 rows)
varchar
- , , : 75% , 1% 24% — .
- btree- ,
join
, . . - Hash- , btree ( ). , : , hash- . , tuple ID «» hash-.
1
,2
3
, .. . , - hash- PostgreSQL, - hash-.
btree hash-.
=> explain (costs false) select count(id) from sex5 where sex='';
QUERY PLAN
-----------------------------------------------------
Aggregate
-> Index Scan using sex5_btree on sex5
Index Cond: ((sex)::text = ''::text)
(3 rows)
=> explain (costs false) select count(id) from sex5h where sex='';
QUERY PLAN
-----------------------------------------------------
Aggregate
-> Index Scan using sex5h_hash on sex5h
Index Cond: ((sex)::text = ''::text)
(3 rows)
json
- 75% . , , 75%. json? PostgreSQL . 1% 24% PostgreSQL .
- (
jsonb_path_ops
) ( 1% — , ), gin json. - , , json — .
=> explain (costs false) select count(id) from sex6 where jdoc@>'{"sex":""}';
QUERY PLAN
-------------------------------------------------------
Aggregate
-> Seq Scan on sex6
Filter: (jdoc @> '{"sex": ""}'::jsonb)
(3 rows)
=> explain (costs false) select count(id) from sex6 where jdoc@>'{"sex":""}';
QUERY PLAN
-----------------------------------------------------------------
Aggregate
-> Bitmap Heap Scan on sex6
Recheck Cond: (jdoc @> '{"sex": ""}'::jsonb)
-> Bitmap Index Scan on sex6_gin
Index Cond: (jdoc @> '{"sex": ""}'::jsonb)
(5 rows)
, , enum
— , , . , , , 4 enum
, , 1 "char"
2 smallint
, .