PostgreSQL在存储一行数据时,会对各个字段进行对齐。在设计表结构时,需要合理安排字段顺序,减少磁盘空间占用。
postgres=# select typname, typalign, typlen from pg_type where typname in ('int4', 'int8', 'bool', 'float4', 'float8', 'bytea', 'text', 'timestamptz', 'serial', 'json', 'int8[]');
   typname   | typalign | typlen
-------------+----------+--------
 bool        | c        |      1
 bytea       | i        |     -1
 int8        | d        |      8
 int4        | i        |      4
 text        | i        |     -1
 json        | i        |     -1
 float4      | i        |      4
 float8      | d        |      8
 timestamptz | d        |      8
(9 rows)postgres=#
typealign表示对齐所需字节数,c -> 1, i -> 4, d -> 8。
https://www.postgresql.org/docs/current/storage-page-layout.html
1. 示例
每行数据,RowHeader 24字节
postgres=# create table t1 (c char, d float8);
CREATE TABLE
postgres=# insert into t1 values ('a', 1.1);
INSERT 0 1                                                        
postgres=# select pg_column_size(t1.*),  pg_column_size(c),  pg_column_size(d) from t1;
 pg_column_size | pg_column_size | pg_column_size 
----------------+----------------+----------------
             40 |              2 |              8
(1 row)
postgres=#
由于字段d类型是float8,需要8字节对齐,故一行数据大小为 24 + 2 + (6) + 8 = 40,其中,6是对齐所浪费的空间。
如果我们把 d放在前面,则所占空间会减少为24 + 8 + 2 = 34,对齐不浪费空间。
postgres=# create table t2 (d float8, c char);
CREATE TABLE
postgres=# insert into t2 values (1.1, 'a');
INSERT 0 1
postgres=# select pg_column_size(t2.*),  pg_column_size(c),  pg_column_size(d) from t2;
 pg_column_size | pg_column_size | pg_column_size 
----------------+----------------+----------------
             34 |              2 |              8
(1 row)
postgres=#
需要注意的是,bytea,text、text[],json这些复杂数据类型的大小及对齐要求:
1.1 bytea
postgres=# create table t_bytea (b bool, ba bytea);
CREATE TABLE
postgres=# insert into t_bytea values(false, '\xFF');
INSERT 0 1
postgres=# select pg_column_size(t_bytea.*),  pg_column_size(b),  pg_column_size(ba) from t_bytea;
 pg_column_size | pg_column_size | pg_column_size 
----------------+----------------+----------------
             27 |              1 |              2
(1 row)
postgres=#
24 + 1 + 2 = 27, bytea 按 1 个 字节对齐,且只需要额外的 1 个字节作为 bytea 的 header(1 + 1 = 2)
1.2 text
postgres=# create table t_text (b bool, t text);
CREATE TABLE                    
postgres=# insert into t_text values(false, 'a');
INSERT 0 1
postgres=# select pg_column_size(t_text.*),  pg_column_size(b),  pg_column_size(t) from t_text;
 pg_column_size | pg_column_size | pg_column_size 
----------------+----------------+----------------
             27 |              1 |              2
(1 row)
postgres=#
text 与 bytea 一样。
1.3 text[]
postgres=# create table t_text_arr (b bool, arr text[]);
CREATE TABLE
postgres=# insert into t_text_arr values(false, ARRAY['a']);
INSERT 0 1
postgres=# select pg_column_size(t_text_arr.*),  pg_column_size(b),  pg_column_size(arr) from t_text_arr;
 pg_column_size | pg_column_size | pg_column_size 
----------------+----------------+----------------
             54 |              1 |             29
(1 row)
postgres=#
text[] 就比较离谱了,按 1 个字节对齐,但是 text[] 需要额外的 20多个字节来存储 header。
1.4 int8[]
int8[] 与 text[] 一样
postgres=# create table t_int8_arr (b bool, arr int8[]);
CREATE TABLE
postgres=# insert into t_int8_arr values(false, ARRAY[1]);
INSERT 0 1
postgres=# select pg_column_size(t_int8_arr.*),  pg_column_size(b),  pg_column_size(arr) from t_int8_arr;
 pg_column_size | pg_column_size | pg_column_size 
----------------+----------------+----------------
             54 |              1 |             29
(1 row)
postgres=# select pg_column_size(t_int8_arr.*),  pg_column_size(b),  pg_column_size(arr), arr from t_int8_arr;
 pg_column_size | pg_column_size | pg_column_size |   arr   
----------------+----------------+----------------+---------
             54 |              1 |             29 | {1}
             38 |              1 |             13 | {}
             62 |              1 |             37 | {2,3}
             70 |              1 |             45 | {2,3,4}
(4 rows)
postgres=#
1.5 json
json 也是按 1 个字节对齐。但是不像 array ,header 比较小。
postgres=# create table t_json (b bool, j json);
CREATE TABLE
postgres=# insert into t_json values(false, '{}'::json);
INSERT 0 1
postgres=# insert into t_json values(false, '[1]'::json);
INSERT 0 1
postgres=# insert into t_json values(false, '["1"]'::json);
INSERT 0 1
postgres=# select pg_column_size(t_json.*),  pg_column_size(b),  pg_column_size(j), j from t_json;
 pg_column_size | pg_column_size | pg_column_size |   j   
----------------+----------------+----------------+-------
             28 |              1 |              3 | {}
             29 |              1 |              4 | [1]
             31 |              1 |              6 | ["1"]
(3 rows)
postgres=#
2. 优化原则
把typalign大的字段放前面,比如ctime、mtime等,可变长字段放后面,bool类型字段见缝插针。
例如,fs_key大小为25,typalign = 1,可以在其前面或后面适当地插入 bool 类型的字段。
postgres=# select pg_column_size(key), key from tbl limit 3;
 pg_column_size |          fs_key          
----------------+--------------------------
             25 | jhMkIJXyb_TVApicSMmLOg==
             25 | g_3zGPlD75JSCWSVNdK3jQ==
             25 | aofTLLC26vOU9l5sV0XjqA==