首页 » PostgreSQL/GaussDB » 如何最短停机时间在openGauss(和PostgreSQL)更改列类型

如何最短停机时间在openGauss(和PostgreSQL)更改列类型

在数据库日常运维中难免会随着Table数据量f增加,原来的数据类型需要修改,在Oracle如是加长兼容的列类型可以直接修改元数据,而其它提示列上有数据不允许修改,在MySQL和PG,openGauss中修改列如从int 到bigint时会导致表数据reload, 在之前“alter table ” modify column in Oracle、MySQL、PostGreSQL(数据库比较系列十三) 笔记有记录这一现象,而如果表已经很大,该操作可能从空间与业务连续的影响时间上都比较大,前不久看到一种方法觉的不错简单记录一下。

openGuass 3.1

anbob=# \d key
      Table "public.key"
 Column |  Type   | Modifiers
--------+---------+-----------
 id     | integer |

anbob=# select relfilenode from pg_class where relname='key';
 relfilenode
-------------
       16392
(1 row)

anbob=# alter table key alter column id TYPE bigint;
ALTER TABLE
anbob=# select relfilenode from pg_class where relname='key';
 relfilenode
-------------
       24589
(1 row)

note:
表对象已重构。

现在通过增加新列,使用trigger更新后期新数据到新列,分批更新历史数据(小事务为了减少行锁),rename列,drop 旧列;

create table demo ( k bigserial primary key, a int);
insert into demo(a) select generate_series(1,10000);
\d demo

anbob=# \d demo
                         Table "public.demo"
 Column |  Type   |                    Modifiers
--------+---------+--------------------------------------------------
 k      | bigint  | not null default nextval('demo_k_seq'::regclass)
 a      | integer |
Indexes:
    "demo_pkey" PRIMARY KEY, btree (k) TABLESPACE pg_default


增加新列

alter table demo add column a_new bigint;

create or replace function a_new() returns trigger as $$
 begin new.a_new := new.a; return new; end; $$ language plpgsql;

-- postgresql
create trigger a_new_trigger
before insert or update of a on demo for each row
 execute function a_new();

-- openGauss
create trigger a_new_trigger
before insert or update of a on demo for each row
 execute procedure  a_new();

anbob=# \d demo
                         Table "public.demo"
 Column |  Type   |                    Modifiers
--------+---------+--------------------------------------------------
 k      | bigint  | not null default nextval('demo_k_seq'::regclass)
 a      | integer |
 a_new  | bigint  |
Indexes:
    "demo_pkey" PRIMARY KEY, btree (k) TABLESPACE pg_default
Triggers:
    a_new_trigger BEFORE INSERT OR UPDATE OF a ON demo FOR EACH ROW EXECUTE PROCEDURE a_new()


Note:
postgresql和opengauss的trigger关键字有区别。

新数据

update demo set a=2 where k=1;
insert into demo(k,a) values(0,0);

anbob=# select * from demo order by k limit 3;
 k | a | a_new
---+---+-------
 0 | 0 |     0
 1 | 2 |     2
 2 | 2 |
(3 rows)

更新历史数据

通常会一次更新如下
update demo set a_new=a where a_new is null;

但是这样会锁定行记录,影响业务的DML(update/delete),甚至可能会超时,最好最小化更新, 这里创建个索引
create index demo_a_new on demo(k) where a_new is null;

anbob=# explain(costs off,analyze, buffers)

anbob=# insert into demo(a) select generate_series(1,100000);
INSERT 0 100000

#每次更新1000行, 验证执行计划高效
anbob=# explain(analyze,buffers)update demo set a_new=a where k in ( select k from demo where a_new is null limit 1000 );

anbob=# analyze demo;

Note:
相同的数据在PG13可以很好的使用索引,但是在openGuss3.1中执行计划很糟糕2次回表全是Seq Scan on demo。 包括我增加了更多的数据改变选择率, 收集了统计信息后执行计划还是不够完美。

openGuass中的执行计划

anbob=# select count(*) from demo;
 count
--------
 110001
(1 row)

anbob=# select count(*) from demo where a_new is null;
 count
-------
  8799
(1 row)


anbob=# explain update demo set a_new=a where k in ( select k from demo where a_new is null limit 100 );
                                             QUERY PLAN
----------------------------------------------------------------------------------------------------
 Update on demo  (cost=4.99..758.24 rows=100 width=26)
   ->  Nested Loop  (cost=4.99..758.24 rows=100 width=26)
         ->  HashAggregate  (cost=4.99..5.99 rows=100 width=8)
               Group By Key: public.demo.k
               ->  Limit  (cost=0.00..3.74 rows=100 width=8)
                     ->  Index Scan using demo_a_new on demo  (cost=0.00..312.56 rows=8367 width=8)
         ->  Index Scan using demo_pkey on demo  (cost=0.00..7.51 rows=1 width=18)
               Index Cond: (k = public.demo.k)
(8 rows)



anbob=# explain(analyze,buffers)update demo set a_new=a where k in ( select k from demo where a_new is null limit 1000 );
                                                                   QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------
 Update on demo  (cost=59.25..2224.14 rows=1000 width=26) (actual time=1.956..73.338 rows=1000 loops=1)
   (Buffers: shared hit=7786 read=17 dirtied=50)
   ->  Hash Semi Join  (cost=59.25..2224.14 rows=1000 width=26) (actual time=1.906..59.229 rows=1000 loops=1)
         Hash Cond: (public.demo.k = public.demo.k)
         (Buffers: shared hit=756 read=5 dirtied=25)
         ->  Seq Scan on demo  (cost=0.00..1854.30 rows=113130 width=18) (actual time=0.007..30.869 rows=110001 loops=1)
               (Buffers: shared hit=723 dirtied=7)
         ->  Hash  (cost=46.75..46.75 rows=1000 width=8) (actual time=1.658..1.658 rows=1000 loops=1)
                Buckets: 32768  Batches: 1  Memory Usage: 40kB
               (Buffers: shared hit=33 read=5 dirtied=18)
               ->  Limit  (cost=0.00..36.75 rows=1000 width=8) (actual time=0.850..1.349 rows=1000 loops=1)
                     ->  Index Scan using demo_a_new on demo  (cost=0.00..316.22 rows=8605 width=8) (actual time=0.848..1.241 rows=1000 loops=1)
                           (Buffers: shared hit=33 read=5 dirtied=18)
 Total runtime: 73.572 ms
(14 rows)



anbob=# explain(analyze,buffers)update /*+indexscan(demo demo_pkey)*/demo set a_new=a where k in ( select k from demo where a_new is null limit 1000 );
                                                                   QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------
 Update on demo  (cost=48.95..4077.45 rows=1000 width=26) (actual time=1.613..20.927 rows=1000 loops=1)
   (Buffers: shared hit=10070 read=12 dirtied=38)
   ->  Nested Loop  (cost=48.95..4077.45 rows=1000 width=26) (actual time=1.570..7.414 rows=1000 loops=1)
         ->  HashAggregate  (cost=48.95..58.95 rows=1000 width=8) (actual time=1.525..1.949 rows=1000 loops=1)
               Group By Key: public.demo.k
               (Buffers: shared hit=36 read=2 dirtied=15)
               ->  Limit  (cost=0.00..36.45 rows=1000 width=8) (actual time=0.648..1.197 rows=1000 loops=1)
                     ->  Index Scan using demo_a_new on demo  (cost=0.00..318.82 rows=8748 width=8) (actual time=0.647..1.054 rows=1000 loops=1)
                           (Buffers: shared hit=36 read=2 dirtied=15)
         ->  Index Scan using demo_pkey on demo  (cost=0.00..4.01 rows=1 width=18) (actual time=3.542..4.084 rows=1000 loops=1000)
               Index Cond: (k = public.demo.k)
               (Buffers: shared hit=3009)
 Total runtime: 21.202 ms
(13 rows)

note:
这里增加了sql hint才可以indexscan,使用索引,执行更少读取

循环更新

# postgreSQL
with updated as (
update /*+indexscan(demo demo_pkey)*/ demo set a_new=a where k in ( select k from demo where a_new is null limit 1000 )
returning 1 ) select count(*) as updated ,1/count(*) as fail_when_zero from updated
;
\watch 1


# openGuass
openGuass 没有\watch 自动刷新,需要编写复杂的PLSQL 

anbob=#  CREATE OR REPLACE PROCEDURE P_loop()
AS
  DECLARE
    i int :=0;
    r int:=0;
begin
    	select count(*) into i from demo where a_new is null;
		select ceil(i/1000) into r;
		while (r>0) 
		  LOOP 
		   update /*+indexscan(demo demo_pkey)*/ demo set a_new=a where k in ( select k from demo where a_new is null limit 1000 );
		    r:=r-1;
		  end loop;
end;
/

anbob=# call P_loop();
 label_loop
------------

(1 row)

anbob=# select k from demo where a_new is null;
 k
---
(0 rows)

验证数据

anbob=# select * from demo where k<=3;
 k | a | a_new
---+---+-------
 0 | 0 |     0
 1 | 2 |     2
 2 | 2 |     2
 3 | 3 |     3

切换到新列

alter table demo rename column a to a_old;
alter table demo rename column a_new to a;

anbob=# alter table demo drop column   a_old;
ERROR:  cannot drop table demo column a_old because other objects depend on it
DETAIL:  trigger a_new_trigger on table demo depends on table demo column a_old
HINT:  Use DROP ... CASCADE to drop the dependent objects too.

anbob=# alter table demo drop column   a_old cascade;
NOTICE:  drop cascades to trigger a_new_trigger on table demo
ALTER TABLE

anbob=# \d demo
                        Table "public.demo"
 Column |  Type  |                    Modifiers
--------+--------+--------------------------------------------------
 k      | bigint | not null default nextval('demo_k_seq'::regclass)
 a      | bigint |
Indexes:
    "demo_pkey" PRIMARY KEY, btree (k) TABLESPACE pg_default
    "demo_a_new" btree (k) TABLESPACE pg_default WHERE a IS NULL

anbob=# select * from demo where k<=3;
 k | a
---+---
 0 | 0
 1 | 2
 2 | 2
 3 | 3
(4 rows)

Note:
在pg中可以drop function a_new cascade; 但在og中不支持。

到这里已完成,只有几个DDL很快完成,其他都是Online操作, 避免了在数据库中做长事务。

打赏

对不起,这篇文章暂时关闭评论。