Highgo oracle兼容模式的国产库copy force_null

最近在一个从oracle到基于postgresql的国产数据库项目上,遇到使用copy加载到数据库中的表字段是空值数据无法过滤,因为在oracle中的\0x00、NULL、”(空值) 与postgresql中不同,所以在像瀚高、kingbase等PG系的数据库,oracle兼容模式下对于‘’空值的处理要格外注意,下面简单的记录。

环境 highgoDB v9

构造测试数据

[hg@anbob-com ~]$ psql anbob system -p 1521
psql (14.10)
IvorySQL-pro-3.2.0
Type "help" for help.

anbob=# select name,setting from pg_settings where name in ('ivorysql.database_mode','ivorysql.compatible_mode');
           name           | setting
--------------------------+---------
 ivorysql.compatible_mode | oracle
 ivorysql.database_mode   | oracle
(2 rows)

anbob=# CREATE TABLE  test_cp (
  ACCOUNT VARCHAR2(100 byte) NULL COLLATE "DEFAULT",
  SN VARCHAR2(100 byte) NULL COLLATE "DEFAULT"
  );
CREATE TABLE
anbob=# \d test_cp
                  Table "public.test_cp"
 Column  |     Type      | Collation | Nullable | Default
---------+---------------+-----------+----------+---------
 account | varchar2(100) |           |          |
 sn      | varchar2(100) |           |          |

[hg@anbob-com ~]$ cat a.txt
|041621004592677
a|0123456789
[hg@anbob-com ~]$ psql anbob system -p 1521 -c "\copy test_cp (account,sn) from a.txt with (format text, delimiter '|', NULL '')"
ERROR:  syntax error at or near "''"
LINE 1: ...t,sn ) FROM STDIN with (format text, delimiter '|', NULL '')
                                                                    ^
[hg@anbob-com ~]$ psql anbob system -p 1521 -c "\copy test_cp (account,sn) from a.txt with (format text, delimiter '|', NULL ' ')"
COPY 2



$ psql anbob system -p 1521
psql (14.10)
anbob=# select * from test_cp;
 account |       sn
---------+-----------------
         | 041621004592677
 a       | 0123456789
(2 rows)

oracle 兼容模式过滤数据

anbob=# select * from test_cp where account=' '; --空格
 account | sn
---------+----
(0 rows)
anbob=# select * from test_cp where account='';
 account | sn
---------+----
(0 rows)

anbob=# select * from test_cp where account is null ;
 account | sn
---------+----
(0 rows)

anbob=# select * from test_cp where account is not null ;
 account |       sn
---------+-----------------
         | 041621004592677
 a       | 0123456789
(2 rows)

anbob=# select account,length(account),ascii(account),xmin,xmax ,ctid from test_cp where account<>'a';
 account | length | ascii |  xmin   | xmax | ctid
---------+--------+-------+---------+------+-------
         |      0 |     0 | 2523958 |    0 | (0,1)
(1 row)

注意: 第一条account字段无值的记录使用=’ ‘, =”和is null 都无法过滤(oracle中不使用=”)。使用ascii码显示 是\x00。

CHR 0 和空值, NULL

在oracle中可以在数据库中insert chr0, 但是在postgresql中是不允许的。

# ORACLE


SQL> create table test1(v varchar(20));
Table created.

SQL> insert into test1 values (chr(0));
1 row created.

SQL> select dump(v) from test1;
DUMP(V)
--------------------------------------------------------------------------------
Typ=1 Len=1: 0


SQL> insert into test1 values('a');
1 row created.

SQL> insert into test1 values(null);

1 row created.

SQL> insert into test1 values ('');

 row created.

SQL>
  1* select rownum,t.* ,dump(v) dv from test1 t

    ROWNUM V                                        DV
---------- ---------------------------------------- --------------------------------------------------
         1 a                                        Typ=1 Len=1: 97
         2                                          NULL
         3                                          NULL

Note: chr 0 是可以写入到oracle数据库中的,在oracle中” 空值和null是一样的。

PostgreSQL

anbob=# select chr(0);
ERROR:  null character not permitted

highgo=# insert into test1 values (1);
INSERT 0 1
highgo=# insert into test1 values (null);
INSERT 0 1
highgo=# insert into test1 values ('');
INSERT 0 1
highgo=# select * from test1;
 v
---
 1


(3 rows)

highgo=# select v,ascii(v) from test1;
 v | ascii
---+-------
 1 |    49
   |
   |     0
(3 rows)

highgo=# select * from test1 where v='';
 v
---

(1 row)

Note: 无论具体的字符集是什么, PostgreSQL 都不允许将chr(0)(ASCII NUL)的字符插入到 CHAR、VARCHAR 或 TEXT 列中。当您将允许存储零代码字符的数据库(例如 Oracle)迁移到 PostgreSQL 时,需要考虑替换。 同时postgresql中的空字符可以使用=” 筛选。

Highgo copy ”的问题

下面使用pageinspect 查看块上的数据。

anbob=# CREATE EXTENSION pageinspect;
CREATE EXTENSION

anbob=# SELECT * FROM page_header(get_raw_page('test_cp', 0));
    lsn     | checksum | flags | lower | upper | special | pagesize | version | prune_xid
------------+----------+-------+-------+-------+---------+----------+---------+-----------
 1/12DDD6D8 |        0 |     4 |    32 |  8104 |    8192 |     8192 |       4 |         0
(1 row)

anbob=# SELECT * FROM heap_page_items(get_raw_page('test_cp',0));
 lp | lp_off | lp_flags | lp_len | t_xmin  | t_xmax | t_field3 | t_ctid | t_infomask2 | t_infomask | t_hoff | t_bits | t_oid |                t_data
----+--------+----------+--------+---------+--------+----------+--------+-------------+------------+--------+--------+-------+--------------------------------------
  1 |   8144 |        1 |     41 | 2523958 |      0 |        0 | (0,1)  |           2 |       2306 |     24 |        |       | \x0321303431363231303034353932363737
  2 |   8104 |        1 |     37 | 2523958 |      0 |        0 | (0,2)  |           2 |       2306 |     24 |        |       | \x05611730313233343536373839
(2 rows)

anbob=# select account,length(account),ascii(account),xmin,xmax ,ctid from test_cp;
 account | length | ascii |  xmin   | xmax | ctid
---------+--------+-------+---------+------+-------
         |      0 |     0 | 2523958 |    0 | (0,1)
 a       |      1 |    97 | 2523958 |    0 | (0,2)
(2 rows)

anbob=# SELECT lp, t_xmin, t_xmax, t_infomask, t_ctid, encode(t_data, 'hex') as data
anbob-#     FROM heap_page_items(get_raw_page('test_cp', 0));
 lp | t_xmin  | t_xmax | t_infomask | t_ctid |                data
----+---------+--------+------------+--------+------------------------------------
  1 | 2523958 |      0 |       2306 | (0,1)  | 0321303431363231303034353932363737
  2 | 2523958 |      0 |       2306 | (0,2)  | 05611730313233343536373839
(2 rows)

PostgreSQL 兼容模式

[hg@anbob-com ~]$ psql anbob  -p 5866
psql (14.10)
IvorySQL-pro-3.2.0
Type "help" for help.

anbob=# select name,setting from pg_settings where name in ('ivorysql.database_mode','ivorysql.compatible_mode');
           name           | setting
--------------------------+---------
 ivorysql.compatible_mode | pg
 ivorysql.database_mode   | oracle


anbob=# CREATE TABLE  test_cp2 (
anbob(# ACCOUNT VARCHAR(100) NULL ,
anbob(# SN VARCHAR(100 ) NULL
anbob(# );
CREATE TABLE

[hg@anbob-com ~]$ psql anbob  -p 5866 -c "\copy test_cp2 (account,sn) from a.txt with (format text, delimiter '|', NULL '')"
COPY 2

anbob=# select * from test_cp2;
 account |       sn
---------+-----------------
         | 041621004592677
 a       | 0123456789

anbob=# select * from test_cp2 where account='';
 account | sn
---------+----
(0 rows)

anbob=# select * from test_cp2 where account is null;
 account |       sn
---------+-----------------
         | 041621004592677
(1 row)

anbob=# select account,length(account),ascii(account),xmin,xmax ,ctid from test_cp2;
 account | length | ascii |  xmin   | xmax | ctid
---------+--------+-------+---------+------+-------
         |        |       | 2523963 |    0 | (0,1)
 a       |      1 |    97 | 2523963 |    0 | (0,2)
(2 rows)

[hg@dev-zcloud-node3 ~]$ psql anbob  -p 5866 -c "\copy test_cp2 (account,sn) from a.txt with (format text, delimiter '|', NULL ' ')"
COPY 2

anbob=# select account,length(account),ascii(account),xmin,xmax ,ctid from test_cp2;
 account | length | ascii |  xmin   | xmax | ctid
---------+--------+-------+---------+------+-------
         |        |       | 2523963 |    0 | (0,1)
 a       |      1 |    97 | 2523963 |    0 | (0,2)
         |      0 |     0 | 2523964 |    0 | (0,3)
 a       |      1 |    97 | 2523964 |    0 | (0,4)
(4 rows)

anbob=# select account,length(account),ascii(account),xmin,xmax ,ctid from test_cp2 where account='';
 account | length | ascii |  xmin   | xmax | ctid
---------+--------+-------+---------+------+-------
         |      0 |     0 | 2523964 |    0 | (0,3)
(1 row)

anbob=# SELECT * FROM page_header(get_raw_page('test_cp', 0));
    lsn     | checksum | flags | lower | upper | special | pagesize | version | prune_xid
------------+----------+-------+-------+-------+---------+----------+---------+-----------
 1/12E539E0 |        0 |     0 |    48 |  7936 |    8192 |     8192 |       4 |         0
(1 row)


anbob=# SELECT * FROM heap_page_items(get_raw_page('test_cp2',0));
 lp | lp_off | lp_flags | lp_len | t_xmin  | t_xmax | t_field3 | t_ctid | t_infomask2 | t_infomask | t_hoff |  t_bits  | t_oid |                t_data
----+--------+----------+--------+---------+--------+----------+--------+-------------+------------+--------+----------+-------+--------------------------------------
  1 |   8152 |        1 |     40 | 2523963 |      0 |        0 | (0,1)  |           2 |       2307 |     24 | 01000000 |       | \x21303431363231303034353932363737
  2 |   8112 |        1 |     37 | 2523963 |      0 |        0 | (0,2)  |           2 |       2306 |     24 |          |       | \x05611730313233343536373839
  3 |   8064 |        1 |     41 | 2523964 |      0 |        0 | (0,3)  |           2 |       2306 |     24 |          |       | \x0321303431363231303034353932363737
  4 |   8024 |        1 |     37 | 2523964 |      0 |        0 | (0,4)  |           2 |       2306 |     24 |          |       | \x05611730313233343536373839
(4 rows)

anbob=# SELECT * FROM heap_page_items(get_raw_page('test_cp',0));
 lp | lp_off | lp_flags | lp_len | t_xmin  | t_xmax | t_field3 | t_ctid | t_infomask2 | t_infomask | t_hoff |  t_bits  | t_oid |                t_data
----+--------+----------+--------+---------+--------+----------+--------+-------------+------------+--------+----------+-------+--------------------------------------
  1 |   8144 |        1 |     41 | 2523958 |      0 |        0 | (0,1)  |           2 |       2306 |     24 |          |       | \x0321303431363231303034353932363737
  2 |   8104 |        1 |     37 | 2523958 |      0 |        0 | (0,2)  |           2 |       2306 |     24 |          |       | \x05611730313233343536373839
  3 |   8056 |        1 |     41 | 2523960 |      0 |        0 | (0,3)  |           2 |       2306 |     24 |          |       | \x0321303431363231303034353932363737
  4 |   8016 |        1 |     37 | 2523960 |      0 |        0 | (0,4)  |           2 |       2306 |     24 |          |       | \x05611730313233343536373839

Note: Postgresql兼容copy进去的数据,支持null ”,意思如果是源文件是”字符时 自动换为null, 而我们第二个命令null ‘ ‘ ,没有匹配到列是空格的,所以使用了默认的‘’, 在库里使用ascii函数对比,NULL显示是空,而” 显示是0.

使用 pageinspect查看oracle前面无法过滤的数据,是copy 补充了‘’, 但是oracle兼容模式又不支持=” ,所以才导致了数据无法查询。

anbob=# select name,setting from pg_settings where name in ('ivorysql.database_mode','ivorysql.compatible_mode');
           name           | setting
--------------------------+---------
 ivorysql.compatible_mode | pg
 ivorysql.database_mode   | oracle

anbob=# select * from test_cp where account='';
 account |       sn
---------+-----------------
         | 041621004592677

Note: 刚才在oracle mode copy导入的表,在pg mode下使用=‘’可以过滤。

解决方法:

使用FORCE_NULL, 如果字段是”时直接替换为NULL, 还有对应的Force_not_null。只适合csv mode,但text同样可以以csv mode使用. 该方法适用于highgo,也适用于kingbase.

[hg@anbob-com ~]$ psql anbob system -p 1521 -c "\copy test_cp (account,sn) from a.txt with (format text, delimiter '|')"
COPY 2
[hg@anbob-com ~]$ psql anbob system -p 1521 -c "\copy test_cp (account,sn) from a.txt with (format text, delimiter '|', FORCE_NULL(ACCOUNT))"
ERROR: COPY force null available only in CSV mode

[hg@anbob-com ~]$ psql anbob system -p 1521 -c "\copy test_cp (account,sn) from a.txt with (format csv, delimiter '|', FORCE_NULL(ACCOUNT))"
COPY 2

anbob=# select account,length(account),ascii(account),xmin,xmax ,ctid,sn from test_cp ;
account | length | ascii | xmin | xmax | ctid | sn
---------+--------+-------+---------+------+--------+-----------------
| 0 | 0 | 2523974 | 0 | (0,9) | 041621004592677
a | 1 | 97 | 2523974 | 0 | (0,10) | 0123456789
| | | 2523975 | 0 | (0,11) | 041621004592677
a | 1 | 97 | 2523975 | 0 | (0,12) | 0123456789
(4 rows)

anbob=# select account,length(account),ascii(account),xmin,xmax ,ctid,sn from test_cp where account is null;
account | length | ascii | xmin | xmax | ctid | sn
---------+--------+-------+---------+------+--------+-----------------
| | | 2523975 | 0 | (0,11) | 041621004592677
(1 row)

anbob=# \pset null null
Null display is "null".
anbob=# select account,length(account),ascii(account),xmin,xmax ,ctid,sn from test_cp ;
account | length | ascii | xmin | xmax | ctid | sn
---------+--------+-------+---------+------+--------+-----------------
| 0 | 0 | 2523974 | 0 | (0,9) | 041621004592677
a | 1 | 97 | 2523974 | 0 | (0,10) | 0123456789
null | null | null | 2523975 | 0 | (0,11) | 041621004592677
a | 1 | 97 | 2523975 | 0 | (0,12) | 0123456789
(4 rows)


— over —

Leave a Comment