AntDB分布式关系型数据库_数据分片

把数据拆分到多个节点上,在多个节点上进行存储和处理，这种技术叫做数据分区也称为数据分片。

AntDB提供四种数据分片方式，分别是：Hash分片、Mod取模分片、Random随机分片及复制表。用户可以在建表语句CREATE TABLE中使用 DISTRIBUTE BY 关键字来指定分片方式。除复制表外，其他三种数据分片方式都是基于键的分片。比如Hash分片的原理是使用新写入数据的某个键值 —— 比如客户 ID、客户端 IP、ZIP 码等等 —— 通过哈希函数决定保存的数据节点（也就是分片位置）。

Hash分片

使用Hash的sql具体操作

--HASH分片
create table T36CE5 (id int,name varchar(20)) distribute by hash(id);

insert into T36CE5 select i,'test' from generate_series(0, 5999) i;

select count(1) from T36CE5;

execute direct on (dn0) 'select sum(id) from T36CE5;';
execute direct on (dn7) 'select sum(id) from T36CE5;';
execute direct on (dn14) 'select sum(id) from T36CE5;';
drop table T36CE5;


--issue 389

create table T36CE5 (id int ,age int );

alter  table T36CE5  distribute by hash(age);
alter  table T36CE5  distribute by hash(id);
alter  table T36CE5  distribute by hash(age);
alter  table T36CE5  distribute by hash(id);
alter  table T36CE5  distribute by hash(age);
alter  table T36CE5  distribute by hash(id);
alter  table T36CE5  distribute by hash(age);
alter  table T36CE5  distribute by hash(id);
alter  table T36CE5  distribute by hash(age);
alter  table T36CE5  distribute by hash(id);
alter  table T36CE5  distribute by hash(age);
alter  table T36CE5  distribute by hash(id);

drop table T36CE5

Mod取模分片

使用Mod取模分片的sql具体操作

--modulo分片
create table T253DD (id int,num1 int,num2 int) distribute by modulo(id);
insert into T253DD select i % 25, i % 20 , i % 30from generate_series(0, 2999) i;

select count(1) from T253DD;

execute direct on (dn0) 'select sum(num1) from T253DD;';
execute direct on (dn7) 'select sum(num1) from T253DD;';
execute direct on (dn14) 'select sum(num1) from T253DD;';
drop table T253DD;

--ISSUE 460
--不支持对非数值类型的字段进行modulo
create table bmsql_config (
  cfg_name    varchar(30) primary key,
  cfg_value   varchar(50)
) distribute by modulo(cfg_name) ;

Random随机分片

使用Random随机分片的sql具体操作

\c cidb regress_super_user
--random  
create table T5E620 (id int,num1 int,num2 int) distribute by random ;

insert into T5E620 select 100 from  generate_series(1, 400000);
select count(1) from T5E620;

-- execute direct on (VAR_DN0_NAME) 'select * from T5E620;';
execute direct on (dn0) 'select count(1)>0 from T5E620;';
-- execute direct on (VAR_DN1_NAME) 'select * from T5E620;';
execute direct on (dn1) 'select count(1)>0 from T5E620;';
execute direct on (dn2) 'select count(1)>0 from T5E620;';
drop table T5E620;

复制表

复制表的sql具体操作

\c cidb regress_super_user
--复制表
create table TCFF1F (id serial primary key,name varchar(20)) distribute by replication;

insert into TCFF1F (name) values ('xxxxxxxxxxxxxxxxxxxx');
insert into TCFF1F (name)  select name from TCFF1F;
insert into TCFF1F (name)  select name from TCFF1F;
insert into TCFF1F (name)  select name from TCFF1F;

select * from TCFF1F;

execute direct on (dn0) 'select * from TCFF1F;';
execute direct on (dn1) 'select * from TCFF1F;';
execute direct on (dn2) 'select * from TCFF1F;';
drop table TCFF1F;