数据分片
更新时间:2024-11-17 12:01:00
把数据拆分到多个节点上,在多个节点上进行存储和处理,这种技术叫做数据分区也称为数据分片。
AntDB提供四种数据分片方式,分别是:Hash分片、Mod取模分片、Random随机分片及复制表。用户可以在建表语句CREATE TABLE中使用 DISTRIBUTE BY 关键字来指定分片方式。除复制表外,其他三种数据分片方式都是基于键的分片。比如Hash分片的原理是使用新写入数据的某个键值 —— 比如客户 ID、客户端 IP、ZIP 码等等 —— 通过哈希函数决定保存的数据节点(也就是分片位置)。
Hash分片
使用Hash的sql具体操作
--HASH分片
create table T36CE5 (id int,name varchar(20)) distribute by hash(id);
insert into T36CE5 select i,'test' from generate_series(0, 5999) i;
select count(1) from T36CE5;
execute direct on (dn0) 'select sum(id) from T36CE5;';
execute direct on (dn7) 'select sum(id) from T36CE5;';
execute direct on (dn14) 'select sum(id) from T36CE5;';
drop table T36CE5;
--issue 389
create table T36CE5 (id int ,age int );
alter table T36CE5 distribute by hash(age);
alter table T36CE5 distribute by hash(id);
alter table T36CE5 distribute by hash(age);
alter table T36CE5 distribute by hash(id);
alter table T36CE5 distribute by hash(age);
alter table T36CE5 distribute by hash(id);
alter table T36CE5 distribute by hash(age);
alter table T36CE5 distribute by hash(id);
alter table T36CE5 distribute by hash(age);
alter table T36CE5 distribute by hash(id);
alter table T36CE5 distribute by hash(age);
alter table T36CE5 distribute by hash(id);
drop table T36CE5
Mod取模分片
使用Mod取模分片的sql具体操作
--modulo分片
create table T253DD (id int,num1 int,num2 int) distribute by modulo(id);
insert into T253DD select i % 25, i % 20 , i % 30from generate_series(0, 2999) i;
select count(1) from T253DD;
execute direct on (dn0) 'select sum(num1) from T253DD;';
execute direct on (dn7) 'select sum(num1) from T253DD;';
execute direct on (dn14) 'select sum(num1) from T253DD;';
drop table T253DD;
--ISSUE 460
--不支持对非数值类型的字段进行modulo
create table bmsql_config (
cfg_name varchar(30) primary key,
cfg_value varchar(50)
) distribute by modulo(cfg_name) ;
Random随机分片
使用Random随机分片的sql具体操作
\c cidb regress_super_user
--random
create table T5E620 (id int,num1 int,num2 int) distribute by random ;
insert into T5E620 select 100 from generate_series(1, 400000);
select count(1) from T5E620;
-- execute direct on (VAR_DN0_NAME) 'select * from T5E620;';
execute direct on (dn0) 'select count(1)>0 from T5E620;';
-- execute direct on (VAR_DN1_NAME) 'select * from T5E620;';
execute direct on (dn1) 'select count(1)>0 from T5E620;';
execute direct on (dn2) 'select count(1)>0 from T5E620;';
drop table T5E620;
复制表
复制表的sql具体操作
\c cidb regress_super_user
--复制表
create table TCFF1F (id serial primary key,name varchar(20)) distribute by replication;
insert into TCFF1F (name) values ('xxxxxxxxxxxxxxxxxxxx');
insert into TCFF1F (name) select name from TCFF1F;
insert into TCFF1F (name) select name from TCFF1F;
insert into TCFF1F (name) select name from TCFF1F;
select * from TCFF1F;
execute direct on (dn0) 'select * from TCFF1F;';
execute direct on (dn1) 'select * from TCFF1F;';
execute direct on (dn2) 'select * from TCFF1F;';
drop table TCFF1F;
问题反馈