好得很程序员自学网

<tfoot draggable='sEl'></tfoot>

clickhouse 批量插入数据及ClickHouse常用命令详解

一.安装使用

ClickHouse是Yandex提供的一个开源的列式存储数据库管理系统,多用于联机分析(OLAP)场景,可提供海量数据的存储和分析,同时利用其数据压缩和向量化引擎的特性,能提供快速的数据搜索。

Ⅰ).安装

?

1

2

3

4

5

6

sudo yum install yum-utils

sudo rpm -- import https: //repo .yandex.ru /clickhouse/CLICKHOUSE-KEY .GPG

sudo yum-config-manager --add-repo https: //repo .yandex.ru /clickhouse/rpm/stable/x86_64

sudo yum install clickhouse-server clickhouse-client

sudo /etc/init .d /clickhouse-server start

clickhouse-client

Ⅱ).配置

a).clickhouse-server

?

1

2

3

4

5

6

CLICKHOUSE_USER=username

 

CLICKHOUSE_LOGDIR=${CLICKHOUSE_HOME} /log/clickhoue-server

CLICKHOUSE_LOGDIR_USER=username

CLICKHOUSE_DATADIR_OLD=${CLICKHOUSE_HOME} /data/old

CLICKHOUSE_DATADIR=${CLICKHOUSE_HOME} /data

b).config.xml

?

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

... ...

   <!-- 配置日志参数 -->

   <logger>

     <level>info< /level >

     <log>${CLICKHOUSE_HOME} /log/clickhoue-server/clickhoue-server .log< /log >

     <errorlog>${CLICKHOUSE_HOME} /log/clickhoue-server/clickhoue-server-error .log< /errorlog >

     <size>100M< /size >

     <count>5< /count >

   < /logger >

 

   <!-- 配置数据保存路径 -->

   <path>${CLICKHOUSE_HOME}</>

   <tmp_path>${CLICKHOUSE_HOME} /tmp </>

   <user_files_path>${CLICKHOUSE_HOME} /user_files </>

 

   <!-- 配置监听 -->

   <listen_host>::< /listen_host >

 

   <!-- 配置时区 -->

   <timezone>Asiz /Shanghai < /timezone >

... ...

Ⅲ).启停服务

?

1

2

3

4

#### a).启动服务

sudo service clickhouse-server start

#### b).停止服务

sudo service clickhouse-server stop

Ⅳ).客户端访问

?

1

clickhouse-client

二.常用命令

Ⅰ).创建表

?

1

2

3

4

5

6

7

8

9

10

11

CREATE TABLE IF NOT EXISTS database .table_name ON cluster cluster_shardNum_replicasNum(

     'id' UInt64,

     'name' String,

     'time' UInt64,

     'age' UInt8,

     'flag' UInt8

)

ENGINE = MergeTree

PARTITION BY toDate( time /1000)

ORDER BY (id, name )

SETTINGS index_granularity = 8192

Ⅱ).创建物化视图

?

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

CREATE MATERIALIZED VIEW database .view_name ON cluster cluster_shardNum_replicasNum

ENGINE = AggregatingMergeTree

PARTITION BY toYYYYMMDD( time )

ORDER BY (id, name )

AS SELECT

     toStartOfHour(toDateTime( time /1000)) as time ,

     id,

     name ,

     sumState( if (flag = 1, 1, 0)) AS successCount,

     sumState( if (flag = 0, 1, 0)) AS faildCount,

     sumState( if ((age < 10), 1, 0)) AS rang1Age,

     sumState( if ((age > 10) AND (age < 20), 2, 0)) AS rang2Age,

     sumState( if ((age > 20), 3, 0)) AS rang3Age,

     maxState(age) AS maxAge,

     minState(age) AS minAge

FROM datasource.table_name

GROUP BY time ,id, name

Ⅲ).插入数据

a).普通数据插入

?

1

INSERT INTO database .table_name(id, name , age, flag) VALUES (1, 'test' , 15, 0)

b).Json数据插入

?

1

INSERT INTO database .table_name FORMAT JSONEachRow{ "id" : "1" , "name" : "test" , "age" : "11" , "flag" : "1" }

Ⅳ).查询数据

a).表数据查询

?

1

SELECT * FROM database .table_name WHERE id=1

b).物化视图查询

?

1

2

3

4

SELECT id, name , sumMerge(successCount), sumMerge(faildCount), sumMerge(rang1Age), sumMerge(rang2Age), maxMerge(maxAge), minMerge(minAge)

FROM database .view_name

WHERE id=1

GROUP BY id, name

Ⅴ).创建NESTED表

?

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

CREATE TABLE IF NOT EXISTS database .table_name(

   'id' UInt64,

   'name' String,

   'time' UInt64,

   'age' UInt8,

   'flag' UInt8

nested_table_name Nested (

   sequence UInt32,

   id UInt64,

   name String,

   time UInt64,

   age UInt8,

   flag UInt8

   socketAddr String,

   socketRemotePort UInt32,

   socketLocalPort UInt32,

   eventTime UInt64,

   exceptionClassName String,

   hashCode Int32,

   nextSpanId UInt64

))

ENGINE = MergeTree

PARTITION BY toDate ( time / 1000)

ORDER BY (id, name , time )

SETTINGS index_granularity = 8192

Ⅵ).NESTED表数据查询

?

1

SELECT table1.*,table1.id FROM nest.table_name AS table1 array JOIN nested_table_name AS table2

Ⅶ).配置字典项

?

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

<dictionaries>

   <dictionary>

     <name>url</name> 

     <source>

       <clickhouse>

         <host>hostname</host> 

         <port> 9000 </port> 

         <user> default </user> 

         <password/> 

         <db>dict</db> 

         <table>url_dict</table>

       </clickhouse>

     </source> 

     <lifetime>

       <min> 30 </min> 

       <max> 36 </max>

     </lifetime> 

     <layout>

       <hashed/>

     </layout> 

     <structure>

       <id>

         <name>id</name>

       </id> 

       <attribute>

         <name>hash_code</name> 

         <type>String</type> 

         <null_value/>

       </attribute> 

       <attribute>

         <name>url</name> 

         <type>String</type> 

         <null_value/>

       </attribute>

     </structure>

   </dictionary> 

   <dictionary>

     <name>url_hash</name> 

     <source>

       <clickhouse>

         <host>hostname</host> 

         <port> 9000 </port> 

         <user> default </user> 

         <password/> 

         <db>dict</db> 

         <table>url_hash</table>

       </clickhouse>

     </source> 

     <lifetime>

       <min> 30 </min> 

       <max> 36 </max>

     </lifetime> 

     <layout>

       <complex_key_hashed/>

     </layout> 

     <structure>

       <key>

         <attribute>

           <name>hash_code</name> 

           <type>String</type>

         </attribute>

       </key> 

       <attribute>

         <name>url</name> 

         <type>String</type> 

         <null_value/>

       </attribute>

     </structure>

   </dictionary>

</dictionaries>

Ⅷ).字典查询

?

1

2

3

4

5

SELECT

     id,

     dictGet( 'name' , 'name' , toUInt64( name )) AS name ,

     dictGetString( 'url' , 'url' , tuple(url)) AS url

FROM table_name

Ⅸ).导入数据

?

1

clickhouse-client --query= "INSERT INTO database.table_name FORMAT CSVWithNames" < /path/import_filename .csv

Ⅹ).导出数据

?

1

clickhouse-client --query= "SELECT * FROM database.table_name FORMAT CSV" sed 's/"//g' > /path/export_filename .csv

Ⅺ).查看partition状态

?

1

SELECT table , name , partition,active FROM system.parts WHERE database = 'database_name'

Ⅻ).清理partition

?

1

ALTER TABLE database .table_name ON cluster cluster_shardNum_replicasNum detach partition 'partition_id'

XIII).查看列的压缩率

?

1

2

3

4

5

6

7

8

9

10

11

SELECT

     database ,

     table ,

     name ,

     formatReadableSize( sum (data_compressed_bytes) AS c) AS comp,

     formatReadableSize( sum (data_uncompressed_bytes) AS r) AS raw,

     c/r AS comp_ratio

FROM system.columns

WHERE database = 'database_name'

     AND table = 'table_name'

GROUP BY name

XIV).查看物化视图的磁盘占用

?

1

clickhouse-client --query= "SELECT partition,count(*) AS partition_num, formatReadableSize(sum(bytes)) AS disk_size FROM system.columns WHERE database='database_name' " --external --? le =***.sql --name=parts --structure= 'table String, name String, partition UInt64, engine String' -h hostname

到此这篇关于clickhouse 批量插入数据及ClickHouse常用命令的文章就介绍到这了,更多相关clickhouse 批量插入内容请搜索以前的文章或继续浏览下面的相关文章希望大家以后多多支持!

原文链接:https://blog.csdn.net/weixin_30444625/article/details/112520469

查看更多关于clickhouse 批量插入数据及ClickHouse常用命令详解的详细内容...

  阅读:40次