工作使用

# 服务器异常断电导致文件损坏 clickhouse无法启动--Detaching Broken Part,DB::Exception: Suspiciously many (12) broken par **原因** 由于clickhouse是开源列式存储数据库,服务器断电后,写入数据导致元数据与数据不一致。clickhouse在重启服务的时候会重新加载MergeTree表引擎数据,数据可能存在损坏情况。 clickhouse配置参数当中包含一个参数max_suspicious_broken_parts,如果单个分区的损坏部分数量超过max_suspicious_broken_parts配置的值,则拒绝自动修复或者拒绝删除损坏部分的数据,并且在服务启动时直接报错退出。 修改/etc/clickhouse-server/config.xml文件中max_suspicious_broken_parts的值,将其改为1000,重启clickhouse. ``` <merge_tree> <max_suspicious_broken_parts>1000</max_suspicious_broken_parts> </merge_tree> ``` ## sql ``` 查看失败的变更 select * from system.mutations where is_done = 0;\G 终止失败的变更 KILL MUTATION WHERE database = 'easyviews' AND table = 'bpm_base_local'; -- 查看时间对应的secondeIndex是什么 select dateDiff('second', toStartOfDay(toDateTime('2023-10-20 09:40:00')), toDateTime('2023-10-20 09:40:00')) secondIndex -- 查数据库中有没有这个功能号 SELECT bb.componentId , bb.componentName , count(*) from easyviews.bpm_base bb group by bb.componentId , bb.componentName HAVING date = today() and bb.dimension7 in ('270998', '213998') -- 计算单个颗粒最大交易量 SELECT datetime ,SUM(allTransCount) as num FROM easyviews.bpm_indicator WHERE `date` = today() and `level` =3 GROUP by datetime order by num desc limit 1; -- 计算单个事件单个颗粒度最大交易量,修改 levelUuid = 事件id SELECT datetime,SUM(allTransCount) as num FROM easyviews.bpm_indicator WHERE `date` = today() and levelUuid = 267 AND `level` = 4 GROUP by datetime order by num desc limit 1; -- 1、按照功能号分组,计算每组内的该功能号的请求数;让后降序排列,然后选择第一名的功能号; WITH max_counts AS ( SELECT dimension7, MAX(count) AS max_count FROM ( SELECT dimension7, secondIndex, COUNT(*) AS count FROM easyviews.bpm_base WHERE `date` = today() and `datetime` < '2024-03-14 09:35:00' and `datetime` > '2024-03-14 09:26:00' GROUP BY dimension7, secondIndex ) GROUP BY dimension7 ) SELECT b.dimension7, b.secondIndex, a.max_count FROM max_counts a JOIN ( SELECT dimension7, secondIndex, COUNT(*) AS count FROM easyviews.bpm_base WHERE `date` = today() and `datetime` < '2024-03-14 09:35:00' and `datetime` > '2024-03-14 09:26:00' GROUP BY dimension7, secondIndex ) b ON a.dimension7 = b.dimension7 AND a.max_count = b.count ORDER BY a.max_count DESC; # 有一堆或者太长的SQL需要执行,可以写成一个文件,批量执行: clickhouse-client --user 用户名 --password 密码 -d 数据库 --multiquery < /root/temp.sql # 查看SQL的执行计划: clickhouse-client -h localhost --send_logs_level=trace <<<"SQL语句" >/dev/null # 导入为csv文件: clickhouse-client --query="select * from default.t_city" > city.csv # 或者 # echo 'select * from default.t_city' | curl localhost:8123?database=default -udefault:password -d @- > table_name.sql # 导入csv文件 cat city.csv | clickhouse-client --query "insert into city FORMAT CSV" 比较小的数据量导出csv文件,带上字段名,然后导入 clickhouse> select * from default.t_city INTO OUTFILE '/data/t_city.csv' FORMAT CSVWithNames; SELECT * FROM default.t_city INTO OUTFILE & ``` [gitee ck other](https://gitee.com/zhouguanyulovejiadanyang/learnnodel/tree/master) 31123 31124是nodeport38123 39000是hostport ``` -- 查询耗时最长 SELECT query, query_duration_ms, event_time FROM system.query_log AS ql WHERE (event_time >= '2023-04-20 16:00:00') AND (event_time < '2023-04-20 16:15:00') ORDER BY query_duration_ms DESC LIMIT 10 -- 查询耗时最长频率 SELECT normalizeQuery(query) AS q, count(), any(query), avg(read_rows) / 10000000 AS read_rows_kw, avg(query_duration_ms), max(query_duration_ms) FROM system.query_log WHERE (event_date = '2023-10-25') AND (event_time > '2023-10-25 14:00:00') AND (event_time < '2023-10-25 18:30:00') AND (type = 2) GROUP BY q HAVING count() > 10 ORDER BY avg(query_duration_ms) DESC LIMIT 10 select count() from system.processes select query from system.processes -- 查看整个库占的大小 SELECT d.name AS database, sum(p.bytes) AS total_bytes, formatReadableSize(sum(p.bytes)) AS total_size FROM system.parts AS p JOIN system.databases AS d ON p.database = d.name GROUP BY d.name; SELECT table AS table_name, sum(rows) AS row_num, formatReadableSize(sum(data_uncompressed_bytes)) AS org_size, formatReadableSize(sum(data_compressed_bytes)) AS compress_size, round((sum(data_compressed_bytes) / sum(data_uncompressed_bytes)) * 100, 0) AS compress_ratio FROM system.parts WHERE database='easyviews' GROUP BY table order by sum(rows) desc; # 查看一个月时间时间新增了多大磁盘空间的数据 SELECT sum(data_uncompressed_bytes) / 1024 /1024 /1024 AS uncompressed_gb, sum(data_compressed_bytes) / 1024 /1024 /1024 AS compressed_gb FROM system.parts WHERE database = 'easyviews' AND active = 1 AND (modification_time >= '2024-04-01 00:00:00' AND modification_time <= '2024-04-30 23:59:59'); 2、查看库里每张表大小:(每个节点都要执行) select table,sum(bytes_on_disk)/1000/1000/1000 as GB from system.parts group by table order by GB desc 3、查看所有表每个分区下的大小和行数(每个节点都要执行) select `partition` , sum(rows) as row, formatReadableSize(sum(bytes_on_disk)) as used_disk from system.parts group by `partition` order by partition 4、查看某个表每个分区下的大小和行数(每个节点都要执行) select `partition` , sum(rows) as row, formatReadableSize(sum(bytes_on_disk)) as used_disk from system.parts where `table` = 'bpm_base_local' group by `partition` order by partition -- 清空一个表 truncate table easyviews.packet_base_local on cluster '{cluster}' -- 手动计算五日均值 select `datetime` ,`date` , levelUuid , sumMerge(allTransCount) as sm from easyviews.mv_bpm_indicator_y mbiy where `level` = 4 and levelUuid = 6042 and `date` in ('2023-06-26','2023-06-27','2023-06-25','2023-06-20','2023-06-21') and timeIndex = 15*60*60/5 GROUP by levelUuid , `datetime` ,centerId ,linkId ,datasetId ,`date` ,`level` ORDER by `date` ; 4878 + 4497 + 330 + 4636 +5164 = 19505 19505/5=3901 -- 查询事件6042的五日均值 SELECT yDataAvgValueBy5day FROM easyviews.daily_statistics where `date` = today() and timeIndex = 15*60*60/5 and levelUuid = 6042 ; -- 查峰值 SELECT timeIndex ,yDataHistoryMaxValue ,yDataHistoryMaxValueDate,insertTime FROM easyviews.daily_statistics WHERE date=today() ORDER BY insertTime desc LIMIT 100; -- 查请求和响应报文 SELECT a.`time` , a.`uuid` as rid, max(case when a.direction = 0 then a.`data` else null end) as request , max(case when a.direction = 1 then a.`data` else null end) as response from easyviews.bpm_original a where a.applicationId = 6061 and a.`date` BETWEEN '2023-07-14' and '2023-07-18' group by a.`time` , a.`uuid` ``` ``` -- 计算成功率 SELECT sumMerge(allTransCount) AS allTransCount, round(if(isNaN(sumMerge(allStart2ends) / sumMerge(responseTransCount)), null, sumMerge(allStart2ends) / sumMerge(responseTransCount)), 2) AS responseTime, round(if(isNaN(sumMerge(responseTransCount) / allTransCount), null, sumMerge(responseTransCount) / allTransCount) * 100, 2) AS responseRate, round(if(isNaN(sumMerge(successTransCount) / sumMerge(responseTransCount)), null, sumMerge(successTransCount) / sumMerge(responseTransCount)) * 100, 2) AS successRate FROM mv_bpm_indicator_hour WHERE date = '2023-04-12' AND time >= toUnixTimestamp('2023-04-12 00:00:00', 'Asia/Shanghai')*1000 AND time <= toUnixTimestamp('2023-04-13 00:00:00','Asia/Shanghai')*1000 AND levelUuid = 5939 ## 以功能号维度统计多个组件的交易数 SELECT bb.dimension7 , count(*) as ct from easyviews.bpm_base bb group by bb.dimension7 HAVING --date >= '2024-01-04' and date <= '2024-01-10' date in ('2024-01-04','2024-01-05') and bb.componentId in (209,210,211) order by ct desc ``` ``` -- time 存在意义是保留数据精度 -- datetime 数据时间戳 请求时间 -- probeTime 探针输出时间戳 还原完数据写到kafka 的时间 -- decodeTime 解码输出时间戳 -- pairingTime 配对输出时间戳 -- responseTime 响应时间 -- insertTime 插入ck库的时间 -- secondIndex 一天的第几秒 存在意义便于按秒级别过滤数据 --过滤掉响应时间和配对时间为0的 SELECT concat(toString(toDateTime(intDiv(time,1000000000),'Asia/Shanghai')),concat('.',toString(time % 1000000000))) as request_time, concat(toString(toDateTime(intDiv(responseTime,1000000000), 'Asia/Shanghai')), concat('.', toString(time % 1000000000))) as respon_Time, concat(toString(toDateTime(intDiv(probeTime,1000000000),'Asia/Shanghai')),concat('.',toString(time % 1000000000))) as pro_Time, concat(toString(toDateTime(intDiv(decodeTime,1000000000),'Asia/Shanghai')),concat('.',toString(time % 1000000000))) as dec_Time, concat(toString(toDateTime(intDiv(pairingTime, 1000000000),'Asia/Shanghai')), concat('.', toString(time % 1000000000))) as pair_Time, insertTime from easyviews.bpm_base where date = today() and datetime > '2023-09-11 13:38:00' and responseTime not in (0) and pairingTime not in (0) limit 10; select datetime,insertTime, toDateTime(probeTime/1000000000,'Asia/Shanghai') as probeT, toDateTime(decodeTime/1000000000,'Asia/Shanghai') as decodeT, toDateTime(pairingTime/1000000000,'Asia/Shanghai') as pairT, toDateTime(consumerTime /1000000000,'Asia/Shanghai') as conInsertT, toDateTime(JSONExtract(consumerInfo,'consumerPullTime','UInt64')/1000000000,'Asia/Shanghai') as conPullT, toDateTime(JSONExtract(consumerInfo,'eventPushTime','UInt64')/1000000000,'Asia/Shanghai') as eventT, (probeT - datetime) as delay, componentId,componentName from easyviews.bpm_base where date = today() and pairingTime != 0 and `datetime` >= '2023-11-09 17:00:00' and `datetime` <= '2023-11-09 18:40:00' order by delay desc limit 10; ```