欢迎您访问 最编程 本站为您分享编程语言代码,编程技术文章!
您现在的位置是: 首页

ClickHouse窗口函数详解全面指南

最编程 2024-07-23 08:07:14
...

clickhouse支持开窗函数后更香了

文章推荐

clickhouse数据模型之留存分析

clickhouse数据模型之智能路径分析

clickhouse数据模型之有序漏斗分析

clickhouse数据模型之session分析

详解开窗函数用法

-- 当前版本21.4开窗函数功能还处于试验阶段,所有需要打开如下设置
set allow_experimental_window_functions = 1;
  • aggregation
-- 先看一个简单的例子 通过count开窗进行逐层计数,其中intDiv(number, 3)表示除3向下取正
select number, count() over (partition by intDiv(number, 3) order by number rows unbounded preceding) from numbers(10)

-- 聚合函数开窗,取每一个分组中的最大值,类似的还有min、avg
select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) from numbers(10) 

-- 窗口中不使用partition by关键字的话,窗口将是全部数据
select number, avg(number) over (order by number rows unbounded preceding) from numbers(10)

-- 提取窗口中间数
select number, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) from numbers(10)

-- 可以通过设置别名对窗口结果进行二次处理
select q * 10, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) q from numbers(10)

-- 多窗口支持
select number,intDiv(number, 3)p3, intDiv( number, 5) p5, max(number) over (partition by p3 order by number desc rows unbounded preceding),median(number) over (partition by p5 order by number rows unbounded preceding) as m from numbers(11) order by number 

  • groupArray
-- 通过groupArray聚合分组内某字段数
select number, groupArray(number) over (partition by intDiv( number,3)) from numbers(10)

-- 窗口结果支持去重
select distinct sum(0) over (rows unbounded preceding) from numbers(2)

-- 支持 WINDOW 关键字 
select sum(number) over w1, sum(number) over w2
from numbers(10)
window
    w1 as (rows unbounded preceding),
    w2 as (partition by intDiv(number, 3) rows unbounded preceding)

  • row_number
-- 支持排名函数rank、dense_rank、row_number 
select number, p, o,
    count(*) over w,
    rank() over w,
    dense_rank() over w,
    row_number() over w
from (select number, intDiv(number, 5) p, mod(number, 3) o
    from numbers(31) order by o, number) t
window w as (partition by p order by o)
order by p, o, number 
 
  • lead
-- 支持lag、lead,获取创建内向上或向下指定位置的数据;参数lagInFrame(数据列,自上向下数第几个位置,默认值)
select number, p, pp,
    lagInFrame(number) over w as lag1,
    lagInFrame(number, 2) over w as lag2,
    lagInFrame(number,  number - pp, number * 11) over w as lag,
    leadInFrame(number, 1,  number) over w as lead
from (select number, intDiv(number, 5) p, p * 5 pp from numbers(16))
window w as (partition by p order by number
    rows between unbounded preceding and unbounded following)
order by number

  • first_value
-- first_value:取窗口内第一个值;last_value:取窗口内最后一个值
select
    number,
    fIrSt_VaLue(number) over w,
    lAsT_vAlUe(number) over w
from numbers(10)
window w as (order by number range between 1 preceding and 1 following)
order by number