Chapter 04

窗口函数与 CTE

PostgreSQL 最强大的分析工具:窗口函数、公共表表达式(CTE)与递归查询

4.1 窗口函数概念

窗口函数(Window Function)在保留原始行的同时,对一组相关行执行计算——这组行称为"窗口(Window)"。与 GROUP BY 不同,窗口函数不折叠行,每行仍然保留并获得一个计算结果

🪟

窗口函数的语法结构
函数名() OVER (PARTITION BY 分组列 ORDER BY 排序列 ROWS/RANGE BETWEEN ... AND ...)

- PARTITION BY:将数据划分为独立的窗口(类似 GROUP BY,但不合并行)
- ORDER BY:决定窗口内的行顺序(影响排名、LAG/LEAD 等)
- 帧子句(ROWS/RANGE BETWEEN):决定计算哪些行(默认从开始到当前行)

SQL-- 准备示例数据:销售数据
CREATE TABLE sales (
  id          SERIAL PRIMARY KEY,
  salesperson TEXT NOT NULL,
  region      TEXT NOT NULL,
  amount      NUMERIC(12,2) NOT NULL,
  sale_date   DATE NOT NULL
);

INSERT INTO sales (salesperson, region, amount, sale_date) VALUES
('Alice', '北区', 8500,  '2024-01-05'),
('Bob',   '北区', 12000, '2024-01-10'),
('Alice', '北区', 9200,  '2024-01-15'),
('Carol', '南区', 15000, '2024-01-08'),
('Dave',  '南区', 7800,  '2024-01-12'),
('Carol', '南区', 11500, '2024-01-20'),
('Bob',   '北区', 9800,  '2024-02-03'),
('Dave',  '南区', 13200, '2024-02-07');

4.2 排名函数

SQL-- ROW_NUMBER:唯一行号(无并列)
-- RANK:并列时跳过后续名次(1,1,3)
-- DENSE_RANK:并列时不跳过(1,1,2)
SELECT
  salesperson,
  region,
  amount,
  ROW_NUMBER() OVER (PARTITION BY region ORDER BY amount DESC) AS row_num,
  RANK()       OVER (PARTITION BY region ORDER BY amount DESC) AS rank,
  DENSE_RANK() OVER (PARTITION BY region ORDER BY amount DESC) AS dense_rank,
  PERCENT_RANK() OVER (PARTITION BY region ORDER BY amount) AS pct_rank
FROM sales
ORDER BY region, amount DESC;

-- 结果示例(北区):
-- salesperson | region | amount | row_num | rank | dense_rank
-- ------------+--------+--------+---------+------+-----------
-- Bob         | 北区   | 12000  |    1    |   1  |     1
-- Bob         | 北区   |  9800  |    2    |   2  |     2
-- Alice       | 北区   |  9200  |    3    |   3  |     3
-- Alice       | 北区   |  8500  |    4    |   4  |     4

-- 实战:取每个区域 TOP 3 销售
SELECT * FROM (
  SELECT
    salesperson, region, amount,
    RANK() OVER (PARTITION BY region ORDER BY amount DESC) AS rnk
  FROM sales
) ranked
WHERE rnk <= 3;

4.3 LAG 与 LEAD:访问相邻行

SQL-- LAG:获取当前行前 N 行的值
-- LEAD:获取当前行后 N 行的值
SELECT
  salesperson,
  sale_date,
  amount,
  LAG(amount, 1) OVER (
    PARTITION BY salesperson ORDER BY sale_date
  ) AS prev_amount,
  amount - LAG(amount, 1) OVER (
    PARTITION BY salesperson ORDER BY sale_date
  ) AS change,
  LEAD(amount, 1, 0) OVER (  -- 第3个参数是 NULL 时的默认值
    PARTITION BY salesperson ORDER BY sale_date
  ) AS next_amount
FROM sales
ORDER BY salesperson, sale_date;

4.4 聚合窗口函数:滑动计算

SQL-- 累计求和(Running Total)
SELECT
  sale_date,
  salesperson,
  amount,
  SUM(amount) OVER (
    PARTITION BY salesperson
    ORDER BY sale_date
    ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
  ) AS running_total
FROM sales
ORDER BY salesperson, sale_date;

-- 3日移动平均(滑动窗口)
SELECT
  sale_date,
  amount,
  AVG(amount) OVER (
    ORDER BY sale_date
    ROWS BETWEEN 2 PRECEDING AND CURRENT ROW  -- 当前行及前2行
  ) AS moving_avg_3d
FROM sales
ORDER BY sale_date;

-- 占比:每笔销售占该区域总销售额的比例
SELECT
  salesperson,
  region,
  amount,
  ROUND(
    amount * 100.0 / SUM(amount) OVER (PARTITION BY region),
    2
  ) AS pct_of_region
FROM sales
ORDER BY region, pct_of_region DESC;

-- FIRST_VALUE / LAST_VALUE:窗口内的第一个/最后一个值
SELECT
  salesperson,
  region,
  amount,
  FIRST_VALUE(amount) OVER (PARTITION BY region ORDER BY amount DESC) AS region_max,
  NTH_VALUE(amount, 2) OVER (PARTITION BY region ORDER BY amount DESC
    ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
  ) AS region_second
FROM sales;

4.5 CTE(公共表表达式)

CTE 用 WITH 子句将复杂查询分解为可读的命名块,相当于"临时视图"。

SQL-- 基础 CTE:分步计算用户购买力
WITH
user_totals AS (
  SELECT user_id, SUM(total) AS lifetime_value
  FROM orders
  WHERE status = 'delivered'
  GROUP BY user_id
),
user_stats AS (
  SELECT
    AVG(lifetime_value) AS avg_ltv,
    PERCENTILE_CONT(0.9) WITHIN GROUP (ORDER BY lifetime_value) AS p90_ltv
  FROM user_totals
)
SELECT
  u.username,
  ut.lifetime_value,
  CASE
    WHEN ut.lifetime_value > us.p90_ltv THEN 'VIP'
    WHEN ut.lifetime_value > us.avg_ltv THEN '高价值'
    ELSE '普通'
  END AS tier
FROM user_totals ut
JOIN users u ON u.id = ut.user_id
CROSS JOIN user_stats us
ORDER BY ut.lifetime_value DESC;

4.6 递归 CTE

递归 CTE 允许查询引用自身,非常适合树形结构(组织架构、商品分类层级、评论回复链)。

SQL-- 准备员工组织架构表
CREATE TABLE employees (
  id          INTEGER PRIMARY KEY,
  name        TEXT NOT NULL,
  manager_id  INTEGER REFERENCES employees(id),  -- 自引用外键
  department  TEXT
);

INSERT INTO employees VALUES
(1, 'CEO 张总',    NULL, '管理层'),
(2, 'CTO 李总',   1,    '技术'),
(3, '产品总监',   1,    '产品'),
(4, '后端架构师', 2,    '技术'),
(5, '前端负责人', 2,    '技术'),
(6, '数据库工程师',4,   '技术'),
(7, '后端开发A',  4,    '技术'),
(8, '前端开发B',  5,    '技术'),
(9, 'PM 王经理',  3,    '产品');

-- 递归 CTE:查询某人的所有下属(含各级)
WITH RECURSIVE org_tree AS (
  -- 锚点查询(初始行,非递归部分)
  SELECT
    id, name, manager_id, department,
    0 AS depth,
    ARRAY[id] AS path
  FROM employees
  WHERE id = 2   -- 从 CTO 开始

  UNION ALL

  -- 递归部分:每次找下一级员工
  SELECT
    e.id, e.name, e.manager_id, e.department,
    ot.depth + 1,
    ot.path || e.id
  FROM employees e
  JOIN org_tree ot ON e.manager_id = ot.id
  WHERE ot.depth < 10      -- 防止无限循环,限制最大深度
)
SELECT
  REPEAT('  ', depth) || name AS org_chart,
  depth,
  department,
  path::TEXT AS id_path
FROM org_tree
ORDER BY path;

-- 输出结果:
-- org_chart          | depth | department
-- -------------------+-------+-----------
-- CTO 李总           |   0   | 技术
--   后端架构师       |   1   | 技术
--     数据库工程师   |   2   | 技术
--     后端开发A      |   2   | 技术
--   前端负责人       |   1   | 技术
--     前端开发B      |   2   | 技术

4.7 物化 CTE 与 LATERAL 连接

SQL-- MATERIALIZED:强制 CTE 物化(计算一次并缓存结果)
-- 默认 PG 12+ 会根据优化器决定是否物化
WITH expensive_calc AS MATERIALIZED (
  SELECT user_id, SUM(total) AS ltv
  FROM orders
  GROUP BY user_id
)
SELECT * FROM expensive_calc WHERE ltv > 1000;

-- NOT MATERIALIZED:强制内联(让优化器更灵活地推送条件)
WITH cte AS NOT MATERIALIZED (
  SELECT * FROM orders WHERE status = 'delivered'
)
SELECT * FROM cte WHERE user_id = 42;

-- LATERAL JOIN:关联子查询,右侧子查询可引用左侧表的列
-- 场景:为每个用户获取最近 3 笔订单
SELECT
  u.username,
  recent.id AS order_id,
  recent.total,
  recent.created_at
FROM users u
CROSS JOIN LATERAL (
  SELECT id, total, created_at
  FROM orders o
  WHERE o.user_id = u.id    -- 引用外部 u.id
  ORDER BY created_at DESC
  LIMIT 3
) recent
ORDER BY u.username, recent.created_at DESC;
💡

LATERAL vs 普通子查询普通子查询(from 子句)不能引用同一 FROM 中其他表的列;LATERAL 子查询可以。LATERAL 等价于"对每一行执行一次子查询",类似编程语言中的 flatMap。

📌

本章小结窗口函数(OVER)是 PostgreSQL 分析查询的核心:ROW_NUMBER/RANK/DENSE_RANK 用于排名,LAG/LEAD 访问相邻行,SUM/AVG OVER 计算累计或滑动聚合。CTE 用 WITH 子句分解复杂查询,提高可读性。递归 CTE(WITH RECURSIVE)是查询树形/图形数据的标准方式。LATERAL 解决了"对每行执行关联子查询"的需求。