Chapter 03

SQL 基础与进阶查询

从 DDL 约束设计到 PostgreSQL 特有的 INSERT ON CONFLICT、DISTINCT ON,掌握完整查询技能

3.1 建表:DDL 与约束

DDL(Data Definition Language)定义数据库结构。PostgreSQL 的 DDL 可以在事务中执行并回滚,这是与 MySQL 的重要区别。

SQL-- 完整建表示例:电商系统
CREATE TABLE categories (
  id    INTEGER GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
  name  TEXT NOT NULL UNIQUE,
  slug  TEXT NOT NULL UNIQUE
      CHECK (slug ~ '^[a-z0-9-]+$')   -- slug 只允许小写字母、数字和连字符
);

CREATE TABLE products (
  id           BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
  sku          TEXT NOT NULL UNIQUE,
  name         TEXT NOT NULL,
  description  TEXT,
  price        NUMERIC(12, 2) NOT NULL
               CHECK (price >= 0),
  stock        INTEGER NOT NULL DEFAULT 0
               CHECK (stock >= 0),
  category_id  INTEGER NOT NULL REFERENCES categories(id)
               ON DELETE RESTRICT,        -- 有商品时不允许删除分类
  is_active    BOOLEAN NOT NULL DEFAULT TRUE,
  metadata     JSONB NOT NULL DEFAULT '{}',
  created_at   TIMESTAMPTZ NOT NULL DEFAULT NOW(),
  updated_at   TIMESTAMPTZ NOT NULL DEFAULT NOW()
);

CREATE TABLE users (
  id          BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
  email       TEXT NOT NULL UNIQUE,
  username    TEXT NOT NULL UNIQUE,
  password_hash TEXT NOT NULL,
  created_at  TIMESTAMPTZ NOT NULL DEFAULT NOW()
);

CREATE TABLE orders (
  id          BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
  user_id     BIGINT NOT NULL REFERENCES users(id),
  status      TEXT NOT NULL DEFAULT 'pending'
              CHECK (status IN ('pending','confirmed','shipped','delivered','cancelled')),
  total       NUMERIC(12, 2) NOT NULL DEFAULT 0,
  created_at  TIMESTAMPTZ NOT NULL DEFAULT NOW(),
  updated_at  TIMESTAMPTZ NOT NULL DEFAULT NOW()
);

CREATE TABLE order_items (
  id          BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
  order_id    BIGINT NOT NULL REFERENCES orders(id) ON DELETE CASCADE,
  product_id  BIGINT NOT NULL REFERENCES products(id),
  quantity    INTEGER NOT NULL CHECK (quantity > 0),
  unit_price  NUMERIC(12, 2) NOT NULL,  -- 记录下单时的价格快照
  UNIQUE (order_id, product_id)          -- 同一订单中同一商品只能有一行
);

ALTER TABLE 常用操作

SQL-- 添加列
ALTER TABLE products ADD COLUMN weight_kg NUMERIC(8,3);

-- 设置列的默认值
ALTER TABLE products ALTER COLUMN weight_kg SET DEFAULT 0;

-- 修改列类型(需要 USING 子句转换旧数据)
ALTER TABLE products ALTER COLUMN stock TYPE BIGINT USING stock::BIGINT;

-- 添加约束
ALTER TABLE products ADD CONSTRAINT products_name_len
  CHECK (char_length(name) <= 200);

-- 删除约束
ALTER TABLE products DROP CONSTRAINT products_name_len;

-- 在 PostgreSQL 中 DDL 可以回滚!
BEGIN;
ALTER TABLE products DROP COLUMN description;
-- 发现误操作,回滚
ROLLBACK;  -- description 列还在!MySQL 无法做到这一点

3.2 INSERT、UPSERT 与批量操作

SQL-- 基础插入
INSERT INTO categories (name, slug) VALUES ('电子产品', 'electronics');

-- 批量插入(比多次单条插入快 10x 以上)
INSERT INTO products (sku, name, price, stock, category_id) VALUES
  ('MAC-001', 'MacBook Pro 14"', 14999.00, 50,  1),
  ('IPH-001', 'iPhone 16 Pro',   9999.00,  200, 1),
  ('ARP-001', 'AirPods Pro 2',   1899.00,  500, 1);

-- RETURNING:返回插入的行(非常有用!)
INSERT INTO users (email, username, password_hash)
VALUES ('alice@example.com', 'alice', 'hashed_pw')
RETURNING id, created_at;

-- UPSERT:ON CONFLICT DO UPDATE(PostgreSQL 特有)
-- 场景:同步商品库存,存在则更新,不存在则插入
INSERT INTO products (sku, name, price, stock, category_id)
VALUES ('MAC-001', 'MacBook Pro 14"', 13999.00, 45, 1)
ON CONFLICT (sku) DO UPDATE SET
  price      = EXCLUDED.price,
  stock      = EXCLUDED.stock,
  updated_at = NOW()
RETURNING id, sku, price, stock;
-- EXCLUDED 代表"被冲突排除的那行",即尝试插入的新数据

-- ON CONFLICT DO NOTHING:冲突时静默忽略
INSERT INTO categories (name, slug)
VALUES ('电子产品', 'electronics')
ON CONFLICT DO NOTHING;

3.3 UPDATE 与 DELETE

SQL-- 基础更新
UPDATE products SET price = price * 0.9, updated_at = NOW()
WHERE category_id = 1 AND is_active = TRUE;

-- UPDATE ... FROM(PostgreSQL 扩展:连表更新)
-- 场景:用新价格表批量更新商品价格
UPDATE products p
SET
  price = np.new_price,
  updated_at = NOW()
FROM (
  VALUES
    ('MAC-001', 15999.00),
    ('IPH-001', 10999.00)
) AS np(sku, new_price)
WHERE p.sku = np.sku
RETURNING p.id, p.sku, p.price;

-- DELETE ... RETURNING
DELETE FROM order_items
WHERE order_id = 101
RETURNING *;

-- TRUNCATE:清空表(比 DELETE 快,无法 WHERE 过滤)
TRUNCATE TABLE order_items RESTART IDENTITY CASCADE;

3.4 查询基础:SELECT、WHERE、ORDER BY、LIMIT

SQL-- 基础查询
SELECT id, name, price FROM products
WHERE is_active = TRUE
  AND price BETWEEN 1000 AND 20000
  AND name ILIKE '%pro%'   -- ILIKE 是不区分大小写的 LIKE
ORDER BY price DESC, name ASC
LIMIT 10 OFFSET 20;           -- 第3页(每页10条)

-- NULL 处理:NULL 参与任何比较都返回 NULL(不是 TRUE/FALSE)
SELECT * FROM products WHERE description IS NULL;
SELECT * FROM products WHERE description IS NOT NULL;

-- COALESCE:返回第一个非 NULL 值
SELECT name, COALESCE(description, '暂无描述') AS desc_text FROM products;

-- CASE 表达式
SELECT name, price,
  CASE
    WHEN price < 500   THEN '低价'
    WHEN price < 5000  THEN '中价'
    ELSE '高价'
  END AS price_tier
FROM products;

3.5 多表 JOIN

SQL-- INNER JOIN:只返回两表都有匹配的行
SELECT
  o.id AS order_id,
  u.username,
  u.email,
  o.status,
  o.total,
  o.created_at
FROM orders o
INNER JOIN users u ON o.user_id = u.id
WHERE o.status = 'delivered'
ORDER BY o.created_at DESC
LIMIT 20;

-- LEFT JOIN:返回左表所有行,右表无匹配时为 NULL
-- 场景:找出没有下过订单的用户
SELECT u.id, u.username, COUNT(o.id) AS order_count
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
GROUP BY u.id, u.username
HAVING COUNT(o.id) = 0;

-- 三表 JOIN:订单明细完整信息
SELECT
  o.id AS order_id,
  u.username,
  p.name AS product_name,
  oi.quantity,
  oi.unit_price,
  (oi.quantity * oi.unit_price) AS line_total
FROM order_items oi
JOIN orders   o ON oi.order_id   = o.id
JOIN users    u ON o.user_id     = u.id
JOIN products p ON oi.product_id = p.id
WHERE o.id = 1001;

3.6 聚合函数与 GROUP BY

SQL-- 常用聚合函数
SELECT
  COUNT(*) AS total_orders,
  COUNT(DISTINCT user_id) AS unique_customers,
  SUM(total) AS total_revenue,
  AVG(total) AS avg_order_value,
  MIN(total) AS min_order,
  MAX(total) AS max_order,
  PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY total) AS median_order
FROM orders
WHERE status = 'delivered';

-- GROUP BY + HAVING:按月统计营收,只看超过 10 万的月份
SELECT
  DATE_TRUNC('month', created_at) AS month,
  COUNT(*) AS orders,
  SUM(total) AS revenue
FROM orders
WHERE status = 'delivered'
GROUP BY DATE_TRUNC('month', created_at)
HAVING SUM(total) > 100000
ORDER BY month DESC;

3.7 子查询与 EXISTS

SQL-- IN 子查询:找出购买过某商品的用户
SELECT * FROM users
WHERE id IN (
  SELECT DISTINCT o.user_id
  FROM orders o
  JOIN order_items oi ON o.id = oi.order_id
  WHERE oi.product_id = 1
);

-- EXISTS(相关子查询,通常比 IN 更高效)
SELECT * FROM users u
WHERE EXISTS (
  SELECT 1 FROM orders o
  WHERE o.user_id = u.id AND o.total > 10000  -- 引用外部的 u.id
);

-- 标量子查询:在 SELECT 列表中嵌套
SELECT
  u.username,
  (
    SELECT SUM(total)
    FROM orders o
    WHERE o.user_id = u.id AND o.status = 'delivered'
  ) AS lifetime_value
FROM users u
ORDER BY lifetime_value DESC NULLS LAST
LIMIT 10;

3.8 DISTINCT ON — PostgreSQL 特有利器

DISTINCT ON 是 PostgreSQL 特有的扩展语法,可以按某些列去重,保留每组中的第一行(由 ORDER BY 决定保留哪行)。

💡

DISTINCT ON 的典型场景"取每个用户最近一次登录记录"、"取每个商品最新的价格变更"、"取每个分类中价格最低的商品"。在 MySQL 中需要用 ROW_NUMBER() 窗口函数才能实现。

SQL-- 取每个用户最近一笔订单
SELECT DISTINCT ON (user_id)
  user_id,
  id AS order_id,
  total,
  status,
  created_at
FROM orders
ORDER BY user_id, created_at DESC;
-- 规则:DISTINCT ON 的列必须出现在 ORDER BY 最前面
-- 语义:先按 user_id 分组,每组内按 created_at DESC 排序,取第一行

-- 取每个分类中价格最低的商品
SELECT DISTINCT ON (category_id)
  category_id,
  id AS product_id,
  name,
  price
FROM products
WHERE is_active = TRUE
ORDER BY category_id, price ASC;
📌

本章小结PostgreSQL 的 DDL 可以在事务中回滚,这是重大优势。INSERT ... ON CONFLICT DO UPDATE(UPSERT)是同步数据的利器,EXCLUDED 代指被冲突的新行。UPDATE ... FROM 支持连表更新。DISTINCT ON 是 PG 特有语法,优雅解决"每组取最新/最小/最大一行"的需求。