Chapter 10

测试、CrossGuard 策略与最佳实践

为基础设施代码建立完整的质量保障体系

单元测试:mock 资源

为什么需要单元测试?

Pulumi 的独特优势在于可以对 IaC 代码进行真正的单元测试——无需创建真实云资源,通过 mock 模拟资源输出,验证程序逻辑是否正确。这比 Terraform 的 Terratest(需要真实部署)快数百倍。

Python 单元测试(pytest + pulumi.runtime.setMocks)

# test_infrastructure.py
import unittest
import pulumi

class MyMocks(pulumi.runtime.Mocks):
    """Mock 资源提供者:拦截所有资源调用,返回模拟值"""

    def new_resource(self, args: pulumi.runtime.MockResourceArgs):
        # 根据资源类型返回模拟输出
        outputs = args.inputs
        if args.type_  == "aws:ec2/instance:Instance":
            outputs = {
                **args.inputs,
                "publicIp": "1.2.3.4",
                "privateIp": "10.0.1.5",
                "id": "i-mock123",
            }
        elif args.type_ == "aws:s3/bucketV2:BucketV2":
            outputs = {
                **args.inputs,
                "id": args.name + "-bucket-mock",
                "arn": f"arn:aws:s3:::{args.name}-bucket-mock",
            }
        return [args.name + "_id", outputs]

    def call(self, args: pulumi.runtime.MockCallArgs):
        # mock 数据源查询
        if args.token == "aws:index/getAvailabilityZones:getAvailabilityZones":
            return {"names": ["us-east-1a", "us-east-1b"]}
        return {}


# 在测试模块级别设置 Mocks
pulumi.runtime.set_mocks(MyMocks())

# 现在可以导入并测试 Pulumi 程序
import infra  # 你的 Pulumi 程序


class TestInfrastructure(unittest.TestCase):

    @pulumi.runtime.test
    def test_s3_bucket_has_tags(self):
        """验证 S3 Bucket 有必要的标签"""
        def check_bucket_tags(args):
            tags = args[0]
            self.assertIn("Environment", tags, "Bucket must have Environment tag")
            self.assertIn("ManagedBy", tags, "Bucket must have ManagedBy tag")

        return pulumi.Output.all(infra.bucket.tags).apply(check_bucket_tags)

    @pulumi.runtime.test
    def test_ec2_not_publicly_exposed(self):
        """验证 EC2 实例不直接暴露到公网"""
        def check_not_public(args):
            associate_public_ip = args[0]
            self.assertFalse(
                associate_public_ip,
                "EC2 instance should not have a public IP"
            )

        return pulumi.Output.all(
            infra.web_server.associate_public_ip_address
        ).apply(check_not_public)

    @pulumi.runtime.test
    def test_rds_encryption_enabled(self):
        """验证 RDS 开启了存储加密"""
        def check_encrypted(args):
            encrypted = args[0]
            self.assertTrue(encrypted, "RDS storage must be encrypted")

        return pulumi.Output.all(infra.db.storage_encrypted).apply(check_encrypted)

    @pulumi.runtime.test
    def test_security_group_no_unrestricted_ssh(self):
        """验证安全组没有对全网开放 SSH"""
        def check_no_open_ssh(args):
            ingress_rules = args[0]
            for rule in ingress_rules:
                if rule.get("fromPort") == 22:
                    cidr_blocks = rule.get("cidrBlocks", [])
                    self.assertNotIn(
                        "0.0.0.0/0", cidr_blocks,
                        "SSH port 22 should not be open to 0.0.0.0/0"
                    )

        return pulumi.Output.all(infra.web_sg.ingress).apply(check_no_open_ssh)


if __name__ == "__main__":
    unittest.main()
# 运行单元测试
python -m pytest test_infrastructure.py -v

# 输出示例:
# test_s3_bucket_has_tags PASSED
# test_ec2_not_publicly_exposed PASSED
# test_rds_encryption_enabled PASSED
# test_security_group_no_unrestricted_ssh PASSED

集成测试:真实部署 + 验证 + 销毁

# integration_test.py — 使用 Automation API 进行集成测试
import pytest
import requests
from pulumi import automation as auto

@pytest.fixture(scope="session")
def deployed_stack():
    """创建测试 Stack,测试完成后自动销毁"""
    stack = auto.create_or_select_stack(
        stack_name="integration-test",
        work_dir=".",
    )
    stack.set_config("environment", auto.ConfigValue("test"))

    # 部署
    stack.up(on_output=print)
    outputs = stack.outputs()

    yield outputs  # 运行测试

    # 测试完成后销毁
    stack.destroy(on_output=print)
    stack.workspace.remove_stack("integration-test")


def test_web_server_responds(deployed_stack):
    """验证 Web 服务器可以响应 HTTP 请求"""
    url = deployed_stack["web_url"].value
    response = requests.get(url, timeout=30)
    assert response.status_code == 200


def test_health_endpoint(deployed_stack):
    """验证健康检查端点"""
    url = deployed_stack["web_url"].value
    response = requests.get(f"{url}/health", timeout=30)
    assert response.status_code == 200
    data = response.json()
    assert data["status"] == "ok"

CrossGuard:Policy as Code 合规检查

什么是 CrossGuard?

CrossGuard 是 Pulumi 的 Policy as Code 框架,允许你用 TypeScript/Python 编写合规策略,在 pulumi up 时自动检查基础设施是否符合安全标准。违反策略的部署会被阻止。

# 创建 Policy Pack 项目
mkdir my-policies && cd my-policies
pulumi policy new aws-typescript  # 使用 TypeScript
// index.ts — CrossGuard 合规策略
import { PolicyPack, validateResourceOfType } from "@pulumi/policy";
import * as aws from "@pulumi/aws";

new PolicyPack("aws-security-policies", {
    policies: [
        {
            // 策略1:S3 Bucket 不能公开访问
            name: "s3-no-public-read",
            description: "S3 Buckets must not allow public access",
            enforcementLevel: "mandatory",  // mandatory = 违规时阻止部署
            validateResource: validateResourceOfType(aws.s3.BucketV2, (bucket, args, report) => {
                // 检查是否存在公开访问屏蔽配置
                // (结合检查 BucketPublicAccessBlock 资源)
                const acl = args.props["acl"];
                if (acl === "public-read" || acl === "public-read-write") {
                    report(`S3 bucket '${bucket.name}' has public ACL '${acl}'. Must be private.`);
                }
            }),
        },

        {
            // 策略2:所有 EC2 必须有 required 标签
            name: "ec2-required-tags",
            description: "EC2 instances must have required tags",
            enforcementLevel: "mandatory",
            validateResource: validateResourceOfType(aws.ec2.Instance, (instance, args, report) => {
                const requiredTags = ["Environment", "Team", "CostCenter"];
                const tags = args.props["tags"] || {};
                for (const tag of requiredTags) {
                    if (!tags[tag]) {
                        report(`EC2 '${instance.name}' missing required tag '${tag}'`);
                    }
                }
            }),
        },

        {
            // 策略3:RDS 必须开启加密
            name: "rds-storage-encrypted",
            description: "RDS instances must have storage encryption enabled",
            enforcementLevel: "mandatory",
            validateResource: validateResourceOfType(aws.rds.Instance, (db, args, report) => {
                if (!args.props["storageEncrypted"]) {
                    report(`RDS '${db.name}' must have storage encryption enabled`);
                }
            }),
        },

        {
            // 策略4:EC2 实例类型限制(防止使用过大机型)
            name: "ec2-allowed-instance-types",
            description: "Only approved EC2 instance types are allowed",
            enforcementLevel: "advisory",  // advisory = 警告但不阻止
            validateResource: validateResourceOfType(aws.ec2.Instance, (instance, args, report) => {
                const allowedTypes = ["t3.micro", "t3.small", "t3.medium", "c5.large"];
                const instanceType = args.props["instanceType"];
                if (!allowedTypes.includes(instanceType)) {
                    report(`EC2 instance type '${instanceType}' is not in approved list: ${allowedTypes.join(", ")}`);
                }
            }),
        },
    ],
});
# 本地使用 Policy Pack
pulumi up --policy-pack ./my-policies

# 在 Pulumi Cloud 中发布策略(适用于整个组织)
cd my-policies
pulumi policy publish

# 为组织开启强制策略
pulumi policy enable --policy-group default aws-security-policies latest

从 Terraform 迁移到 Pulumi

pulumi convert 自动转换

# 安装 terraform-bridge(需要 Terraform CLI)
brew install pulumi/tap/pulumi

# 将当前目录的 Terraform HCL 转换为 Python
pulumi convert --from terraform --language python --out ./pulumi-infra

# 转换为 TypeScript
pulumi convert --from terraform --language typescript --out ./pulumi-ts

# 转换特定目录
pulumi convert --from terraform --language python \
  --out ./pulumi-output \
  ./terraform-project
pulumi convert 的局限性

自动转换只能处理基础 HCL 语法(resource、variable、output),不能完全转换 Terraform 的 count/for_each 逻辑、自定义函数和复杂表达式。转换后的代码通常需要人工审查和调整,但可以减少 70-80% 的手工工作量。

手动迁移策略

01

先用 pulumi import 接管现有资源

不要销毁再重建!用 pulumi import 将现有 Terraform 管理的资源纳入 Pulumi 管理,零停机迁移。

02

从叶子资源开始

从没有依赖的资源(S3、IAM Role)开始迁移,再逐步迁移有依赖的资源(EC2 引用 VPC、SG)。

03

双轨运行过渡期

Terraform 和 Pulumi 可以短期共存——它们管理各自的资源,通过输出值共享必要信息。

04

利用 pulumi convert 生成初始代码

pulumi convert --from terraform 生成初始 Python/TypeScript 代码,再配合 pulumi import 建立 State 映射。

项目结构最佳实践

单体 Stack vs 微 Stack

Mono-Stack(不推荐大型项目)

my-infra/ ├── network.py ├── compute.py ├── database.py ├── kubernetes.py └── __main__.py ← 所有资源在一个 Stack 每次 up 都要检查所有资源

Micro-Stack(推荐)

infra/ ├── network/ ← Stack 1:VPC 网络 ├── platform/ ← Stack 2:RDS/EKS ├── apps/ │ ├── api/ ← Stack 3:API 服务 │ └── worker/ ← Stack 4:Worker 服务 └── monitoring/ ← Stack 5:监控告警

命名与标签规范

# 推荐的命名和标签规范
import pulumi

config = pulumi.Config()
env      = config.require("environment")
team     = config.get("team") or "platform"
project  = pulumi.get_project()
stack    = pulumi.get_stack()   # 当前 Stack 名称

# 统一的标签函数
def common_tags(name: str, extra: dict = {}) -> dict:
    return {
        "Name": name,
        "Environment": env,
        "Team": team,
        "Project": project,
        "ManagedBy": "Pulumi",
        **extra,
    }

# 统一的资源名称函数(避免在多 Stack 中冲突)
def resource_name(base: str) -> str:
    return f"{base}-{env}"   # e.g., "vpc-prod", "db-staging"

# 使用
import pulumi_aws as aws
vpc = aws.ec2.Vpc(
    resource_name("main-vpc"),
    cidr_block="10.0.0.0/16",
    tags=common_tags("main-vpc"),
)

成本优化建议

Dev 环境缩减
开发环境不需要 Multi-AZ、NAT Gateway 可以共享(不是每个 AZ 一个)、RDS 用 t3.micro 而不是 r5.large,可节省 70-90% 成本。
Spot 实例
对于可中断的工作负载(批处理、Worker),在 Launch Template 中配置 Spot 实例,成本比按需降低 60-90%。
定时启停 Dev 环境
用 Automation API 编写脚本,工作时间(9am-6pm)自动启动 dev 环境,下班后自动停止(Pulumi destroy),周末不计费。
资源标签+成本追踪
确保所有资源有 Team 和 Project 标签,在 AWS Cost Explorer 按标签过滤,了解每个团队/项目的云成本。

学习路线图

Pulumi 学习路径 阶段1:入门(完成本课程) ───────────────────────────────────────────────── ✓ Pulumi CLI 安装 + pulumi up ✓ Output<T> + apply() 理解 ✓ Stack 多环境隔离 ✓ TypeScript 或 Python 实战 阶段2:进阶(团队应用) ───────────────────────────────────────────────── → ComponentResource 封装内部组件库 → GitHub Actions CI/CD + PR Preview → CrossGuard 合规策略 → Stack Reference 分层架构 阶段3:专家(平台工程) ───────────────────────────────────────────────── → Automation API 构建内部开发者平台 → 发布内部 npm/PyPI 组件库 → 多账号多区域架构 → Pulumi AI(自然语言生成 IaC)

本章小结与课程总结

第10章核心要点
完成课程!继续深入