This commit is contained in:
2026-02-28 10:38:33 +08:00
parent fa37deb209
commit a5ba7e045f
13 changed files with 133 additions and 112 deletions
+34 -34
View File
@@ -10,7 +10,7 @@
|------|------|------|
| **数据生产者** | 采集电商平台数据并推送到 MQ | 各平台采集服务 |
| **消息队列** | 数据路由、缓冲、隔离 | RabbitMQ |
| **数据消费者** | 消费 MQ 消息并入库 | Dataflow App |
| **数据消费者** | 消费 MQ 消息并入库 | Datahub App |
| **存储层** | 持久化存储和统计分析 | PostgreSQL + TimescaleDB |
---
@@ -27,7 +27,7 @@ graph TB
end
subgraph "RabbitMQ 消息队列"
subgraph "VHost: dataflow-app"
subgraph "VHost: datahub-app"
subgraph "业务 Exchanges"
E1[douyin.exchange]
E2[tmall.exchange]
@@ -65,7 +65,7 @@ graph TB
end
end
subgraph "Dataflow 消费者"
subgraph "Datahub 消费者"
C1[OrderConsumer]
C2[ProductConsumer]
C3[RefundConsumer]
@@ -187,7 +187,7 @@ graph LR
end
subgraph "消费者"
CO[OrderConsumer<br/>user_dataflow_consumer]
CO[OrderConsumer<br/>user_datahub_consumer]
end
PD -->|write only| ED
@@ -387,7 +387,7 @@ message_1 后到达 (data_version: 1736840000):
```json
{
"message_id": "order#dataflow#100#20#200#order#DY123",
"message_id": "order#datahub#100#20#200#order#DY123",
"metadata": {
"data_version": 1736840100 // 必需:平台数据的最后更新时间戳
},
@@ -484,7 +484,7 @@ BEGIN;
-- 1. 记录消息已处理(幂等性)
INSERT INTO processed_messages (message_id, platform_id, data_type)
VALUES ('order#dataflow#100#20#200#order#DY123', 20, 'order')
VALUES ('order#datahub#100#20#200#order#DY123', 20, 'order')
ON CONFLICT (message_id) DO NOTHING
RETURNING message_id;
@@ -525,7 +525,7 @@ sequenceDiagram
Note over P: 1. 数据采集阶段
P->>P: 从 DouYin API 获取订单数据
P->>P: 生成 message_id<br/>order#dataflow#100#20#200#order#DY123<br/>提取 data_version
P->>P: 生成 message_id<br/>order#datahub#100#20#200#order#DY123<br/>提取 data_version
Note over P,E: 2. 消息发布阶段
P->>E: 发布消息<br/>routing_key: order<br/>持久化消息
@@ -673,7 +673,7 @@ sequenceDiagram
| 操作 | 权限 | 说明 |
|------|------|------|
| 连接到 VHost | `dataflow-app` | 只能访问此 VHost |
| 连接到 VHost | `datahub-app` | 只能访问此 VHost |
| 发布消息 | `{platform}.exchange` | 只能写入自己平台的 Exchange |
| 订阅错误 | `{platform}.errors.exchange` | 可选,接收本平台错误通知 |
@@ -685,19 +685,19 @@ sequenceDiagram
格式: {entity_type}#{app_id}#{company_id}#{platform_id}#{store_id}#{entity_type}#{platform_unique_id}
示例:
- order#dataflow#100#20#200#order#DY123456789
- product#dataflow#100#2#201#product#TM-PROD789
- refund#dataflow#100#20#200#refund#DY-REF456
- inventory#dataflow#100#2#201#inventory#TM-SKU123
- order#datahub#100#20#200#order#DY123456789
- product#datahub#100#2#201#product#TM-PROD789
- refund#datahub#100#20#200#refund#DY-REF456
- inventory#datahub#100#2#201#inventory#TM-SKU123
```
**生成逻辑**
```
# 伪代码
message_id = f"{data_type}#dataflow#{company_id}#{platform_id}#{store_id}#{data_type}#{platform_order_id}"
message_id = f"{data_type}#datahub#{company_id}#{platform_id}#{store_id}#{data_type}#{platform_order_id}"
# 实际示例(DouYin 订单)
message_id = f"order#dataflow#100#20#200#order#{order_data['order_id']}"
message_id = f"order#datahub#100#20#200#order#{order_data['order_id']}"
```
##### 步骤 2: 提取 data_version
@@ -724,7 +724,7 @@ if not data_version:
```json
{
"message_id": "order#dataflow#100#20#200#order#DY123456",
"message_id": "order#datahub#100#20#200#order#DY123456",
"timestamp": "2025-01-14T10:30:00Z",
"platform": "douyin",
"data_type": "order",
@@ -753,7 +753,7 @@ if not data_version:
**连接参数**
- Host: `<rabbitmq-host>`
- Port: `5672`
- VHost: `dataflow-app`
- VHost: `datahub-app`
- Username: `user_{platform}`
- Password: `<provided_password>`
@@ -794,7 +794,7 @@ if not data_version:
---
### 2. Dataflow 应用(数据消费者)
### 2. Datahub 应用(数据消费者)
#### 职责
@@ -847,7 +847,7 @@ OrderConsumer (单实例)
| 操作 | 权限 | 说明 |
|------|------|------|
| 连接到 VHost | `dataflow-app` | 访问业务 VHost |
| 连接到 VHost | `datahub-app` | 访问业务 VHost |
| 读取队列 | `orders.queue`, `products.queue`, `refunds.queue` | 消费业务消息 |
| 写入错误 Exchange | `*.errors.exchange` | 发送错误消息到对应平台的错误 Exchange |
| 管理队列 | `orders.queue`, `products.queue`, `refunds.queue` | 配置消费者参数 |
@@ -892,7 +892,7 @@ BEGIN;
-- 步骤 1: 尝试插入 processed_messages(幂等性检查)
INSERT INTO processed_messages (message_id, platform_id, data_type)
VALUES ('order#dataflow#100#20#200#order#DY123', 20, 'order')
VALUES ('order#datahub#100#20#200#order#DY123', 20, 'order')
ON CONFLICT (message_id) DO NOTHING
RETURNING message_id;
@@ -994,7 +994,7 @@ COMMIT;
```bash
# 查看所有队列状态
rabbitmqctl list_queues -p dataflow-app name messages_ready messages_unacknowledged
rabbitmqctl list_queues -p datahub-app name messages_ready messages_unacknowledged
# 告警阈值
# - messages_ready > 10000: 严重堆积
@@ -1005,7 +1005,7 @@ rabbitmqctl list_queues -p dataflow-app name messages_ready messages_unacknowled
```bash
# 查看所有消费者连接
rabbitmqctl list_consumers -p dataflow-app
rabbitmqctl list_consumers -p datahub-app
# 检查项:
# - 消费者数量是否正常
@@ -1034,7 +1034,7 @@ ORDER BY error_count DESC;
```
排查步骤:
1. 检查消费者是否在线
rabbitmqctl list_consumers -p dataflow-app
rabbitmqctl list_consumers -p datahub-app
2. 查看消费速率
管理界面 → Queues → 选择队列 → 查看 Deliver/Get rate
@@ -1104,7 +1104,7 @@ ORDER BY error_count DESC;
```bash
# 启用 lazy queue (大量消息堆积时)
rabbitmqctl set_policy lazy-queue "^(orders|products|refunds)\.queue$" \
'{"queue-mode":"lazy"}' --vhost dataflow-app
'{"queue-mode":"lazy"}' --vhost datahub-app
# 效果:
# - 消息存储在磁盘,减少内存占用
@@ -1133,12 +1133,12 @@ rabbitmqctl set_policy lazy-queue "^(orders|products|refunds)\.queue$" \
```bash
# 业务 Exchange
rabbitmqadmin -u admin -p <password> declare exchange \
name="aliexpress.exchange" vhost=dataflow-app \
name="aliexpress.exchange" vhost=datahub-app \
type=topic durable=true
# 错误 Exchange
rabbitmqadmin -u admin -p <password> declare exchange \
name="aliexpress.errors.exchange" vhost=dataflow-app \
name="aliexpress.errors.exchange" vhost=datahub-app \
type=topic durable=true
```
@@ -1148,24 +1148,24 @@ rabbitmqadmin -u admin -p <password> declare exchange \
# 绑定到业务队列(使用 {data_type}.{platform} 格式)
rabbitmqadmin -u admin -p <password> declare binding \
source="aliexpress.exchange" destination="orders.queue" \
vhost=dataflow-app routing_key="order.aliexpress"
vhost=datahub-app routing_key="order.aliexpress"
rabbitmqadmin -u admin -p <password> declare binding \
source="aliexpress.exchange" destination="products.queue" \
vhost=dataflow-app routing_key="product.aliexpress"
vhost=datahub-app routing_key="product.aliexpress"
rabbitmqadmin -u admin -p <password> declare binding \
source="aliexpress.exchange" destination="refunds.queue" \
vhost=dataflow-app routing_key="refund.aliexpress"
vhost=datahub-app routing_key="refund.aliexpress"
rabbitmqadmin -u admin -p <password> declare binding \
source="aliexpress.exchange" destination="inventory.queue" \
vhost=dataflow-app routing_key="inventory.aliexpress"
vhost=datahub-app routing_key="inventory.aliexpress"
# 绑定到错误队列
rabbitmqadmin -u admin -p <password> declare binding \
source="aliexpress.errors.exchange" destination="errors.queue" \
vhost=dataflow-app routing_key="#"
vhost=datahub-app routing_key="#"
```
**步骤 3: 创建用户**
@@ -1177,7 +1177,7 @@ rabbitmqadmin -u admin -p <password> declare user \
# 配置权限
rabbitmqadmin -u admin -p <password> declare permission \
vhost=dataflow-app user="user_aliexpress" \
vhost=datahub-app user="user_aliexpress" \
configure="" write="^aliexpress\.(exchange|errors\.exchange)$" \
read="^aliexpress\.errors\..*$"
```
@@ -1188,7 +1188,7 @@ rabbitmqadmin -u admin -p <password> declare permission \
连接信息:
- Host: <rabbitmq-host>
- Port: 5672
- VHost: dataflow-app
- VHost: datahub-app
- Username: user_aliexpress
- Password: <strong_password>
@@ -1422,7 +1422,7 @@ graph TB
```json
{
"message_id": "order#dataflow#100#20#200#order#DY202501140001",
"message_id": "order#datahub#100#20#200#order#DY202501140001",
"timestamp": "2025-01-14T10:30:00Z",
"platform": "douyin",
"data_type": "order",
@@ -1459,7 +1459,7 @@ graph TB
```json
{
"message_id": "product#dataflow#100#2#201#product#TM-PROD-789",
"message_id": "product#datahub#100#2#201#product#TM-PROD-789",
"timestamp": "2025-01-14T11:00:00Z",
"platform": "tmall",
"data_type": "product",