update
This commit is contained in:
+34
-34
@@ -10,7 +10,7 @@
|
||||
|------|------|------|
|
||||
| **数据生产者** | 采集电商平台数据并推送到 MQ | 各平台采集服务 |
|
||||
| **消息队列** | 数据路由、缓冲、隔离 | RabbitMQ |
|
||||
| **数据消费者** | 消费 MQ 消息并入库 | Dataflow App |
|
||||
| **数据消费者** | 消费 MQ 消息并入库 | Datahub App |
|
||||
| **存储层** | 持久化存储和统计分析 | PostgreSQL + TimescaleDB |
|
||||
|
||||
---
|
||||
@@ -27,7 +27,7 @@ graph TB
|
||||
end
|
||||
|
||||
subgraph "RabbitMQ 消息队列"
|
||||
subgraph "VHost: dataflow-app"
|
||||
subgraph "VHost: datahub-app"
|
||||
subgraph "业务 Exchanges"
|
||||
E1[douyin.exchange]
|
||||
E2[tmall.exchange]
|
||||
@@ -65,7 +65,7 @@ graph TB
|
||||
end
|
||||
end
|
||||
|
||||
subgraph "Dataflow 消费者"
|
||||
subgraph "Datahub 消费者"
|
||||
C1[OrderConsumer]
|
||||
C2[ProductConsumer]
|
||||
C3[RefundConsumer]
|
||||
@@ -187,7 +187,7 @@ graph LR
|
||||
end
|
||||
|
||||
subgraph "消费者"
|
||||
CO[OrderConsumer<br/>user_dataflow_consumer]
|
||||
CO[OrderConsumer<br/>user_datahub_consumer]
|
||||
end
|
||||
|
||||
PD -->|write only| ED
|
||||
@@ -387,7 +387,7 @@ message_1 后到达 (data_version: 1736840000):
|
||||
|
||||
```json
|
||||
{
|
||||
"message_id": "order#dataflow#100#20#200#order#DY123",
|
||||
"message_id": "order#datahub#100#20#200#order#DY123",
|
||||
"metadata": {
|
||||
"data_version": 1736840100 // 必需:平台数据的最后更新时间戳
|
||||
},
|
||||
@@ -484,7 +484,7 @@ BEGIN;
|
||||
|
||||
-- 1. 记录消息已处理(幂等性)
|
||||
INSERT INTO processed_messages (message_id, platform_id, data_type)
|
||||
VALUES ('order#dataflow#100#20#200#order#DY123', 20, 'order')
|
||||
VALUES ('order#datahub#100#20#200#order#DY123', 20, 'order')
|
||||
ON CONFLICT (message_id) DO NOTHING
|
||||
RETURNING message_id;
|
||||
|
||||
@@ -525,7 +525,7 @@ sequenceDiagram
|
||||
|
||||
Note over P: 1. 数据采集阶段
|
||||
P->>P: 从 DouYin API 获取订单数据
|
||||
P->>P: 生成 message_id<br/>order#dataflow#100#20#200#order#DY123<br/>提取 data_version
|
||||
P->>P: 生成 message_id<br/>order#datahub#100#20#200#order#DY123<br/>提取 data_version
|
||||
|
||||
Note over P,E: 2. 消息发布阶段
|
||||
P->>E: 发布消息<br/>routing_key: order<br/>持久化消息
|
||||
@@ -673,7 +673,7 @@ sequenceDiagram
|
||||
|
||||
| 操作 | 权限 | 说明 |
|
||||
|------|------|------|
|
||||
| 连接到 VHost | `dataflow-app` | 只能访问此 VHost |
|
||||
| 连接到 VHost | `datahub-app` | 只能访问此 VHost |
|
||||
| 发布消息 | `{platform}.exchange` | 只能写入自己平台的 Exchange |
|
||||
| 订阅错误 | `{platform}.errors.exchange` | 可选,接收本平台错误通知 |
|
||||
|
||||
@@ -685,19 +685,19 @@ sequenceDiagram
|
||||
格式: {entity_type}#{app_id}#{company_id}#{platform_id}#{store_id}#{entity_type}#{platform_unique_id}
|
||||
|
||||
示例:
|
||||
- order#dataflow#100#20#200#order#DY123456789
|
||||
- product#dataflow#100#2#201#product#TM-PROD789
|
||||
- refund#dataflow#100#20#200#refund#DY-REF456
|
||||
- inventory#dataflow#100#2#201#inventory#TM-SKU123
|
||||
- order#datahub#100#20#200#order#DY123456789
|
||||
- product#datahub#100#2#201#product#TM-PROD789
|
||||
- refund#datahub#100#20#200#refund#DY-REF456
|
||||
- inventory#datahub#100#2#201#inventory#TM-SKU123
|
||||
```
|
||||
|
||||
**生成逻辑**:
|
||||
```
|
||||
# 伪代码
|
||||
message_id = f"{data_type}#dataflow#{company_id}#{platform_id}#{store_id}#{data_type}#{platform_order_id}"
|
||||
message_id = f"{data_type}#datahub#{company_id}#{platform_id}#{store_id}#{data_type}#{platform_order_id}"
|
||||
|
||||
# 实际示例(DouYin 订单)
|
||||
message_id = f"order#dataflow#100#20#200#order#{order_data['order_id']}"
|
||||
message_id = f"order#datahub#100#20#200#order#{order_data['order_id']}"
|
||||
```
|
||||
|
||||
##### 步骤 2: 提取 data_version
|
||||
@@ -724,7 +724,7 @@ if not data_version:
|
||||
|
||||
```json
|
||||
{
|
||||
"message_id": "order#dataflow#100#20#200#order#DY123456",
|
||||
"message_id": "order#datahub#100#20#200#order#DY123456",
|
||||
"timestamp": "2025-01-14T10:30:00Z",
|
||||
"platform": "douyin",
|
||||
"data_type": "order",
|
||||
@@ -753,7 +753,7 @@ if not data_version:
|
||||
**连接参数**:
|
||||
- Host: `<rabbitmq-host>`
|
||||
- Port: `5672`
|
||||
- VHost: `dataflow-app`
|
||||
- VHost: `datahub-app`
|
||||
- Username: `user_{platform}`
|
||||
- Password: `<provided_password>`
|
||||
|
||||
@@ -794,7 +794,7 @@ if not data_version:
|
||||
|
||||
---
|
||||
|
||||
### 2. Dataflow 应用(数据消费者)
|
||||
### 2. Datahub 应用(数据消费者)
|
||||
|
||||
#### 职责
|
||||
|
||||
@@ -847,7 +847,7 @@ OrderConsumer (单实例)
|
||||
|
||||
| 操作 | 权限 | 说明 |
|
||||
|------|------|------|
|
||||
| 连接到 VHost | `dataflow-app` | 访问业务 VHost |
|
||||
| 连接到 VHost | `datahub-app` | 访问业务 VHost |
|
||||
| 读取队列 | `orders.queue`, `products.queue`, `refunds.queue` | 消费业务消息 |
|
||||
| 写入错误 Exchange | `*.errors.exchange` | 发送错误消息到对应平台的错误 Exchange |
|
||||
| 管理队列 | `orders.queue`, `products.queue`, `refunds.queue` | 配置消费者参数 |
|
||||
@@ -892,7 +892,7 @@ BEGIN;
|
||||
|
||||
-- 步骤 1: 尝试插入 processed_messages(幂等性检查)
|
||||
INSERT INTO processed_messages (message_id, platform_id, data_type)
|
||||
VALUES ('order#dataflow#100#20#200#order#DY123', 20, 'order')
|
||||
VALUES ('order#datahub#100#20#200#order#DY123', 20, 'order')
|
||||
ON CONFLICT (message_id) DO NOTHING
|
||||
RETURNING message_id;
|
||||
|
||||
@@ -994,7 +994,7 @@ COMMIT;
|
||||
|
||||
```bash
|
||||
# 查看所有队列状态
|
||||
rabbitmqctl list_queues -p dataflow-app name messages_ready messages_unacknowledged
|
||||
rabbitmqctl list_queues -p datahub-app name messages_ready messages_unacknowledged
|
||||
|
||||
# 告警阈值
|
||||
# - messages_ready > 10000: 严重堆积
|
||||
@@ -1005,7 +1005,7 @@ rabbitmqctl list_queues -p dataflow-app name messages_ready messages_unacknowled
|
||||
|
||||
```bash
|
||||
# 查看所有消费者连接
|
||||
rabbitmqctl list_consumers -p dataflow-app
|
||||
rabbitmqctl list_consumers -p datahub-app
|
||||
|
||||
# 检查项:
|
||||
# - 消费者数量是否正常
|
||||
@@ -1034,7 +1034,7 @@ ORDER BY error_count DESC;
|
||||
```
|
||||
排查步骤:
|
||||
1. 检查消费者是否在线
|
||||
rabbitmqctl list_consumers -p dataflow-app
|
||||
rabbitmqctl list_consumers -p datahub-app
|
||||
|
||||
2. 查看消费速率
|
||||
管理界面 → Queues → 选择队列 → 查看 Deliver/Get rate
|
||||
@@ -1104,7 +1104,7 @@ ORDER BY error_count DESC;
|
||||
```bash
|
||||
# 启用 lazy queue (大量消息堆积时)
|
||||
rabbitmqctl set_policy lazy-queue "^(orders|products|refunds)\.queue$" \
|
||||
'{"queue-mode":"lazy"}' --vhost dataflow-app
|
||||
'{"queue-mode":"lazy"}' --vhost datahub-app
|
||||
|
||||
# 效果:
|
||||
# - 消息存储在磁盘,减少内存占用
|
||||
@@ -1133,12 +1133,12 @@ rabbitmqctl set_policy lazy-queue "^(orders|products|refunds)\.queue$" \
|
||||
```bash
|
||||
# 业务 Exchange
|
||||
rabbitmqadmin -u admin -p <password> declare exchange \
|
||||
name="aliexpress.exchange" vhost=dataflow-app \
|
||||
name="aliexpress.exchange" vhost=datahub-app \
|
||||
type=topic durable=true
|
||||
|
||||
# 错误 Exchange
|
||||
rabbitmqadmin -u admin -p <password> declare exchange \
|
||||
name="aliexpress.errors.exchange" vhost=dataflow-app \
|
||||
name="aliexpress.errors.exchange" vhost=datahub-app \
|
||||
type=topic durable=true
|
||||
```
|
||||
|
||||
@@ -1148,24 +1148,24 @@ rabbitmqadmin -u admin -p <password> declare exchange \
|
||||
# 绑定到业务队列(使用 {data_type}.{platform} 格式)
|
||||
rabbitmqadmin -u admin -p <password> declare binding \
|
||||
source="aliexpress.exchange" destination="orders.queue" \
|
||||
vhost=dataflow-app routing_key="order.aliexpress"
|
||||
vhost=datahub-app routing_key="order.aliexpress"
|
||||
|
||||
rabbitmqadmin -u admin -p <password> declare binding \
|
||||
source="aliexpress.exchange" destination="products.queue" \
|
||||
vhost=dataflow-app routing_key="product.aliexpress"
|
||||
vhost=datahub-app routing_key="product.aliexpress"
|
||||
|
||||
rabbitmqadmin -u admin -p <password> declare binding \
|
||||
source="aliexpress.exchange" destination="refunds.queue" \
|
||||
vhost=dataflow-app routing_key="refund.aliexpress"
|
||||
vhost=datahub-app routing_key="refund.aliexpress"
|
||||
|
||||
rabbitmqadmin -u admin -p <password> declare binding \
|
||||
source="aliexpress.exchange" destination="inventory.queue" \
|
||||
vhost=dataflow-app routing_key="inventory.aliexpress"
|
||||
vhost=datahub-app routing_key="inventory.aliexpress"
|
||||
|
||||
# 绑定到错误队列
|
||||
rabbitmqadmin -u admin -p <password> declare binding \
|
||||
source="aliexpress.errors.exchange" destination="errors.queue" \
|
||||
vhost=dataflow-app routing_key="#"
|
||||
vhost=datahub-app routing_key="#"
|
||||
```
|
||||
|
||||
**步骤 3: 创建用户**
|
||||
@@ -1177,7 +1177,7 @@ rabbitmqadmin -u admin -p <password> declare user \
|
||||
|
||||
# 配置权限
|
||||
rabbitmqadmin -u admin -p <password> declare permission \
|
||||
vhost=dataflow-app user="user_aliexpress" \
|
||||
vhost=datahub-app user="user_aliexpress" \
|
||||
configure="" write="^aliexpress\.(exchange|errors\.exchange)$" \
|
||||
read="^aliexpress\.errors\..*$"
|
||||
```
|
||||
@@ -1188,7 +1188,7 @@ rabbitmqadmin -u admin -p <password> declare permission \
|
||||
连接信息:
|
||||
- Host: <rabbitmq-host>
|
||||
- Port: 5672
|
||||
- VHost: dataflow-app
|
||||
- VHost: datahub-app
|
||||
- Username: user_aliexpress
|
||||
- Password: <strong_password>
|
||||
|
||||
@@ -1422,7 +1422,7 @@ graph TB
|
||||
|
||||
```json
|
||||
{
|
||||
"message_id": "order#dataflow#100#20#200#order#DY202501140001",
|
||||
"message_id": "order#datahub#100#20#200#order#DY202501140001",
|
||||
"timestamp": "2025-01-14T10:30:00Z",
|
||||
"platform": "douyin",
|
||||
"data_type": "order",
|
||||
@@ -1459,7 +1459,7 @@ graph TB
|
||||
|
||||
```json
|
||||
{
|
||||
"message_id": "product#dataflow#100#2#201#product#TM-PROD-789",
|
||||
"message_id": "product#datahub#100#2#201#product#TM-PROD-789",
|
||||
"timestamp": "2025-01-14T11:00:00Z",
|
||||
"platform": "tmall",
|
||||
"data_type": "product",
|
||||
|
||||
Reference in New Issue
Block a user