OceanBase批量插入数据时,响应时间抖动得非常厉害

【 使用环境 】测试环境
【 OB or 其他组件 】 OceanBase_CE 4.2.1
我们在 测试环境 进行如下测试操作:

  1. 从 A 表查询数据,经过处理后,批量插入 B 表。
  2. 每批次处理的数据为 20000 条。通过原生 JDBC 进行批量插入( 20000条是在同一个事务中,每3500条调用一次 executeBatch()
  3. 开了7个线程并行执行上述流程。
  4. A 表是传统单表,有40亿行数据,查询耗时 0.12秒 左右。
  5. B 表是分区表,每次插入的耗时,从 1.x 秒 ~ 70.x 秒不等(如下图所示)。

image

我们通过查询 oceanbase.v$OB_SQL_AUDIT 得到的慢查询如下所示( 为精简内容,只提供2条 ,批量插入的 SQL 太长已省略)

[
  {
    "SVR_IP": "172.31.46.6",
    "SVR_PORT": 2882,
    "REQUEST_ID": 424837637,
    "SQL_EXEC_ID": 505109686,
    "TRACE_ID": "YB42AC1F2E06-00061BAFD2ED7786-0-0",
    "SID": 3221749360,
    "CLIENT_IP": "172.31.45.237",
    "CLIENT_PORT": 59728,
    "TENANT_ID": 1002,
    "TENANT_NAME": "test",
    "EFFECTIVE_TENANT_ID": 1002,
    "USER_ID": 200001,
    "USER_NAME": "root",
    "USER_GROUP": 0,
    "USER_CLIENT_IP": "172.31.46.49",
    "DB_ID": 503602,
    "DB_NAME": "test",
    "SQL_ID": "E12E769FAB3D9A8F06824C9B383377C8",
    "QUERY_SQL": "",
    "PLAN_ID": 48743,
    "AFFECTED_ROWS": 3500,
    "RETURN_ROWS": 0,
    "PARTITION_CNT": 128,
    "RET_CODE": 0,
    "QC_ID": 0,
    "DFO_ID": 0,
    "SQC_ID": 0,
    "WORKER_ID": 0,
    "EVENT": "das wait remote response",
    "P1TEXT": "",
    "P1": 140505562599568,
    "P2TEXT": "",
    "P2": 0,
    "P3TEXT": "",
    "P3": 0,
    "LEVEL": 0,
    "WAIT_CLASS_ID": 107,
    "WAIT_CLASS#": 7,
    "WAIT_CLASS": "NETWORK",
    "STATE": "WAITED KNOWN TIME",
    "WAIT_TIME_MICRO": 51136536,
    "TOTAL_WAIT_TIME_MICRO": 51136536,
    "TOTAL_WAITS": 1,
    "RPC_COUNT": 42,
    "PLAN_TYPE": 1,
    "IS_INNER_SQL": 0,
    "IS_EXECUTOR_RPC": 0,
    "IS_HIT_PLAN": 1,
    "REQUEST_TIME": 1720771898387968,
    "ELAPSED_TIME": 51284594,
    "NET_TIME": 0,
    "NET_WAIT_TIME": 2,
    "QUEUE_TIME": 14,
    "DECODE_TIME": 1,
    "GET_PLAN_TIME": 11985,
    "EXECUTE_TIME": 51272308,
    "APPLICATION_WAIT_TIME": 0,
    "CONCURRENCY_WAIT_TIME": 0,
    "USER_IO_WAIT_TIME": 0,
    "SCHEDULE_TIME": 0,
    "ROW_CACHE_HIT": 0,
    "BLOOM_FILTER_CACHE_HIT": 0,
    "BLOCK_CACHE_HIT": 9408,
    "DISK_READS": 0,
    "RETRY_CNT": 0,
    "TABLE_SCAN": 0,
    "CONSISTENCY_LEVEL": 3,
    "MEMSTORE_READ_ROW_COUNT": 0,
    "SSSTORE_READ_ROW_COUNT": 0,
    "DATA_BLOCK_READ_CNT": 4702,
    "DATA_BLOCK_CACHE_HIT": 4702,
    "INDEX_BLOCK_READ_CNT": 9412,
    "INDEX_BLOCK_CACHE_HIT": 4706,
    "BLOCKSCAN_BLOCK_CNT": 0,
    "BLOCKSCAN_ROW_CNT": 0,
    "PUSHDOWN_STORAGE_FILTER_ROW_CNT": 0,
    "REQUEST_MEMORY_USED": 31426529,
    "EXPECTED_WORKER_COUNT": 0,
    "USED_WORKER_COUNT": 0,
    "SCHED_INFO": null,
    "FUSE_ROW_CACHE_HIT": 0,
    "PS_CLIENT_STMT_ID": -1,
    "PS_INNER_STMT_ID": -1,
    "TX_ID": 506214539,
    "SNAPSHOT_VERSION": 0,
    "REQUEST_TYPE": 2,
    "IS_BATCHED_MULTI_STMT": 0,
    "OB_TRACE_INFO": null,
    "PLAN_HASH": 3290318591324336132,
    "LOCK_FOR_READ_TIME": 0,
    "PARAMS_VALUE": "",
    "RULE_NAME": "",
    "PARTITION_HIT": 1,
    "TX_INTERNAL_ROUTING": 1,
    "TX_STATE_VERSION": 417,
    "FLT_TRACE_ID": "",
    "NETWORK_WAIT_TIME": 51136536
  },
  {
    "SVR_IP": "172.31.46.6",
    "SVR_PORT": 2882,
    "REQUEST_ID": 424837464,
    "SQL_EXEC_ID": 505109566,
    "TRACE_ID": "YB42AC1F2E06-00061BAFD7F9056B-0-0",
    "SID": 3221749256,
    "CLIENT_IP": "172.31.46.6",
    "CLIENT_PORT": 54212,
    "TENANT_ID": 1002,
    "TENANT_NAME": "test",
    "EFFECTIVE_TENANT_ID": 1002,
    "USER_ID": 200001,
    "USER_NAME": "root",
    "USER_GROUP": 0,
    "USER_CLIENT_IP": "172.31.46.49",
    "DB_ID": 503602,
    "DB_NAME": "test",
    "SQL_ID": "E12E769FAB3D9A8F06824C9B383377C8",
    "QUERY_SQL": "*",
    "PLAN_ID": 48743,
    "AFFECTED_ROWS": 3500,
    "RETURN_ROWS": 0,
    "PARTITION_CNT": 128,
    "RET_CODE": 0,
    "QC_ID": 0,
    "DFO_ID": 0,
    "SQC_ID": 0,
    "WORKER_ID": 0,
    "EVENT": "das wait remote response",
    "P1TEXT": "",
    "P1": 140467300063936,
    "P2TEXT": "",
    "P2": 0,
    "P3TEXT": "",
    "P3": 0,
    "LEVEL": 0,
    "WAIT_CLASS_ID": 107,
    "WAIT_CLASS#": 7,
    "WAIT_CLASS": "NETWORK",
    "STATE": "WAITED KNOWN TIME",
    "WAIT_TIME_MICRO": 44044948,
    "TOTAL_WAIT_TIME_MICRO": 44044948,
    "TOTAL_WAITS": 1,
    "RPC_COUNT": 36,
    "PLAN_TYPE": 1,
    "IS_INNER_SQL": 0,
    "IS_EXECUTOR_RPC": 0,
    "IS_HIT_PLAN": 1,
    "REQUEST_TIME": 1720771896652022,
    "ELAPSED_TIME": 44160137,
    "NET_TIME": 0,
    "NET_WAIT_TIME": 2,
    "QUEUE_TIME": 14,
    "DECODE_TIME": 1,
    "GET_PLAN_TIME": 9008,
    "EXECUTE_TIME": 44150790,
    "APPLICATION_WAIT_TIME": 0,
    "CONCURRENCY_WAIT_TIME": 0,
    "USER_IO_WAIT_TIME": 0,
    "SCHEDULE_TIME": 0,
    "ROW_CACHE_HIT": 0,
    "BLOOM_FILTER_CACHE_HIT": 0,
    "BLOCK_CACHE_HIT": 9684,
    "DISK_READS": 0,
    "RETRY_CNT": 0,
    "TABLE_SCAN": 0,
    "CONSISTENCY_LEVEL": 3,
    "MEMSTORE_READ_ROW_COUNT": 0,
    "SSSTORE_READ_ROW_COUNT": 0,
    "DATA_BLOCK_READ_CNT": 4558,
    "DATA_BLOCK_CACHE_HIT": 4558,
    "INDEX_BLOCK_READ_CNT": 10252,
    "INDEX_BLOCK_CACHE_HIT": 5126,
    "BLOCKSCAN_BLOCK_CNT": 0,
    "BLOCKSCAN_ROW_CNT": 0,
    "PUSHDOWN_STORAGE_FILTER_ROW_CNT": 0,
    "REQUEST_MEMORY_USED": 30858687,
    "EXPECTED_WORKER_COUNT": 0,
    "USED_WORKER_COUNT": 0,
    "SCHED_INFO": null,
    "FUSE_ROW_CACHE_HIT": 0,
    "PS_CLIENT_STMT_ID": -1,
    "PS_INNER_STMT_ID": -1,
    "TX_ID": 506214534,
    "SNAPSHOT_VERSION": 0,
    "REQUEST_TYPE": 2,
    "IS_BATCHED_MULTI_STMT": 0,
    "OB_TRACE_INFO": null,
    "PLAN_HASH": 3290318591324336132,
    "LOCK_FOR_READ_TIME": 0,
    "PARAMS_VALUE": "",
    "RULE_NAME": "",
    "PARTITION_HIT": 1,
    "TX_INTERNAL_ROUTING": 1,
    "TX_STATE_VERSION": 228,
    "FLT_TRACE_ID": "",
    "NETWORK_WAIT_TIME": 44044948
  }
]

B 表的建表分区情况大致如下:

CREATE TABLE `table_b` (
    `id` bigint unsigned NOT NULL,
    `merchant_id` int unsigned NOT NULL,
    `user_id` int unsigned NOT NULL,
    `local_time` datetime NOT NULL,
    `days` SMALLINT UNSIGNED GENERATED ALWAYS AS ( (id & 65504) >> 5 ) VIRTUAL,
    `uid` TINYINT UNSIGNED GENERATED ALWAYS AS ( id & 31 ) VIRTUAL,
    PRIMARY KEY (`id`) USING BTREE
) DEFAULT CHARSET=utf8mb4
    PARTITION BY LIST (days)
        SUBPARTITION BY HASH (uid) SUBPARTITIONS 32
    (
        PARTITION P20240611_0 VALUES IN ( 1632, 1633, 1634, 1635, 1636, 1637, 1638, 1639, 1640, 1641, 1642, 1643, 1644, 1645, 1646, 1647 ),
        PARTITION P20240612_0 VALUES IN ( 1648, 1649, 1650, 1651, 1652, 1653, 1654, 1655, 1656, 1657, 1658, 1659, 1660, 1661, 1662, 1663 )
);

同一批次插入的数据一般都是同一天的(最多在凌晨跨天),因此一次插入最多分散在32个分区中。

请问各位大神,这种抖动得如此厉害的情况该如何避免呢 ?

2 个赞

建议先使用obdiag 获取一下跑批时候的ASH报告
https://www.oceanbase.com/docs/common-obdiag-cn-1000000000791136#10-title-obdiag%20gather%20ash

3 个赞

你插入分区表就会产生这种情况,因为你的insert有可能会产生分布式事务,如果你插入的数据分区正好都集中在同一台observer上可能会变得很快,如果你insert得一批数据落到B表多个分区上,同时这个几个分区又分布在不同observer上,此时就产生了分布式事务,自然就会变慢,唯一得办法就是在insert时提前计算出分区位置,尽量保证同一批insert数据集中在一个分区种

2 个赞

我们目前是3个Zone,每个Zone只有一个副本(也只有一台服务器),每个副本都是全功能副本。
而且写入走的都是主节点,也就是主节点本身应该包含所有分区数据,不应该存在这种情况才对。

2 个赞

目前发现在此过程后,OCP 有告警:OBServer ioawait 过高,现在正在观察到底是什么原因所致。
【抖动出现二十几分钟后,OCP 才出现上述告警,两者不知道是否存在关联关系。】

OCP的告警貌似有问题,变量占位符并没有对正确替换,有官方人员能帮忙反馈一下吗?

告警详情:集群:test,主机:172.31.45.237,告警:OceanBase server 磁盘ioawait高。OceanBase磁盘:ob_data,device nvme2n1,io await ${io_await_value_zh_cn} 超过 ${io_await_alarm_threshold} 毫秒,io util ${io_util_value_zh_cn} 超过 ${io_util_alarm_threshold} %,io qusize ${io_qusize_value} 超过 ${io_qusize_alarm_threshold}。

2 个赞

分区表每个分区都是有一个主副本,多个分区时分布在不同zone上得

2 个赞
  1. 发一个不慢的SQL的sql_audit对比下
  2. 发一个内存的监控图,尤其是memstore的
  3. 还有租户的规格,primary zone的配置
2 个赞

您好,感谢大佬的热心回复。请问有什么办法可以查询到每个分区的Zone分布吗 ?

1 个赞

可以用这个系统表CDB_OB_TABLE_LOCATIONS
https://www.oceanbase.com/docs/common-oceanbase-database-cn-1000000000820159

1 个赞
[
  {
    "SVR_IP": "172.31.41.43",
    "SVR_PORT": 2882,
    "REQUEST_ID": 35097820,
    "SQL_EXEC_ID": 78305510,
    "TRACE_ID": "YB42AC1F292B-00061C2CA47FD9C3-0-0",
    "SID": 3222335814,
    "CLIENT_IP": "172.31.46.6",
    "CLIENT_PORT": 54740,
    "TENANT_ID": 1002,
    "TENANT_NAME": "test",
    "EFFECTIVE_TENANT_ID": 1002,
    "USER_ID": 200001,
    "USER_NAME": "root",
    "USER_GROUP": 0,
    "USER_CLIENT_IP": "172.31.46.49",
    "DB_ID": 503602,
    "DB_NAME": "test",
    "SQL_ID": "E12E769FAB3D9A8F06824C9B383377C8",
    "QUERY_SQL": "*",
    "PLAN_ID": 38604,
    "AFFECTED_ROWS": 3500,
    "RETURN_ROWS": 0,
    "PARTITION_CNT": 128,
    "RET_CODE": 0,
    "QC_ID": 0,
    "DFO_ID": 0,
    "SQC_ID": 0,
    "WORKER_ID": 0,
    "EVENT": "latch: trans ctx latch wait",
    "P1TEXT": "address",
    "P1": 140515940604600,
    "P2TEXT": "number",
    "P2": 1073767318,
    "P3TEXT": "tries",
    "P3": 1,
    "LEVEL": 0,
    "WAIT_CLASS_ID": 104,
    "WAIT_CLASS#": 4,
    "WAIT_CLASS": "CONCURRENCY",
    "STATE": "WAITED SHORT TIME",
    "WAIT_TIME_MICRO": 241,
    "TOTAL_WAIT_TIME_MICRO": 440,
    "TOTAL_WAITS": 2,
    "RPC_COUNT": 6,
    "PLAN_TYPE": 1,
    "IS_INNER_SQL": 0,
    "IS_EXECUTOR_RPC": 0,
    "IS_HIT_PLAN": 1,
    "REQUEST_TIME": 1720778847159651,
    "ELAPSED_TIME": 123011,
    "NET_TIME": 0,
    "NET_WAIT_TIME": 3,
    "QUEUE_TIME": 18,
    "DECODE_TIME": 0,
    "GET_PLAN_TIME": 9212,
    "EXECUTE_TIME": 113518,
    "APPLICATION_WAIT_TIME": 0,
    "CONCURRENCY_WAIT_TIME": 880,
    "USER_IO_WAIT_TIME": 0,
    "SCHEDULE_TIME": 0,
    "ROW_CACHE_HIT": 0,
    "BLOOM_FILTER_CACHE_HIT": 0,
    "BLOCK_CACHE_HIT": 6079,
    "DISK_READS": 0,
    "RETRY_CNT": 0,
    "TABLE_SCAN": 0,
    "CONSISTENCY_LEVEL": 3,
    "MEMSTORE_READ_ROW_COUNT": 0,
    "SSSTORE_READ_ROW_COUNT": 0,
    "DATA_BLOCK_READ_CNT": 2383,
    "DATA_BLOCK_CACHE_HIT": 2383,
    "INDEX_BLOCK_READ_CNT": 7392,
    "INDEX_BLOCK_CACHE_HIT": 3696,
    "BLOCKSCAN_BLOCK_CNT": 0,
    "BLOCKSCAN_ROW_CNT": 0,
    "PUSHDOWN_STORAGE_FILTER_ROW_CNT": 0,
    "REQUEST_MEMORY_USED": 26113670,
    "EXPECTED_WORKER_COUNT": 0,
    "USED_WORKER_COUNT": 0,
    "SCHED_INFO": null,
    "FUSE_ROW_CACHE_HIT": 0,
    "PS_CLIENT_STMT_ID": -1,
    "PS_INNER_STMT_ID": -1,
    "TX_ID": 506205921,
    "SNAPSHOT_VERSION": 0,
    "REQUEST_TYPE": 2,
    "IS_BATCHED_MULTI_STMT": 0,
    "OB_TRACE_INFO": null,
    "PLAN_HASH": 3290318591324336132,
    "LOCK_FOR_READ_TIME": 0,
    "PARAMS_VALUE": "",
    "RULE_NAME": "",
    "PARTITION_HIT": 1,
    "TX_INTERNAL_ROUTING": 1,
    "TX_STATE_VERSION": 367,
    "FLT_TRACE_ID": "",
    "NETWORK_WAIT_TIME": 0
  }
]
  1. 宿主机是 16vCPU(超卖比例 150%) 128GB

1 个赞

好的,谢谢。这个需要 sys 租户才能执行吧 ?

此外,如果我一个分区表有 2048 个分区,如果一次查询需要扫描所有分区,就会产生 2048 次 IO 吗 ?

1 个赞

你好,请问有办法限制单个表的所有分区都固定在一个Zone上吗 ?
我们现在是单台机器固定负责集中写入,其他机器只需要查询,而是都是优先通过 从副本 进行查询。

1 个赞

要看你的SQL是否带上了分区键,没有带上的话就是全表扫描所有分区,带了分区键就能走分区裁剪

2 个赞

因为你是1-1-1的架构,最简单的就是切换租户的primary_zone,这样所有表的leader都集中在一台上
如果只想这张表的leader集中其他表打散就创建一个none模式的表组,将这张表加进入也能将分区leader集中

1 个赞