想请问下,这个时间段ob集群在做什么操作
[2024-09-01 00:01:00.991010] TRACE [TRACE] after_process (obmp_base.cpp:154) [19407][T1_L0_G0][T1][YB420A09CECD-00061DA833EDA6B4-0-0] [lt=150] [err query](TRACE=begin_ts=1725120060990976 2024-08-31 16:01:00.990976|[process_begin] u=0 in_queue_time:36, receive_ts:1725120060990938, enqueue_ts:1725120060990946|[start_sql] u=2 addr:{ip:“127.0.0.1”, port:4666}|[query_begin] u=1 trace_id:YB420A09CECD-00061DA833EDA6B4-0-0|[before_processor_run] u=9 |[session] u=8 sid:3221929130, tenant_id:1|[process_end] u=8 run_ts:1725120060990989|[process_ret] u=1 process_ret:-4009|total_timeu=29)
[2024-09-01 00:01:03.439666] TRACE [TRACE] print_trace_log_if_necessary_ (ob_trans_ctx.cpp:139) [23394][T1002_ApplySrv2][T1002][YB420A09CECD-00061DA91CDEC5E7-0-0] [lt=53] [trans sampling] (TRACE=begin_ts=1725120063428165 2024-08-31 16:01:03.428165|[init] u=0 addr:0x7fc832c36dd0, id:1002, trans_id:{txid:159413633}, ref:0, opid:2730637|[start_trans] u=3 ret:0, left_time:86400000000, ref:2, opid:2730638|[start_access] u=3 ret:0, trace_id:“YB420A09CECD-00061DA91CDEC60F-0-0”, opid:2, data_seq:1725120063427169, pending:1, ref:2, tid:1217|[end_access] u=316 opid:2, pending:0, ref:2, tid:1217|[start_access] u=14 ret:0, trace_id:“YB420A09CECD-00061DA91CDEC60F-0-0”, opid:2, data_seq:1725120063427170, pending:1, ref:2, tid:1217|[end_access] u=117 opid:2, pending:0, ref:2, tid:1217|[set_stc] u=49 stc:1725120063428660, opid:2730643|[commit] u=54 ret:0, tid:19641, ref:3, opid:2730643|[after_submit_log] u=2506 ret:0, log_no:0, base_ts:{scn_val:1725120063431663196}, t:{scn_val:1725120063431663196}, lsn:{lsn:87411113369}|[get_gts_callback] u=2 ret:0, srr:1725120063428673, gts:{scn_val:1725120063431663196}, ref:3, opid:2730644|[log_sync_succ_cb] u=8404 ret:0, log_type:0x1, t:{scn_val:1725120063431663196}, offset:{lsn:87411113369}, ref:2|[log_sync_succ_cb] u=1 ret:0, log_type:0x40, t:{scn_val:1725120063431663196}, offset:{lsn:87411113369}, ref:2|[response_scheduler] u=26 ret:0, tag1:false, tag2:false, status:0, commit_version:{scn_val:1725120063431663196}|total_timeu=11495)
[2024-09-01 00:01:03.439859] TRACE [TRACE] print_trace_log_if_necessary_ (ob_trans_ctx.cpp:139) [23394][T1002_ApplySrv2][T1002][YB420A09CECD-00061DA91CDEC60F-0-0] [lt=111] [trans sampling] (TRACE=begin_ts=1725120063428165 2024-08-31 16:01:03.428165|[init] u=0 addr:0x7fc832c36dd0, id:1002, trans_id:{txid:159413633}, ref:0, opid:2730637|[start_trans] u=3 ret:0, left_time:86400000000, ref:2, opid:2730638|[start_access] u=3 ret:0, trace_id:“YB420A09CECD-00061DA91CDEC60F-0-0”, opid:2, data_seq:1725120063427169, pending:1, ref:2, tid:1217|[end_access] u=316 opid:2, pending:0, ref:2, tid:1217|[start_access] u=14 ret:0, trace_id:“YB420A09CECD-00061DA91CDEC60F-0-0”, opid:2, data_seq:1725120063427170, pending:1, ref:2, tid:1217|[end_access] u=117 opid:2, pending:0, ref:2, tid:1217|[set_stc] u=49 stc:1725120063428660, opid:2730643|[commit] u=54 ret:0, tid:19641, ref:3, opid:2730643|[after_submit_log] u=2506 ret:0, log_no:0, base_ts:{scn_val:1725120063431663196}, t:{scn_val:1725120063431663196}, lsn:{lsn:87411113369}|[get_gts_callback] u=2 ret:0, srr:1725120063428673, gts:{scn_val:1725120063431663196}, ref:3, opid:2730644|[log_sync_succ_cb] u=8404 ret:0, log_type:0x1, t:{scn_val:1725120063431663196}, offset:{lsn:87411113369}, ref:2|[log_sync_succ_cb] u=1 ret:0, log_type:0x40, t:{scn_val:1725120063431663196}, offset:{lsn:87411113369}, ref:2|[response_scheduler] u=26 ret:0, tag1:false, tag2:false, status:0, commit_version:{scn_val:1725120063431663196}|[exiting] u=119 ref:2, arg1:3221930421, opid:2730646|[log_sync_succ_cb] u=1 ret:0, log_type:0x100, t:{scn_val:1725120063431663196}, offset:{lsn:87411113369}, ref:1|[end_trans_cb] u=75 ret:0, arg1:0, arg2:{scn_val:1725120063431663196}, async:false, opid:2730647|[destroy] u=3 opid:2730647|total_timeu=11693)
[2024-09-01 00:01:03.624272] TRACE [TRACE] dump_tenant_info_ (ob_tenant_info_loader.cpp:216) [21333][T1002_TenantInf][T1002][Y0-0000000000000000-0-0] [lt=58] [dump tenant_info_loader](TRACE=begin_ts=1725120063624266 2024-08-31 16:01:03.624266|[ob_tenant_info_loader] u=0 tenant_id:1002, is_sys_ls_leader:false, broadcast_cost_time:1, broadcast_times:0, broadcast_per_sec:0, rpc_update_times:1, rpc_update_per_sec:0, last_rpc_update_time_us:1725120061856481, sql_update_cost_time:7451, sql_update_times:0, tenant_info_cache:{tenant_info:{tenant_id:1002, switchover_epoch:0, sync_scn:{scn_val:1725120060429369919}, replayable_scn:{scn_val:1725120060429369919}, standby_scn:{scn_val:1725120060429369919}, recovery_until_scn:{scn_val:4611686018427387903}, tenant_role:{value:1}, switchover_status:{value:1}}, last_sql_update_time:1725120063616826, ora_rowscn:1725120063109685214}, is_inited:true|total_timeu=0)
[2024-09-01 00:01:04.448370] TRACE [TRACE] print_trace_log_if_necessary_ (ob_trans_ctx.cpp:139) [23399][T1002_ReplaySrv][T1002][Y0-0000000000000000-0-0] [lt=35] [trans sampling] (TRACE=begin_ts=1725120064448191 2024-08-31 16:01:04.448191|[init] u=0 addr:0x7fc832c3d950, id:1001, trans_id:{txid:160488065}, ref:0, opid:1316329|[replay_redo] u=134 ret:0, used:4, need_replay:true, offset:110011244496, t:{scn_val:1725120064408002261}, ref:2|[replay_commit_info] u=14 ret:0, used:4, offset:110011244496, t:{scn_val:1725120064408002261}, ref:2, opid:1316337|[push_max_commit_version] u=12 trans_version:{scn_val:1725120064408002261}, ref:2, opid:1316340|total_timeu=160)
[2024-09-01 00:01:04.448449] TRACE [TRACE] print_trace_log_if_necessary_ (ob_trans_ctx.cpp:139) [23399][T1002_ReplaySrv][T1002][Y0-0000000000000000-0-0] [lt=71] [trans sampling] (TRACE=begin_ts=1725120064448191 2024-08-31 16:01:04.448191|[init] u=0 addr:0x7fc832c3d950, id:1001, trans_id:{txid:160488065}, ref:0, opid:1316329|[replay_redo] u=134 ret:0, used:4, need_replay:true, offset:110011244496, t:{scn_val:1725120064408002261}, ref:2|[replay_commit_info] u=14 ret:0, used:4, offset:110011244496, t:{scn_val:1725120064408002261}, ref:2, opid:1316337|[push_max_commit_version] u=12 trans_version:{scn_val:1725120064408002261}, ref:2, opid:1316340|[exiting] u=91 ref:2, arg1:0, opid:1316340|[replay_commit] u=2 ret:0, used:97, offset:110011244496, t:{scn_val:1725120064408002261}, ref:1, opid:1316340|[destroy] u=4 opid:1316343|total_timeu=257)
[2024-09-01 00:01:04.625393] TRACE [TRACE] dump_tenant_info_ (ob_tenant_info_loader.cpp:216) [21333][T1002_TenantInf][T1002][Y0-0000000000000000-0-0] [lt=79] [dump tenant_info_loader](TRACE=begin_ts=1725120064625386 2024-08-31 16:01:04.625386|[ob_tenant_info_loader] u=0 tenant_id:1002, is_sys_ls_leader:false, broadcast_cost_time:1, broadcast_times:0, broadcast_per_sec:0, rpc_update_times:0, rpc_update_per_sec:0, last_rpc_update_time_us:1725120061856481, sql_update_cost_time:8407, sql_update_times:0, tenant_info_cache:{tenant_info:{tenant_id:1002, switchover_epoch:0, sync_scn:{scn_val:1725120062957037651}, replayable_scn:{scn_val:1725120062957037651}, standby_scn:{scn_val:1725120062957037651}, recovery_until_scn:{scn_val:4611686018427387903}, tenant_role:{value:1}, switchover_status:{value:1}}, last_sql_update_time:1725120064616985, ora_rowscn:1725120064352958495}, is_inited:true|total_timeu=0)
[2024-09-01 00:01:05.386313] TRACE [TRACE] print_trace_log_if_necessary_ (ob_trans_ctx.cpp:139) [23396][T1002_ApplySrv4][T1002][YB420A09CECD-00061DA91CDEC66E-0-0] [lt=51] [trans sampling] (TRACE=begin_ts=1725120065372363 2024-08-31 16:01:05.372363|[init] u=0 addr:0x7fc8323d4390, id:1002, trans_id:{txid:159413761}, ref:0, opid:32206|[start_trans] u=3 ret:0, left_time:86400000000, ref:2, opid:32207|[start_access] u=4 ret:0, trace_id:“YB420A09CECD-00061DA916AD40EF-0-0”, opid:2, data_seq:1725120065367156, pending:1, ref:2, tid:1231|[end_access] u=321 opid:2, pending:0, ref:2, tid:1231|[start_access] u=10 ret:0, trace_id:“YB420A09CECD-00061DA916AD40EF-0-0”, opid:2, data_seq:1725120065367157, pending:1, ref:2, tid:1231|[end_access] u=92 opid:2, pending:0, ref:2, tid:1231|[set_stc] u=46 stc:1725120065372834, opid:32212|[commit] u=51 ret:0, tid:21308, ref:3, opid:32212|[after_submit_log] u=1833 ret:0, log_no:0, base_ts:{scn_val:1725120065374951069}, t:{scn_val:1725120065374951069}, lsn:{lsn:87411305069}|[get_gts_callback] u=3 ret:0, srr:1725120065372843, gts:{scn_val:1725120065374951069}, ref:3, opid:32213|[log_sync_succ_cb] u=11548 ret:0, log_type:0x1, t:{scn_val:1725120065374951069}, offset:{lsn:87411305069}, ref:2|[log_sync_succ_cb] u=1 ret:0, log_type:0x40, t:{scn_val:1725120065374951069}, offset:{lsn:87411305069}, ref:2|[response_scheduler] u=33 ret:0, tag1:false, tag2:false, status:0, commit_version:{scn_val:1725120065374951069}|total_timeu=13945)
[2024-09-01 00:01:05.386513] TRACE [TRACE] print_trace_log_if_necessary_ (ob_trans_ctx.cpp:139) [23396][T1002_ApplySrv4][T1002][YB420A09CECD-00061DA916AD40EF-0-0] [lt=96] [trans sampling] (TRACE=begin_ts=1725120065372363 2024-08-31 16:01:05.372363|[init] u=0 addr:0x7fc8323d4390, id:1002, trans_id:{txid:159413761}, ref:0, opid:32206|[start_trans] u=3 ret:0, left_time:86400000000, ref:2, opid:32207|[start_access] u=4 ret:0, trace_id:“YB420A09CECD-00061DA916AD40EF-0-0”, opid:2, data_seq:1725120065367156, pending:1, ref:2, tid:1231|[end_access] u=321 opid:2, pending:0, ref:2, tid:1231|[start_access] u=10 ret:0, trace_id:“YB420A09CECD-00061DA916AD40EF-0-0”, opid:2, data_seq:1725120065367157, pending:1, ref:2, tid:1231|[end_access] u=92 opid:2, pending:0, ref:2, tid:1231|[set_stc] u=46 stc:1725120065372834, opid:32212|[commit] u=51 ret:0, tid:21308, ref:3, opid:32212|[after_submit_log] u=1833 ret:0, log_no:0, base_ts:{scn_val:1725120065374951069}, t:{scn_val:1725120065374951069}, lsn:{lsn:87411305069}|[get_gts_callback] u=3 ret:0, srr:1725120065372843, gts:{scn_val:1725120065374951069}, ref:3, opid:32213|[log_sync_succ_cb] u=11548 ret:0, log_type:0x1, t:{scn_val:1725120065374951069}, offset:{lsn:87411305069}, ref:2|[log_sync_succ_cb] u=1 ret:0, log_type:0x40, t:{scn_val:1725120065374951069}, offset:{lsn:87411305069}, ref:2|[response_scheduler] u=33 ret:0, tag1:false, tag2:false, status:0, commit_version:{scn_val:1725120065374951069}|[exiting] u=103 ref:2, arg1:3221930421, opid:32215|[log_sync_succ_cb] u=1 ret:0, log_type:0x100, t:{scn_val:1725120065374951069}, offset:{lsn:87411305069}, ref:1|[end_trans_cb] u=97 ret:0, arg1:0, arg2:{scn_val:1725120065374951069}, async:false, opid:32216|[destroy] u=3 opid:32216|total_timeu=14149)
[2024-09-01 00:01:07.632164] TRACE [TRACE] dump_tenant_info_ (ob_tenant_info_loader.cpp:216) [21333][T1002_TenantInf][T1002][Y0-0000000000000000-0-0] [lt=98] [dump tenant_info_loader](TRACE=begin_ts=1725120067632159 2024-08-31 16:01:07.632159|[ob_tenant_info_loader] u=0 tenant_id:1002, is_sys_ls_leader:false, broadcast_cost_time:1, broadcast_times:0, broadcast_per_sec:0, rpc_update_times:1, rpc_update_per_sec:0, last_rpc_update_time_us:1725120065857750, sql_update_cost_time:13101, sql_update_times:0, tenant_info_cache:{tenant_info:{tenant_id:1002, switchover_epoch:0, sync_scn:{scn_val:1725120065580701976}, replayable_scn:{scn_val:1725120065580701976}, standby_scn:{scn_val:1725120065556658178}, recovery_until_scn:{scn_val:4611686018427387903}, tenant_role:{value:1}, switchover_status:{value:1}}, last_sql_update_time:1725120067619065, ora_rowscn:1725120067033116781}, is_inited:true|total_timeu=0)
[2024-09-01 00:01:11.152341] ERROR detect_palf_hang_failure_ (ob_failure_detector.cpp:349) [19454][T1001_Occam][T1001][Y0-0000000000000000-0-0] [lt=36][errcode=-4392] disk is hung(msg=“clog disk may be hung, add failure event”, clog_disk_hang_event={type:PROCESS HANG, module:LOG, info:clog disk hang event, level:FATAL}, clog_disk_last_working_time=1725120066080961, hung time=5071284)
[2024-09-01 00:01:11.212791] ERROR detect_palf_hang_failure_ (ob_failure_detector.cpp:349) [21062][T1002_Occam][T1002][Y0-0000000000000000-0-0] [lt=28][errcode=-4392] disk is hung(msg=“clog disk may be hung, add failure event”, clog_disk_hang_event={type:PROCESS HANG, module:LOG, info:clog disk hang event, level:FATAL}, clog_disk_last_working_time=1725120066118350, hung time=5094359)
[2024-09-01 00:01:11.232875] ERROR detect_palf_hang_failure_ (ob_failure_detector.cpp:349) [18680][T1_Occam][T1][Y0-0000000000000000-0-0] [lt=27][errcode=-4392] disk is hung(msg=“clog disk may be hung, add failure event”, clog_disk_hang_event={type:PROCESS HANG, module:LOG, info:clog disk hang event, level:FATAL}, clog_disk_last_working_time=1725120066089394, hung time=5143395)
[2024-09-01 00:01:14.638602] TRACE [TRACE] dump_tenant_info_ (ob_tenant_info_loader.cpp:216) [21333][T1002_TenantInf][T1002][Y0-0000000000000000-0-0] [lt=66] [dump tenant_info_loader](TRACE=begin_ts=1725120074638596 2024-08-31 16:01:14.638596|[ob_tenant_info_loader] u=0 tenant_id:1002, is_sys_ls_leader:false, broadcast_cost_time:0, broadcast_times:0, broadcast_per_sec:0, rpc_update_times:0, rpc_update_per_sec:0, last_rpc_update_time_us:1725120065857750, sql_update_cost_time:18879, sql_update_times:0, tenant_info_cache:{tenant_info:{tenant_id:1002, switchover_epoch:0, sync_scn:{scn_val:1725120072861506760}, replayable_scn:{scn_val:1725120072861506760}, standby_scn:{scn_val:1725120072706229391}, recovery_until_scn:{scn_val:4611686018427387903}, tenant_role:{value:1}, switchover_status:{value:1}}, last_sql_update_time:1725120074619730, ora_rowscn:1725120074316520107}, is_inited:true|total_timeu=0)
[2024-09-01 00:01:15.287936] TRACE [TRACE] after_process (obmp_base.cpp:154) [19340][T1_L0_G0][T1][YB420A09CECD-00061DA9017ED523-0-0] [lt=69] [err query](TRACE=begin_ts=1725120075287901 2024-08-31 16:01:15.287901|[process_begin] u=0 in_queue_time:71, receive_ts:1725120075287827, enqueue_ts:1725120075287845|[start_sql] u=1 addr:{ip:“127.0.0.1”, port:4716}|[query_begin] u=2 trace_id:YB420A09CECD-00061DA9017ED523-0-0|[before_processor_run] u=9 |[session] u=7 sid:3221931233, tenant_id:1|[process_end] u=8 run_ts:1725120075287914|[process_ret] u=2 process_ret:-4009|total_timeu=29)
[2024-09-01 00:01:15.305183] TRACE [TRACE] after_process (obmp_base.cpp:154) [19343][T1_L0_G0][T1][YB420A09CECD-00061DA8F4FF2441-0-0] [lt=55] [err query](TRACE=begin_ts=1725120075305158 2024-08-31 16:01:15.305158|[process_begin] u=0 in_queue_time:47, receive_ts:1725120075305109, enqueue_ts:1725120075305116|[start_sql] u=1 addr:{ip:“127.0.0.1”, port:4720}|[query_begin] u=1 trace_id:YB420A09CECD-00061DA8F4FF2441-0-0|[before_processor_run] u=6 |[session] u=6 sid:3221929424, tenant_id:1|[process_end] u=5 run_ts:1725120075305167|[process_ret] u=1 process_ret:-4009|total_timeu=20)
[2024-09-01 00:01:15.319597] TRACE [TRACE] after_process (obmp_base.cpp:154) [19345][T1_L0_G0][T1][YB420A09CECD-00061DA8EFFF4802-0-0] [lt=69] [err query](TRACE=begin_ts=1725120075319575 2024-08-31 16:01:15.319575|[process_begin] u=0 in_queue_time:35, receive_ts:1725120075319538, enqueue_ts:1725120075319543|[start_sql] u=1 addr:{ip:“127.0.0.1”, port:4718}|[query_begin] u=0 trace_id:YB420A09CECD-00061DA8EFFF4802-0-0|[before_processor_run] u=5 |[session] u=5 sid:3221929049, tenant_id:1|[process_end] u=6 run_ts:1725120075319581|[process_ret] u=1 process_ret:-4009|total_timeu=18)
[2024-09-01 00:01:15.727045] TRACE [TRACE] after_process (obmp_base.cpp:154) [19343][T1_L0_G0][T1][YB420A09CECD-00061DA8F4FF2443-0-0] [lt=60] [err query](TRACE=begin_ts=1725120075727016 2024-08-31 16:01:15.727016|[process_begin] u=0 in_queue_time:60, receive_ts:1725120075726953, enqueue_ts:1725120075726963|[start_sql] u=1 addr:{ip:“127.0.0.1”, port:4734}|[query_begin] u=2 trace_id:YB420A09CECD-00061DA8F4FF2443-0-0|[before_processor_run] u=8 |[session] u=5 sid:3221931141, tenant_id:1|[process_end] u=7 run_ts:1725120075727027|[process_ret] u=2 process_ret:-4009|total_timeu=25)
[2024-09-01 00:01:15.751132] TRACE [TRACE] after_process (obmp_base.cpp:154) [19340][T1_L0_G0][T1][YB420A09CECD-00061DA9017ED528-0-0] [lt=92] [err query](TRACE=begin_ts=1725120075751097 2024-08-31 16:01:15.751097|[process_begin] u=0 in_queue_time:59, receive_ts:1725120075751017, enqueue_ts:1725120075751026|[start_sql] u=1 addr:{ip:“127.0.0.1”, port:4736}|[query_begin] u=2 trace_id:YB420A09CECD-00061DA9017ED528-0-0|[before_processor_run] u=7 |[session] u=7 sid:3221928066, tenant_id:1|[process_end] u=11 run_ts:1725120075751108|[process_ret] u=2 process_ret:-4009|total_timeu=30)
[2024-09-01 00:01:15.794983] TRACE [TRACE] after_process (obmp_base.cpp:154) [19396][T1_L0_G0][T1][YB420A09CECD-00061DA8368DA795-0-0] [lt=128] [err query](TRACE=begin_ts=1725120075794945 2024-08-31 16:01:15.794945|[process_begin] u=0 in_queue_time:83, receive_ts:1725120075794859, enqueue_ts:1725120075794867|[start_sql] u=1 addr:{ip:“127.0.0.1”, port:4740}|[query_begin] u=2 trace_id:YB420A09CECD-00061DA8368DA795-0-0|[before_processor_run] u=11 |[session] u=8 sid:3221910480, tenant_id:1|[process_end] u=9 run_ts:1725120075794959|[process_ret] u=2 process_ret:-4009|total_timeu=33)
查看下/var/log/message 是不是存在硬件报错了。
clog对硬件的要求比较高
这个报错是clog盘hung住了,大部分是因为硬件原因io能力不足ocp告警应该会自动回复
1 个赞
1、卡住很可能是 clog 所在盘使用率到达阈值,或者就是 clog 盘太慢了,不要data盘和clog盘共用。
2、你也可以用命令 查看结果发一下
iostat -d -x -k 1 10 #查看设备使用率(%util)、响应时间(await) iostat -c 1 10 #查看cpu状态
3、把哪个时间段observer.log日志 整个打包发一下
1 个赞
这个问题有进展吗?可以参考楼上发的检查下,另外发下OB版本