【 使用环境 】生产环境 or 测试环境
【 OB or 其他组件 】
【 使用版本 】
【问题描述】[2024-10-12 10:32:28.122992] INFO [STORAGE.TRANS] handle_timeout (ob_trans_part_ctx.cpp:718) [66894][T1_TransTimeWhe][T1][Y0-0000000000000000-0-0] [lt=22] handle timeout(ret=0, *this={this:0x146f4ea2ba50, ref:4, trans_id:{txid:3412091}, tenant_id:1, is_exiting:false, trans_expired_time:1728786745537916, cluster_version:17180066560, trans_need_wait_wrap:{receive_gts_ts_:[mts=0], need_wait_interval_us:0}, stc:[mts=1728700345539603], ctx_create_time:1728700345538997}{self_ls_id:{id:1}, session_id:3221493617, associated_session_id:3221493617, part_trans_action:3, pending_write:0, 2pc_role:“UNKNOWN”, ctx_tx_data_:{ctx_mgr_:0x146f3a604030, tx_data_guard_:{tx_data:{tx_id:{txid:3412091}, ref_cnt:1, state:“RUNNING”, commit_version:{val:1728700345538997000, v:0}, start_scn:{val:1728700345538997000, v:0}, end_scn:{val:18446744073709551615, v:3}, op_guard:{tx_data_op_:null}}}, read_only_:false}, role_state_:0, create_ctx_scn_:{val:18446744073709551615, v:3}, ctx_source:1, epoch_:1006938489258677, replay_completeness_:1, upstream_state:“INIT”, collected:[], rec_log_ts:{val:1728700345538997000, v:0}, prev_rec_log_ts:{val:18446744073709551615, v:3}, lastest_snapshot:{val:18446744073709551615, v:3}, last_request_ts:1728700345538997, block_frozen_memtable:null, max_2pc_commit_scn:{val:18446744073709551615, v:3}, mt_ctx_:{ObIMvccCtx={alloc_type=0 ctx_descriptor=0 min_table_version=1728546361475664 max_table_version=1728546361475664 trans_version={val:1728700345538997000, v:0} commit_version={val:0, v:0} lock_wait_start_ts=0 replay_compact_version={val:0, v:0}} end_code=0 tx_status=0 is_readonly=false ref=0 trans_id={txid:3412091} ls_id=1 row_callback[alloc:1, free:0, unsubmit:0] redo[fill:2,sync_succ:0, sync_fail:0] main_list_len=2 pending_log_size=0 callback_list:{cnt=1 stat:[tx_end=0, rollback_to=0, fast_commit=0, remove_memtable=0, ext_info_log=0] detail:[(log_epoch,length,logged,synced,appended,removed,unlog_removed,branch_removed)|0:(-1,2,2,0,2,0,0,0)|]}} -{busy_cbs_cnt:1, oldest_busy_cb:{ObTxBaseLogCb:{base_ts:{val:1728700345538997000, v:0}, log_ts:{val:1728700345538997000, v:0}, lsn:{lsn:595024034}, submit_ts:1728700345539680}, this:0x146f4ea2f400, is_inited_:true, trans_id:{txid:3412091}, ls_id:{id:1}, ctx:0x146f4ea2ba50, tx_data_guard:{tx_data:NULL}, is_callbacked_:false, is_dynamic_:false, mds_range_:{range_array_.count():0, range_array_:[]}, cb_arg_array_:[{log_type:0x1, arg:null}, {log_type:0x40, arg:null}, {log_type:0x100, arg:null}], first_part_scn_:{val:18446744073709551615, v:3}, extra_cb_:null, need_free_extra_cb_:false, callbacks_.count():1}, final_log_cb:{ObTxBaseLogCb:{base_ts:{val:1728700345538997000, v:0}, log_ts:{val:1728700345538997000, v:0}, lsn:{lsn:595024034}, submit_ts:1728700345539680}, this:0x146f4ea2f400, is_inited_:true, trans_id:{txid:3412091}, ls_id:{id:1}, ctx:0x146f4ea2ba50, tx_data_guard:{tx_data:NULL}, is_callbacked_:false, is_dynamic_:false, mds_range_:{range_array_.count():0, range_array_:[]}, cb_arg_array_:[{log_type:0x1, arg:null}, {log_type:0x40, arg:null}, {log_type:0x100, arg:null}], first_part_scn_:{val:18446744073709551615, v:3}, extra_cb_:null, need_free_extra_cb_:false, callbacks_.count():1}, -{info_log_submitted:1 state_log_submitting:1 state_log_submitted:1 }-, -{downstream_state:“INIT”, upstream:{id:-1}, participants:[{id:1}], redo_log_no:0, scheduler:“127.0.0.1:2882”, prepare_version:{val:18446744073709551615, v:3}, trans_type:0, next_log_entry_no:1, max_applied_log_ts:{val:18446744073709551615, v:3}, max_appling_log_ts:{val:18446744073709551615, v:3}, max_applying_part_log_no:9223372036854775807, max_submitted_seq_no:{branch:0, seq:3}, checksum:[0], checksum_scn:[{val:0, v:0}], need_checksum:true, data_complete:false, is_dup_tx:false, exec_epoch:0, commit_parts:[{ls_id:{id:1}, exec_epoch:1006938489258677, transfer_epoch:-1}], }- }-}, tx_expired=false, commit_expired=false, delay=400000)
[2024-10-12 10:32:28.123052] INFO [STORAGE.TRANS] handle_timeout (ob_trans_part_ctx.cpp:721) [66894][T1_TransTimeWhe][T1][Y0-0000000000000000-0-0] [lt=60] trx is waiting log_cb(busy_cbs_.get_size()=1, busy_cbs_.get_first()={ObTxBaseLogCb:{base_ts:{val:1728700345538997000, v:0}, log_ts:{val:1728700345538997000, v:0}, lsn:{lsn:595024034}, submit_ts:1728700345539680}, this:0x146f4ea2f400, is_inited_:true, trans_id:{txid:3412091}, ls_id:{id:1}, ctx:0x146f4ea2ba50, tx_data_guard:{tx_data:NULL}, is_callbacked_:false, is_dynamic_:false, mds_range_:{range_array_.count():0, range_array_:[]}, cb_arg_array_:[{log_type:0x1, arg:null}, {log_type:0x40, arg:null}, {log_type:0x100, arg:null}], first_part_scn_:{val:18446744073709551615, v:3}, extra_cb_:null, need_free_extra_cb_:false, callbacks_.count():1}, busy_cbs_.get_last()={ObTxBaseLogCb:{base_ts:{val:1728700345538997000, v:0}, log_ts:{val:1728700345538997000, v:0}, lsn:{lsn:595024034}, submit_ts:1728700345539680}, this:0x146f4ea2f400, is_inited_:true, trans_id:{txid:3412091}, ls_id:{id:1}, ctx:0x146f4ea2ba50, tx_data_guard:{tx_data:NULL}, is_callbacked_:false, is_dynamic_:false, mds_range_:{range_array_.count():0, range_array_:[]}, cb_arg_array_:[{log_type:0x1, arg:null}, {log_type:0x40, arg:null}, {log_type:0x100, arg:null}], first_part_scn_:{val:18446744073709551615, v:3}, extra_cb_:null, need_free_extra_cb_:false, callbacks_.count():1})
[2024-10-12 10:32:28.123804] WDIAG [STORAGE.TRANS] run1 (ob_standby_timestamp_service.cpp:147) [67237][T1002_STSWorker][T1002][Y0-0000000000000000-0-0] [lt=50][errcode=-4076] query and update last id fail(ret=-4076, ret=“OB_NEED_WAIT”)
[2024-10-12 10:32:28.128047] WDIAG [STORAGE.TRANS] inner_lock_for_read (ob_tx_data_functor.cpp:342) [67453][TimezoneMgr][T1][YB427F000001-0006242E2C27A38F-0-0] [lt=11][errcode=-6004] lock_for_read need retry(ret=-6004, tx_data={tx_id:{txid:3412093}, ref_cnt:1, state:“RUNNING”, commit_version:{val:1728700346078392000, v:0}, start_scn:{val:1728700346078392000, v:0}, end_scn:{val:18446744073709551615, v:3}, op_guard:{tx_data_op_:null}}, lock_for_read_arg_={mvcc_acc_ctx_:{type:1, abs_lock_timeout_ts:1728700378027928, tx_lock_timeout_us:-1, snapshot:{version:{val:1728700347795321000, v:0}, tx_id:{txid:0}, scn:0}, tx_table_guards:{tx_table_guard:{tx_table:0x146f3a5e4e50, epoch:1, mini_cache_:[0:{}, 1:{}, 2:{}, 3:{}]}, src_tx_table_guard:{tx_table:null, epoch:-1, mini_cache_:[0:{}, 1:{}, 2:{}, 3:{}]}, src_ls_handle:{ls_map_:null, ls_:null, mod_:-1}}, tx_id:{txid:0}, tx_desc:NULL, tx_ctx:null, mem_ctx:null, tx_scn:0, write_flag:{is_table_api:0, is_table_lock:0, is_mds:0, is_dml_batch_opt:0, is_insert_up:0, is_write_only_index:0, is_check_row_locked:0, is_lob_aux:0, is_skip_flush_redo:0, is_update_uk:0}, handle_start_time:-1, lock_wait_start_ts:0}, data_trans_id_:{txid:3412093}, data_sql_sequence_:{branch:0, seq:3}, read_latest_:false, read_uncommitted_:false, scn_:{val:1728700346078392000, v:0}}, tx_cc_ctx={state:10, prepare_version:{val:1728700346078392000, v:0}})
[2024-10-12 10:32:28.142203] INFO [COORDINATOR] refresh (ob_leader_coordinator.cpp:146) [67124][T1002_Occam][T1002][Y0-0000000000000000-0-0] [lt=34] refresh __all_ls_election_reference_info success(ret=0, ret=“OB_SUCCESS”, *new_all_ls_election_reference_info=[{1,0,False,{False,},False,False,True}, {1001,0,False,{False,},False,False,True}])
这些日志都是干啥的
【复现路径】
【附件及日志】
【备注】基于 LLM 和开源文档 RAG 的论坛小助手已开放测试,在发帖时输入 [@论坛小助手] 即可召唤小助手,欢迎试用!
从您的问题来看,您关注的是 OceanBase 数据库日志打印的速度以及如何理解这些日志的具体含义。这可能涉及到日志打印速率过高导致的性能影响或日志内容的理解问题。不过,根据提供的信息,这个问题更适合通过日志采集来进一步分析,因为日志的内容可以帮助我们理解其背后的原因。
日志采集场景: 集群基础信息
日志采集命令: obdiag gather scene run --scene=observer.base
推荐使用 obdiag 工具进行日志采集,这将帮助我们了解您的集群配置以及日志打印的相关设置,从而更好地解释日志打印速度和内容。请您执行上述命令并将采集到的日志压缩包提供给我们。
为了更准确地定位问题,请您提供以下信息:
- 当前集群配置中关于日志打印速率的设置 (
syslog_io_bandwidth_limit
) 是多少? - 是否观察到了任何特定的日志信息或警告,例如
REACH SYSLOG RATE LIMIT
? - 您是否调整过与日志相关的任何配置项,例如
enable_async_syslog
? - 当前遇到的日志打印速度问题是否伴随着其他系统性能指标的变化,如 CPU 使用率、磁盘 I/O 等?
- 您是否注意到日志中出现了特定类型的错误信息或警告信息?如果有,请提供相关信息。
具体信息可参考以下文档:
附上敏捷诊断工具 obdiag 使用帮助链接
可以使用obdiag查看下集群做个巡检
目前看有普通info日志,任务执行错误日志(可忽略)