subtask_8.log
2025-04-09 12:01:05.203 INFO 37937 --- [manual-subtask-executor14,b6fe713bf3fcc2eb,07caf8caa6b11133] com.oceanbase.ocp.common.ssh.SshUtils : SSH executeCommand begin: sudo bash -c ''"'"'/home/admin/ocp_agent'"'"'/bin/ocp_agentctl start' on 172.16.120.9
2025-04-09 12:01:15.294 INFO 37937 --- [manual-subtask-executor14,b6fe713bf3fcc2eb,07caf8caa6b11133] com.oceanbase.ocp.common.ssh.SshUtils : SSH executeCommand end: sudo bash -c ''"'"'/home/admin/ocp_agent'"'"'/bin/ocp_agentctl start' on 172.16.120.9, result: SshResult(host=172.16.120.9, username=dell, command=sudo bash -c ''"'"'/home/admin/ocp_agent'"'"'/bin/ocp_agentctl start', out=, err={"successful":false,"message":null,"error":"Module=agent, kind=DEADLINE_EXCEEDED, code=wait_for_ready_timeout; "}, extOut=null, exitStatus=255)
2025-04-09 12:01:15.297 INFO 37937 --- [manual-subtask-executor14,b6fe713bf3fcc2eb,07caf8caa6b11133] c.o.o.e.internal.template.SshTemplate : SSH execute end: sudo bash -c ''"'"'/home/admin/ocp_agent'"'"'/bin/ocp_agentctl start' on 172.16.120.9,result:SshResult(host=172.16.120.9, username=dell, command=sudo bash -c ''"'"'/home/admin/ocp_agent'"'"'/bin/ocp_agentctl start', out=, err={"successful":false,"message":null,"error":"Module=agent, kind=DEADLINE_EXCEEDED, code=wait_for_ready_timeout; "}, extOut=null, exitStatus=255)
2025-04-09 12:01:15.300 INFO 37937 --- [manual-subtask-executor14,b6fe713bf3fcc2eb,07caf8caa6b11133] c.o.ocp.executor.executor.SshExecutor : execute ssh command failed, command:sudo bash -c ''"'"'/home/admin/ocp_agent'"'"'/bin/ocp_agentctl start', return code:255, output:, err:{"successful":false,"message":null,"error":"Module=agent, kind=DEADLINE_EXCEEDED, code=wait_for_ready_timeout; "}
2025-04-09 12:01:15.302 ERROR 37937 --- [manual-subtask-executor14,b6fe713bf3fcc2eb,07caf8caa6b11133] c.o.ocp.executor.executor.SshExecutor : failed to execute ssh command, errMsg:[SshClient]: failed to execute ssh command: sudo bash -c ''"'"'/home/admin/ocp_agent'"'"'/bin/ocp_agentctl start', exit code: 255, cause:null
2025-04-09 12:01:15.305 ERROR 37937 --- [manual-subtask-executor14,b6fe713bf3fcc2eb,07caf8caa6b11133] c.o.o.c.t.e.c.w.subtask.SubtaskExecutor : [SshClient]: failed to execute ssh command: sudo bash -c ''"'"'/home/admin/ocp_agent'"'"'/bin/ocp_agentctl start', exit code: 255
com.oceanbase.ocp.executor.exception.SshCommandFailedException: [SshClient]: failed to execute ssh command: sudo bash -c ''"'"'/home/admin/ocp_agent'"'"'/bin/ocp_agentctl start', exit code: 255
at com.oceanbase.ocp.executor.executor.SshExecutor.execute(SshExecutor.java:429)
at com.oceanbase.ocp.executor.executor.SshExecutor.startOcpAgent(SshExecutor.java:294)
at com.oceanbase.ocp.service.compute.AgentInstallationTaskService.startOcpAgent(AgentInstallationTaskService.java:243)
at com.oceanbase.ocp.service.compute.AgentInstallationTaskService$$FastClassBySpringCGLIB$$f7a6037f.invoke(<generated>)
at org.springframework.cglib.proxy.MethodProxy.invoke(MethodProxy.java:218)
at org.springframework.aop.framework.CglibAopProxy.invokeMethod(CglibAopProxy.java:386)
at org.springframework.aop.framework.CglibAopProxy.access$000(CglibAopProxy.java:85)
at org.springframework.aop.framework.CglibAopProxy$DynamicAdvisedInterceptor.intercept(CglibAopProxy.java:703)
at com.oceanbase.ocp.service.compute.AgentInstallationTaskService$$EnhancerBySpringCGLIB$$b8de8a27.startOcpAgent(<generated>)
at com.oceanbase.ocp.service.task.business.host.InstallOcpAgentTask.run(InstallOcpAgentTask.java:68)
at com.oceanbase.ocp.core.task.runtime.Subtask.retry(Subtask.java:49)
at com.oceanbase.ocp.core.task.engine.runner.JavaSubtaskRunner.retry(JavaSubtaskRunner.java:76)
at com.oceanbase.ocp.core.task.engine.runner.JavaSubtaskRunner.doRun(JavaSubtaskRunner.java:35)
at com.oceanbase.ocp.core.task.engine.runner.JavaSubtaskRunner.run(JavaSubtaskRunner.java:26)
at com.oceanbase.ocp.core.task.engine.runner.RunnerFactory.doRun(RunnerFactory.java:76)
at com.oceanbase.ocp.core.task.engine.coordinator.worker.subtask.SubtaskExecutor.doRun(SubtaskExecutor.java:207)
at com.oce
anbase.ocp.core.task.engine.coordinator.worker.subtask.SubtaskExecutor.redirectConsoleOutput(SubtaskExecutor.java:201)
at com.oceanbase.ocp.core.task.engine.coordinator.worker.subtask.SubtaskExecutor.lambda$submit$2(SubtaskExecutor.java:137)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:750)
Set state for subtask: 8, operation:RETRY, state: FAILED
agentctl.log
2025-04-09T12:01:02.50308+08:00 INFO [190874,] caller=agent/admin.go:355:AgentStatus: check agentd status got: {State:running Ready:false Version: Pid:190891 RootDir:/home/admin/ocp_agent Socket:/home/admin/ocp_agent/run/ocp_agentd.190891.sock Services:map[ocp_mgragent:{Status:{State:stopped Version: Pid:190983 StartAt:1744171254720013916 Extra:map[]} Socket:/home/admin/ocp_agent/run/ocp_mgragent.190983.sock EndAt:1744171255831545281} ocp_monagent:{Status:{State:running Version:4.3.5-SNAPSHOT Pid:190909 StartAt:1744171252507988109 Extra:map[port:62889]} Socket:/home/admin/ocp_agent/run/ocp_monagent.190909.sock EndAt:-6795364578871345152}] Dangling:[]}
2025-04-09T12:01:02.5031+08:00 ERROR [190874,] caller=agent/admin.go:245:startAgent: wait for agent ready timeout
2025-04-09T12:01:02.50311+08:00 INFO [190874,] caller=agent/admin.go:637:progressEnd: startAgent end
2025-04-09T12:01:02.50312+08:00 WARN [190874,] caller=agent/admin.go:639:progressEnd: updateProgress: missing storedStatus fields: task_token=
2025-04-09T12:01:02.50313+08:00 ERROR [190874,] caller=agent/admin.go:199:StartAgent: start agent failed: Module=agent, kind=DEADLINE_EXCEEDED, code=wait_for_ready_timeout;
2025-04-09T12:01:02.50316+08:00 INFO [190874,] caller=agent/admin.go:178:Unlock: process 190874 release admin lock
2025-04-09T12:01:02.50322+08:00 INFO [190874,] caller=agentctl/main.go:264:func2: agentctl error fields: response="{\"successful\":false,\"message\":null,\"error\":\"Module=agent, kind=DEADLINE_EXCEEDED, code=wait_for_ready_timeout; \"}"
……
025-04-09T16:01:53.79879+08:00 INFO [199078,] caller=agent/admin.go:355:AgentStatus: check agentd status got: {State:running Ready:false Version: Pid:199094 RootDir:/home/admin/ocp_agent Socket:/home/admin/ocp_agent/run/ocp_agentd.199094.sock Services:map[ocp_mgragent:{Status:{State:stopped Version: Pid:199191 StartAt:1744185705948125207 Extra:map[]} Socket:/home/admin/ocp_agent/run/ocp_mgragent.199191.sock EndAt:1744185707035412576} ocp_monagent:{Status:{State:running Version:4.3.5-SNAPSHOT Pid:199112 StartAt:1744185703802815393 Extra:map[port:62889]} Socket:/home/admin/ocp_agent/run/ocp_monagent.199112.sock EndAt:-6795364578871345152}] Dangling:[]}
2025-04-09T16:01:53.79881+08:00 ERROR [199078,] caller=agent/admin.go:245:startAgent: wait for agent ready timeout
2025-04-09T16:01:53.79882+08:00 INFO [199078,] caller=agent/admin.go:637:progressEnd: startAgent end
2025-04-09T16:01:53.79883+08:00 WARN [199078,] caller=agent/admin.go:639:progressEnd: updateProgress: missing storedStatus fields:, task_token=
2025-04-09T16:01:53.79885+08:00 ERROR [199078,] caller=agent/admin.go:199:StartAgent: start agent failed: Module=agent, kind=DEADLINE_EXCEEDED, code=wait_for_ready_timeout;
2025-04-09T16:01:53.79889+08:00 INFO [199078,] caller=agent/admin.go:178:Unlock: process 199078 release admin lock
2025-04-09T16:01:53.79894+08:00 INFO [199078,] caller=agentctl/main.go:264:func2: agentctl error fields:, response="{\"successful\":false,\"message\":null,\"error\":\"Module=agent, kind=DEADLINE_EXCEEDED, code=wait_for_ready_timeout; \"}"
mgragent.log
2025-04-09T12:00:54.77203+08:00 INFO [190983,0a236ac6096ea51a] caller=config/config_version.go:107:checkConfigVersionBackups: config stay count:30, config version count:1, no need to rotate.
2025-04-09T12:00:55.8073+08:00 INFO [190983,] caller=shell/exec.go:124:execute: execute shell command start, command=Command{user=root, program=sh, outputType=std, cmd=lscpu | grep "On-line CPU(s) list:", timeout=10s} fields:, duration="29.388µs"
2025-04-09T12:00:55.80748+08:00 INFO [190983,] caller=shell/exec.go:128:execute: c.user=root, currentUser=root
2025-04-09T12:00:55.82809+08:00 INFO [190983,] caller=shell/exec.go:163:execute: execute shell command failed, command=Command{user=root, program=sh, outputType=std, cmd=lscpu | grep "On-line CPU(s) list:", timeout=10s}, exitCode=1, error=: exit status 1 fields:, duration=20.812433ms
2025-04-09T12:00:55.82812+08:00 ERROR [190983,] caller=system/system.go:358:GetHostInfo: failed to execute command to get online CPU list: failed to execute command: Command{user=root, program=sh, outputType=std, cmd=lscpu | grep "On-line CPU(s) list:", timeout=10s}, exitCode: 1, output: , error: : exit status 1
2025-04-09T16:01:43.80507+08:00 WARN [199111,] caller=config/yaml.go:99:validateNode: configs may not be replaced: [ocp.agent.http.ip ocp.agent.manager.http.port]
……
2025-04-09T16:01:45.99354+08:00 INFO [199191,e6c4282b786a8765] caller=config/config_version.go:107:checkConfigVersionBackups: config stay count:30, config version count:2, no need to rotate.
2025-04-09T16:01:47.01252+08:00 INFO [199191,] caller=shell/exec.go:124:execute: execute shell command start, command=Command{user=root, program=sh, outputType=std, cmd=lscpu | grep "On-line CPU(s) list:", timeout=10s} fields: duration="22.188µs"
2025-04-09T16:01:47.01269+08:00 INFO [199191,] caller=shell/exec.go:128:execute: c.user=root, currentUser=root
2025-04-09T16:01:47.03201+08:00 INFO [199191,] caller=shell/exec.go:163:execute: execute shell command failed, command=Command{user=root, program=sh, outputType=std, cmd=lscpu | grep "On-line CPU(s) list:", timeout=10s}, exitCode=1, error=: exit status 1 fields: duration=19.511732ms
2025-04-09T16:01:47.03204+08:00 ERROR [199191,] caller=system/system.go:358:GetHostInfo: failed to execute command to get online CPU list: failed to execute command: Command{user=root, program=sh, outputType=std, cmd=lscpu | grep "On-line CPU(s) list:", timeout=10s}, exitCode: 1, output: , error: : exit status 1
172.16.120.9的操作系统和版本麻烦发下
root用户下执行5次下面的命令发下
time lscpu | grep "On-line CPU(s) list:"