Oceanbase归因分析
通过现场测试发现Oceanbase 连接租户数据库报错Timeout,除了ob_query_timeout和ob_trx_timeout外,还有可能是服务分配的运行内存不足导致程序内部获取JDBC连接报错连接超时。
通过筛查磁盘空间富余。
show parameters where name in ('memory_limit','memory_limit_percentage','system_memory','log_disk_size','log_disk_percentage','datafile_size','datafile_disk_percentage');
- 步骤三:查询集群server级资源分配情况,查询可知data_file_free和memory_free不足
select zone,concat(SVR_IP,’:’,SVR_PORT) observer,
cpu_capacity_max cpu_total,cpu_assigned_max cpu_assigned,
cpu_capacity-cpu_assigned_max as cpu_free,
round(memory_limit/1024/1024/1024,2) as memory_total,
round((memory_limit-mem_capacity)/1024/1024/1024,2) as system_memory,
round(mem_assigned/1024/1024/1024,2) as mem_assigned,
round((mem_capacity-mem_assigned)/1024/1024/1024,2) as memory_free,
round(log_disk_capacity/1024/1024/1024,2) as log_disk_capacity,
round(log_disk_assigned/1024/1024/1024,2) as log_disk_assigned,
round((log_disk_capacity-log_disk_assigned)/1024/1024/1024,2) as log_disk_free,
round((data_disk_capacity/1024/1024/1024),2) as data_disk,
round((data_disk_in_use/1024/1024/1024),2) as data_disk_used,
round((data_disk_capacity-data_disk_in_use)/1024/1024/1024,2) as data_disk_free
from oceanbase.gv$ob_servers;
select a.zone,a.svr_ip,b.tenant_name,b.tenant_type, a.max_cpu, a.min_cpu,
round(a.memory_size/1024/1024/1024,2) memory_size_gb,
round(a.log_disk_size/1024/1024/1024,2) log_disk_size,
round(a.log_disk_in_use/1024/1024/1024,2) log_disk_in_use,
round(a.data_disk_in_use/1024/1024/1024,2) data_disk_in_use
from oceanbase.gv$ob_units a join oceanbase.dba_ob_tenants b on a.tenant_id=b.tenant_id order by b.tenant_name;
// 重新分配datafile_size
alter system set datafile_size = '200G';
// 重新分配system_memory
alter system set system_memory = '128G';
// 重启
obd cluster restart xxxx
- 其余尝试操作:此类操作能解决其余timeout问题,但不能有效解决因内存分配不足产生的timeout问题。
alter system set datafile_disk_percentage = 95;
alter system set internal_sql_execute_timeout = "2m";
alter system set ob_query_timeout = '5m';