res_list = []
i=0
def halfSearch (func, left, right, x):
global i
i = i+1
if right > left:
# print(i,left,right)
val = func(left, right)
if val < x:
res_list.append([left, right])
else:
mid = int(left + (right - left)/2)
if func(left, mid) <= x:
res_list.append([left, mid])
else:
halfSearch(func, left, mid, x)
if func(mid, right) <= x:
res_list.append([mid, right])
else:
halfSearch(func, mid+1, right, x)
else:
# 不存在
return - # 基本判断1
背景
账户表只有账户有索引,但不连续,并且跨度极大 要用这个索引,但一次不能查询太多数据,会对线上造成压力 现用二分查找确定一个批次的区间边界
场景数据模拟
import random
from ai.box.db import DbConnect
class Sql():
create_acc="""
CREATE TABLE if not exists acc(
id bigint(11) NOT NULL AUTO_INCREMENT primary key COMMENT '主键',
c_acc bigint(19),
c_time datetime DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '提交时间',
index(c_acc));
"""
class MysqlInit(DbConnect):
def __init__(self) -> None:
super().__init__(db_file="/opt/tpf/aiwks/code/aisty/ai/box/db.db")
def create_acc(self):
"""创建记录表
"""
with self.mysql() as connection:
cursor = connection.cursor()
cursor.execute(Sql.create_acc)
connection.commit()
cursor.close()
def gen_data(self):
acc = 1
for i in range(100000):
add = random.randint(10,100000000)
acc = i + add
sql = "insert into acc(c_acc)values({})".format(acc)
with self.mysql() as connection:
cursor = connection.cursor()
cursor.execute(sql)
connection.commit()
cursor.close()
mi = MysqlInit()
mi.create_acc()
mi.gen_data()
import pandas as pd
from ai.box.db import DbConnect
class Sql():
acc_count = """select count(c_acc)
from db1.acc
where c_acc>{} and c_acc<={}"""
acc_check = "select count(c_acc) from db1.acc where c_acc>1 and c_acc<=100000000"
res_list = []
i=0
def halfSearch (func, left, right, x):
global i
# 基本判断
i = i+1
if right > left:
# print(i,left,right)
val = func(left, right)
if val < x:
res_list.append([left, right, val])
else:
mid = int(left + (right - left)/2)
val1 = func(left, mid)
if val1 <= x:
res_list.append([left, mid, val1])
else:
halfSearch(func, left, mid, x)
val2 = func(mid, right)
if val2 <= x:
res_list.append([mid, right, val2])
else:
halfSearch(func, mid+1, right, x)
else:
# 不存在
return -1
class MysqlInit(DbConnect):
"""mysql创库后初始化操作
"""
def __init__(self) -> None:
super().__init__(db_file="/opt/tpf/aiwks/code/aisty/ai/box/db.db")
def db_count(self,begin,end):
sql = Sql.acc_count.format(begin,end)
# print(sql)
with self.mysql() as connection:
cursor = connection.cursor()
cursor.execute(sql)
col = [c[0] for c in cursor.description]
res = cursor.fetchall()
data = pd.DataFrame(res,columns=col)
cursor.close()
return data.iloc[0][0]
def count(self, sql):
with self.mysql() as connection:
cursor = connection.cursor()
cursor.execute(sql)
col = [c[0] for c in cursor.description]
res = cursor.fetchall()
data = pd.DataFrame(res,columns=col)
cursor.close()
return data.iloc[0][0]
mi = MysqlInit()
left=1
right=100000000
halfSearch (mi.db_count, left=left, right=right, x=5000)
res_list
[[1, 25000000, 3299],
[25000000, 50000000, 3239],
[50000001, 75000000, 3444],
[75000000, 100000000, 3276]]
data=pd.DataFrame(res_list,columns=["A","B","C"])
print("二分计算累计:",data.C.sum())
print("数据库实际行数:",mi.count(Sql.acc_check))
二分计算累计: 13258
数据库实际行数: 13258
res_list
[[1, 25000000, 3299],
[25000000, 50000000, 3239],
[50000001, 75000000, 3444],
[75000000, 100000000, 3276]]
有的下一个的left等于上一个的right
有的则是上一个right+1
如果是+1,则会遗漏一个数据
res_list = []
i=0
def halfSearch (func, left, right, x):
global i
# 基本判断
i = i+1
if right > left:
# print(i,left,right)
val = func(left, right)
if val < x:
res_list.append([left, right, val])
else:
mid = int(left + (right - left)/2)
val1 = func(left, mid)
if val1 <= x:
res_list.append([left, mid, val1])
else:
halfSearch(func, left, mid, x)
val2 = func(mid, right)
if val2 <= x:
res_list.append([mid, right, val2])
else:
halfSearch(func, mid, right, x)
else:
# 不存在
return -1
左开右闭的区间
left=1 right=100000000 halfSearch (mi.db_count, left=left, right=right, x=1000) res_list [[1, 6250000, 796], [6250000, 12500000, 811], [12500000, 18750000, 846], [18750000, 25000000, 846], [25000000, 31250000, 776], [31250000, 37500000, 810], [37500000, 43750000, 799], [43750000, 50000000, 854], [50000000, 56250000, 879], [56250000, 62500000, 848], [62500000, 68750000, 902], [68750000, 75000000, 815], [75000000, 81250000, 785], [81250000, 87500000, 889], [87500000, 93750000, 817], [93750000, 100000000, 785]] 每个区间都是左开右闭,这样,所有区间就无缝连接起来了 也看具体业务需求,不要求这个,就无所谓