处理百万量级数据时,单线程太慢了,笔者第一次使用多线程,记录一下模版
#多线程task代码模版 import multiprocessing as mp mp.set_start_method('spawn') #gpu多线程需要添加这一句 def xxx_task_xxx_fn(task_list, samples, xxx): pass def xxx_task_xxx(task_list, worker_num, xxx): workload = (len(task_list) + worker_num - 1) / worker_num workload = int(workload) samples = mp.Manager().dict() #list等,根据任务数据格式 plist = [] for k in range(worker_num): start, end = k*workload, min((k+1)*workload, len(filenames)) p = mp.Process(target=xxx_task_xxx_fn, args=(task_list[start: end], samples, xxx) p.start() plist.append(p) for p in plist: p.join()