python&pandas | 多进程处理dataframe通过某一列计算新列

from multiprocessing import cpu_count
from joblib import Parallel, delayed
import pandas as pd

cores = cpu_count()
"""
对当前列进行处理的函数
"""
def proc():
	"""
	xxxxxxxxxxxxxxx
	"""
	return results
	
def apply_parallel(df_grouped, func):
	results = Parallel(n_jobs = cores)(delayed(func)(group) for name, group in df_grouped)
	return pd.concat(results)

def func(df):
	df["new"] = df["old"].apply(lambda x:proc(x))
	return df

df_grouped = df.groupby("old")
df_new = apply_parallel(df_grouped, func)

猜你喜欢

转载自blog.csdn.net/weixin_43236007/article/details/107537523