并发调用大模型API接口
对大模型API接口的并发调用,速度可以快一倍以上
主要使用了内置的concurrent.futures
import tqdm as tqdm
import requests
import pandas as pd
import concurrent.futures
def process_row(row, index,prompt):
url = 'http://0.0.0.0:1111/v1/chat/completions'
data = {
"model": "your/model/path/where/you/start/in/vllm",
"messages": [
{
"role": "system", "content": prompt},
{
"role": "user", "content": row['列名']}
],
"temperature": 0,
"n": 1,
"max_tokens": 2048,
"stop": ["<|im_end|>",
"<|endoftext|>"]
}
try:
response = requests.post(url, json=data)
if response.status_code == 200:
result = response.json()['choices'][0]['message']['content']
row['推理结果'] = result
else:
print('调用失败')
print(response.status_code)
row['processed'] = True
except Exception as e:
print(f'Error processing index {
index}: {
e}')
return row
def extract(thread_nums):
data_path = '/your/data/path'
save_path = '/need/save/data/path'
# 断点继续
try:
df = pd.read_csv(save_path)
print('继续抽取')
except FileNotFoundError:
df = pd.read_csv(data_path)
# 加一列处理列,在推理中断时,可以从断点处继续处理
df['processed'] = False
df['推理结果'] = None
# 只处理未处理的行
to_process = df[df['processed'] == False]
with concurrent.futures.ThreadPoolExecutor(max_workers=thread_nums) as executor:
# 创建 future 列表并应用 tqdm
futures = [executor.submit(process_row, row, index, your_prompt) for index, row in to_process.iterrows()]
# 使用 tqdm 包装 as_completed,以便显示进度条
for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures), desc='Processing'):
result = future.result()
df.at[result.name, '推理结果'] = result['推理结果']
df.at[result.name, 'processed'] = result['processed']
df.to_csv(save_path, index=False)
print("推理完成,文件已保存。")
if __name__ == '__main__':
# 线程数设置和你的接口承载量有关
extract(4)