并发调用大模型API接口

并发调用大模型API接口

对大模型API接口的并发调用,速度可以快一倍以上

主要使用了内置的concurrent.futures

import tqdm as tqdm
import requests
import pandas as pd
import concurrent.futures

def process_row(row, index,prompt):
    url = 'http://0.0.0.0:1111/v1/chat/completions'
    data = {
    
    
        "model": "your/model/path/where/you/start/in/vllm",
        "messages": [
            {
    
    "role": "system", "content": prompt},
            {
    
    "role": "user", "content": row['列名']}
        ],
        "temperature": 0,
        "n": 1,
        "max_tokens": 2048,
        "stop": ["<|im_end|>",
                "<|endoftext|>"]
    }
    try:
        response = requests.post(url, json=data)
        if response.status_code == 200:
            result = response.json()['choices'][0]['message']['content']
            row['推理结果'] = result
        else:
            print('调用失败')
            print(response.status_code)
        row['processed'] = True
    except Exception as e:
        print(f'Error processing index {
      
      index}: {
      
      e}')
    return row


def extract(thread_nums):
    data_path = '/your/data/path'
    save_path = '/need/save/data/path'
    # 断点继续
    try:
        df = pd.read_csv(save_path)
        print('继续抽取')
    except FileNotFoundError:
        df = pd.read_csv(data_path)
        # 加一列处理列,在推理中断时,可以从断点处继续处理
        df['processed'] = False
        df['推理结果'] = None
    # 只处理未处理的行
    to_process = df[df['processed'] == False]
    with concurrent.futures.ThreadPoolExecutor(max_workers=thread_nums) as executor:
        # 创建 future 列表并应用 tqdm
        futures = [executor.submit(process_row, row, index, your_prompt) for index, row in to_process.iterrows()]
        # 使用 tqdm 包装 as_completed,以便显示进度条
        for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures), desc='Processing'):
            result = future.result()
            df.at[result.name, '推理结果'] = result['推理结果']
            df.at[result.name, 'processed'] = result['processed']
            df.to_csv(save_path, index=False)
    print("推理完成,文件已保存。")

if __name__ == '__main__':
    # 线程数设置和你的接口承载量有关
	extract(4)

猜你喜欢

转载自blog.csdn.net/weixin_48435461/article/details/141254527