这里写自定义目录标题
LLama-3.2-11B/90B-Vision-Instruct
使用Gradio+流式输出+LLama3.2-Vision构建模型推理web demo
模型下载
- Huggingface:
https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct
https://huggingface.co/meta-llama/Llama-3.2-90B-Vision-Instruct - ModelScope:
https://modelscope.cn/models/LLM-Research/Llama-3.2-11B-Vision-Instruct
https://modelscope.cn/models/LLM-Research/Llama-3.2-90B-Vision-Instruct
环境
torch
pip install torch==2.2.2 torchvision==0.17.2 torchaudio==2.2.2 --index-url https://download.pytorch.org/whl/cu121
gradio
pip install gradio==5.0.1
transformers
pip install transformers==4.45.0
accelerate
pip install accelerate==0.34.2
代码
import torch
import gradio as gr
from transformers import MllamaForConditionalGeneration, AutoProcessor, TextIteratorStreamer
from threading import Thread
model_id = "/your/path/Llama-3_2-90B-Vision-Instruct/"
model = MllamaForConditionalGeneration.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
# device_map="auto",
device_map="balanced",
)
processor = AutoProcessor.from_pretrained(model_id)
def generate(image, user_text):
messages = [
{
"role": "user", "content": [
{
"type": "image"},
{
"type": "text", "text": user_text}
]}
]
print("user_input: ", user_text)
input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(image, input_text, return_tensors="pt").to(model.device)
# 初始化流式输出器
streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
# 启动生成线程
generation_kwargs = dict(**inputs, streamer=streamer, max_new_tokens=512)
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
generated_text = ''
# 流式输出
for new_text in streamer:
generated_text += new_text
yield generated_text
print(f"output: {
generated_text} \n")
# 创建 Gradio 接口
demo = gr.Interface(
fn=generate,
inputs=[
gr.Image(type="pil", label="Upload Image"),
gr.Textbox(label="Input Text", placeholder="Write your prompt here...")
],
outputs=gr.Textbox(label="Generated text"),
title="LLama3.2 Vision",
description="Upload an image and input text."
)
# 启动 Gradio 接口,指定 IP 和端口
demo.launch(server_name="0.0.0.0", server_port=1234)