You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
image_file = 'path/to/lena.png'
text = "请你描述这张图片"
#######模仿best practice的推理代码#######
import os
import glob
import json
import csv
import re
import time
from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
from qwen_vl_utils import process_vision_info
import torch
from PIL import Image
os.environ['CUDA_VISIBLE_DEVICES'] = '1,2,3,4'
from swift.llm import (
get_model_tokenizer, get_template, inference, ModelType,
get_default_template_type, inference_stream
)
from swift.utils import seed_everything
import torch
hardware and system info
CUDA 11.8/Linux 5.4.119-19-0009.11/GPU A800-SXM4-80GB/torch 2.5.1; transformers 4.46.1; tokenizers 0.20.1; swift 2.6.0.dev0; vllm 0.6.3.post2
The text was updated successfully, but these errors were encountered:
vllm 推理微调qwen2_vl_7b异常
模仿文档“VLLM推理加速与部署”部分利用vllm推理微调过的qwen2-vl-7B模型,推理结果与模仿qwen2-vl-7B的best practice的推理结果偏差较大,且生成内容不合理。
以彩色Lena图为例:
· 模仿best practice的微调模型推理结果:“图片中展示了一位穿着礼服的歌手,礼服上点有钻石。歌手头带草帽,草帽上插有羽毛。图片以粉、橙、红、深红等颜色为背景,以展示歌手和其礼服。”
· 而利用vllm推理微调模型的结果:“片中展示了一位穿着礼服的赛马车夫,以及一个头戴礼服和面纱的赛马车夫。”。
image_file = 'path/to/lena.png'
text = "请你描述这张图片"
#######模仿best practice的推理代码#######
import os
import glob
import json
import csv
import re
import time
from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
from qwen_vl_utils import process_vision_info
import torch
from PIL import Image
os.environ['CUDA_VISIBLE_DEVICES'] = '1,2,3,4'
from swift.llm import (
get_model_tokenizer, get_template, inference, ModelType,
get_default_template_type, inference_stream
)
from swift.utils import seed_everything
import torch
model_type = ModelType.qwen2_vl_7b_instruct
template_type = get_default_template_type(model_type)
print(f'template_type: {template_type}')
model, tokenizer = get_model_tokenizer(model_type, torch.bfloat16,model_id_or_path="/path/to/model",
model_kwargs={'device_map': 'auto'})
model.generation_config.max_new_tokens = 256
model.generation_config.temperature = 0.05
template = get_template(template_type, tokenizer)
seed_everything(42)
#######利用vllm推理代码#########
import os
import re
import time
import base64
import io
import requests
from datetime import datetime
from PIL import Image
from flask import Flask, request, jsonify
import torch
from swift.llm import (
ModelType, get_vllm_engine, get_default_template_type,
get_template, inference_vllm, inference_stream_vllm
)
from swift.utils import seed_everything
os.environ['CUDA_VISIBLE_DEVICES'] = '1,2,3,4'
model_type = ModelType.qwen2_vl_7b_instruct
model_id_or_path = "/path/to/model"
llm_engine = get_vllm_engine(model_type, torch.bfloat16, model_id_or_path=model_id_or_path)
template_type = get_default_template_type(model_type)
template = get_template(template_type, llm_engine.hf_tokenizer)
llm_engine.generation_config.temperature = 0.05
llm_engine.generation_config.max_new_tokens = 256
generation_info = {}
seed_everything(42)
request_list = [{'query': text, 'images': image_file}]
resp_list = inference_vllm(llm_engine, template, request_list)
response = resp_list[0]["response"]
print(response)
hardware and system info
CUDA 11.8/Linux 5.4.119-19-0009.11/GPU A800-SXM4-80GB/torch 2.5.1; transformers 4.46.1; tokenizers 0.20.1; swift 2.6.0.dev0; vllm 0.6.3.post2
The text was updated successfully, but these errors were encountered: