[OpenMMLab AI Combat Camp Phase II Notes] Fun AIGC Artifact MMagic Code Tutorial on the Eleventh Day

1. Install and configure MMagic

1.1 Install Pytorch

# 安装Pytorch
!pip3 install install torch==1.10.1+cu113 torchvision==0.11.2+cu113 torchaudio==0.10.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html

1.2 Install MMCV and MMEngine environment

!pip3 install openmim
!mim install 'mmcv>=2.0.0'
!mim install 'mmengine'

1.3 Install MMagic

method one:

!mim install 'mmagic'

Method 2: Source code installation

!rm -rf mmagic # 删除原有的 mmagic 文件夹(如有)
!git clone https://github.com/open-mmlab/mmagic.git # 下载 mmagic 源代码
import os
os.chdir('mmagic')
!pip3 install -e .

1.4 Check that the installation is successful

# 检查 Pytorch
import torch, torchvision
print('Pytorch 版本', torch.__version__)
print('CUDA 是否可用',torch.cuda.is_available())
# 检查 mmcv
import mmcv
from mmcv.ops import get_compiling_cuda_version, get_compiler_version
print('MMCV版本', mmcv.__version__)
print('CUDA版本', get_compiling_cuda_version())
print('编译器版本', get_compiler_version())
# 检查 mmagic
import mmagic
print('MMagic版本', mmagic.__version__)

1.5 Install other tool packages

!pip install opencv-python pillow matplotlib seaborn tqdm -i https://pypi.tuna.tsinghua.edu.cn/simple
!pip install clip transformers gradio 'httpx[socks]' diffusers==0.14.0 -i https://pypi.tuna.tsinghua.edu.cn/simple
!mim install 'mmdet>=3.0.0'

2.Application: Colorizing black and white photos

2.1 Enter the MMagic home directory

import os
os.chdir('mmagic')

2.2 Download sample images

!wget https://zihao-openmmlab.obs.cn-east-3.myhuaweicloud.com/20230613-MMagic/data/test_colorization.jpg -O test_colorization.jpg

2.3 Run prediction

!python demo/mmagic_inference_demo.py \
        --model-name inst_colorization \
        --img test_colorization.jpg \
        --result-out-dir out_colorization.png

3. Application: Vincent Chart Stable Diffusion

3.1 Import toolkit

from mmagic.apis import MMagicInferencer

3.2 Load model

sd_inferencer = MMagicInferencer(model_name='stable_diffusion')

3.3 Specify Prompt text

text_prompts = 'A panda is having dinner at KFC'
text_prompts = 'A Persian cat walking in the streets of New York'

3.4 Forecast

sd_inferencer.infer(text=text_prompts, result_out_dir='output/sd_res.png')

4. Application: Vincent Picture Dreambooth

4.1 Enter the MMagic home directory

import os
os.chdir('mmagic')

4.2 Training Dreambooth on the dataset

!bash tools/dist_train.sh configs/dreambooth/dreambooth-lora.py 1

4.3 Use the trained model to make predictions

from mmengine import Config

from mmagic.registry import MODELS
from mmagic.utils import register_all_modules

register_all_modules()
cfg = Config.fromfile('./mmagic/configs/dreambooth/dreambooth-lora.py')
dreambooth_lora = MODELS.build(cfg.model)
state = torch.load('mmagic/work_dirs/dreambooth-lora/iter_1000.pth')['state_dict']
def convert_state_dict(state):
    state_dict_new = {
    
    }
    for k, v in state.items():
        if '.module' in k:
            k_new = k.replace('.module', '')
        else:
            k_new = k
        if 'vae' in k:
            if 'to_q' in k:
                k_new = k.replace('to_q', 'query')
            elif 'to_k' in k:
                k_new = k.replace('to_k', 'key')
            elif 'to_v' in k:
                k_new = k.replace('to_v', 'value')
            elif 'to_out' in k:
                k_new = k.replace('to_out.0', 'proj_attn')
        state_dict_new[k_new] = v
    return state_dict_new
dreambooth_lora.load_state_dict(convert_state_dict(state))
dreambooth_lora = dreambooth_lora.cuda()
samples = dreambooth_lora.infer('side view of sks dog', guidance_scale=5)
samples['samples'][0]
samples = dreambooth_lora.infer('ear close-up of sks dog', guidance_scale=5)
samples['samples'][0]

5. Application: Tushengtu-ControlNet-Canny

5.1 Enter the MMagic home directory

import os
os.chdir('mmagic')

5.2 Import toolkit

import cv2
import numpy as np
import mmcv
from mmengine import Config
from PIL import Image

from mmagic.registry import MODELS
from mmagic.utils import register_all_modules

register_all_modules()

5.3 Load ControlNet model

cfg = Config.fromfile('configs/controlnet/controlnet-canny.py')
controlnet = MODELS.build(cfg.model).cuda()

5.4 Input Canny edge map

control_url = 'https://user-images.githubusercontent.com/28132635/230288866-99603172-04cb-47b3-8adb-d1aa532d1d2c.jpg'
control_img = mmcv.imread(control_url)
control = cv2.Canny(control_img, 100, 200)
control = control[:, :, None]
control = np.concatenate([control] * 3, axis=2)
control = Image.fromarray(control)

5.5 Prompt

prompt = 'Room with blue walls and a yellow ceiling.'

5.6 Perform predictions

output_dict = controlnet.infer(prompt, control=control)
samples = output_dict['samples']
for idx, sample in enumerate(samples):
    sample.save(f'sample_{
      
      idx}.png')
controls = output_dict['controls']
for idx, control in enumerate(controls):
    control.save(f'control_{
      
      idx}.png')

6. Application: Tushengtu-ControlNet-Pose

6.1 Enter the MMagic home directory

import os
os.chdir('mmagic')

6.2 Import toolkit

import mmcv
from mmengine import Config
from PIL import Image

from mmagic.registry import MODELS
from mmagic.utils import register_all_modules

register_all_modules()

6.3 Load ControlNet model

cfg = Config.fromfile('configs/controlnet/controlnet-pose.py')
# convert ControlNet's weight from SD-v1.5 to Counterfeit-v2.5
cfg.model.unet.from_pretrained = 'gsdf/Counterfeit-V2.5'
cfg.model.vae.from_pretrained = 'gsdf/Counterfeit-V2.5'
cfg.model.init_cfg['type'] = 'convert_from_unet'
controlnet = MODELS.build(cfg.model).cuda()
# call init_weights manually to convert weight
controlnet.init_weights()

6.4 Prompt

prompt = 'masterpiece, best quality, sky, black hair, skirt, sailor collar, looking at viewer, short hair, building, bangs, neckerchief, long sleeves, cloudy sky, power lines, shirt, cityscape, pleated skirt, scenery, blunt bangs, city, night, black sailor collar, closed mouth'

6.5 Input Pose diagram

control_url = 'https://user-images.githubusercontent.com/28132635/230380893-2eae68af-d610-4f7f-aa68-c2f22c2abf7e.png'
control_img = mmcv.imread(control_url)
control = Image.fromarray(control_img)
control.save('control.png')

6.6 Executing predictions

output_dict = controlnet.infer(prompt, control=control, width=512, height=512, guidance_scale=7.5)
samples = output_dict['samples']
for idx, sample in enumerate(samples):
    sample.save(f'sample_{
      
      idx}.png')
controls = output_dict['controls']
for idx, control in enumerate(controls):
    control.save(f'control_{
      
      idx}.png')

7. Graph-ControlNet Animation

7.1 Method 1: Gradio command line

!python demo/gradio_controlnet_animation.py

7.2 Method 2: MMagic API

# 导入工具包
from mmagic.apis import MMagicInferencer

# Create a MMEdit instance and infer
editor = MMagicInferencer(model_name='controlnet_animation')
# 指定 prompt 咒语
prompt = 'a girl, black hair, T-shirt, smoking, best quality, extremely detailed'
negative_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'
# 待测视频
# https://user-images.githubusercontent.com/12782558/227418400-80ad9123-7f8e-4c1a-8e19-0892ebad2a4f.mp4
video = '../run_forrest_frames_rename_resized.mp4'
save_path = '../output_video.mp4'
# 执行预测
editor.infer(video=video, prompt=prompt, image_width=512, image_height=512, negative_prompt=negative_prompt, save_path=save_path)

Guess you like

Origin blog.csdn.net/qq_41776136/article/details/131254457