AIMET API 文档（9）

- - 1.2.8 配置定义
  - 1.2.9 代码示例

1.2.8 配置定义

类 aimet_common.defs.CostMetric

用于衡量模型/层成本的指标枚举

mac= 1
MAC：针对计算要求进行成本建模
memory= 2
内存：针对空间需求进行成本建模

类 aimet_common.defs.CompressionScheme

AIMET支持的压缩方案枚举

channel_pruning= 3
通道修剪
spatial_svd= 2
空间奇异值分解
weight_svd= 1
重量奇异值分解

类 aimet_torch.defs.ModuleCompRatioPair（模块，comp_ratio ）

一对 torch.nn.module 和一个压缩比

变量

module – torch.nn.module 类型的模块
comp_ratio – 压缩比。压缩率是压缩模型的成本与原始模型的成本之比。

1.2.9 代码示例

所需进口

import os
from decimal import Decimal
import torch


# Compression-related imports
from aimet_common.defs import CostMetric, CompressionScheme, GreedySelectionParameters, RankSelectScheme
from aimet_torch.defs import WeightSvdParameters, SpatialSvdParameters, ChannelPruningParameters, \
    ModuleCompRatioPair
from aimet_torch.compress import ModelCompressor

评价功能

def evaluate_model(model: torch.nn.Module, eval_iterations: int, use_cuda: bool = False) -> float:
    """
    This is intended to be the user-defined model evaluation function.
    AIMET requires the above signature. So if the user's eval function does not
    match this signature, please create a simple wrapper.

    Note: Honoring the number of iterations is not absolutely necessary.
    However if all evaluations run over an entire epoch of validation data,
    the runtime for AIMET compression will obviously be higher.

    :param model: Model to evaluate
    :param eval_iterations: Number of iterations to use for evaluation.
            None for entire epoch.
    :param use_cuda: If true, evaluate using gpu acceleration
    :return: single float number (accuracy) representing model's performance
    """
    return .5

在自动模式下使用空间 SVD 进行压缩，重数 = 8 进行排名舍入

def spatial_svd_auto_mode():

    # load trained MNIST model
    model = torch.load(os.path.join('../', 'data', 'mnist_trained_on_GPU.pth'))

    # Specify the necessary parameters
    greedy_params = GreedySelectionParameters(target_comp_ratio=Decimal(0.8),
                                              num_comp_ratio_candidates=10)
    auto_params = SpatialSvdParameters.AutoModeParams(greedy_params,
                                                      modules_to_ignore=[model.conv1])

    params = SpatialSvdParameters(mode=SpatialSvdParameters.Mode.auto,
                                  params=auto_params, multiplicity=8)

    # Single call to compress the model
    results = ModelCompressor.compress_model(model,
                                             eval_callback=evaluate_model,
                                             eval_iterations=1000,
                                             input_shape=(1, 1, 28, 28),
                                             compress_scheme=CompressionScheme.spatial_svd,
                                             cost_metric=CostMetric.mac,
                                             parameters=params)

    compressed_model, stats = results
    print(compressed_model)
    print(stats)     # Stats object can be pretty-printed easily

在手动模式下使用 Spatial SVD 进行压缩

def spatial_svd_manual_mode():

    # Load a trained MNIST model
    model = torch.load(os.path.join('../', 'data', 'mnist_trained_on_GPU.pth'))

    # Specify the necessary parameters
    manual_params = SpatialSvdParameters.ManualModeParams([ModuleCompRatioPair(model.conv1, 0.5),
                                                           ModuleCompRatioPair(model.conv2, 0.4)])
    params = SpatialSvdParameters(mode=SpatialSvdParameters.Mode.manual,
                                  params=manual_params)

    # Single call to compress the model
    results = ModelCompressor.compress_model(model,
                                             eval_callback=evaluate_model,
                                             eval_iterations=1000,
                                             input_shape=(1, 1, 28, 28),
                                             compress_scheme=CompressionScheme.spatial_svd,
                                             cost_metric=CostMetric.mac,
                                             parameters=params)

    compressed_model, stats = results
    print(compressed_model)
    print(stats)    # Stats object can be pretty-printed easily

在自动模式下使用权重 SVD 进行压缩

def weight_svd_auto_mode():

    # Load trained MNIST model
    model = torch.load(os.path.join('../', 'data', 'mnist_trained_on_GPU.pth'))

    # Specify the necessary parameters
    greedy_params = GreedySelectionParameters(target_comp_ratio=Decimal(0.8),
                                              num_comp_ratio_candidates=10)
    rank_select = RankSelectScheme.greedy
    auto_params = WeightSvdParameters.AutoModeParams(rank_select_scheme=rank_select,
                                                     select_params=greedy_params,
                                                     modules_to_ignore=[model.conv1])

    params = WeightSvdParameters(mode=WeightSvdParameters.Mode.auto,
                                 params=auto_params)

    # Single call to compress the model
    results = ModelCompressor.compress_model(model,
                                             eval_callback=evaluate_model,
                                             eval_iterations=1000,
                                             input_shape=(1, 1, 28, 28),
                                             compress_scheme=CompressionScheme.weight_svd,
                                             cost_metric=CostMetric.mac,
                                             parameters=params)

    compressed_model, stats = results
    print(compressed_model)
    print(stats)     # Stats object can be pretty-printed easily

在手动模式下使用权重 SVD 进行压缩，重数 = 8 进行排名舍入

def weight_svd_manual_mode():

    # Load a trained MNIST model
    model = torch.load(os.path.join('../', 'data', 'mnist_trained_on_GPU.pth'))

    # Specify the necessary parameters
    manual_params = WeightSvdParameters.ManualModeParams([ModuleCompRatioPair(model.conv1, 0.5),
                                                          ModuleCompRatioPair(model.conv2, 0.4)])
    params = WeightSvdParameters(mode=WeightSvdParameters.Mode.manual,
                                 params=manual_params, multiplicity=8)

    # Single call to compress the model
    results = ModelCompressor.compress_model(model,
                                             eval_callback=evaluate_model,
                                             eval_iterations=1000,
                                             input_shape=(1, 1, 28, 28),
                                             compress_scheme=CompressionScheme.weight_svd,
                                             cost_metric=CostMetric.mac,
                                             parameters=params)

    compressed_model, stats = results
    print(compressed_model)
    print(stats)    # Stats object can be pretty-printed easily

在自动模式下使用通道修剪进行压缩

def channel_pruning_auto_mode():

    # Load trained MNIST model
    model = torch.load(os.path.join('../', 'data', 'mnist_trained_on_GPU.pth'))

    # Specify the necessary parameters
    greedy_params = GreedySelectionParameters(target_comp_ratio=Decimal(0.8),
                                              num_comp_ratio_candidates=10)
    auto_params = ChannelPruningParameters.AutoModeParams(greedy_params,
                                                          modules_to_ignore=[model.conv1])

    data_loader = mnist_torch_model.DataLoaderMnist(cuda=True, seed=1, shuffle=True)
    params = ChannelPruningParameters(data_loader=data_loader.train_loader,
                                      num_reconstruction_samples=500,
                                      allow_custom_downsample_ops=True,
                                      mode=ChannelPruningParameters.Mode.auto,
                                      params=auto_params)

    # Single call to compress the model
    results = ModelCompressor.compress_model(model,
                                             eval_callback=evaluate_model,
                                             eval_iterations=1000,
                                             input_shape=(1, 1, 28, 28),
                                             compress_scheme=CompressionScheme.channel_pruning,
                                             cost_metric=CostMetric.mac,
                                             parameters=params)

    compressed_model, stats = results
    print(compressed_model)
    print(stats)     # Stats object can be pretty-printed easily

在手动模式下使用通道修剪进行压缩

def channel_pruning_manual_mode():

    # Load a trained MNIST model
    model = torch.load(os.path.join('../', 'data', 'mnist_trained_on_GPU.pth'))

    # Specify the necessary parameters
    manual_params = ChannelPruningParameters.ManualModeParams([ModuleCompRatioPair(model.conv2, 0.4)])

    data_loader = mnist_torch_model.DataLoaderMnist(cuda=True, seed=1, shuffle=True)
    params = ChannelPruningParameters(data_loader=data_loader.train_loader,
                                      num_reconstruction_samples=500,
                                      allow_custom_downsample_ops=True,
                                      mode=ChannelPruningParameters.Mode.manual,
                                      params=manual_params)

    # Single call to compress the model
    results = ModelCompressor.compress_model(model,
                                             eval_callback=evaluate_model,
                                             eval_iterations=1000,
                                             input_shape=(1, 1, 28, 28),
                                             compress_scheme=CompressionScheme.channel_pruning,
                                             cost_metric=CostMetric.mac,
                                             parameters=params)

    compressed_model, stats = results
    print(compressed_model)
    print(stats)    # Stats object can be pretty-printed easily

训练对象示例

class Trainer:
    """ Example trainer class """

    def __init__(self):
        self._layer_db = []

    def train_model(self, model, layer, train_flag=True):
        """
        Trains a model
        :param model: Model to be trained
        :param layer: layer which has to be fine tuned
        :param train_flag: Default: True. If ture the model gets trained
        :return:
        """
        if train_flag:
            mnist_torch_model.train(model, epochs=1, use_cuda=True, batch_size=50, batch_callback=None)
        self._layer_db.append(layer)

在自动模式下使用 Spatial SVD 进行压缩并进行逐层微调

def spatial_svd_auto_mode_with_layerwise_finetuning():

    # load trained MNIST model
    model = torch.load(os.path.join('../', 'data', 'mnist_trained_on_GPU.pth'))

    # Specify the necessary parameters
    greedy_params = GreedySelectionParameters(target_comp_ratio=Decimal(0.8),
                                              num_comp_ratio_candidates=10)
    auto_params = SpatialSvdParameters.AutoModeParams(greedy_params,
                                                      modules_to_ignore=[model.conv1])

    params = SpatialSvdParameters(mode=SpatialSvdParameters.Mode.auto,
                                  params=auto_params)

    # Single call to compress the model
    results = ModelCompressor.compress_model(model,
                                             eval_callback=evaluate_model,
                                             eval_iterations=1000,
                                             input_shape=(1, 1, 28, 28),
                                             compress_scheme=CompressionScheme.spatial_svd,
                                             cost_metric=CostMetric.mac,
                                             parameters=params, trainer=Trainer())

    compressed_model, stats = results
    print(compressed_model)
    print(stats)     # Stats object can be pretty-printed easily

AIMET API 文档（9）

AIMET API 文档（9）

1.2.8 配置定义

1.2.9 代码示例

猜你喜欢