test/pytest/test_onnx.py

import subprocess

import torch
import torch.onnx


class ToyModel(torch.nn.Module):
    def __init__(self):
        super(ToyModel, self).__init__()
        self.linear1 = torch.nn.Linear(1, 1)
        self.linear2 = torch.nn.Linear(1, 1)

    def forward(self, x):
        x = self.linear1(x)
        x = self.linear2(x)
        return x


# For a custom model you still need to manually author your converter, as far as I can tell there isn't a nice out of the box that exists
def test_convert_to_onnx():
    model = ToyModel()
    dummy_input = torch.randn(1, 1)
    model_path = "linear.onnx"
    # set the model to inference mode
    model.eval()

    # Let's create a dummy input tensor

    # Export the model
    torch.onnx.export(
        model,  # model being run
        dummy_input,  # model input (or a tuple for multiple inputs)
        model_path,  # where to save the model
        export_params=True,  # store the trained parameter weights inside the model file
        opset_version=10,  # the ONNX version to export the model to
        do_constant_folding=True,  # whether to execute constant folding for optimization
        input_names=["modelInput"],  # the model's input names
        output_names=["modelOutput"],  # the model's output names
        dynamic_axes={
            "modelInput": {0: "batch_size"},  # variable length axes
            "modelOutput": {0: "batch_size"},
        },
    )


def test_model_packaging_and_start():
    subprocess.run("mkdir model_store", shell=True)
    subprocess.run(
        "torch-model-archiver -f --model-name onnx --version 1.0 --serialized-file linear.onnx --export-path model_store --handler onnx_handler.py",
        shell=True,
        check=True,
    )


def test_model_start():
    subprocess.run(
        "torchserve --start --ncs --model-store model_store --models onnx.mar",
        shell=True,
        check=True,
    )


def test_inference():
    subprocess.run(
        "curl -X POST http://127.0.0.1:8080/predictions/onnx --data-binary '1'",
        shell=True,
    )


def test_stop():
    subprocess.run("torchserve --stop", shell=True, check=True)