MNN does not use GPU mail-G31 for inference on android #3093

trungnb34 · 2024-11-20T00:46:39Z

Hi everyone

I am deploying model yolov11 on Android NDK using the MNN platform. I follow this link and use OpenCL for inference.

Hear is my code

int thread = 4;
int precision = 0;
int forwardType = MNN_FORWARD_OPENCL;
MNN::ScheduleConfig sConfig;
sConfig.type = static_cast<MNNForwardType>(forwardType);
sConfig.numThread = thread;
BackendConfig bConfig;
bConfig.precision = static_cast<BackendConfig::PrecisionMode>(precision);
sConfig.backendConfig = &bConfig;
std::shared_ptr<Executor::RuntimeManager> rtmgr = std::shared_ptr<Executor::RuntimeManager>(Executor::RuntimeManager::createRuntimeManager(sConfig));
if(rtmgr == nullptr) {
    MNN_ERROR("Empty RuntimeManger\n");
    return 0;
}
rtmgr->setCache("/data/data/com.vsafe.testmnn/files/.cachefile");

std::shared_ptr<Module> net(Module::load(std::vector<std::string>{}, std::vector<std::string>{}, "/data/data/com.vsafe.testmnn/files/yolo11n.mnn", rtmgr));
cv::Mat img = cv::imread("/data/data/com.vsafe.testmnn/files/img.jpg");
LOGE("original_image : %d x %d", img.cols, img.rows);
cv::Mat resizedImage;
cv::resize(img, resizedImage, cv::Size(640, 640));
for (int i ; i < 1000; i++) {

    float scale = (resizedImage.rows > resizedImage.cols ? resizedImage.rows : resizedImage.cols) / 640.0;

    resizedImage.convertTo(resizedImage, CV_32F, 1.0 / 255.0);

    auto inputTensor = MNN::Express::_Input({1, 3, 640, 640}, MNN::Express::NCHW,
                                            halide_type_of<float>());
    auto inputPtr = inputTensor->writeMap<float>();
    for (int h = 0; h < 640; ++h) {
        for (int w = 0; w < 640; ++w) {
            inputPtr[0 * 640 * 640 + h * 640 + w] = resizedImage.at<cv::Vec3f>(h, w)[2];
            inputPtr[1 * 640 * 640 + h * 640 + w] = resizedImage.at<cv::Vec3f>(h, w)[1];
            inputPtr[2 * 640 * 640 + h * 640 + w] = resizedImage.at<cv::Vec3f>(h, w)[0];
        }
    }

    auto input = MNN::Express::_Convert(inputTensor, MNN::Express::NC4HW4);

    uint64_t startTime = get_current_time();
    auto outputs = net->onForward({input});
    uint64_t end = get_current_time();
    LOGE("Time %llu", (unsigned long long) (end - startTime));
    auto output = _Convert(outputs[0], NCHW);
    output = _Squeeze(output);
    auto cx = _Gather(output, _Scalar<int>(0));
    auto cy = _Gather(output, _Scalar<int>(1));
    auto w = _Gather(output, _Scalar<int>(2));
    auto h = _Gather(output, _Scalar<int>(3));
    std::vector<int> startvals{4, 0};
    auto start = _Const(static_cast<void *>(startvals.data()), {2}, NCHW,
                        halide_type_of<int>());
    std::vector<int> sizevals{-1, -1};
    auto size = _Const(static_cast<void *>(sizevals.data()), {2}, NCHW, halide_type_of<int>());
    auto probs = _Slice(output, start, size);
    // [cx, cy, w, h] -> [y0, x0, y1, x1]
    auto x0 = cx - w * _Const(0.5);
    auto y0 = cy - h * _Const(0.5);
    auto x1 = cx + w * _Const(0.5);
    auto y1 = cy + h * _Const(0.5);
    auto boxes = _Stack({x0, y0, x1, y1}, 1);
    auto scores = _ReduceMax(probs, {0});
    auto ids = _ArgMax(probs, 0);
    auto result_ids = _Nms(boxes, scores, 100, 0.45, 0.25);
    auto result_ptr = result_ids->readMap<int>();
    auto box_ptr = boxes->readMap<float>();
    auto ids_ptr = ids->readMap<int>();
    auto score_ptr = scores->readMap<float>();
    for (int i = 0; i < 100; i++) {
        auto idx = result_ptr[i];
        if (idx < 0) break;
        auto x0 = int(box_ptr[idx * 4 + 0] * scale);
        auto y0 = int(box_ptr[idx * 4 + 1] * scale);
        auto x1 = int(box_ptr[idx * 4 + 2] * scale);
        auto y1 = int(box_ptr[idx * 4 + 3] * scale);
        auto class_idx = ids_ptr[idx];
        auto score = score_ptr[idx];
        LOGE("### box: {%d, %d, %d, %d}, class_idx: %d, score: %f\n", x0, y0, x1, y1, class_idx, score);
    }

    std::this_thread::sleep_for(std::chrono::milliseconds(200));
}
if (imwrite("/data/data/com.vsafe.testmnn/files/res.jpg", img)) {
    LOGE("result image write to `res.jpg`.\n");
}
rtmgr->updateCache();

Although using the MNN_FORWARD_OPENCL I see that running it uses a lot of CPU and time inference is the same as CPU . I don't know why.

My device has GPU mail-G31

Thanks everyone !!!

The text was updated successfully, but these errors were encountered:

jxt1234 · 2024-11-20T01:44:52Z

Use RuntimeManager's getInfo interface can get the real backend.

jxt1234 · 2024-11-20T01:46:06Z

Otherwise, do you compile MNN with -DMNN_OPENCL=true and use System.load("libMNN_OpenCL") in app's java code?

trungnb34 · 2024-11-21T16:40:03Z

Hi @jxt1234
I follow your suggestion this is the command but the result has not changed. I used this command to build MNN

#!/bin/bash
cmake ../../../ \
-DCMAKE_TOOLCHAIN_FILE=/Users/pancake/Library/Android/sdk/ndk/26.1.10909125/build/cmake/android.toolchain.cmake \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=./install_32 \
-DANDROID_ABI="armeabi-v7a" \
-DANDROID_STL=c++_static \
-DANDROID_NATIVE_API_LEVEL=android-14  \
-DANDROID_TOOLCHAIN=clang \
-DMNN_USE_LOGCAT=false \
-DMNN_USE_SSE=OFF \
-DMNN_BUILD_FOR_ANDROID=ON \
-DMNN_OPENCL=ON \
-DMNN_OPENMP=ON \
-DMNN_DEBUG=ON \
-DMNN_BUILD_TEST=OFF \
-DMNN_BUILD_FOR_ANDROID_COMMAND=true \
-DNATIVE_LIBRARY_OUTPUT=. -DNATIVE_INCLUDE_OUTPUT=. $1 $2 $3 $4 $5 $6 $7

This is the CMakeLists.txt file

cmake_minimum_required(VERSION 3.22.1)

project("testmnn")

set(OpenCV_DIR ${CMAKE_SOURCE_DIR}/OpenCV-android-sdk/sdk/native/jni)
find_package(OpenCV REQUIRED)

include_directories(${OpenCV_INCLUDE_DIRS})

include_directories(${CMAKE_SOURCE_DIR}/install_32/include)

add_library(${CMAKE_PROJECT_NAME} SHARED
        native-lib.cpp)
target_link_libraries(${CMAKE_PROJECT_NAME}
        android
        ${OpenCV_LIBS}
        ${CMAKE_SOURCE_DIR}/install_32/lib/libMNN.so
        ${CMAKE_SOURCE_DIR}/install_32/lib/libMNN_CL.so
        ${CMAKE_SOURCE_DIR}/install_32/lib/libMNN_Express.so
        log)

If I add System.load("libMNN_OpenCL") to kotlin code as above, it is crashed.

This is OpenCL version on my device

2024-11-21 19:09:10.582 21068-21068 OpenCLVersion           com.vsafe.testmnn                    I  OpenCL Version: 1.4 Android META-EGL
2024-11-21 19:09:10.582 21068-21068 OpenCLVersion           com.vsafe.testmnn                    D  Detected OpenCL version: 1.4 Android META-EGL

This is about my GPU info on device

GLES: ARM, Mali-G31, OpenGL ES 3.2 v1.r20p0-01rel0.9f6475d0baffb05079096179e1278b1d

Plz help me check why MNN does not use GPU to infer
Thanks a lot

jxt1234 · 2024-11-22T06:16:23Z

you can add -DMNN_SEP_BUILD=OFF to build MNN. Then it will only build a libMNN.so

jxt1234 added the question Further information is requested label Nov 20, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

MNN does not use GPU mail-G31 for inference on android #3093

MNN does not use GPU mail-G31 for inference on android #3093

trungnb34 commented Nov 20, 2024

jxt1234 commented Nov 20, 2024

jxt1234 commented Nov 20, 2024

trungnb34 commented Nov 21, 2024

jxt1234 commented Nov 22, 2024

MNN does not use GPU mail-G31 for inference on android #3093

MNN does not use GPU mail-G31 for inference on android #3093

Comments

trungnb34 commented Nov 20, 2024

jxt1234 commented Nov 20, 2024

jxt1234 commented Nov 20, 2024

trungnb34 commented Nov 21, 2024

jxt1234 commented Nov 22, 2024