Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 29 additions & 5 deletions backends/qualcomm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -239,11 +239,28 @@ target_link_libraries(
shared_buffer
qnn_dlc_manager
)
target_link_libraries(
qnn_executorch_backend
PRIVATE qnn_executorch_header qnn_schema qnn_manager executorch_core
extension_tensor qnn_backend_options
)
if(${CMAKE_SYSTEM_PROCESSOR} MATCHES Hexagon)
link_directories(
$ENV{HEXAGON_TOOLS_ROOT}/Tools/target/hexagon/lib/$ENV{HEXAGON_ARCH}/G0/pic
)
target_link_libraries(
qnn_executorch_backend
PRIVATE qnn_executorch_header
qnn_schema
qnn_manager
executorch_core
extension_tensor
qnn_backend_options
c
c++
)
else()
target_link_libraries(
qnn_executorch_backend
PRIVATE qnn_executorch_header qnn_schema qnn_manager executorch_core
extension_tensor qnn_backend_options
)
endif()
set_target_properties(
qnn_executorch_backend PROPERTIES LINK_FLAGS "-Wl,-rpath='$ORIGIN'"
)
Expand Down Expand Up @@ -278,6 +295,13 @@ install(
RUNTIME DESTINATION ${CMAKE_INSTALL_LIBDIR}/executorch/backends/qualcomm
)

if(DEFINED ENV{HEXAGON_SDK_ROOT})
add_subdirectory(
${QNN_EXECUTORCH_ROOT_DIR}/fastrpc
${CMAKE_CURRENT_BINARY_DIR}/qnn_executorch/fastrpc
)
endif()

# QNN pybind
if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86_64")
add_subdirectory(
Expand Down
17 changes: 11 additions & 6 deletions backends/qualcomm/_passes/build_quant_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,8 @@
# LICENSE file in the root directory of this source tree.
import torch
from executorch.backends.qualcomm.utils.constants import QCOM_QUANTIZED_IO
from executorch.exir.delegate import executorch_call_delegate

from executorch.exir.pass_base import ExportPass, ProxyValue
from executorch.exir.pass_base import ExportPass
from executorch.exir.tensor import TensorSpec
from torch.utils import _pytree as pytree

Expand Down Expand Up @@ -39,11 +38,17 @@ def call_getitem(self, value, key: int, meta):
return super().call_getitem(value, key, meta)

def call_delegate(self, lowered_module, args, kwargs, meta):
args_data, _ = pytree.tree_map_only(
ProxyValue, lambda x: x.data, (args, kwargs)
)
output_node = [
node
for node in lowered_module.original_module.graph.nodes
if node.target == "output"
][0]
tensors = [
node.meta["val"].to(node.meta[QCOM_QUANTIZED_IO])
for node in output_node.args[0]
]
meta["spec"] = pytree.tree_map(
self._make_spec,
executorch_call_delegate(lowered_module, *args_data),
tuple(tensors),
)
return super().call_delegate(lowered_module, args, kwargs, meta)
10 changes: 10 additions & 0 deletions backends/qualcomm/aot/wrappers/TensorWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,17 @@ TensorWrapper::TensorWrapper(
Error TensorWrapper::FillDataBuffer(const void* data, bool copy_data) {
if (data != nullptr) {
QNN_TENSOR_VER_PTR(tensor_)->memType = QNN_TENSORMEMTYPE_RAW;
#ifdef __hexagon__
// alignment is required
auto align_size = [](size_t alignment, size_t sz) {
return (sz + (alignment - 1)) & ~(alignment - 1);
};
const size_t alignment = 64;
QNN_TENSOR_VER_PTR(tensor_)->clientBuf.dataSize =
align_size(alignment, bytes_);
#else
QNN_TENSOR_VER_PTR(tensor_)->clientBuf.dataSize = bytes_;
#endif
if (copy_data) {
owned_data_ = std::make_unique<char[]>(bytes_);
const char* src_data = static_cast<const char*>(data);
Expand Down
78 changes: 78 additions & 0 deletions backends/qualcomm/fastrpc/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Copyright (c) Qualcomm Innovation Center, Inc.
# Copyright 2025 Arm Limited and/or its affiliates.
# All rights reserved
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)

set(_qnn_fastrpc__dir ${CMAKE_BINARY_DIR}/backends/qualcomm/fastrpc)
set(_qnn_fastrpc__srcs ${CMAKE_CURRENT_LIST_DIR}/qnn_executorch.idl)
set(_qnn_fastrpc__outputs
${_qnn_fastrpc__dir}/qnn_executorch.h
${_qnn_fastrpc__dir}/qnn_executorch_stub.c
${_qnn_fastrpc__dir}/qnn_executorch_skel.c
)

if(DEFINED ENV{HEXAGON_SDK_ROOT})
add_custom_command(
OUTPUT ${_qnn_fastrpc__outputs}
COMMAND mkdir -p ${_qnn_fastrpc__dir}
COMMAND
$ENV{HEXAGON_SDK_ROOT}/ipc/fastrpc/qaic/bin/qaic -I
$ENV{HEXAGON_SDK_ROOT}/incs -I $ENV{HEXAGON_SDK_ROOT}/incs/stddef -o
${_qnn_fastrpc__dir} ${_qnn_fastrpc__srcs}
WORKING_DIRECTORY ${EXECUTORCH_SOURCE_DIR}
DEPENDS qnn_executorch_backend
COMMENT "Codegen for fastrpc files"
)
add_custom_target(
fastrpc_codegen
DEPENDS ${_qnn_fastrpc__outputs}
COMMENT "Codegen for fastrpc files"
)

endif()

if(${CMAKE_SYSTEM_PROCESSOR} MATCHES Hexagon)
add_library(
qnn_executorch_skel SHARED
${_qnn_fastrpc__dir}/qnn_executorch.h
${_qnn_fastrpc__dir}/qnn_executorch_skel.c qnn_executorch_impl.cpp
)
target_include_directories(qnn_executorch_skel PRIVATE ${_qnn_fastrpc__dir})
target_link_libraries(
qnn_executorch_skel PRIVATE extension_data_loader qnn_executorch_backend
c++ c
)
add_dependencies(qnn_executorch_skel fastrpc_codegen)
endif()

if(${CMAKE_SYSTEM_PROCESSOR} MATCHES aarch64)
include_directories(
$ENV{HEXAGON_SDK_ROOT}/incs $ENV{HEXAGON_SDK_ROOT}/incs/stddef
${_qnn_fastrpc__dir}
)
link_directories(
$ENV{HEXAGON_SDK_ROOT}/ipc/fastrpc/remote/ship/android_aarch64
)
add_library(
qnn_executorch_stub SHARED ${_qnn_fastrpc__dir}/qnn_executorch.h
${_qnn_fastrpc__dir}/qnn_executorch_stub.c
)
# TODO: support cdsp if necessary
target_link_libraries(qnn_executorch_stub PRIVATE adsprpc)
add_dependencies(qnn_executorch_stub fastrpc_codegen)

# build minimum example app
add_executable(qnn_executor_runner qnn_executor_runner.cpp)
target_link_libraries(
qnn_executor_runner PRIVATE executorch_core gflags qnn_executorch_stub
adsprpc
)
# TODO: support cdsp if necessary
target_link_libraries(qnn_executor_runner PRIVATE adsprpc)
endif()
148 changes: 148 additions & 0 deletions backends/qualcomm/fastrpc/qnn_executor_runner.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
/*
* Copyright (c) Qualcomm Innovation Center, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <chrono>
#include <fstream>
#include <memory>
#include <numeric>

#include <executorch/runtime/platform/assert.h>
#include <gflags/gflags.h>

#include "qnn_executorch.h"

DEFINE_string(
model_path,
"model.pte",
"Model serialized in flatbuffer format.");
DEFINE_string(
output_folder_path,
".",
"Executorch inference data output path.");
DEFINE_string(input_list_path, "input_list.txt", "Model input list path.");

int main(int argc, char** argv) {
gflags::ParseCommandLineFlags(&argc, &argv, true);
if (argc != 1) {
std::string msg = "extra commandline args:";
for (int i = 1 /* skip argv[0] (program name) */; i < argc; i++) {
msg += std::string(" ") + argv[i];
}
ET_LOG(Error, "%s", msg.c_str());
return 1;
}

// fastrpc related
// adsp
const int adsp_domain_id = 0;
// signed PD
const int enable_unsigned_pd = 0;
// domain uri
std::string domain_uri(qnn_executorch_URI);
domain_uri += "&_dom=adsp";
// init session
struct remote_rpc_control_unsigned_module data;
data.domain = adsp_domain_id;
data.enable = enable_unsigned_pd;
int err = AEE_SUCCESS;
ET_CHECK_MSG(
AEE_SUCCESS ==
(err = remote_session_control(
DSPRPC_CONTROL_UNSIGNED_MODULE, (void*)&data, sizeof(data))),
"remote_session_control failed: 0x%x",
err);
// start session
remote_handle64 handle = -1;
ET_CHECK_MSG(
AEE_SUCCESS == (err = qnn_executorch_open(domain_uri.data(), &handle)),
"qnn_executorch_open failed: 0x%x",
err);
// load model
const char* model_path = FLAGS_model_path.c_str();
qnn_executorch_load(handle, model_path);

// prepare io
std::vector<std::vector<uint8_t>> input_data, output_data;
std::vector<tensor> input_tensor, output_tensor;
for (int i = 0;; ++i) {
int nbytes = 0;
qnn_executorch_get_input_size(handle, model_path, i, &nbytes);
if (nbytes == -1) {
break;
}
input_data.emplace_back(std::vector<uint8_t>(nbytes));
input_tensor.emplace_back(
tensor({input_data.back().data(), (int)input_data.back().size()}));
}
for (int i = 0;; ++i) {
int nbytes = 0;
qnn_executorch_get_output_size(handle, model_path, i, &nbytes);
if (nbytes == -1) {
break;
}
output_data.emplace_back(std::vector<uint8_t>(nbytes));
output_tensor.emplace_back(
tensor({output_data.back().data(), (int)output_data.back().size()}));
}

// prepare input data
std::ifstream input_list(FLAGS_input_list_path);
// TODO: should check IO info via fastrpc first
if (input_list.is_open()) {
auto split = [](std::string s, std::string delimiter) {
size_t pos_start = 0, pos_end, delim_len = delimiter.length();
std::string token;
std::vector<std::string> res;

while ((pos_end = s.find(delimiter, pos_start)) != std::string::npos) {
token = s.substr(pos_start, pos_end - pos_start);
pos_start = pos_end + delim_len;
res.push_back(token);
}
res.push_back(s.substr(pos_start));
return res;
};

std::string file_path;
int inference_index = 0;
while (std::getline(input_list, file_path)) {
auto input_files = split(file_path, " ");
if (input_files.size() == 0) {
break;
}
size_t num_inputs = input_files.size();
for (int i = 0; i < num_inputs; ++i) {
std::ifstream fin(input_files[i], std::ios::binary);
fin.seekg(0, fin.end);
size_t file_size = fin.tellg();
fin.seekg(0, fin.beg);
fin.read((char*)input_data[i].data(), file_size);
fin.close();
}
qnn_executorch_set_input(
handle, model_path, input_tensor.data(), input_tensor.size());
qnn_executorch_execute(handle, model_path);
qnn_executorch_get_output(
handle, model_path, output_tensor.data(), output_tensor.size());
for (size_t i = 0; i < output_tensor.size(); i++) {
auto output_file_name = FLAGS_output_folder_path + "/output_" +
std::to_string(inference_index) + "_" + std::to_string(i) + ".raw";
std::ofstream fout(output_file_name.c_str(), std::ios::binary);
fout.write(
(const char*)output_tensor[i].data, output_tensor[i].dataLen);
fout.close();
}
}
}

// unload model
qnn_executorch_unload(handle, model_path);
// tear down
qnn_executorch_close(handle);
return 0;
}
27 changes: 27 additions & 0 deletions backends/qualcomm/fastrpc/qnn_executorch.idl
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
* Copyright (c) Qualcomm Innovation Center, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include "AEEStdDef.idl"
#include "remote.idl"

/// Enabling stub-skel mismatch check feature in the auto-gen files.
/// Please refer to the IDL documentation for more details on the feature.
/// It is fully supported only on Kailua and later targets.
const string IDL_VERSION = "0.0.0";

typedef sequence<uint8> tensor;

interface qnn_executorch : remote_handle64 {
long load(in string pte_path);
long get_input_size(in string pte_path, in long index, rout long nbytes);
long set_input(in string pte_path, in sequence<tensor> tensors);
long execute(in string pte_path);
long get_output_size(in string pte_path, in long index, rout long nbytes);
long get_output(in string pte_path, rout sequence<tensor> tensors);
long unload(in string pte_path);
};
Loading
Loading