Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Ano python bindings #66

Draft
wants to merge 8 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion applications/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

# Make sure to order by alphabetical list
add_holohub_application(adv_networking_bench DEPENDS
OPERATORS basic_network)
OPERATORS advanced_network)

add_holohub_application(basic_networking_ping DEPENDS
OPERATORS basic_network)
Expand Down
1 change: 1 addition & 0 deletions applications/adv_networking_bench/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@
# limitations under the License.

add_subdirectory(cpp)
add_subdirectory(python)
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ advanced_network:
version: 1
master_core: 5 # Master CPU core
rx:
- if_name: 0005:03:00.1 # PCIe BFD of NIC
- if_name: 3d:00.0 # PCIe BFD of NIC
flow_isolation: true
queues:
- name: "Default"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ extensions:
advanced_network:
cfg:
version: 1
master_core: 5 # Master CPU core
master_core: 4 # Master CPU core
tx:
- if_name: 03:00.0 # PCIe BFD of NIC
- if_name: 3d:00.0 # PCIe BFD of NIC
queues:
- name: "ADC Samples"
id: 0
Expand All @@ -31,12 +31,46 @@ advanced_network:
num_concurrent_batches: 5 # Number of batches that can be used at any time
batch_size: 10240 # Number of packets in a batch
fill_type: "udp" # Highest layer that network operator should populate
eth_dst_addr: "48:B0:2D:D9:30:A2" # Destination MAC to populate
ip_src_addr: "192.168.1.2" # Source IP send from
ip_dst_addr: "192.168.10.1" # Destination IP to send to
eth_dst_addr: "48:b0:2d:63:80:1c" # Destination MAC to populate
ip_src_addr: "192.168.2.6" # Source IP send from
ip_dst_addr: "192.168.1.7" # Destination IP to send to
udp_dst_port: 4096 # UDP destination port
udp_src_port: 4096 # UDP source port
cpu_cores: "7" # CPU cores for transmitting
cpu_cores: "5" # CPU cores for transmitting
rx:
- if_name: 88:00.0 # PCIe BFD of NIC
flow_isolation: true
queues:
- name: "Default"
id: 0
gpu_direct: false
cpu_cores: "7"
max_packet_size: 9000 # Maximum payload size
num_concurrent_batches: 32767 # Number of batches that can be used at any time
batch_size: 1 # Number of packets in a batch
- name: "ADC Samples"
id: 1
gpu_device: 0
gpu_direct: true
split_boundary: 42
cpu_cores: "6"
max_packet_size: 9000 # Maximum payload size
num_concurrent_batches: 20 # Number of batches that can be used at any time
batch_size: 1000 # Number of packets in a batch
flows:
- name: "ADC Samples"
action:
type: queue
id: 1
match:
udp_src: 4096
udp_dst: 4096

bench_rx:
split_boundary: true
batch_size: 10000
max_packet_size: 7680


bench_tx:
batch_size: 10000
Expand Down
19 changes: 12 additions & 7 deletions applications/adv_networking_bench/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
cmake_minimum_required(VERSION 3.20)
project(adv_networking_bench CXX)

find_package(holoscan 0.5 REQUIRED CONFIG
find_package(holoscan 0.6 REQUIRED CONFIG
PATHS "/opt/nvidia/holoscan" "/workspace/holoscan-sdk/install")

find_package(PkgConfig REQUIRED)
Expand All @@ -28,19 +28,24 @@ add_executable(adv_networking_bench
target_link_libraries(adv_networking_bench
PRIVATE
holoscan::core
holoscan::advanced_network
holoscan::advanced_network_rx
holoscan::advanced_network_tx
)

# Copy config file
add_custom_target(adv_networking_bench_rx_yaml
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/adv_networking_bench_rx.yaml" ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/adv_networking_bench_rx.yaml"
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/../adv_networking_bench_rx.yaml" ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/../adv_networking_bench_rx.yaml"
)
add_custom_target(adv_networking_bench_tx_yaml
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/adv_networking_bench_tx.yaml" ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/adv_networking_bench_tx.yaml"
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/../adv_networking_bench_tx.yaml" ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/../adv_networking_bench_tx.yaml"
)
add_custom_target(adv_networking_bench_tx_rx_yaml
COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/../adv_networking_bench_tx_rx.yaml" ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/../adv_networking_bench_tx_rx.yaml"
)

add_dependencies(adv_networking_bench adv_networking_bench_rx_yaml adv_networking_bench_tx_yaml)
add_dependencies(adv_networking_bench adv_networking_bench_rx_yaml adv_networking_bench_tx_yaml adv_networking_bench_tx_rx_yaml)


24 changes: 15 additions & 9 deletions applications/adv_networking_bench/cpp/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#include <linux/udp.h>
#include <arpa/inet.h>
#include <assert.h>

#include <sys/time.h>

namespace holoscan::ops {

Expand Down Expand Up @@ -76,18 +76,24 @@ class AdvNetworkingBenchTxOp : public Operator {
* expect the transmit operator to operate much faster than the receiver since it's not having to do any work
* to construct packets, and just copying from a buffer into memory.
*/
while (!adv_net_tx_burst_available(batch_size_.get())) {}

auto msg = CreateSharedBurstParams();
adv_net_set_hdr(msg, port_id, queue_id, batch_size_);

if ((ret = adv_net_get_tx_pkt_burst(msg)) != AdvNetStatus::SUCCESS) {
//timeval t1, t2;
//gettimeofday(&t1, NULL);
while (!adv_net_tx_burst_available(batch_size_.get(), port_id)) {}
// gettimeofday(&t2, NULL);
// timeval t3;
// timersub(&t2, &t1, &t3);
// printf("%ld\n", t3.tv_usec);

auto msg = adv_net_create_burst_params();
adv_net_set_hdr(msg, port_id, queue_id, batch_size_.get());

if ((ret = adv_net_get_tx_pkt_burst(msg, port_id)) != AdvNetStatus::SUCCESS) {
HOLOSCAN_LOG_ERROR("Error returned from adv_net_get_tx_pkt_burst: {}", static_cast<int>(ret));
return;
}

void *pkt;
for (int num_pkt = 0; num_pkt < msg->hdr.num_pkts; num_pkt++) {
for (int num_pkt = 0; num_pkt < adv_net_get_num_pkts(msg); num_pkt++) {
if ((ret = adv_net_set_cpu_udp_payload( msg,
num_pkt,
static_cast<char*>(full_batch_data_h_) +
Expand Down Expand Up @@ -152,7 +158,7 @@ class AdvNetworkingBenchRxOp : public Operator {
ttl_pkts_recv_ += adv_net_get_num_pkts(burst);

// If packets are coming in from our non-GPUDirect queue, free them and move on
if (burst->hdr.q_id == 0) {
if (adv_net_get_q_id(burst) == 0) {
adv_net_free_cpu_pkts_and_burst(burst);
HOLOSCAN_LOG_INFO("Freeing CPU packets on queue 0");
return;
Expand Down
20 changes: 20 additions & 0 deletions applications/adv_networking_bench/python/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# target does nothing but be created if this folder is reached
# to set dependencies on adequate operators
# add_custom_target(advanced_networking_bench_python ALL
# DEPENDS advanced_network_python
# )
176 changes: 176 additions & 0 deletions applications/adv_networking_bench/python/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
# SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import sys
import cupy

from holoscan.conditions import CountCondition
from holoscan.core import Application, Operator, OperatorSpec

from holohub.advanced_network_tx import AdvNetworkOpTx
from holohub.advanced_network_common import (
adv_net_tx_burst_available,
adv_net_create_burst_params,
adv_net_set_hdr,
adv_net_get_tx_pkt_burst,
adv_net_get_num_pkts,
adv_net_set_cpu_udp_payload,
AdvNetStatus
)

# import holohub.advanced_network_common.holohub as hh
# print(dir(hh))
logger = logging.getLogger("AdvancedNetworkingBench")
logging.basicConfig(level=logging.INFO)

# class AdvNetworkingBenchTxOp : public Operator {
# public:
# HOLOSCAN_OPERATOR_FORWARD_ARGS(AdvNetworkingBenchTxOp)

# AdvNetworkingBenchTxOp() = default;

# void initialize() override {
# HOLOSCAN_LOG_INFO("AdvNetworkingBenchTxOp::initialize()");
# holoscan::Operator::initialize();

# size_t buf_size = batch_size_.get() * payload_size_.get();
# cudaMallocHost(&full_batch_data_h_, buf_size);

# // Fill in with increasing bytes
# uint8_t *cptr = static_cast<uint8_t*>(full_batch_data_h_);
# uint8_t cur = 0;
# for (int b = 0; b < buf_size; b++) {
# cptr[b] = cur++;
# }

# HOLOSCAN_LOG_INFO("AdvNetworkingBenchTxOp::initialize() complete");
# }

# void setup(OperatorSpec& spec) override {
# spec.output<std::shared_ptr<AdvNetBurstParams>>("burst_out");

# spec.param<uint32_t>(batch_size_, "batch_size", "Batch size",
# "Batch size for each processing epoch", 1000);
# spec.param<uint16_t>(payload_size_, "payload_size", "Payload size",
# "Payload size to send. Does not include <= L4 headers", 1400);
# }

# void compute(InputContext&, OutputContext& op_output, ExecutionContext&) override {
# AdvNetStatus ret;

# /**
# * Spin waiting until a buffer is free. This can be stalled by sending faster than the NIC can handle it. We
# * expect the transmit operator to operate much faster than the receiver since it's not having to do any work
# * to construct packets, and just copying from a buffer into memory.
# */
# while (!adv_net_tx_burst_available(batch_size_.get())) {}

# auto msg = CreateSharedBurstParams();
# adv_net_set_hdr(msg, port_id, queue_id, batch_size_);

# if ((ret = adv_net_get_tx_pkt_burst(msg)) != AdvNetStatus::SUCCESS) {
# HOLOSCAN_LOG_ERROR("Error returned from adv_net_get_tx_pkt_burst: {}", static_cast<int>(ret));
# return;
# }

# void *pkt;
# for (int num_pkt = 0; num_pkt < msg->hdr.num_pkts; num_pkt++) {
# if ((ret = adv_net_set_cpu_udp_payload( msg,
# num_pkt,
# static_cast<char*>(full_batch_data_h_) +
# num_pkt * payload_size_.get(),
# payload_size_.get())) != AdvNetStatus::SUCCESS) {
# HOLOSCAN_LOG_ERROR("Failed to create packet {}", num_pkt);
# }
# }

# op_output.emit(msg, "burst_out");
# };

# private:
# void *full_batch_data_h_;
# static constexpr uint16_t port_id = 0;
# static constexpr uint16_t queue_id = 0;
# Parameter<uint32_t> batch_size_;
# Parameter<uint16_t> payload_size_;
# };


class AdvancedNetworkingBenchTxOp(Operator):
def __init__(self, fragment, *args, batch_size, payload_size, **kwargs):
self.index = 1
self.batch_size = batch_size
self.payload_size = payload_size
self.buf_size = self.batch_size * self.payload_size
self.buf = cupy.cuda.alloc_pinned_memory(self.buf_size)
super().__init__(fragment, *args, **kwargs)

def initialize(self):
Operator.initialize(self)

def setup(self, spec: OperatorSpec):
spec.output("msg_out")

def compute(self, op_input, op_output, context):
value = self.index

while not adv_net_tx_burst_available(self.batch_size):
continue

msg = adv_net_create_burst_params()
adv_net_set_hdr(msg, 0, 0, self.batch_size)

ret = adv_net_get_tx_pkt_burst(msg)
if ret != AdvNetStatus.SUCCESS:
logger.error(f"Error returned from adv_net_get_tx_pkt_burst: {ret}")
return

for num_pkt in range(adv_net_get_num_pkts(msg)):
ret = adv_net_set_cpu_udp_payload(msg, num_pkt, self.buf.ptr + num_pkt * self.payload_size, self.payload_size)
if ret != AdvNetStatus.SUCCESS:
logger.error(f"Error returned from adv_net_set_cpu_udp_payload: {ret} != {AdvNetStatus.SUCCESS}")
return
print(type(msg))
op_output.emit(msg, "msg_out")

# Now define a simple application using the operators defined above
NUM_MSGS = 10


class App(Application):
def compose(self):
print("HERE")
# Define the tx and rx operators, allowing the tx operator to execute 10 times
if "cfg" in self.kwargs("advanced_network") and "tx" in self.kwargs("advanced_network")["cfg"]:
tx = AdvancedNetworkingBenchTxOp(self, CountCondition(self, NUM_MSGS), name="tx", **self.kwargs("bench_tx"))
adv_net_tx = AdvNetworkOpTx(self, name="adv_net_tx")
self.add_flow(tx, adv_net_tx, {("msg_out", "burst_in")})
else:
logger.info("No TX config found")

# if len(self.kwargs("network_rx")) > 0:
# basic_net_rx = BasicNetworkOpRx(self, name="basic_net_rx", **self.kwargs("network_rx"))
# rx = BasicNetworkPingRxOp(self, name="rx")
# self.add_flow(basic_net_rx, rx, {("burst_out", "msg_in")})
# else:
# logger.info("No RX config found")


if __name__ == "__main__":
config_path = sys.argv[1]
app = App()
app.config(config_path)
app.run()
Loading