nvidia-holoscan · cliffburdick · Aug 21, 2023 · Aug 22, 2023 · Aug 22, 2023 · Aug 23, 2023
diff --git a/applications/CMakeLists.txt b/applications/CMakeLists.txt
@@ -15,7 +15,7 @@
 
 # Make sure to order by alphabetical list
 add_holohub_application(adv_networking_bench DEPENDS
-                        OPERATORS basic_network)
+                        OPERATORS advanced_network)
 
 add_holohub_application(basic_networking_ping DEPENDS
                         OPERATORS basic_network)

diff --git a/applications/adv_networking_bench/CMakeLists.txt b/applications/adv_networking_bench/CMakeLists.txt
@@ -14,3 +14,4 @@
 # limitations under the License.
 
 add_subdirectory(cpp)
+add_subdirectory(python)
diff --git a/...ng_bench/cpp/adv_networking_bench_rx.yaml → ...orking_bench/adv_networking_bench_rx.yaml b/...ng_bench/cpp/adv_networking_bench_rx.yaml → ...orking_bench/adv_networking_bench_rx.yaml
@@ -22,7 +22,7 @@ advanced_network:
     version: 1
     master_core: 5                  # Master CPU core
     rx:
-      - if_name: 0005:03:00.1       # PCIe BFD of NIC
+      - if_name: 3d:00.0       # PCIe BFD of NIC
         flow_isolation: true
         queues:
           - name: "Default"

diff --git a/...ng_bench/cpp/adv_networking_bench_tx.yaml → ...orking_bench/adv_networking_bench_tx.yaml b/...ng_bench/cpp/adv_networking_bench_tx.yaml → ...orking_bench/adv_networking_bench_tx.yaml
@@ -20,9 +20,9 @@ extensions:
 advanced_network:
   cfg:
     version: 1
-    master_core: 5              # Master CPU core
+    master_core: 4              # Master CPU core
     tx:
-      - if_name: 03:00.0       # PCIe BFD of NIC  
+      - if_name: 3d:00.0       # PCIe BFD of NIC  
         queues:
           - name: "ADC Samples"
             id: 0
@@ -31,12 +31,46 @@ advanced_network:
             num_concurrent_batches: 5      # Number of batches that can be used at any time
             batch_size: 10240              # Number of packets in a batch      
             fill_type: "udp"                    # Highest layer that network operator should populate
-            eth_dst_addr: "48:B0:2D:D9:30:A2"   # Destination MAC to populate
-            ip_src_addr: "192.168.1.2"          # Source IP send from
-            ip_dst_addr: "192.168.10.1"         # Destination IP to send to
+            eth_dst_addr: "48:b0:2d:63:80:1c"   # Destination MAC to populate
+            ip_src_addr: "192.168.2.6"          # Source IP send from
+            ip_dst_addr: "192.168.1.7"         # Destination IP to send to
             udp_dst_port: 4096                  # UDP destination port
             udp_src_port: 4096                  # UDP source port
-            cpu_cores: "7"                      # CPU cores for transmitting          
+            cpu_cores: "5"                      # CPU cores for transmitting          
+    rx:
+      - if_name: 88:00.0       # PCIe BFD of NIC
+        flow_isolation: true
+        queues:
+          - name: "Default"
+            id: 0
+            gpu_direct: false
+            cpu_cores: "7"
+            max_packet_size: 9000        # Maximum payload size
+            num_concurrent_batches: 32767   # Number of batches that can be used at any time
+            batch_size: 1              # Number of packets in a batch               
+          - name: "ADC Samples"
+            id: 1
+            gpu_device: 0
+            gpu_direct: true
+            split_boundary: 42
+            cpu_cores: "6"
+            max_packet_size: 9000     # Maximum payload size
+            num_concurrent_batches: 20   # Number of batches that can be used at any time
+            batch_size: 1000              # Number of packets in a batch      
+        flows:
+          - name: "ADC Samples"
+            action: 
+              type: queue
+              id: 1
+            match:
+              udp_src: 4096
+              udp_dst: 4096
+
+bench_rx:
+  split_boundary: true
+  batch_size: 10000
+  max_packet_size: 7680
+
 
 bench_tx:
   batch_size: 10000

diff --git a/applications/adv_networking_bench/cpp/CMakeLists.txt b/applications/adv_networking_bench/cpp/CMakeLists.txt
@@ -16,7 +16,7 @@
 cmake_minimum_required(VERSION 3.20)
 project(adv_networking_bench CXX)
 
-find_package(holoscan 0.5 REQUIRED CONFIG
+find_package(holoscan 0.6 REQUIRED CONFIG
              PATHS "/opt/nvidia/holoscan" "/workspace/holoscan-sdk/install")
 
 find_package(PkgConfig REQUIRED)
@@ -28,19 +28,24 @@ add_executable(adv_networking_bench
 target_link_libraries(adv_networking_bench
   PRIVATE
   holoscan::core
-  holoscan::advanced_network
+  holoscan::advanced_network_rx
+  holoscan::advanced_network_tx
 )
 
 # Copy config file
 add_custom_target(adv_networking_bench_rx_yaml
-  COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/adv_networking_bench_rx.yaml" ${CMAKE_CURRENT_BINARY_DIR}
-  DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/adv_networking_bench_rx.yaml"
+  COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/../adv_networking_bench_rx.yaml" ${CMAKE_CURRENT_BINARY_DIR}
+  DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/../adv_networking_bench_rx.yaml"
 )
 add_custom_target(adv_networking_bench_tx_yaml
-  COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/adv_networking_bench_tx.yaml" ${CMAKE_CURRENT_BINARY_DIR}
-  DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/adv_networking_bench_tx.yaml"
+  COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/../adv_networking_bench_tx.yaml" ${CMAKE_CURRENT_BINARY_DIR}
+  DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/../adv_networking_bench_tx.yaml"
+)
+add_custom_target(adv_networking_bench_tx_rx_yaml
+  COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/../adv_networking_bench_tx_rx.yaml" ${CMAKE_CURRENT_BINARY_DIR}
+  DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/../adv_networking_bench_tx_rx.yaml"
 )
 
-add_dependencies(adv_networking_bench adv_networking_bench_rx_yaml adv_networking_bench_tx_yaml)
+add_dependencies(adv_networking_bench adv_networking_bench_rx_yaml adv_networking_bench_tx_yaml adv_networking_bench_tx_rx_yaml)
 
 
diff --git a/applications/adv_networking_bench/cpp/main.cpp b/applications/adv_networking_bench/cpp/main.cpp
@@ -24,7 +24,7 @@
 #include <linux/udp.h>
 #include <arpa/inet.h>
 #include <assert.h>
-
+#include <sys/time.h>
 
 namespace holoscan::ops {
 
@@ -76,18 +76,24 @@ class AdvNetworkingBenchTxOp : public Operator {
      * expect the transmit operator to operate much faster than the receiver since it's not having to do any work
      * to construct packets, and just copying from a buffer into memory.
     */
-    while (!adv_net_tx_burst_available(batch_size_.get())) {}
-
-    auto msg = CreateSharedBurstParams();
-    adv_net_set_hdr(msg, port_id, queue_id, batch_size_);
-
-    if ((ret = adv_net_get_tx_pkt_burst(msg)) != AdvNetStatus::SUCCESS) {
+   //timeval t1, t2;
+   //gettimeofday(&t1, NULL);
+    while (!adv_net_tx_burst_available(batch_size_.get(), port_id)) {}
+    // gettimeofday(&t2, NULL);
+    // timeval t3;
+    // timersub(&t2, &t1, &t3);
+    // printf("%ld\n", t3.tv_usec);
+
+    auto msg = adv_net_create_burst_params();
+    adv_net_set_hdr(msg, port_id, queue_id, batch_size_.get());
+
+    if ((ret = adv_net_get_tx_pkt_burst(msg, port_id)) != AdvNetStatus::SUCCESS) {
       HOLOSCAN_LOG_ERROR("Error returned from adv_net_get_tx_pkt_burst: {}", static_cast<int>(ret));
       return;
     }
 
     void *pkt;
-    for (int num_pkt = 0; num_pkt < msg->hdr.num_pkts; num_pkt++) {
+    for (int num_pkt = 0; num_pkt < adv_net_get_num_pkts(msg); num_pkt++) {
       if ((ret = adv_net_set_cpu_udp_payload( msg,
                                               num_pkt,
                                               static_cast<char*>(full_batch_data_h_) +
@@ -152,7 +158,7 @@ class AdvNetworkingBenchRxOp : public Operator {
     ttl_pkts_recv_ += adv_net_get_num_pkts(burst);
 
     // If packets are coming in from our non-GPUDirect queue, free them and move on
-    if (burst->hdr.q_id == 0) {
+    if (adv_net_get_q_id(burst) == 0) {
       adv_net_free_cpu_pkts_and_burst(burst);
       HOLOSCAN_LOG_INFO("Freeing CPU packets on queue 0");
       return;

diff --git a/applications/adv_networking_bench/python/CMakeLists.txt b/applications/adv_networking_bench/python/CMakeLists.txt
@@ -0,0 +1,20 @@
+# SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# target does nothing but be created if this folder is reached
+# to set dependencies on adequate operators
+# add_custom_target(advanced_networking_bench_python ALL
+#   DEPENDS advanced_network_python
+# )
diff --git a/applications/adv_networking_bench/python/main.py b/applications/adv_networking_bench/python/main.py
@@ -0,0 +1,176 @@
+# SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import sys
+import cupy
+
+from holoscan.conditions import CountCondition
+from holoscan.core import Application, Operator, OperatorSpec
+
+from holohub.advanced_network_tx import AdvNetworkOpTx
+from holohub.advanced_network_common import (
+    adv_net_tx_burst_available,
+    adv_net_create_burst_params,
+    adv_net_set_hdr,
+    adv_net_get_tx_pkt_burst,
+    adv_net_get_num_pkts,
+    adv_net_set_cpu_udp_payload,
+    AdvNetStatus
+)
+
+# import holohub.advanced_network_common.holohub as hh
+# print(dir(hh))
+logger = logging.getLogger("AdvancedNetworkingBench")
+logging.basicConfig(level=logging.INFO)
+
+# class AdvNetworkingBenchTxOp : public Operator {
+#  public:
+#   HOLOSCAN_OPERATOR_FORWARD_ARGS(AdvNetworkingBenchTxOp)
+
+#   AdvNetworkingBenchTxOp() = default;
+
+#   void initialize() override {
+#     HOLOSCAN_LOG_INFO("AdvNetworkingBenchTxOp::initialize()");
+#     holoscan::Operator::initialize();
+
+#     size_t buf_size = batch_size_.get() * payload_size_.get();
+#     cudaMallocHost(&full_batch_data_h_, buf_size);
+
+#     // Fill in with increasing bytes
+#     uint8_t *cptr = static_cast<uint8_t*>(full_batch_data_h_);
+#     uint8_t cur = 0;
+#     for (int b = 0; b < buf_size; b++) {
+#       cptr[b] = cur++;
+#     }
+
+#     HOLOSCAN_LOG_INFO("AdvNetworkingBenchTxOp::initialize() complete");
+#   }
+
+#   void setup(OperatorSpec& spec) override {
+#     spec.output<std::shared_ptr<AdvNetBurstParams>>("burst_out");
+
+#     spec.param<uint32_t>(batch_size_, "batch_size", "Batch size",
+#       "Batch size for each processing epoch", 1000);
+#     spec.param<uint16_t>(payload_size_, "payload_size", "Payload size",
+#       "Payload size to send. Does not include <= L4 headers", 1400);
+#   }
+
+#   void compute(InputContext&, OutputContext& op_output, ExecutionContext&) override {
+#     AdvNetStatus ret;
+
+#     /**
+#      * Spin waiting until a buffer is free. This can be stalled by sending faster than the NIC can handle it. We
+#      * expect the transmit operator to operate much faster than the receiver since it's not having to do any work
+#      * to construct packets, and just copying from a buffer into memory.
+#     */
+#     while (!adv_net_tx_burst_available(batch_size_.get())) {}
+
+#     auto msg = CreateSharedBurstParams();
+#     adv_net_set_hdr(msg, port_id, queue_id, batch_size_);
+
+#     if ((ret = adv_net_get_tx_pkt_burst(msg)) != AdvNetStatus::SUCCESS) {
+#       HOLOSCAN_LOG_ERROR("Error returned from adv_net_get_tx_pkt_burst: {}", static_cast<int>(ret));
+#       return;
+#     }
+
+#     void *pkt;
+#     for (int num_pkt = 0; num_pkt < msg->hdr.num_pkts; num_pkt++) {
+#       if ((ret = adv_net_set_cpu_udp_payload( msg,
+#                                               num_pkt,
+#                                               static_cast<char*>(full_batch_data_h_) +
+#                                                     num_pkt * payload_size_.get(),
+#                                               payload_size_.get())) != AdvNetStatus::SUCCESS) {
+#         HOLOSCAN_LOG_ERROR("Failed to create packet {}", num_pkt);
+#       }
+#     }
+
+#     op_output.emit(msg, "burst_out");
+#   };
+
+#  private:
+#   void *full_batch_data_h_;
+#   static constexpr uint16_t port_id = 0;
+#   static constexpr uint16_t queue_id = 0;
+#   Parameter<uint32_t> batch_size_;
+#   Parameter<uint16_t> payload_size_;
+# };
+
+
+class AdvancedNetworkingBenchTxOp(Operator):
+    def __init__(self, fragment, *args, batch_size, payload_size, **kwargs):
+        self.index = 1
+        self.batch_size = batch_size
+        self.payload_size = payload_size
+        self.buf_size = self.batch_size * self.payload_size
+        self.buf = cupy.cuda.alloc_pinned_memory(self.buf_size)
+        super().__init__(fragment, *args, **kwargs)
+
+    def initialize(self):
+        Operator.initialize(self)
+
+    def setup(self, spec: OperatorSpec):
+        spec.output("msg_out")
+
+    def compute(self, op_input, op_output, context):
+        value = self.index
+
+        while not adv_net_tx_burst_available(self.batch_size):
+          continue
+
+        msg = adv_net_create_burst_params()
+        adv_net_set_hdr(msg, 0, 0, self.batch_size)
+
+        ret = adv_net_get_tx_pkt_burst(msg)
+        if ret != AdvNetStatus.SUCCESS:
+          logger.error(f"Error returned from adv_net_get_tx_pkt_burst: {ret}")
+          return
+
+        for num_pkt in range(adv_net_get_num_pkts(msg)):
+          ret = adv_net_set_cpu_udp_payload(msg, num_pkt, self.buf.ptr + num_pkt * self.payload_size, self.payload_size)
+          if ret != AdvNetStatus.SUCCESS:
+            logger.error(f"Error returned from adv_net_set_cpu_udp_payload: {ret} != {AdvNetStatus.SUCCESS}")
+            return
+        print(type(msg))
+        op_output.emit(msg, "msg_out")
+
+# Now define a simple application using the operators defined above
+NUM_MSGS = 10
+
+
+class App(Application):
+    def compose(self):
+        print("HERE")
+        # Define the tx and rx operators, allowing the tx operator to execute 10 times
+        if "cfg" in self.kwargs("advanced_network") and "tx" in self.kwargs("advanced_network")["cfg"]:
+            tx = AdvancedNetworkingBenchTxOp(self, CountCondition(self, NUM_MSGS), name="tx", **self.kwargs("bench_tx"))
+            adv_net_tx = AdvNetworkOpTx(self, name="adv_net_tx")
+            self.add_flow(tx, adv_net_tx, {("msg_out", "burst_in")})
+        else:
+            logger.info("No TX config found")
+
+        # if len(self.kwargs("network_rx")) > 0:
+        #     basic_net_rx = BasicNetworkOpRx(self, name="basic_net_rx", **self.kwargs("network_rx"))
+        #     rx = BasicNetworkPingRxOp(self, name="rx")
+        #     self.add_flow(basic_net_rx, rx, {("burst_out", "msg_in")})
+        # else:
+        #     logger.info("No RX config found")
+
+
+if __name__ == "__main__":
+    config_path = sys.argv[1]
+    app = App()
+    app.config(config_path)
+    app.run()