From d354588782dc46046031508f4df59331329f4783 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Sun, 14 Apr 2024 16:53:07 -0700 Subject: [PATCH 01/16] Added new APIs and enhanced the required APIs in "module_base.py and chassis_base.py" to support SmartSwitch --- sonic_platform_base/chassis_base.py | 51 ++++++++++++++++++- sonic_platform_base/module_base.py | 76 +++++++++++++++++++++++++++-- 2 files changed, 122 insertions(+), 5 deletions(-) diff --git a/sonic_platform_base/chassis_base.py b/sonic_platform_base/chassis_base.py index 6cd4403c6..4bf02feaf 100644 --- a/sonic_platform_base/chassis_base.py +++ b/sonic_platform_base/chassis_base.py @@ -151,6 +151,9 @@ def is_modular_chassis(self): A bool value, should return False by default or for fixed-platforms. Should return True for supervisor-cards, line-cards etc running as part of modular-chassis. + For SmartSwitch platforms this should return True even if they are + fixed-platforms, as they are treated like a modular chassis as the + DPU cards are treated like line-cards of a modular-chassis. """ return False @@ -218,6 +221,7 @@ def get_component(self, index): def get_num_modules(self): """ Retrieves the number of modules available on this chassis + On a SmarSwitch chassis this includes the number of DPUs. Returns: An integer, the number of modules available on this chassis @@ -226,7 +230,8 @@ def get_num_modules(self): def get_all_modules(self): """ - Retrieves all modules available on this chassis + Retrieves all modules available on this chassis. On a SmartSwitch + chassis this includes the DPUs. Returns: A list of objects derived from ModuleBase representing all @@ -237,6 +242,8 @@ def get_all_modules(self): def get_module(self, index): """ Retrieves module represented by (0-based) index + On a SmartSwitch index:0 will fetch switch, index:1 will fetch + DPU0 and so on Args: index: An integer, the index (0-based) of the module to @@ -263,12 +270,54 @@ def get_module_index(self, module_name): Args: module_name: A string, prefixed by SUPERVISOR, LINE-CARD or FABRIC-CARD Ex. SUPERVISOR0, LINE-CARD1, FABRIC-CARD5 + SmartSwitch Example: SWITCH, DPU1, DPU2 ... DPUX Returns: An integer, the index of the ModuleBase object in the module_list """ raise NotImplementedError + ############################################## + # SmartSwitch methods + ############################################## + + def get_dpu_id(self, name): + """ + Retrieves the DPU ID for the given dpu-module name. + Returns None for non-smartswitch chassis. + + Returns: + An integer, indicating the DPU ID Ex: name:DPU0 return value 1, + name:DPU1 return value 2, name:DPUX return value X+1 + """ + raise NotImplementedError + + def is_smartswitch(self): + """ + Retrieves whether the sonic instance is part of smartswitch + + Returns: + Returns:True for SmartSwitch and False for other platforms + """ + raise NotImplementedError + + def get_module_dpu_data_port(self, index): + """ + Retrieves the DPU data port NPU-DPU association represented for + the DPU index. Platforms that need to overwrite the platform.json + file will use this API. This is valid only on the Switch and not on DPUs + + Args: + index: An integer, the index of the module to retrieve + + Returns: + A string giving the NPU-DPU port association: + Ex: For index: 1 will return the dup0 port association which is + "Ethernet-BP0: Ethernet0" where the string left of ":" (Ethernet-BP0) + is the NPU port and the string right of ":" (Ethernet0) is the DPU port + """ + raise NotImplementedError + ############################################## # Fan methods ############################################## diff --git a/sonic_platform_base/module_base.py b/sonic_platform_base/module_base.py index ccd507b44..3314425fd 100644 --- a/sonic_platform_base/module_base.py +++ b/sonic_platform_base/module_base.py @@ -21,6 +21,8 @@ class ModuleBase(device_base.DeviceBase): MODULE_TYPE_SUPERVISOR = "SUPERVISOR" MODULE_TYPE_LINE = "LINE-CARD" MODULE_TYPE_FABRIC = "FABRIC-CARD" + MODULE_TYPE_DPU = "DPU" + MODULE_TYPE_SWITCH = "SWITCH" # Possible card status for modular chassis # Module state is Empty if no module is inserted in the slot @@ -104,15 +106,18 @@ def get_system_eeprom_info(self): def get_name(self): """ Retrieves the name of the module prefixed by SUPERVISOR, LINE-CARD, - FABRIC-CARD + FABRIC-CARD, SWITCH, DPU0, DPUX Returns: A string, the module name prefixed by one of MODULE_TYPE_SUPERVISOR, - MODULE_TYPE_LINE or MODULE_TYPE_FABRIC and followed by a 0-based index + MODULE_TYPE_LINE or MODULE_TYPE_FABRIC or MODULE_TYPE_DPU or + MODULE_TYPE_SWITCH and followed by a 0-based index. Ex. A Chassis having 1 supervisor, 4 line-cards and 6 fabric-cards can provide names SUPERVISOR0, LINE-CARD0 to LINE-CARD3, - FABRIC-CARD0 to FABRIC-CARD5 + FABRIC-CARD0 to FABRIC-CARD5. + A SmartSwitch having 4 DPUs and 1 Switch can provide names DPU0 to + DPU3 and SWITCH """ raise NotImplementedError @@ -141,6 +146,7 @@ def get_type(self): Returns: A string, the module-type from one of the predefined types: MODULE_TYPE_SUPERVISOR, MODULE_TYPE_LINE or MODULE_TYPE_FABRIC + or MODULE_TYPE_DPU or MODULE_TYPE_SWITCH """ raise NotImplementedError @@ -152,6 +158,10 @@ def get_oper_status(self): A string, the operational status of the module from one of the predefined status values: MODULE_STATUS_EMPTY, MODULE_STATUS_OFFLINE, MODULE_STATUS_FAULT, MODULE_STATUS_PRESENT or MODULE_STATUS_ONLINE + The SmartSwitch platforms will have these additional status + MODULE_STATUS_MIDPLANE_OFFLINE, MODULE_STATUS_MIDPLANE_ONLINE, + MODULE_STATUS_CONTROLPLANE_OFFLINE, MODULE_STATUS_CONTROLPLANE_ONLINE, + MODULE_STATUS_DATAPLANE_OFFLINE, MODULE_STATUS_DATAPLANE_ONLINE """ raise NotImplementedError @@ -175,7 +185,7 @@ def set_admin_state(self, up): The down state will power down the module and the status should show MODULE_STATUS_OFFLINE. The up state will take the module to MODULE_STATUS_FAULT or - MODULE_STAUS_ONLINE states. + MODULE_STATUS_ONLINE states. Args: up: A boolean, True to set the admin-state to UP. False to set the @@ -196,6 +206,62 @@ def get_maximum_consumed_power(self): """ raise NotImplementedError + ############################################## + # SmartSwitch methods + ############################################## + + def get_reboot_cause(self): + """ + Retrieves the DPU ID. Returns None for non-smartswitch chassis. + + Returns: + An integer, indicating the DPU ID. DPU0 returns 1, DPUX returns X+1 + Returns '0' on switch module + """ + raise NotImplementedError + + def get_reboot_cause(self): + """ + Retrieves the cause of the previous reboot of the DPU module + + Returns: + A tuple (string, string) where the first element is a string + containing the cause of the previous reboot. This string must + be one of the predefined strings in this class. If the first + string is "REBOOT_CAUSE_HARDWARE_OTHER", the second string can be + used to pass a description of the reboot cause. + + Some more causes are appended to the existing list to handle other + modules such as DPUs. + Ex: REBOOT_CAUSE_POWER_LOSS, REBOOT_CAUSE_HOST_RESET_DPU, + REBOOT_CAUSE_HOST_POWERCYCLED_DPU, REBOOT_CAUSE_SW_THERMAL, + REBOOT_CAUSE_DPU_SELF_REBOOT + """ + raise NotImplementedError + + def get_state_info(self): + """ + Retrieves the dpu state object having the detailed dpu state progression. + Fetched from ChassisStateDB. + + Returns: + An object instance of the DPU_STATE (see DB schema) + Returns None on switch module + """ + raise NotImplementedError + + def get_health_info(self): + """ + Retrieves the dpu health object having the detailed dpu health. + Fetched from the DPUs. + + Returns: + An object instance of the dpu health. Should consist of two lists + "summary and monitorlist" See system_health.py for usage + Returns None on switch module + """ + raise NotImplementedError + ############################################## # Component methods ############################################## @@ -541,6 +607,8 @@ def get_midplane_ip(self): line-card and return the midplane IP-address of the line-card. When called from the line-card, the module will represent the Supervisor and return its midplane IP-address. + When called from the DPU, returns the midplane IP-address of the dpu-card. + When called from the Switch returns the midplane IP-address of Switch. Returns: A string, the IP-address of the module reachable over the midplane From 5389771ecc014e3e274588fe63ed99036c32b00b Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Sun, 28 Apr 2024 20:33:12 -0700 Subject: [PATCH 02/16] The default chassis base class is_smartswitch() returns False now --- sonic_platform_base/chassis_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sonic_platform_base/chassis_base.py b/sonic_platform_base/chassis_base.py index 4bf02feaf..143c894f4 100644 --- a/sonic_platform_base/chassis_base.py +++ b/sonic_platform_base/chassis_base.py @@ -299,7 +299,7 @@ def is_smartswitch(self): Returns: Returns:True for SmartSwitch and False for other platforms """ - raise NotImplementedError + return False def get_module_dpu_data_port(self, index): """ From 5f664ef2a8df07a25c6845adf829d82e0d9add48 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Fri, 31 May 2024 07:47:24 -0700 Subject: [PATCH 03/16] Addressing review comments --- sonic_platform_base/module_base.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/sonic_platform_base/module_base.py b/sonic_platform_base/module_base.py index 3314425fd..2ad330611 100644 --- a/sonic_platform_base/module_base.py +++ b/sonic_platform_base/module_base.py @@ -17,6 +17,20 @@ class ModuleBase(device_base.DeviceBase): # Device type definition. Note, this is a constant. DEVICE_TYPE = "module" + # Possible reboot causes + REBOOT_CAUSE_POWER_LOSS = "Power Loss" + REBOOT_CAUSE_THERMAL_OVERLOAD_CPU = "Thermal Overload: CPU" + REBOOT_CAUSE_THERMAL_OVERLOAD_ASIC = "Thermal Overload: ASIC" + REBOOT_CAUSE_THERMAL_OVERLOAD_OTHER = "Thermal Overload: Other" + REBOOT_CAUSE_INSUFFICIENT_FAN_SPEED = "Insufficient Fan Speed" + REBOOT_CAUSE_WATCHDOG = "Watchdog" + REBOOT_CAUSE_HARDWARE_OTHER = "Hardware - Other" + REBOOT_CAUSE_HARDWARE_BIOS = "BIOS" + REBOOT_CAUSE_HARDWARE_CPU = "CPU" + REBOOT_CAUSE_HARDWARE_BUTTON = "Push button" + REBOOT_CAUSE_HARDWARE_RESET_FROM_ASIC = "Reset from ASIC" + REBOOT_CAUSE_NON_HARDWARE = "Non-Hardware" + # Possible card types for modular chassis MODULE_TYPE_SUPERVISOR = "SUPERVISOR" MODULE_TYPE_LINE = "LINE-CARD" @@ -210,7 +224,7 @@ def get_maximum_consumed_power(self): # SmartSwitch methods ############################################## - def get_reboot_cause(self): + def get_dpu_id(self): """ Retrieves the DPU ID. Returns None for non-smartswitch chassis. @@ -231,11 +245,6 @@ def get_reboot_cause(self): string is "REBOOT_CAUSE_HARDWARE_OTHER", the second string can be used to pass a description of the reboot cause. - Some more causes are appended to the existing list to handle other - modules such as DPUs. - Ex: REBOOT_CAUSE_POWER_LOSS, REBOOT_CAUSE_HOST_RESET_DPU, - REBOOT_CAUSE_HOST_POWERCYCLED_DPU, REBOOT_CAUSE_SW_THERMAL, - REBOOT_CAUSE_DPU_SELF_REBOOT """ raise NotImplementedError @@ -256,9 +265,7 @@ def get_health_info(self): Fetched from the DPUs. Returns: - An object instance of the dpu health. Should consist of two lists - "summary and monitorlist" See system_health.py for usage - Returns None on switch module + An object instance of the dpu health. """ raise NotImplementedError From 33ca01a6b6a5ee4a101e6d189f4249728171b338 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Thu, 6 Jun 2024 16:20:30 -0700 Subject: [PATCH 04/16] Added test cases for the chassis/module APIs for smartswitch --- tests/chassis_base_test.py | 6 ++++++ tests/module_base_test.py | 20 ++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/tests/chassis_base_test.py b/tests/chassis_base_test.py index e550ce5f8..1cd94f11c 100644 --- a/tests/chassis_base_test.py +++ b/tests/chassis_base_test.py @@ -22,6 +22,8 @@ def test_chassis_base(self): not_implemented_methods = [ [chassis.get_uid_led], [chassis.set_uid_led, "COLOR"], + [chassis.get_dpu_id, "DPU0"], + [chassis.get_module_dpu_data_port, 0], ] for method in not_implemented_methods: @@ -35,6 +37,10 @@ def test_chassis_base(self): assert exception_raised + def test_smartswitch(self): + chassis = ChassisBase() + assert(chassis.is_smartswitch() == False) + def test_sensors(self): chassis = ChassisBase() assert(chassis.get_num_voltage_sensors() == 0) diff --git a/tests/module_base_test.py b/tests/module_base_test.py index 20d0ef05a..10312fccf 100644 --- a/tests/module_base_test.py +++ b/tests/module_base_test.py @@ -2,6 +2,26 @@ class TestModuleBase: + def test_module_base(self): + module = ModuleBase() + not_implemented_methods = [ + [module.get_dpu_id], + [module.get_reboot_cause], + [module.get_state_info], + [module.get_health_info], + ] + + for method in not_implemented_methods: + exception_raised = False + try: + func = method[0] + args = method[1:] + func(*args) + except NotImplementedError: + exception_raised = True + + assert exception_raised + def test_sensors(self): module = ModuleBase() assert(module.get_num_voltage_sensors() == 0) From d38f264b1bfa1a9342690d3a7da1a1aeb2b5ba4d Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Wed, 12 Jun 2024 11:19:28 -0700 Subject: [PATCH 05/16] Addressing review comments: 1. removed module type "SWITCH" --- sonic_platform_base/chassis_base.py | 8 ++++---- sonic_platform_base/module_base.py | 16 +++++++--------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/sonic_platform_base/chassis_base.py b/sonic_platform_base/chassis_base.py index 143c894f4..0b9f056c9 100644 --- a/sonic_platform_base/chassis_base.py +++ b/sonic_platform_base/chassis_base.py @@ -242,7 +242,7 @@ def get_all_modules(self): def get_module(self, index): """ Retrieves module represented by (0-based) index - On a SmartSwitch index:0 will fetch switch, index:1 will fetch + On a SmartSwitch index:0 is not used, index:1 will fetch DPU0 and so on Args: @@ -250,7 +250,7 @@ def get_module(self, index): retrieve Returns: - An object dervied from ModuleBase representing the specified + An object derived from ModuleBase representing the specified module """ module = None @@ -270,7 +270,7 @@ def get_module_index(self, module_name): Args: module_name: A string, prefixed by SUPERVISOR, LINE-CARD or FABRIC-CARD Ex. SUPERVISOR0, LINE-CARD1, FABRIC-CARD5 - SmartSwitch Example: SWITCH, DPU1, DPU2 ... DPUX + SmartSwitch Example: DPU0, DPU1, DPU2 ... DPUX Returns: An integer, the index of the ModuleBase object in the module_list @@ -313,7 +313,7 @@ def get_module_dpu_data_port(self, index): Returns: A string giving the NPU-DPU port association: Ex: For index: 1 will return the dup0 port association which is - "Ethernet-BP0: Ethernet0" where the string left of ":" (Ethernet-BP0) + "Ethernet192: Ethernet0" where the string left of ":" (Ethernet192) is the NPU port and the string right of ":" (Ethernet0) is the DPU port """ raise NotImplementedError diff --git a/sonic_platform_base/module_base.py b/sonic_platform_base/module_base.py index 2ad330611..f647d2dd8 100644 --- a/sonic_platform_base/module_base.py +++ b/sonic_platform_base/module_base.py @@ -36,7 +36,6 @@ class ModuleBase(device_base.DeviceBase): MODULE_TYPE_LINE = "LINE-CARD" MODULE_TYPE_FABRIC = "FABRIC-CARD" MODULE_TYPE_DPU = "DPU" - MODULE_TYPE_SWITCH = "SWITCH" # Possible card status for modular chassis # Module state is Empty if no module is inserted in the slot @@ -120,18 +119,17 @@ def get_system_eeprom_info(self): def get_name(self): """ Retrieves the name of the module prefixed by SUPERVISOR, LINE-CARD, - FABRIC-CARD, SWITCH, DPU0, DPUX + FABRIC-CARD, DPU0, DPUX Returns: A string, the module name prefixed by one of MODULE_TYPE_SUPERVISOR, - MODULE_TYPE_LINE or MODULE_TYPE_FABRIC or MODULE_TYPE_DPU or - MODULE_TYPE_SWITCH and followed by a 0-based index. + MODULE_TYPE_LINE or MODULE_TYPE_FABRIC or MODULE_TYPE_DPU and followed + by a 0-based index. Ex. A Chassis having 1 supervisor, 4 line-cards and 6 fabric-cards can provide names SUPERVISOR0, LINE-CARD0 to LINE-CARD3, FABRIC-CARD0 to FABRIC-CARD5. - A SmartSwitch having 4 DPUs and 1 Switch can provide names DPU0 to - DPU3 and SWITCH + A SmartSwitch having 4 DPUs names DPU0 to DPU3 """ raise NotImplementedError @@ -160,7 +158,7 @@ def get_type(self): Returns: A string, the module-type from one of the predefined types: MODULE_TYPE_SUPERVISOR, MODULE_TYPE_LINE or MODULE_TYPE_FABRIC - or MODULE_TYPE_DPU or MODULE_TYPE_SWITCH + or MODULE_TYPE_DPU """ raise NotImplementedError @@ -175,7 +173,8 @@ def get_oper_status(self): The SmartSwitch platforms will have these additional status MODULE_STATUS_MIDPLANE_OFFLINE, MODULE_STATUS_MIDPLANE_ONLINE, MODULE_STATUS_CONTROLPLANE_OFFLINE, MODULE_STATUS_CONTROLPLANE_ONLINE, - MODULE_STATUS_DATAPLANE_OFFLINE, MODULE_STATUS_DATAPLANE_ONLINE + MODULE_STATUS_CONTROLPLANE_PARTIAL_ONLINE, MODULE_STATUS_DATAPLANE_OFFLINE, + MODULE_STATUS_DATAPLANE_ONLINE, MODULE_STATUS_DATAPLANE_PARTIAL_ONLINE """ raise NotImplementedError @@ -230,7 +229,6 @@ def get_dpu_id(self): Returns: An integer, indicating the DPU ID. DPU0 returns 1, DPUX returns X+1 - Returns '0' on switch module """ raise NotImplementedError From ae9f72332718c0f447bc780ff5511e7d62f667de Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 9 Jul 2024 13:46:11 -0700 Subject: [PATCH 06/16] Addressed minor review comments --- sonic_platform_base/chassis_base.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sonic_platform_base/chassis_base.py b/sonic_platform_base/chassis_base.py index 0b9f056c9..60b623a8f 100644 --- a/sonic_platform_base/chassis_base.py +++ b/sonic_platform_base/chassis_base.py @@ -221,21 +221,23 @@ def get_component(self, index): def get_num_modules(self): """ Retrieves the number of modules available on this chassis - On a SmarSwitch chassis this includes the number of DPUs. + On a SmarSwitch chassis this will be the number of DPUs. Returns: - An integer, the number of modules available on this chassis + An integer, the number of modules available on this chassis. + On a SmartSwitch this will be the number of DPUs """ return len(self._module_list) def get_all_modules(self): """ Retrieves all modules available on this chassis. On a SmartSwitch - chassis this includes the DPUs. + chassis this will return all the DPUs. Returns: A list of objects derived from ModuleBase representing all - modules available on this chassis + modules available on this chassis. On a SmartSwitch this + will be a list of DPU objects. """ return self._module_list @@ -312,7 +314,7 @@ def get_module_dpu_data_port(self, index): Returns: A string giving the NPU-DPU port association: - Ex: For index: 1 will return the dup0 port association which is + Ex: For index: 1 will return the dpu0 port association which is "Ethernet192: Ethernet0" where the string left of ":" (Ethernet192) is the NPU port and the string right of ":" (Ethernet0) is the DPU port """ From f6c994901e1d021d6d2e803c8bc0f5e8dfab43f5 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Sun, 28 Jul 2024 17:08:15 -0700 Subject: [PATCH 07/16] Addressed review comments --- sonic_platform_base/module_base.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/sonic_platform_base/module_base.py b/sonic_platform_base/module_base.py index f647d2dd8..8195c6b73 100644 --- a/sonic_platform_base/module_base.py +++ b/sonic_platform_base/module_base.py @@ -170,11 +170,6 @@ def get_oper_status(self): A string, the operational status of the module from one of the predefined status values: MODULE_STATUS_EMPTY, MODULE_STATUS_OFFLINE, MODULE_STATUS_FAULT, MODULE_STATUS_PRESENT or MODULE_STATUS_ONLINE - The SmartSwitch platforms will have these additional status - MODULE_STATUS_MIDPLANE_OFFLINE, MODULE_STATUS_MIDPLANE_ONLINE, - MODULE_STATUS_CONTROLPLANE_OFFLINE, MODULE_STATUS_CONTROLPLANE_ONLINE, - MODULE_STATUS_CONTROLPLANE_PARTIAL_ONLINE, MODULE_STATUS_DATAPLANE_OFFLINE, - MODULE_STATUS_DATAPLANE_ONLINE, MODULE_STATUS_DATAPLANE_PARTIAL_ONLINE """ raise NotImplementedError @@ -254,6 +249,19 @@ def get_state_info(self): Returns: An object instance of the DPU_STATE (see DB schema) Returns None on switch module + + Sample Output: { + 'dpu_control_plane_reason': 'All containers are up and running, host-ethlink-status: Uplink1/1 is UP', + 'dpu_control_plane_state': 'UP', + 'dpu_control_plane_time': '20240626 21:13:25', + 'dpu_data_plane_reason': 'DPU container named polaris is running, pdsagent running : OK, pciemgrd running : OK', + 'dpu_data_plane_state': 'UP', + 'dpu_data_plane_time': '20240626 21:10:07', + 'dpu_midplane_link_reason': 'INTERNAL-MGMT : admin state - UP, oper_state - UP, status - OK, HOST-MGMT : admin state - UP, oper_state - UP, status - OK', + 'dpu_midplane_link_state': 'UP', + 'dpu_midplane_link_time': '20240626 21:13:25', + 'id': '0' + } """ raise NotImplementedError From 3622881a9d183ed3801f2f2c2a8d7ae40b545b34 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 30 Jul 2024 15:23:15 -0700 Subject: [PATCH 08/16] Redefining the return value of get_module_dpu_data_port from string to a dictionary --- sonic_platform_base/chassis_base.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/sonic_platform_base/chassis_base.py b/sonic_platform_base/chassis_base.py index 60b623a8f..61fbebd46 100644 --- a/sonic_platform_base/chassis_base.py +++ b/sonic_platform_base/chassis_base.py @@ -313,10 +313,13 @@ def get_module_dpu_data_port(self, index): index: An integer, the index of the module to retrieve Returns: - A string giving the NPU-DPU port association: - Ex: For index: 1 will return the dpu0 port association which is - "Ethernet192: Ethernet0" where the string left of ":" (Ethernet192) - is the NPU port and the string right of ":" (Ethernet0) is the DPU port + A dictionary giving the NPU-DPU port association: + Ex: When queried for DPU0 it will return + { + "interface": {"Ethernet224": "Ethernet0"} + } + where "Ethernet192: is the NPU port and the string + right of ":" (Ethernet0) is the DPU port """ raise NotImplementedError From eeae8e69bdf1051799900f3bb73c7a59f96e4ed8 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Sun, 4 Aug 2024 09:41:05 -0700 Subject: [PATCH 09/16] dpu_id now ranges from 0 --- sonic_platform_base/chassis_base.py | 4 ++-- sonic_platform_base/module_base.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sonic_platform_base/chassis_base.py b/sonic_platform_base/chassis_base.py index 61fbebd46..3183e835a 100644 --- a/sonic_platform_base/chassis_base.py +++ b/sonic_platform_base/chassis_base.py @@ -289,8 +289,8 @@ def get_dpu_id(self, name): Returns None for non-smartswitch chassis. Returns: - An integer, indicating the DPU ID Ex: name:DPU0 return value 1, - name:DPU1 return value 2, name:DPUX return value X+1 + An integer, indicating the DPU ID Ex: name:DPU0 return value 0, + name:DPU1 return value 1, name:DPUX return value X """ raise NotImplementedError diff --git a/sonic_platform_base/module_base.py b/sonic_platform_base/module_base.py index 8195c6b73..073321de1 100644 --- a/sonic_platform_base/module_base.py +++ b/sonic_platform_base/module_base.py @@ -223,7 +223,7 @@ def get_dpu_id(self): Retrieves the DPU ID. Returns None for non-smartswitch chassis. Returns: - An integer, indicating the DPU ID. DPU0 returns 1, DPUX returns X+1 + An integer, indicating the DPU ID. DPU0 returns 0, DPUX returns X """ raise NotImplementedError From d6501f76b9723e01b0d8aadb7f6a8ea50a5475be Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Wed, 7 Aug 2024 17:12:03 -0700 Subject: [PATCH 10/16] Removed get_health_info and updated the output format of get_state_info --- sonic_platform_base/module_base.py | 38 +++++++++++++----------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/sonic_platform_base/module_base.py b/sonic_platform_base/module_base.py index 073321de1..eb8d5cfbd 100644 --- a/sonic_platform_base/module_base.py +++ b/sonic_platform_base/module_base.py @@ -250,28 +250,24 @@ def get_state_info(self): An object instance of the DPU_STATE (see DB schema) Returns None on switch module - Sample Output: { - 'dpu_control_plane_reason': 'All containers are up and running, host-ethlink-status: Uplink1/1 is UP', - 'dpu_control_plane_state': 'UP', - 'dpu_control_plane_time': '20240626 21:13:25', - 'dpu_data_plane_reason': 'DPU container named polaris is running, pdsagent running : OK, pciemgrd running : OK', - 'dpu_data_plane_state': 'UP', - 'dpu_data_plane_time': '20240626 21:10:07', - 'dpu_midplane_link_reason': 'INTERNAL-MGMT : admin state - UP, oper_state - UP, status - OK, HOST-MGMT : admin state - UP, oper_state - UP, status - OK', - 'dpu_midplane_link_state': 'UP', - 'dpu_midplane_link_time': '20240626 21:13:25', - 'id': '0' + Sample Output: + { + 'dpu_control_plane': { + 'state': 'UP', + 'time': '20240626 21:13:25', + 'reason': 'All containers are up and running, host-ethlink-status: Uplink1/1 is UP' + }, + 'dpu_data_plane': { + 'state': 'UP', + 'time': '20240626 21:13:25', + 'reason': 'DPU container named polaris is running, pciemgrd running : OK' + }, + 'dpu_midplane_link': { + 'state': 'UP', + 'time': '20240626 21:13:25', + 'reason': 'INTERNAL-MGMT : admin state - UP, oper_state - UP, status - OK' } - """ - raise NotImplementedError - - def get_health_info(self): - """ - Retrieves the dpu health object having the detailed dpu health. - Fetched from the DPUs. - - Returns: - An object instance of the dpu health. + } """ raise NotImplementedError From 1633978c2591b7fe63b938d1daa704785d4c7688 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Thu, 8 Aug 2024 08:20:10 -0700 Subject: [PATCH 11/16] Removed get_health_info from module_base_test.py --- tests/module_base_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/module_base_test.py b/tests/module_base_test.py index 10312fccf..b4b9519e3 100644 --- a/tests/module_base_test.py +++ b/tests/module_base_test.py @@ -8,7 +8,6 @@ def test_module_base(self): [module.get_dpu_id], [module.get_reboot_cause], [module.get_state_info], - [module.get_health_info], ] for method in not_implemented_methods: From 6aa67bbde5b977c8d9a028964a5fbcc058825419 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Thu, 8 Aug 2024 18:27:47 -0700 Subject: [PATCH 12/16] Addressed some review comments --- sonic_platform_base/chassis_base.py | 7 +++---- sonic_platform_base/module_base.py | 8 +++++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/sonic_platform_base/chassis_base.py b/sonic_platform_base/chassis_base.py index 3183e835a..53b2686f5 100644 --- a/sonic_platform_base/chassis_base.py +++ b/sonic_platform_base/chassis_base.py @@ -244,8 +244,7 @@ def get_all_modules(self): def get_module(self, index): """ Retrieves module represented by (0-based) index - On a SmartSwitch index:0 is not used, index:1 will fetch - DPU0 and so on + On a SmartSwitch index:0 will fetch DPU0 and so on Args: index: An integer, the index (0-based) of the module to @@ -306,7 +305,7 @@ def is_smartswitch(self): def get_module_dpu_data_port(self, index): """ Retrieves the DPU data port NPU-DPU association represented for - the DPU index. Platforms that need to overwrite the platform.json + the DPU index. Platforms that need to overwrite the hwsku.json file will use this API. This is valid only on the Switch and not on DPUs Args: @@ -318,7 +317,7 @@ def get_module_dpu_data_port(self, index): { "interface": {"Ethernet224": "Ethernet0"} } - where "Ethernet192: is the NPU port and the string + where "Ethernet224: is the NPU port and the string right of ":" (Ethernet0) is the DPU port """ raise NotImplementedError diff --git a/sonic_platform_base/module_base.py b/sonic_platform_base/module_base.py index eb8d5cfbd..e5b6ce290 100644 --- a/sonic_platform_base/module_base.py +++ b/sonic_platform_base/module_base.py @@ -224,6 +224,7 @@ def get_dpu_id(self): Returns: An integer, indicating the DPU ID. DPU0 returns 0, DPUX returns X + DPU ID can be greater than or equal to 0. """ raise NotImplementedError @@ -302,7 +303,7 @@ def get_component(self, index): index: An integer, the index (0-based) of the component to retrieve Returns: - An object dervied from ComponentBase representing the specified component + An object derived from ComponentBase representing the specified component """ component = None @@ -616,8 +617,9 @@ def get_midplane_ip(self): line-card and return the midplane IP-address of the line-card. When called from the line-card, the module will represent the Supervisor and return its midplane IP-address. - When called from the DPU, returns the midplane IP-address of the dpu-card. - When called from the Switch returns the midplane IP-address of Switch. + + When called from the SmartSwitch returns the midplane IP-address of + the DPU module. Returns: A string, the IP-address of the module reachable over the midplane From 1eb90dad213543c383ab0208d795192a2cc25e4b Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Sat, 10 Aug 2024 07:53:40 -0700 Subject: [PATCH 13/16] Addressed a review comment --- sonic_platform_base/chassis_base.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sonic_platform_base/chassis_base.py b/sonic_platform_base/chassis_base.py index 53b2686f5..7e84fa7d7 100644 --- a/sonic_platform_base/chassis_base.py +++ b/sonic_platform_base/chassis_base.py @@ -151,9 +151,7 @@ def is_modular_chassis(self): A bool value, should return False by default or for fixed-platforms. Should return True for supervisor-cards, line-cards etc running as part of modular-chassis. - For SmartSwitch platforms this should return True even if they are - fixed-platforms, as they are treated like a modular chassis as the - DPU cards are treated like line-cards of a modular-chassis. + For SmartSwitch this should return False. """ return False From 678ab44588cc73c8ff5b3c29ae3c0f6cec5e9c54 Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 19 Aug 2024 18:40:44 -0700 Subject: [PATCH 14/16] Removed the API for npu-dpu data port association as it can be fetched from the platform.json file --- sonic_platform_base/chassis_base.py | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/sonic_platform_base/chassis_base.py b/sonic_platform_base/chassis_base.py index 7e84fa7d7..05a633596 100644 --- a/sonic_platform_base/chassis_base.py +++ b/sonic_platform_base/chassis_base.py @@ -298,27 +298,7 @@ def is_smartswitch(self): Returns: Returns:True for SmartSwitch and False for other platforms """ - return False - - def get_module_dpu_data_port(self, index): - """ - Retrieves the DPU data port NPU-DPU association represented for - the DPU index. Platforms that need to overwrite the hwsku.json - file will use this API. This is valid only on the Switch and not on DPUs - - Args: - index: An integer, the index of the module to retrieve - - Returns: - A dictionary giving the NPU-DPU port association: - Ex: When queried for DPU0 it will return - { - "interface": {"Ethernet224": "Ethernet0"} - } - where "Ethernet224: is the NPU port and the string - right of ":" (Ethernet0) is the DPU port - """ - raise NotImplementedError + return False ############################################## # Fan methods From 28847d7cc78bce090be1d12678aed37fd6f611fe Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Mon, 19 Aug 2024 20:43:29 -0700 Subject: [PATCH 15/16] Removed the duplicate definitions in module.py --- sonic_platform_base/module_base.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/sonic_platform_base/module_base.py b/sonic_platform_base/module_base.py index e5b6ce290..0067f116f 100644 --- a/sonic_platform_base/module_base.py +++ b/sonic_platform_base/module_base.py @@ -17,20 +17,6 @@ class ModuleBase(device_base.DeviceBase): # Device type definition. Note, this is a constant. DEVICE_TYPE = "module" - # Possible reboot causes - REBOOT_CAUSE_POWER_LOSS = "Power Loss" - REBOOT_CAUSE_THERMAL_OVERLOAD_CPU = "Thermal Overload: CPU" - REBOOT_CAUSE_THERMAL_OVERLOAD_ASIC = "Thermal Overload: ASIC" - REBOOT_CAUSE_THERMAL_OVERLOAD_OTHER = "Thermal Overload: Other" - REBOOT_CAUSE_INSUFFICIENT_FAN_SPEED = "Insufficient Fan Speed" - REBOOT_CAUSE_WATCHDOG = "Watchdog" - REBOOT_CAUSE_HARDWARE_OTHER = "Hardware - Other" - REBOOT_CAUSE_HARDWARE_BIOS = "BIOS" - REBOOT_CAUSE_HARDWARE_CPU = "CPU" - REBOOT_CAUSE_HARDWARE_BUTTON = "Push button" - REBOOT_CAUSE_HARDWARE_RESET_FROM_ASIC = "Reset from ASIC" - REBOOT_CAUSE_NON_HARDWARE = "Non-Hardware" - # Possible card types for modular chassis MODULE_TYPE_SUPERVISOR = "SUPERVISOR" MODULE_TYPE_LINE = "LINE-CARD" From df23899fac0a70206e9dcede9f43d669b88a040a Mon Sep 17 00:00:00 2001 From: Ramesh Raghupathy Date: Tue, 20 Aug 2024 06:08:05 -0700 Subject: [PATCH 16/16] removed "get_module_dpu_data_port" from the test as well --- tests/chassis_base_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/chassis_base_test.py b/tests/chassis_base_test.py index 1cd94f11c..c7dbc512e 100644 --- a/tests/chassis_base_test.py +++ b/tests/chassis_base_test.py @@ -23,7 +23,6 @@ def test_chassis_base(self): [chassis.get_uid_led], [chassis.set_uid_led, "COLOR"], [chassis.get_dpu_id, "DPU0"], - [chassis.get_module_dpu_data_port, 0], ] for method in not_implemented_methods: