Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add max_acc_splits #1017

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions fx2ait/fx2ait/ait_splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,12 +115,14 @@ def __init__(
min_acc_module_size=DEFAULT_MIN_ACC_MODULE_SIZE,
allow_int_inputs=False,
debug_operator_range=None,
max_acc_splits=-1,
):
super().__init__()
self.min_acc_module_size = min_acc_module_size
self.exclude_support_node_name: set = set()
self.allow_int_inputs: bool = allow_int_inputs
self.debug_operator_range = debug_operator_range
self.max_acc_splits = max_acc_splits


class SelectedOperatorSupport(ops.OperatorSupportBase):
Expand Down
1 change: 1 addition & 0 deletions fx2ait/fx2ait/lower/lower.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ def default_split_function(
settings = AITSplitterSettings(
min_acc_module_size=lower_settings.min_acc_module_size,
allow_int_inputs=lower_settings.allow_int_inputs,
max_acc_splits=lower_settings.max_acc_splits,
)
splitter = AITSplitter(model, inputs, settings=settings)
splitter.node_support_preview()
Expand Down
3 changes: 3 additions & 0 deletions fx2ait/fx2ait/lower/lower_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ class LowerSettings:

max_batch_size: int = 2048
min_acc_module_size: int = 10
# Maximum number of splits for lowered module
# (eg. if lowered module is split into _run_on_gpu_0(unlowered submodule) and _run_on_acc_1(lowered submodule) it has 2 splits)
max_acc_splits: int = -1
workdir: str = ""
name: str = ""
dll_name: str = "ait_engine.so"
Expand Down
59 changes: 59 additions & 0 deletions fx2ait/fx2ait/test/test_ait_splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
import unittest

import torch
from aitemplate.testing import detect_target
from fx2ait.acc_tracer import acc_ops, acc_tracer
from fx2ait.ait_splitter import ( # @manual=//aitemplate/AITemplate/fx2ait/fx2ait:fx2ait
AITSplitter,
Expand Down Expand Up @@ -250,3 +253,59 @@ def is_node_supported(self, submodules, node: torch.fx.Node) -> bool:
dict(split_results_relu_allowed.split_module.named_children()).keys(),
{"_run_on_acc_0"},
)

@unittest.skipIf(detect_target().in_ci_env(), "Skip run in CI")
def test_fail_if_exceed_max_acc_split_limit(self):
class TestModule(torch.nn.Module):
def forward(self, a):
b = torch.sin(a)
c = torch.relu(b)
d = torch.cos(c)
e = torch.sigmoid(d)
f = torch.tanh(e)
return f

# Support all ops
_support_dict = {
"acc_ops.sin": None,
"acc_ops.cos": None,
"acc_ops.relu": None,
"acc_ops.sigmoid": None,
"acc_ops.tanh": None,
}
custom_op_support = op_support.OperatorSupport(_support_dict)

# With no ops excluded, the entire module should be lowered
# into one acc graph
mod = acc_tracer.trace(TestModule(), [torch.randn(2, 3)])
settings = AITSplitterSettings(min_acc_module_size=0, max_acc_splits=1)
splitter = AITSplitter(
mod,
(torch.randn(2, 3),),
custom_op_support,
settings,
)

res_all_nodes_supported = splitter.generate_split_results()
split_named_mods = dict(res_all_nodes_supported.split_module.named_children())
self.assertEqual(len(split_named_mods), 1)
self.assertIn("_run_on_acc_0", split_named_mods)

# Add "relu" to exclude_support_node_name
# The graph should be split into 3 parts now(_run_on_acc_0, _run_on_gpu_1, _run_on_acc_2)
mod = acc_tracer.trace(TestModule(), [torch.randn(2, 3)])
for node in mod.graph.nodes:
if node.target == acc_ops.relu:
settings.exclude_support_node_name.add(node.name)
splitter = AITSplitter(
mod,
(torch.randn(2, 3),),
custom_op_support,
settings,
)
# Split should fail now
with self.assertRaisesRegex(
ValueError,
"Cannot fulfill max_acc_splits limit. This may cause split fragmentation and result in performance issues.",
):
splitter.generate_split_results()
Loading