Skip to content

Commit

Permalink
Refactor the codegen directory, pt 12 (pytorch#2483)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: pytorch#2483

- Migrate backward  templates in `codegen/` over to `codegen/training/backward`

Reviewed By: spcyppt

Differential Revision: D55825078

fbshipit-source-id: 5fdbce85b91cba2bb9b661154bbd52aa1d35a1fd
  • Loading branch information
q10 authored and facebook-github-bot committed Apr 6, 2024
1 parent 3bb1583 commit 3bebb6d
Show file tree
Hide file tree
Showing 16 changed files with 27 additions and 19 deletions.
4 changes: 2 additions & 2 deletions fbgemm_gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,7 @@ set_source_files_properties(${fbgemm_sources}
set(fbgemm_gpu_sources_static_cpu
codegen/training/forward/embedding_forward_split_cpu.cpp
codegen/inference/embedding_forward_quantized_host_cpu.cpp
codegen/embedding_backward_dense_host_cpu.cpp
codegen/training/backward/embedding_backward_dense_host_cpu.cpp
codegen/embedding_bounds_check_host_cpu.cpp
src/merge_pooled_embedding_ops/merge_pooled_embedding_ops_cpu.cpp
src/permute_pooled_embedding_ops/permute_pooled_embedding_function.cpp
Expand All @@ -584,7 +584,7 @@ set(fbgemm_gpu_sources_static_cpu
if(NOT FBGEMM_CPU_ONLY)
list(APPEND fbgemm_gpu_sources_static_cpu
codegen/inference/embedding_forward_quantized_host.cpp
codegen/embedding_backward_dense_host.cpp
codegen/training/backward/embedding_backward_dense_host.cpp
codegen/embedding_bounds_check_host.cpp
src/memory_utils/memory_utils.cpp
src/memory_utils/memory_utils_ops.cpp
Expand Down
26 changes: 15 additions & 11 deletions fbgemm_gpu/codegen/genscript/generate_backward_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,19 +68,19 @@ def generate_backward_split_gpu(**kwargs: Any) -> None:
# Generate the backward split kernels
for template_filepath, filename_format in [
(
"embedding_backward_split_template.cu",
"training/backward/embedding_backward_split_template.cu",
"gen_embedding_backward_{}_split_{}_cuda.cu",
),
(
"embedding_backward_split_meta_template.cpp",
"training/backward/embedding_backward_split_meta_template.cpp",
"gen_embedding_backward_{}_split_{}_meta.cpp",
),
(
"embedding_backward_split_kernel_cta_template.cu",
"training/backward/embedding_backward_split_kernel_cta_template.cu",
"gen_embedding_backward_{}_split_{}_kernel_cta.cu",
),
(
"embedding_backward_split_kernel_warp_template.cu",
"training/backward/embedding_backward_split_kernel_warp_template.cu",
"gen_embedding_backward_{}_split_{}_kernel_warp.cu",
),
]:
Expand All @@ -105,7 +105,7 @@ def generate_backward_split_gpu(**kwargs: Any) -> None:
# Generate CUDA autograd, PT2 unified autograd, and PT2 backward wrapper
for template_filepath, filename in [
(
"embedding_backward_split_host_template.cpp",
"training/backward/embedding_backward_split_host_template.cpp",
f"gen_embedding_backward_split_{optimizer}.cpp",
),
(
Expand Down Expand Up @@ -143,16 +143,16 @@ def generate_backward_split_cpu(**kwargs: Any) -> None:
# Generate the backward splits
if kwargs.get("has_cpu_support"):
CodeTemplate.load(
"embedding_backward_split_cpu_approx_template.cpp"
"training/backward/embedding_backward_split_cpu_approx_template.cpp"
if "approx" in optimizer
else "embedding_backward_split_cpu_template.cpp"
else "training/backward/embedding_backward_split_cpu_template.cpp"
).write(f"gen_embedding_backward_{optimizer}_split_cpu.cpp", **kwargs)

# Generate the backward splits (non-dense)
if not kwargs.get("dense"):
for template_filepath, filename in [
(
"embedding_backward_split_host_cpu_template.cpp",
"training/backward/embedding_backward_split_host_cpu_template.cpp",
f"gen_embedding_backward_split_{optimizer}_cpu.cpp",
),
(
Expand All @@ -179,7 +179,9 @@ def generate_backward_split(**kwargs: Any) -> None:
def generate_backward_device() -> None:
# Generate backward device kernels based on weighted (True/False), VBE
# (True/False), no bag (True/False)
template_filepath = "embedding_backward_split_device_kernel_template.cuh"
template_filepath = (
"training/backward/embedding_backward_split_device_kernel_template.cuh"
)

BackwardSplitGenerator.render_backward_templates(
template_filepath,
Expand All @@ -202,14 +204,16 @@ def generate_backward_device() -> None:
@staticmethod
def generate_backward_grad() -> None:
# Generate the common grad functions
CodeTemplate.load("embedding_backward_split_grad_template.cu").write(
CodeTemplate.load(
"training/backward/embedding_backward_split_grad_template.cu"
).write(
"gen_embedding_backward_split_grad_embedding_ops.cu", is_index_select=False
)

@staticmethod
def generate_backward_indices() -> None:
template = CodeTemplate.load(
"embedding_backward_split_indice_weights_template.cu"
"training/backward/embedding_backward_split_indice_weights_template.cu"
)
for dense in [True, False]:
template.write(
Expand Down
16 changes: 10 additions & 6 deletions fbgemm_gpu/codegen/genscript/generate_index_select.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,19 +41,19 @@ def generate() -> None:
"gen_batch_index_select_dim0_forward_kernel_small.cu",
),
(
"embedding_backward_split_template.cu",
"training/backward/embedding_backward_split_template.cu",
"gen_batch_index_select_dim0_backward_codegen_cuda.cu",
),
(
"embedding_backward_split_kernel_cta_template.cu",
"training/backward/embedding_backward_split_kernel_cta_template.cu",
"gen_batch_index_select_dim0_backward_kernel_cta.cu",
),
(
"embedding_backward_split_kernel_warp_template.cu",
"training/backward/embedding_backward_split_kernel_warp_template.cu",
"gen_batch_index_select_dim0_backward_kernel_warp.cu",
),
(
"embedding_backward_split_device_kernel_template.cuh",
"training/backward/embedding_backward_split_device_kernel_template.cuh",
"gen_embedding_backward_batch_index_select_split_device_kernel.cuh",
),
]:
Expand All @@ -69,13 +69,17 @@ def generate() -> None:
args=optargs.cuda,
)

CodeTemplate.load("embedding_backward_split_grad_template.cu").write(
CodeTemplate.load(
"training/backward/embedding_backward_split_grad_template.cu"
).write(
"gen_embedding_backward_split_grad_index_select.cu",
is_index_select=True,
)

# Generate common backward device kernels (generate only once)
CodeTemplate.load("embedding_backward_split_device_kernel_template.cuh").write(
CodeTemplate.load(
"training/backward/embedding_backward_split_device_kernel_template.cuh"
).write(
"gen_embedding_backward_common_split_device_kernel.cuh",
gen_once=True,
)
Expand Down

0 comments on commit 3bebb6d

Please sign in to comment.