diff --git a/fbgemm_gpu/CMakeLists.txt b/fbgemm_gpu/CMakeLists.txt index 57dae007f..f2e02f827 100644 --- a/fbgemm_gpu/CMakeLists.txt +++ b/fbgemm_gpu/CMakeLists.txt @@ -558,7 +558,7 @@ set_source_files_properties(${fbgemm_sources} set(fbgemm_gpu_sources_static_cpu codegen/training/forward/embedding_forward_split_cpu.cpp codegen/inference/embedding_forward_quantized_host_cpu.cpp - codegen/embedding_backward_dense_host_cpu.cpp + codegen/training/backward/embedding_backward_dense_host_cpu.cpp codegen/embedding_bounds_check_host_cpu.cpp src/merge_pooled_embedding_ops/merge_pooled_embedding_ops_cpu.cpp src/permute_pooled_embedding_ops/permute_pooled_embedding_function.cpp @@ -584,7 +584,7 @@ set(fbgemm_gpu_sources_static_cpu if(NOT FBGEMM_CPU_ONLY) list(APPEND fbgemm_gpu_sources_static_cpu codegen/inference/embedding_forward_quantized_host.cpp - codegen/embedding_backward_dense_host.cpp + codegen/training/backward/embedding_backward_dense_host.cpp codegen/embedding_bounds_check_host.cpp src/memory_utils/memory_utils.cpp src/memory_utils/memory_utils_ops.cpp diff --git a/fbgemm_gpu/codegen/genscript/generate_backward_split.py b/fbgemm_gpu/codegen/genscript/generate_backward_split.py index 4db540f4c..42f5a79c0 100644 --- a/fbgemm_gpu/codegen/genscript/generate_backward_split.py +++ b/fbgemm_gpu/codegen/genscript/generate_backward_split.py @@ -68,19 +68,19 @@ def generate_backward_split_gpu(**kwargs: Any) -> None: # Generate the backward split kernels for template_filepath, filename_format in [ ( - "embedding_backward_split_template.cu", + "training/backward/embedding_backward_split_template.cu", "gen_embedding_backward_{}_split_{}_cuda.cu", ), ( - "embedding_backward_split_meta_template.cpp", + "training/backward/embedding_backward_split_meta_template.cpp", "gen_embedding_backward_{}_split_{}_meta.cpp", ), ( - "embedding_backward_split_kernel_cta_template.cu", + "training/backward/embedding_backward_split_kernel_cta_template.cu", "gen_embedding_backward_{}_split_{}_kernel_cta.cu", ), ( - "embedding_backward_split_kernel_warp_template.cu", + "training/backward/embedding_backward_split_kernel_warp_template.cu", "gen_embedding_backward_{}_split_{}_kernel_warp.cu", ), ]: @@ -105,7 +105,7 @@ def generate_backward_split_gpu(**kwargs: Any) -> None: # Generate CUDA autograd, PT2 unified autograd, and PT2 backward wrapper for template_filepath, filename in [ ( - "embedding_backward_split_host_template.cpp", + "training/backward/embedding_backward_split_host_template.cpp", f"gen_embedding_backward_split_{optimizer}.cpp", ), ( @@ -143,16 +143,16 @@ def generate_backward_split_cpu(**kwargs: Any) -> None: # Generate the backward splits if kwargs.get("has_cpu_support"): CodeTemplate.load( - "embedding_backward_split_cpu_approx_template.cpp" + "training/backward/embedding_backward_split_cpu_approx_template.cpp" if "approx" in optimizer - else "embedding_backward_split_cpu_template.cpp" + else "training/backward/embedding_backward_split_cpu_template.cpp" ).write(f"gen_embedding_backward_{optimizer}_split_cpu.cpp", **kwargs) # Generate the backward splits (non-dense) if not kwargs.get("dense"): for template_filepath, filename in [ ( - "embedding_backward_split_host_cpu_template.cpp", + "training/backward/embedding_backward_split_host_cpu_template.cpp", f"gen_embedding_backward_split_{optimizer}_cpu.cpp", ), ( @@ -179,7 +179,9 @@ def generate_backward_split(**kwargs: Any) -> None: def generate_backward_device() -> None: # Generate backward device kernels based on weighted (True/False), VBE # (True/False), no bag (True/False) - template_filepath = "embedding_backward_split_device_kernel_template.cuh" + template_filepath = ( + "training/backward/embedding_backward_split_device_kernel_template.cuh" + ) BackwardSplitGenerator.render_backward_templates( template_filepath, @@ -202,14 +204,16 @@ def generate_backward_device() -> None: @staticmethod def generate_backward_grad() -> None: # Generate the common grad functions - CodeTemplate.load("embedding_backward_split_grad_template.cu").write( + CodeTemplate.load( + "training/backward/embedding_backward_split_grad_template.cu" + ).write( "gen_embedding_backward_split_grad_embedding_ops.cu", is_index_select=False ) @staticmethod def generate_backward_indices() -> None: template = CodeTemplate.load( - "embedding_backward_split_indice_weights_template.cu" + "training/backward/embedding_backward_split_indice_weights_template.cu" ) for dense in [True, False]: template.write( diff --git a/fbgemm_gpu/codegen/genscript/generate_index_select.py b/fbgemm_gpu/codegen/genscript/generate_index_select.py index eea3f30c5..d070fb3fa 100644 --- a/fbgemm_gpu/codegen/genscript/generate_index_select.py +++ b/fbgemm_gpu/codegen/genscript/generate_index_select.py @@ -41,19 +41,19 @@ def generate() -> None: "gen_batch_index_select_dim0_forward_kernel_small.cu", ), ( - "embedding_backward_split_template.cu", + "training/backward/embedding_backward_split_template.cu", "gen_batch_index_select_dim0_backward_codegen_cuda.cu", ), ( - "embedding_backward_split_kernel_cta_template.cu", + "training/backward/embedding_backward_split_kernel_cta_template.cu", "gen_batch_index_select_dim0_backward_kernel_cta.cu", ), ( - "embedding_backward_split_kernel_warp_template.cu", + "training/backward/embedding_backward_split_kernel_warp_template.cu", "gen_batch_index_select_dim0_backward_kernel_warp.cu", ), ( - "embedding_backward_split_device_kernel_template.cuh", + "training/backward/embedding_backward_split_device_kernel_template.cuh", "gen_embedding_backward_batch_index_select_split_device_kernel.cuh", ), ]: @@ -69,13 +69,17 @@ def generate() -> None: args=optargs.cuda, ) - CodeTemplate.load("embedding_backward_split_grad_template.cu").write( + CodeTemplate.load( + "training/backward/embedding_backward_split_grad_template.cu" + ).write( "gen_embedding_backward_split_grad_index_select.cu", is_index_select=True, ) # Generate common backward device kernels (generate only once) - CodeTemplate.load("embedding_backward_split_device_kernel_template.cuh").write( + CodeTemplate.load( + "training/backward/embedding_backward_split_device_kernel_template.cuh" + ).write( "gen_embedding_backward_common_split_device_kernel.cuh", gen_once=True, ) diff --git a/fbgemm_gpu/codegen/embedding_backward_dense_host.cpp b/fbgemm_gpu/codegen/training/backward/embedding_backward_dense_host.cpp similarity index 100% rename from fbgemm_gpu/codegen/embedding_backward_dense_host.cpp rename to fbgemm_gpu/codegen/training/backward/embedding_backward_dense_host.cpp diff --git a/fbgemm_gpu/codegen/embedding_backward_dense_host_cpu.cpp b/fbgemm_gpu/codegen/training/backward/embedding_backward_dense_host_cpu.cpp similarity index 100% rename from fbgemm_gpu/codegen/embedding_backward_dense_host_cpu.cpp rename to fbgemm_gpu/codegen/training/backward/embedding_backward_dense_host_cpu.cpp diff --git a/fbgemm_gpu/codegen/embedding_backward_split_cpu_approx_template.cpp b/fbgemm_gpu/codegen/training/backward/embedding_backward_split_cpu_approx_template.cpp similarity index 100% rename from fbgemm_gpu/codegen/embedding_backward_split_cpu_approx_template.cpp rename to fbgemm_gpu/codegen/training/backward/embedding_backward_split_cpu_approx_template.cpp diff --git a/fbgemm_gpu/codegen/embedding_backward_split_cpu_template.cpp b/fbgemm_gpu/codegen/training/backward/embedding_backward_split_cpu_template.cpp similarity index 100% rename from fbgemm_gpu/codegen/embedding_backward_split_cpu_template.cpp rename to fbgemm_gpu/codegen/training/backward/embedding_backward_split_cpu_template.cpp diff --git a/fbgemm_gpu/codegen/embedding_backward_split_device_kernel_template.cuh b/fbgemm_gpu/codegen/training/backward/embedding_backward_split_device_kernel_template.cuh similarity index 100% rename from fbgemm_gpu/codegen/embedding_backward_split_device_kernel_template.cuh rename to fbgemm_gpu/codegen/training/backward/embedding_backward_split_device_kernel_template.cuh diff --git a/fbgemm_gpu/codegen/embedding_backward_split_grad_template.cu b/fbgemm_gpu/codegen/training/backward/embedding_backward_split_grad_template.cu similarity index 100% rename from fbgemm_gpu/codegen/embedding_backward_split_grad_template.cu rename to fbgemm_gpu/codegen/training/backward/embedding_backward_split_grad_template.cu diff --git a/fbgemm_gpu/codegen/embedding_backward_split_host_cpu_template.cpp b/fbgemm_gpu/codegen/training/backward/embedding_backward_split_host_cpu_template.cpp similarity index 100% rename from fbgemm_gpu/codegen/embedding_backward_split_host_cpu_template.cpp rename to fbgemm_gpu/codegen/training/backward/embedding_backward_split_host_cpu_template.cpp diff --git a/fbgemm_gpu/codegen/embedding_backward_split_host_template.cpp b/fbgemm_gpu/codegen/training/backward/embedding_backward_split_host_template.cpp similarity index 100% rename from fbgemm_gpu/codegen/embedding_backward_split_host_template.cpp rename to fbgemm_gpu/codegen/training/backward/embedding_backward_split_host_template.cpp diff --git a/fbgemm_gpu/codegen/embedding_backward_split_indice_weights_template.cu b/fbgemm_gpu/codegen/training/backward/embedding_backward_split_indice_weights_template.cu similarity index 100% rename from fbgemm_gpu/codegen/embedding_backward_split_indice_weights_template.cu rename to fbgemm_gpu/codegen/training/backward/embedding_backward_split_indice_weights_template.cu diff --git a/fbgemm_gpu/codegen/embedding_backward_split_kernel_cta_template.cu b/fbgemm_gpu/codegen/training/backward/embedding_backward_split_kernel_cta_template.cu similarity index 100% rename from fbgemm_gpu/codegen/embedding_backward_split_kernel_cta_template.cu rename to fbgemm_gpu/codegen/training/backward/embedding_backward_split_kernel_cta_template.cu diff --git a/fbgemm_gpu/codegen/embedding_backward_split_kernel_warp_template.cu b/fbgemm_gpu/codegen/training/backward/embedding_backward_split_kernel_warp_template.cu similarity index 100% rename from fbgemm_gpu/codegen/embedding_backward_split_kernel_warp_template.cu rename to fbgemm_gpu/codegen/training/backward/embedding_backward_split_kernel_warp_template.cu diff --git a/fbgemm_gpu/codegen/embedding_backward_split_meta_template.cpp b/fbgemm_gpu/codegen/training/backward/embedding_backward_split_meta_template.cpp similarity index 100% rename from fbgemm_gpu/codegen/embedding_backward_split_meta_template.cpp rename to fbgemm_gpu/codegen/training/backward/embedding_backward_split_meta_template.cpp diff --git a/fbgemm_gpu/codegen/embedding_backward_split_template.cu b/fbgemm_gpu/codegen/training/backward/embedding_backward_split_template.cu similarity index 100% rename from fbgemm_gpu/codegen/embedding_backward_split_template.cu rename to fbgemm_gpu/codegen/training/backward/embedding_backward_split_template.cu