diff --git a/apps/language_models/langchain/gen.py b/apps/language_models/langchain/gen.py
index c23c4b3236..488578360e 100644
--- a/apps/language_models/langchain/gen.py
+++ b/apps/language_models/langchain/gen.py
@@ -109,51 +109,48 @@ def get_config(
         return_model=False,
         raise_exception=False,
     ):
-        from accelerate import init_empty_weights
+        from transformers import AutoConfig
 
-        with init_empty_weights():
-            from transformers import AutoConfig
-
-            try:
-                config = AutoConfig.from_pretrained(
-                    base_model,
-                    use_auth_token=use_auth_token,
-                    trust_remote_code=trust_remote_code,
-                    offload_folder=offload_folder,
-                )
-            except OSError as e:
-                if raise_exception:
-                    raise
-                if "not a local folder and is not a valid model identifier listed on" in str(
-                    e
-                ) or "404 Client Error" in str(
-                    e
-                ):
-                    # e.g. llama, gpjt, etc.
-                    # e.g. HF TGI but not model on HF or private etc.
-                    # HF TGI server only should really require prompt_type, not HF model state
-                    return None, None
-                else:
-                    raise
-            if triton_attn and "mpt-" in base_model.lower():
-                config.attn_config["attn_impl"] = "triton"
-            if long_sequence:
-                if "mpt-7b-storywriter" in base_model.lower():
-                    config.update({"max_seq_len": 83968})
-                if "mosaicml/mpt-7b-chat" in base_model.lower():
-                    config.update({"max_seq_len": 4096})
-                if "mpt-30b" in base_model.lower():
-                    config.update({"max_seq_len": 2 * 8192})
-            if return_model and issubclass(
-                config.__class__, tuple(AutoModel._model_mapping.keys())
+        try:
+            config = AutoConfig.from_pretrained(
+                base_model,
+                use_auth_token=use_auth_token,
+                trust_remote_code=trust_remote_code,
+                offload_folder=offload_folder,
+            )
+        except OSError as e:
+            if raise_exception:
+                raise
+            if "not a local folder and is not a valid model identifier listed on" in str(
+                e
+            ) or "404 Client Error" in str(
+                e
             ):
-                model = AutoModel.from_config(
-                    config,
-                    trust_remote_code=trust_remote_code,
-                )
+                # e.g. llama, gpjt, etc.
+                # e.g. HF TGI but not model on HF or private etc.
+                # HF TGI server only should really require prompt_type, not HF model state
+                return None, None
             else:
-                # can't infer
-                model = None
+                raise
+        if triton_attn and "mpt-" in base_model.lower():
+            config.attn_config["attn_impl"] = "triton"
+        if long_sequence:
+            if "mpt-7b-storywriter" in base_model.lower():
+                config.update({"max_seq_len": 83968})
+            if "mosaicml/mpt-7b-chat" in base_model.lower():
+                config.update({"max_seq_len": 4096})
+            if "mpt-30b" in base_model.lower():
+                config.update({"max_seq_len": 2 * 8192})
+        if return_model and issubclass(
+            config.__class__, tuple(AutoModel._model_mapping.keys())
+        ):
+            model = AutoModel.from_config(
+                config,
+                trust_remote_code=trust_remote_code,
+            )
+        else:
+            # can't infer
+            model = None
         if "falcon" in base_model.lower():
             config.use_cache = False
 
@@ -177,22 +174,6 @@ def get_non_lora_model(
         """
 
         device_map = None
-        if model is not None:
-            # NOTE: Can specify max_memory={0: max_mem, 1: max_mem}, to shard model
-            # NOTE: Some models require avoiding sharding some layers,
-            # then would pass no_split_module_classes and give list of those layers.
-            from accelerate import infer_auto_device_map
-
-            device_map = infer_auto_device_map(
-                model,
-                dtype=torch.float16 if load_half else torch.float32,
-            )
-            if hasattr(model, "model"):
-                device_map_model = infer_auto_device_map(
-                    model.model,
-                    dtype=torch.float16 if load_half else torch.float32,
-                )
-                device_map.update(device_map_model)
 
         n_gpus = torch.cuda.device_count() if torch.cuda.is_available else 0
 
diff --git a/apps/language_models/langchain/langchain_requirements.txt b/apps/language_models/langchain/langchain_requirements.txt
index 78bd6e7562..fe1ddcff45 100644
--- a/apps/language_models/langchain/langchain_requirements.txt
+++ b/apps/language_models/langchain/langchain_requirements.txt
@@ -16,7 +16,7 @@ pandas==2.0.2
 matplotlib==3.7.1
 loralib==0.1.1
 bitsandbytes==0.39.0
-accelerate==0.20.3
+# accelerate==0.20.3
 peft==0.4.0
 # 4.31.0+ breaks load_in_8bit=True (https://github.com/huggingface/transformers/issues/25026)
 transformers==4.30.2