diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 00000000..7fe70d7f
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1 @@
+*.json filter=lfs diff=lfs merge=lfs -text
diff --git a/ultravox/assets/hf/Meta-Llama-3-8B-Instruct/special_tokens_map.json b/ultravox/assets/hf/Meta-Llama-3-8B-Instruct/special_tokens_map.json
new file mode 100644
index 00000000..aa6b9fcf
--- /dev/null
+++ b/ultravox/assets/hf/Meta-Llama-3-8B-Instruct/special_tokens_map.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f38c73729248f6c127296386e3cdde96e254636cc58b4169d3fd32328d9a8ec
+size 296
diff --git a/ultravox/assets/hf/Meta-Llama-3-8B-Instruct/tokenizer.json b/ultravox/assets/hf/Meta-Llama-3-8B-Instruct/tokenizer.json
new file mode 100644
index 00000000..9a62752e
--- /dev/null
+++ b/ultravox/assets/hf/Meta-Llama-3-8B-Instruct/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e134af98b985517b4f068e3755ae90d4e9cd2d45d328325dc503f1c6b2d06cc7
+size 9085698
diff --git a/ultravox/assets/hf/Meta-Llama-3-8B-Instruct/tokenizer_config.json b/ultravox/assets/hf/Meta-Llama-3-8B-Instruct/tokenizer_config.json
new file mode 100644
index 00000000..a251eecd
--- /dev/null
+++ b/ultravox/assets/hf/Meta-Llama-3-8B-Instruct/tokenizer_config.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da0e3a7cce6e4d787e85eb1c24d548420e0d7fe2c7a214e192795c46e40d75bb
+size 50977
diff --git a/ultravox/assets/hf/wav2vec2-base-960h/preprocessor_config.json b/ultravox/assets/hf/wav2vec2-base-960h/preprocessor_config.json
new file mode 100644
index 00000000..a81343fa
--- /dev/null
+++ b/ultravox/assets/hf/wav2vec2-base-960h/preprocessor_config.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:617bd0950f8cc9ac4062e8c73a7be60305ca5790a243df55fa6f44fb671b55b1
+size 257
diff --git a/ultravox/assets/hf/wav2vec2-base-960h/special_tokens_map.json b/ultravox/assets/hf/wav2vec2-base-960h/special_tokens_map.json
new file mode 100644
index 00000000..0805c80d
--- /dev/null
+++ b/ultravox/assets/hf/wav2vec2-base-960h/special_tokens_map.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9046da57c270c8e74d0f38832b4adce269c9d914ef21d2a0925e7772152dd793
+size 96
diff --git a/ultravox/assets/hf/wav2vec2-base-960h/tokenizer_config.json b/ultravox/assets/hf/wav2vec2-base-960h/tokenizer_config.json
new file mode 100644
index 00000000..0bf31239
--- /dev/null
+++ b/ultravox/assets/hf/wav2vec2-base-960h/tokenizer_config.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d7fbc59c63ec955c3d12862fad36d1b919fa6a94e54911297ad649db7822dce1
+size 1147
diff --git a/ultravox/assets/hf/wav2vec2-base-960h/vocab.json b/ultravox/assets/hf/wav2vec2-base-960h/vocab.json
new file mode 100644
index 00000000..e25f0805
--- /dev/null
+++ b/ultravox/assets/hf/wav2vec2-base-960h/vocab.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4178db26b3c7570f6a47f14ac6a1c7b32950b8c2800fb097287e53776934f1c5
+size 358
diff --git a/ultravox/inference/infer_test.py b/ultravox/inference/infer_test.py
index afc5b061..ec165dad 100644
--- a/ultravox/inference/infer_test.py
+++ b/ultravox/inference/infer_test.py
@@ -1,5 +1,3 @@
-import logging
-import os
 from unittest import mock
 
 import numpy as np
@@ -12,23 +10,21 @@
 from ultravox.inference import infer
 from ultravox.model import ultravox_processing
 
-os.environ["TOKENIZERS_PARALLELISM"] = "false"
-
 
+# We cache these files in our repo to make CI faster and also
+# work properly for external contributions (since Llama 3 is gated).
 @pytest.fixture(scope="module")
 def tokenizer():
-    logging.info("Loading tokenizer")
-    yield transformers.AutoTokenizer.from_pretrained(
-        "meta-llama/Meta-Llama-3-8B-Instruct"
+    return transformers.AutoTokenizer.from_pretrained(
+        "./assets/hf/Meta-Llama-3-8B-Instruct", local_files_only=True
     )
-    logging.info("Tearing down tokenizer")
 
 
 @pytest.fixture(scope="module")
 def audio_processor():
-    logging.info("Loading audio processor")
-    yield transformers.AutoProcessor.from_pretrained("facebook/wav2vec2-base-960h")
-    logging.info("Tearing down audio processor")
+    return transformers.AutoProcessor.from_pretrained(
+        "./assets/hf/wav2vec2-base-960h", local_files_only=True
+    )
 
 
 class FakeInference(infer.LocalInference):
@@ -50,9 +46,6 @@ def __init__(
         self.model.device = "cpu"
         self.model.generate = mock.MagicMock(return_value=[range(25)])
 
-    def __del__(self):
-        logging.info("Tearing down inference")
-
 
 EXPECTED_TOKEN_IDS_START = [128000, 128006, 882, 128007]
 EXPECTED_TOKEN_IDS_END = [128009, 128006, 78191, 128007, 271]