Local tokenizer and processor for more consistent CI (#16)

* saving local versions of tokenizer and processor for faster CI * cr * try again * and again * debug * verbose tests * Move files * Revert "verbose tests" This reverts commit bf63a43. * clean up * Reapply "verbose tests" This reverts commit aa09382. * more logging again * blarg * remove logs --------- Co-authored-by: juberti <[email protected]>
fixie-ai · Jun 10, 2024 · 8a3bc75 · 8a3bc75
1 parent f2daad1
commit 8a3bc75
Show file tree

Hide file tree

Showing 9 changed files with 29 additions and 14 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1 @@
+*.json filter=lfs diff=lfs merge=lfs -text
diff --git a/ultravox/assets/hf/Meta-Llama-3-8B-Instruct/special_tokens_map.json b/ultravox/assets/hf/Meta-Llama-3-8B-Instruct/special_tokens_map.json
diff --git a/ultravox/assets/hf/Meta-Llama-3-8B-Instruct/tokenizer.json b/ultravox/assets/hf/Meta-Llama-3-8B-Instruct/tokenizer.json
diff --git a/ultravox/assets/hf/Meta-Llama-3-8B-Instruct/tokenizer_config.json b/ultravox/assets/hf/Meta-Llama-3-8B-Instruct/tokenizer_config.json
diff --git a/ultravox/assets/hf/wav2vec2-base-960h/preprocessor_config.json b/ultravox/assets/hf/wav2vec2-base-960h/preprocessor_config.json
diff --git a/ultravox/assets/hf/wav2vec2-base-960h/special_tokens_map.json b/ultravox/assets/hf/wav2vec2-base-960h/special_tokens_map.json
diff --git a/ultravox/assets/hf/wav2vec2-base-960h/tokenizer_config.json b/ultravox/assets/hf/wav2vec2-base-960h/tokenizer_config.json
diff --git a/ultravox/assets/hf/wav2vec2-base-960h/vocab.json b/ultravox/assets/hf/wav2vec2-base-960h/vocab.json
diff --git a/ultravox/inference/infer_test.py b/ultravox/inference/infer_test.py
@@ -1,5 +1,3 @@
-import logging
-import os
 from unittest import mock
 
 import numpy as np
@@ -12,23 +10,21 @@
 from ultravox.inference import infer
 from ultravox.model import ultravox_processing
 
-os.environ["TOKENIZERS_PARALLELISM"] = "false"
-
 
+# We cache these files in our repo to make CI faster and also
+# work properly for external contributions (since Llama 3 is gated).
 @pytest.fixture(scope="module")
 def tokenizer():
-    logging.info("Loading tokenizer")
-    yield transformers.AutoTokenizer.from_pretrained(
-        "meta-llama/Meta-Llama-3-8B-Instruct"
+    return transformers.AutoTokenizer.from_pretrained(
+        "./assets/hf/Meta-Llama-3-8B-Instruct", local_files_only=True
     )
-    logging.info("Tearing down tokenizer")
 
 
 @pytest.fixture(scope="module")
 def audio_processor():
-    logging.info("Loading audio processor")
-    yield transformers.AutoProcessor.from_pretrained("facebook/wav2vec2-base-960h")
-    logging.info("Tearing down audio processor")
+    return transformers.AutoProcessor.from_pretrained(
+        "./assets/hf/wav2vec2-base-960h", local_files_only=True
+    )
 
 
 class FakeInference(infer.LocalInference):
@@ -50,9 +46,6 @@ def __init__(
         self.model.device = "cpu"
         self.model.generate = mock.MagicMock(return_value=[range(25)])
 
-    def __del__(self):
-        logging.info("Tearing down inference")
-
 
 EXPECTED_TOKEN_IDS_START = [128000, 128006, 882, 128007]
 EXPECTED_TOKEN_IDS_END = [128009, 128006, 78191, 128007, 271]