diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..7fe70d7f --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.json filter=lfs diff=lfs merge=lfs -text diff --git a/ultravox/assets/hf/Meta-Llama-3-8B-Instruct/special_tokens_map.json b/ultravox/assets/hf/Meta-Llama-3-8B-Instruct/special_tokens_map.json new file mode 100644 index 00000000..aa6b9fcf --- /dev/null +++ b/ultravox/assets/hf/Meta-Llama-3-8B-Instruct/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f38c73729248f6c127296386e3cdde96e254636cc58b4169d3fd32328d9a8ec +size 296 diff --git a/ultravox/assets/hf/Meta-Llama-3-8B-Instruct/tokenizer.json b/ultravox/assets/hf/Meta-Llama-3-8B-Instruct/tokenizer.json new file mode 100644 index 00000000..9a62752e --- /dev/null +++ b/ultravox/assets/hf/Meta-Llama-3-8B-Instruct/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e134af98b985517b4f068e3755ae90d4e9cd2d45d328325dc503f1c6b2d06cc7 +size 9085698 diff --git a/ultravox/assets/hf/Meta-Llama-3-8B-Instruct/tokenizer_config.json b/ultravox/assets/hf/Meta-Llama-3-8B-Instruct/tokenizer_config.json new file mode 100644 index 00000000..a251eecd --- /dev/null +++ b/ultravox/assets/hf/Meta-Llama-3-8B-Instruct/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da0e3a7cce6e4d787e85eb1c24d548420e0d7fe2c7a214e192795c46e40d75bb +size 50977 diff --git a/ultravox/assets/hf/wav2vec2-base-960h/preprocessor_config.json b/ultravox/assets/hf/wav2vec2-base-960h/preprocessor_config.json new file mode 100644 index 00000000..a81343fa --- /dev/null +++ b/ultravox/assets/hf/wav2vec2-base-960h/preprocessor_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:617bd0950f8cc9ac4062e8c73a7be60305ca5790a243df55fa6f44fb671b55b1 +size 257 diff --git a/ultravox/assets/hf/wav2vec2-base-960h/special_tokens_map.json b/ultravox/assets/hf/wav2vec2-base-960h/special_tokens_map.json new file mode 100644 index 00000000..0805c80d --- /dev/null +++ b/ultravox/assets/hf/wav2vec2-base-960h/special_tokens_map.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9046da57c270c8e74d0f38832b4adce269c9d914ef21d2a0925e7772152dd793 +size 96 diff --git a/ultravox/assets/hf/wav2vec2-base-960h/tokenizer_config.json b/ultravox/assets/hf/wav2vec2-base-960h/tokenizer_config.json new file mode 100644 index 00000000..0bf31239 --- /dev/null +++ b/ultravox/assets/hf/wav2vec2-base-960h/tokenizer_config.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7fbc59c63ec955c3d12862fad36d1b919fa6a94e54911297ad649db7822dce1 +size 1147 diff --git a/ultravox/assets/hf/wav2vec2-base-960h/vocab.json b/ultravox/assets/hf/wav2vec2-base-960h/vocab.json new file mode 100644 index 00000000..e25f0805 --- /dev/null +++ b/ultravox/assets/hf/wav2vec2-base-960h/vocab.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4178db26b3c7570f6a47f14ac6a1c7b32950b8c2800fb097287e53776934f1c5 +size 358 diff --git a/ultravox/inference/infer_test.py b/ultravox/inference/infer_test.py index afc5b061..ec165dad 100644 --- a/ultravox/inference/infer_test.py +++ b/ultravox/inference/infer_test.py @@ -1,5 +1,3 @@ -import logging -import os from unittest import mock import numpy as np @@ -12,23 +10,21 @@ from ultravox.inference import infer from ultravox.model import ultravox_processing -os.environ["TOKENIZERS_PARALLELISM"] = "false" - +# We cache these files in our repo to make CI faster and also +# work properly for external contributions (since Llama 3 is gated). @pytest.fixture(scope="module") def tokenizer(): - logging.info("Loading tokenizer") - yield transformers.AutoTokenizer.from_pretrained( - "meta-llama/Meta-Llama-3-8B-Instruct" + return transformers.AutoTokenizer.from_pretrained( + "./assets/hf/Meta-Llama-3-8B-Instruct", local_files_only=True ) - logging.info("Tearing down tokenizer") @pytest.fixture(scope="module") def audio_processor(): - logging.info("Loading audio processor") - yield transformers.AutoProcessor.from_pretrained("facebook/wav2vec2-base-960h") - logging.info("Tearing down audio processor") + return transformers.AutoProcessor.from_pretrained( + "./assets/hf/wav2vec2-base-960h", local_files_only=True + ) class FakeInference(infer.LocalInference): @@ -50,9 +46,6 @@ def __init__( self.model.device = "cpu" self.model.generate = mock.MagicMock(return_value=[range(25)]) - def __del__(self): - logging.info("Tearing down inference") - EXPECTED_TOKEN_IDS_START = [128000, 128006, 882, 128007] EXPECTED_TOKEN_IDS_END = [128009, 128006, 78191, 128007, 271]