diff --git a/README.md b/README.md index 8e75385..160ff3e 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,15 @@ -# `SqueakyCleanText` +
-[![PyPI](https://img.shields.io/pypi/v/squeakycleantext.svg)](https://pypi.org/project/squeakycleantext/) [![PyPI - Downloads](https://img.shields.io/pypi/dm/squeakycleantext)](https://pypistats.org/packages/squeakycleantext) +# SqueakyCleanText + +[![PyPI](https://img.shields.io/pypi/v/squeakycleantext.svg)](https://pypi.org/project/squeakycleantext/) +[![PyPI - Downloads](https://img.shields.io/pypi/dm/squeakycleantext)](https://pypistats.org/packages/squeakycleantext) +[![Python package](https://github.com/rhnfzl/SqueakyCleanText/actions/workflows/python-package.yml/badge.svg)](https://github.com/rhnfzl/SqueakyCleanText/actions/workflows/python-package.yml) +[![Python Versions](https://img.shields.io/badge/Python-3.10%20|%203.11%20|%203.12-blue)](https://pypi.org/project/squeakycleantext/) +[![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE) + +A comprehensive text cleaning and preprocessing pipeline for machine learning and NLP tasks. +
In the world of machine learning and natural language processing, clean and well-structured text data is crucial for building effective downstream models and managing token limits in language models. diff --git a/tests/test_sct.py b/tests/test_sct.py index e9d0fce..e6f0628 100644 --- a/tests/test_sct.py +++ b/tests/test_sct.py @@ -65,12 +65,13 @@ def setUpClass(cls): try: with timeout(1200): # 20 minute timeout config.CHECK_NER_PROCESS = False + # Initialize all the processing classes cls.ProcessContacts = contact.ProcessContacts() cls.ProcessDateTime = datetime.ProcessDateTime() cls.ProcessSpecialSymbols = special.ProcessSpecialSymbols() cls.NormaliseText = normtext.NormaliseText() cls.ProcessStopwords = stopwords.ProcessStopwords() - cls.fake = Faker() + cls.fake = Faker() # Initialize Faker # Override default models with smaller model for testing test_models = ["dslim/bert-base-NER"] * 5 # Same small model for all languages @@ -99,8 +100,15 @@ def setUpClass(cls): raise def setUp(self): + """Set up test fixtures before each test method.""" config.CHECK_NER_PROCESS = True - # Use the class-level NER instance instead of creating a new one + # Copy class-level attributes to instance level + self.ProcessContacts = self.__class__.ProcessContacts + self.ProcessDateTime = self.__class__.ProcessDateTime + self.ProcessSpecialSymbols = self.__class__.ProcessSpecialSymbols + self.NormaliseText = self.__class__.NormaliseText + self.ProcessStopwords = self.__class__.ProcessStopwords + self.fake = self.__class__.fake self.ner = self.__class__.ner @settings(deadline=None)