From 19509ed688355728d0a17e4615aa9d90fd759508 Mon Sep 17 00:00:00 2001
From: PhorstenkampFuzzy <106159868+PhorstenkampFuzzy@users.noreply.github.com>
Date: Tue, 17 Dec 2024 10:28:27 +0100
Subject: [PATCH] Fix for issue #3815

Removed redundent patch as it is already merged in https://github.com/pdfminer/pdfminer.six/pull/885
---
 unstructured/partition/pdf.py    |  6 ------
 unstructured/patches/__init__.py |  0
 unstructured/patches/pdfminer.py | 24 ------------------------
 3 files changed, 30 deletions(-)
 delete mode 100644 unstructured/patches/__init__.py
 delete mode 100644 unstructured/patches/pdfminer.py

diff --git a/unstructured/partition/pdf.py b/unstructured/partition/pdf.py
index f87812d40b..a648af40aa 100644
--- a/unstructured/partition/pdf.py
+++ b/unstructured/partition/pdf.py
@@ -11,7 +11,6 @@
 
 import numpy as np
 import wrapt
-from pdfminer import psparser
 from pdfminer.layout import LTContainer, LTImage, LTItem, LTTextBox
 from pdfminer.utils import open_filename
 from pi_heif import register_heif_opener
@@ -96,16 +95,11 @@
     PartitionStrategy,
 )
 from unstructured.partition.utils.sorting import coord_has_valid_points, sort_page_elements
-from unstructured.patches.pdfminer import parse_keyword
 from unstructured.utils import first, requires_dependencies
 
 if TYPE_CHECKING:
     pass
 
-# NOTE(alan): Patching this to fix a bug in pdfminer.six. Submitted this PR into pdfminer.six to fix
-# the bug: https://github.com/pdfminer/pdfminer.six/pull/885
-psparser.PSBaseParser._parse_keyword = parse_keyword  # type: ignore
-
 RE_MULTISPACE_INCLUDING_NEWLINES = re.compile(pattern=r"\s+", flags=re.DOTALL)
 
 
diff --git a/unstructured/patches/__init__.py b/unstructured/patches/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/unstructured/patches/pdfminer.py b/unstructured/patches/pdfminer.py
deleted file mode 100644
index 20b938d1ce..0000000000
--- a/unstructured/patches/pdfminer.py
+++ /dev/null
@@ -1,24 +0,0 @@
-from typing import Union
-
-from pdfminer.psparser import END_KEYWORD, KWD, PSBaseParser, PSKeyword
-
-
-def parse_keyword(self: PSBaseParser, s: bytes, i: int) -> int:
-    """Patch for pdfminer method _parse_keyword of PSBaseParser. Changes are identical to the PR
-    https://github.com/pdfminer/pdfminer.six/pull/885."""
-    m = END_KEYWORD.search(s, i)
-    if not m:
-        j = len(s)
-        self._curtoken += s[i:]
-    else:
-        j = m.start(0)
-        self._curtoken += s[i:j]
-    if self._curtoken == b"true":
-        token: Union[bool, PSKeyword] = True
-    elif self._curtoken == b"false":
-        token = False
-    else:
-        token = KWD(self._curtoken)
-    self._add_token(token)
-    self._parse1 = self._parse_main
-    return j