-
Notifications
You must be signed in to change notification settings - Fork 20
/
ocr.py
41 lines (32 loc) · 848 Bytes
/
ocr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
from PIL import Image
import pytesseract
import cv2
import os
import time
def preprocess(image,args="thresh"):
# load the example image and convert it to grayscale
# image = img
try:
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
if args == "thresh":
gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
elif args == "blur":
gray = cv2.medianBlur(gray, 3)
except:
gray = image
filename = "{}.jpg".format(os.getpid())
cv2.imwrite(filename, gray)
# return gray
return filename
def ocr(filename):
path = os.getcwd()
im = Image.open(path+"\\"+filename)
text = pytesseract.image_to_string(im)
print(text)
os.remove(filename)
def main():
im = cv2.imread("C:\\Users\hp\Desktop\My Files\Practice\Python\Document Scanner\pic2.jpg")
x = preprocess(im)
ocr(x)
if __name__ == '__main__':
main()