-
Notifications
You must be signed in to change notification settings - Fork 8
/
Text Layout Zone Registration.vb
108 lines (102 loc) · 5.16 KB
/
Text Layout Zone Registration.vb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
'#Language "WWB-COM"
Option Explicit
' https://github.com/KofaxTransformation/KTScripts/blob/master/Text%20Layout%20Classification%20and%20Registration.md
'This script uses all unique words on a page to register OCR and OMR zones to subpixel accuracy.
'It calculates the horizontal and vertical shifts required for each zone as well as the page scaling.
'Horizontal and Vertical calculations are worked out indepedently.
'The scaling can be any size. It will even work if a page is shrunk 50% (eg A4 printed as A5), or increased 5 times.
'!!! IMPORTANT !!!!
' Add on Menu/Edit/References...
' "Microsoft Scripting Runtime" for Dictionary to find and match unique words
' "Kofax Cascade Advanced Zone Locator" for retrieving the Zone Definitions
'Create One Locator
' SL_CalculatePageShift (with subfields Scale, Shift, Confidence, Words, DPI.)
' AZL (on the Registration Tab set Registration to "None")
Private Sub Document_BeforeLocate(ByVal pXDoc As CASCADELib.CscXDocument, ByVal LocatorName As String)
If LocatorName = "AZL" Then
'Move the Zones in the Advanced Zone Locator based on the Shifts and Scale
Dim Shifts As CscXDocFieldAlternatives
Dim Zones As CscXDocSubFields, AZL As CscAdvZoneLocator
Set Shifts=pXDoc.Locators.ItemByName("SL_CalculatePageShift").Alternatives
Set AZL = Project.ClassByName(pXDoc.ExtractionClass).Locators.ItemByName(LocatorName).LocatorMethod
Zones_Shift(AZL.Zones,Shifts,pXDoc.Representations(0))
End If
End Sub
Private Sub SL_CalculatePageShift_LocateAlternatives(ByVal pXDoc As CASCADELib.CscXDocument, ByVal pLocator As CASCADELib.CscXDocField)
'Your document MUST be classified before calling this locator, in order to be able to find the sample image in the AZL.
'This function is purely here for debugging. it is so that you can see the unique words that are used for matching
Dim P As Long, Page as CSCXDocPage, StartWordIndexRef As Long, StartWordIndex As Long, EndWordIndexRef As Long, EndWordIndex As Long
Dim AZLSampleDoc As CscXDocument, LeftShift As Double, DownShift As Double, Tolerance As Double, Confidence As Double
Dim AZLSampleDocFileName As String
AZLSampleDocFileName =Left(Project.FileName,InStrRev(Project.FileName,"\")) & "Samples\" & Class_GetClassPath(pXDoc.ExtractionClass) & "\Sample0.xdc"
Set AZLSampleDoc = New CscXDocument
AZLSampleDoc.Load(AZLSampleDocFileName)
'Find which pages have zones on them
Dim AZLDef As CscAdvZoneLocator, ZonesExist() As Boolean, Z As Long
Set AZLDef=Project.ClassByName(pXDoc.ExtractionClass).Locators.ItemByName(LocatorName).LocatorMethod
ReDim ZonesExist((pXDoc.Pages.Count-1))
For Z=0 To AZLDef.Zones.Count-1
ZonesExist(AZLDef.Zones(Z).PageNr)=True
Next
For P=0 To pXDoc.Pages.Count - 1
if P < AZLSampleDoc.Pages.Count AndAlso ZonesExist(P) then
Set Page=pXDoc.Pages(P)
Pages_Compare(AZLSampleDoc.Pages(P),Page,pLocator.Alternatives,Page.XRes,Page.YRes)
else
pLocator.Alternatives.Add.Confidence=1.0-(pLocator.Alternatives.Count-1)*0.000001
pLocator.Alternatives.Add.Confidence=1.0-(pLocator.Alternatives.Count-1)*0.000001
end if
Next
End Sub
Public Sub Zones_Shift(AZLZones As CscAdvZoneLocZones, Shifts As CscXDocFieldAlternatives, Rep As CscXDocRepresentation)
Dim Z As Long, XDocZone As CscXDocZone
While Rep.Zones.Count>0
Rep.Zones.Remove(0)
Wend
For Z=0 To AZLZones.Count-1
Set XDocZone=Zone_Shift(AZLZones(Z),Shifts,Rep)
Rep.Zones.Append(XDocZone)
Next
End Sub
Public Function Zone_Shift(AZLZone As CscAdvZoneLocZone, Shifts As CscXDocFieldAlternatives, Rep As CscXDocRepresentation) As CscXDocZone
Dim XDocZone As CscXDocZone, X As Double, Y As Double, Right As Long, Bottom As Long
Set XDocZone=New CscXDocZone
XDocZone.PageNr=AZLZone.PageNr
XDocZone.Name=AZLZone.Name
'Shift the top right corner
X=AZLZone.Left+AZLZone.Width
Y=AZLZone.Top
Coordinate_Shift(X,Y,Shifts,AZLZone.PageNr)
Right=X
'Shift the bottom left corner
X=AZLZone.Left
Y=AZLZone.Top+AZLZone.Height
Coordinate_Shift(X,Y,Shifts,AZLZone.PageNr)
Bottom=Y
'Shift the Top Left corner
X=AZLZone.Left
Y=AZLZone.Top
Coordinate_Shift(X,Y,Shifts,AZLZone.PageNr)
XDocZone.Left=X
XDocZone.Top=Y
XDocZone.Width=Right-XDocZone.Left
XDocZone.Height=Bottom-XDocZone.Top
Return XDocZone
End Function
Public Sub Coordinate_Shift(ByRef X As Double, ByRef Y As Double, Shifts As CscXDocFieldAlternatives, page As Integer)
Dim XRes As Long, YRes As Long, xm As Double, xb As Double, ym As Double, yb As Double
With Shifts(page*2)
xm=.SubFields.ItemByName("Scale").Confidence
xb=.SubFields.ItemByName("Shift").Confidence
XRes=.SubFields.ItemByName("DPI").Confidence
End With
With Shifts(page*2+1)
ym=.SubFields.ItemByName("Scale").Confidence
yb=.SubFields.ItemByName("Shift").Confidence
YRes=.SubFields.ItemByName("DPI").Confidence
End With
X=X/25.4*XRes
Y=Y/25.4*YRes
X=xm*X+xb 'The Linear regression function gave us these slopes m and intercepts b.
Y=ym*Y+yb
End Sub