Dim g_DirToHoldFiles
Function DPM_OnCreate(DCRoot)
g_DirToHoldFiles = "E:\temp\referenz"
End Function
Function DPM_OnProcessDocument(DCDocument)
SaveTifAndAddx(DCDocument)
End Function
Function SaveTifAndAddx(DCDocument)
'FileSystem-Objekt öffnen
set fso = CreateObject("Scripting.FileSystemObject")
'Verzeichnis
If not fso.FolderExists(g_DirToHoldFiles) Then
f = fso.CreateFolder(g_DirToHoldFiles)
End If
'Dateinamen
randname = g_dcRoot.GetRandomString(0)
tifFile = g_DirToHoldFiles & "\dvbes_referenz_" & randname & ".tif"
addxFile = g_DirToHoldFiles & "\dvbes_referenz_" & randname & ".addx"
ocrFile = g_DirToHoldFiles & "\dvbes_referenz_" & randname & ".txt"
'TIF exportieren
DCDocument.ExportImageFileW tifFile , 0
dcLog InfoLog, "reference created: " & tifFile
'ADDX erstellen
addxDoc = "<ocr-document-result>" & Chr(13) & Chr(10)
ocrText = ""
for pageNum = 0 to DCDocument.GetPageCount() - 1
set page = DCDocument.GetPage(pageNum)
ocrPage = page.Prop.GetStrW("PlainText")
ocrText = ocrText & ocrPage & VBNewLine
addxPage = page.Prop.GetStr("PlainTextClassify_ORIG")
if addxPage="" then
addxPage = page.Prop.GetStr("PlainTextClassify")
end if
addxDoc = addxDoc & Left( addxPage, 12 ) & Chr(13) & Chr(10)
addxDoc = addxDoc & "<page name=" & Chr(34)
PageNumStr = "0000000" & pageNum
PageNumStr = Right( PageNumStr, 7 )
addxDoc = addxDoc & PageNumStr & Chr(34) & " />"
if Len(addxPage)>12 then
addxDoc = addxDoc & Right(addxPage, Len(addxPage) - 12 ) & Chr(13) & Chr(10)
end if
next
addxDoc = addxDoc & "</ocr-document-result>"
'ADDX schreiben
set File = fso.OpenTextFile(addxFile, 2, true)
File.Write(addxDoc)
File.Close
'OCR schreiben
set File = fso.OpenTextFile(ocrFile , 2, true, -1)
File.Write(ocrText)
File.Close
'FileSystem-Objekt schließen
Set fso = Nothing
End Function |
Dim g_DirToHoldFiles
Function DPM_OnCreate(DCRoot)
g_DirToHoldFiles = "E:\temp\referenz"
End Function
Function DPM_OnProcessDocument(DCDocument)
SaveTifAndAddx(DCDocument)
End Function
Function SaveTifAndAddx(DCDocument)
'FileSystem-Objekt öffnen
set fso = CreateObject("Scripting.FileSystemObject")
'Verzeichnis
If not fso.FolderExists(g_DirToHoldFiles) Then
f = fso.CreateFolder(g_DirToHoldFiles)
End If
'Dateinamen
randname = g_dcRoot.GetRandomString(0)
tifFile = g_DirToHoldFiles & "\dvbes_referenz_" & randname & ".tif"
addxFile = g_DirToHoldFiles & "\dvbes_referenz_" & randname & ".addx"
ocrFile = g_DirToHoldFiles & "\dvbes_referenz_" & randname & ".txt"
'TIF exportieren
DCDocument.ExportImageFileW tifFile , 0
dcLog InfoLog, "reference created: " & tifFile
'ADDX erstellen
addxDoc = "<ocr-document-result>" & Chr(13) & Chr(10)
ocrText = ""
for pageNum = 0 to DCDocument.GetPageCount() - 1
set page = DCDocument.GetPage(pageNum)
ocrPage = page.Prop.GetStrW("PlainText")
ocrText = ocrText & ocrPage & VBNewLine
addxPage = page.Prop.GetStr("PlainTextClassify_ORIG")
if addxPage="" then
addxPage = page.Prop.GetStr("PlainTextClassify")
end if
addxDoc = addxDoc & Left( addxPage, 12 ) & Chr(13) & Chr(10)
addxDoc = addxDoc & "<page name=" & Chr(34)
PageNumStr = "0000000" & pageNum
PageNumStr = Right( PageNumStr, 7 )
addxDoc = addxDoc & PageNumStr & Chr(34) & " />"
if Len(addxPage)>12 then
addxDoc = addxDoc & Right(addxPage, Len(addxPage) - 12 ) & Chr(13) & Chr(10)
end if
next
addxDoc = addxDoc & "</ocr-document-result>"
'ADDX schreiben
set File = fso.OpenTextFile(addxFile, 2, true)
File.Write(addxDoc)
File.Close
'OCR schreiben
set File = fso.OpenTextFile(ocrFile , 2, true, -1)
File.Write(ocrText)
File.Close
'FileSystem-Objekt schließen
Set fso = Nothing
End Function