From 311fe20e9fa41dbcabf0fde068826cafdb00bdaf Mon Sep 17 00:00:00 2001 From: xulihang Date: Sun, 18 Nov 2018 13:51:45 +0800 Subject: [PATCH] fix tesseract pagenum and output name problem --- BasicCAT/BasicCAT.b4j.meta | 2 +- BasicCAT/PDF2TXT.bas | 2 +- BasicCAT/pdfbox.bas | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/BasicCAT/BasicCAT.b4j.meta b/BasicCAT/BasicCAT.b4j.meta index cf712f7..6f410a1 100644 --- a/BasicCAT/BasicCAT.b4j.meta +++ b/BasicCAT/BasicCAT.b4j.meta @@ -161,4 +161,4 @@ ModuleClosedNodes7= ModuleClosedNodes8= ModuleClosedNodes9= SelectedBuild=0 -VisibleModules=1,2,3,4,5,6,7,8,9,10 +VisibleModules=1,2,3,4,5,6,7,8,9,10,35,27,12,13 diff --git a/BasicCAT/PDF2TXT.bas b/BasicCAT/PDF2TXT.bas index 0d07dbc..dba661a 100644 --- a/BasicCAT/PDF2TXT.bas +++ b/BasicCAT/PDF2TXT.bas @@ -217,7 +217,7 @@ Sub scanWithPagenum(files As List,langsParam As String,affix As String,offset As End If Dim args As List args.Initialize - args.AddAll(Array As String(pdfFilename&"-"&i&".jpg",i,"-l",langsParam)) + args.AddAll(Array As String(i&".jpg",i,"-l",langsParam)) Dim sh1 As Shell diff --git a/BasicCAT/pdfbox.bas b/BasicCAT/pdfbox.bas index 326ea4e..edbbdc7 100644 --- a/BasicCAT/pdfbox.bas +++ b/BasicCAT/pdfbox.bas @@ -57,7 +57,7 @@ Sub getImage(dir As String,filename As String) As ResumableSub pageNum=doc.RunMethod("getNumberOfPages",Null) Dim PDFRenderer As JavaObject PDFRenderer.InitializeNewInstance("org.apache.pdfbox.rendering.PDFRenderer",Array(doc)) - For i=0 To pageNum + For i=0 To pageNum-1 Log(i) Sleep(0) Dim bi As JavaObject @@ -65,12 +65,12 @@ Sub getImage(dir As String,filename As String) As ResumableSub dpi=150 bi=PDFRenderer.RunMethodJO("renderImageWithDPI",Array(i,dpi)) Dim out As OutputStream - out=File.OpenOutput(dir,filename&"-"&i&".jpg",False) + out=File.OpenOutput(dir,i&".jpg",False) Dim imageIO As JavaObject imageIO.InitializeStatic("javax.imageio.ImageIO") imageIO.RunMethod("write",Array(bi,"jpg",out)) out.Close - files.Add(File.Combine(dir,filename&"-"&i&".jpg")) + files.Add(File.Combine(dir,i&".jpg")) Next Return files End Sub