Skip to content

Commit

Permalink
fix tesseract pagenum and output name problem
Browse files Browse the repository at this point in the history
  • Loading branch information
xulihang committed Nov 18, 2018
1 parent f433690 commit 311fe20
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 5 deletions.
2 changes: 1 addition & 1 deletion BasicCAT/BasicCAT.b4j.meta
Original file line number Diff line number Diff line change
Expand Up @@ -161,4 +161,4 @@ ModuleClosedNodes7=
ModuleClosedNodes8=
ModuleClosedNodes9=
SelectedBuild=0
VisibleModules=1,2,3,4,5,6,7,8,9,10
VisibleModules=1,2,3,4,5,6,7,8,9,10,35,27,12,13
2 changes: 1 addition & 1 deletion BasicCAT/PDF2TXT.bas
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ Sub scanWithPagenum(files As List,langsParam As String,affix As String,offset As
End If
Dim args As List
args.Initialize
args.AddAll(Array As String(pdfFilename&"-"&i&".jpg",i,"-l",langsParam))
args.AddAll(Array As String(i&".jpg",i,"-l",langsParam))


Dim sh1 As Shell
Expand Down
6 changes: 3 additions & 3 deletions BasicCAT/pdfbox.bas
Original file line number Diff line number Diff line change
Expand Up @@ -57,20 +57,20 @@ Sub getImage(dir As String,filename As String) As ResumableSub
pageNum=doc.RunMethod("getNumberOfPages",Null)
Dim PDFRenderer As JavaObject
PDFRenderer.InitializeNewInstance("org.apache.pdfbox.rendering.PDFRenderer",Array(doc))
For i=0 To pageNum
For i=0 To pageNum-1
Log(i)
Sleep(0)
Dim bi As JavaObject
Dim dpi As Float
dpi=150
bi=PDFRenderer.RunMethodJO("renderImageWithDPI",Array(i,dpi))
Dim out As OutputStream
out=File.OpenOutput(dir,filename&"-"&i&".jpg",False)
out=File.OpenOutput(dir,i&".jpg",False)
Dim imageIO As JavaObject
imageIO.InitializeStatic("javax.imageio.ImageIO")
imageIO.RunMethod("write",Array(bi,"jpg",out))
out.Close
files.Add(File.Combine(dir,filename&"-"&i&".jpg"))
files.Add(File.Combine(dir,i&".jpg"))
Next
Return files
End Sub
Expand Down

0 comments on commit 311fe20

Please sign in to comment.