diff --git a/OCR/.NET/Adding-Line-Breaks-Using-OCR/.NET/Adding-Line-Breaks-Using-OCR.sln b/OCR/.NET/Adding-Line-Breaks-Using-OCR/.NET/Adding-Line-Breaks-Using-OCR.sln new file mode 100644 index 00000000..40ed81e3 --- /dev/null +++ b/OCR/.NET/Adding-Line-Breaks-Using-OCR/.NET/Adding-Line-Breaks-Using-OCR.sln @@ -0,0 +1,22 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.12.35707.178 d17.12 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Adding-Line-Breaks-Using-OCR", "Adding-Line-Breaks-Using-OCR\Adding-Line-Breaks-Using-OCR.csproj", "{9B5BD78C-C908-4195-B981-EFB2EF032039}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {9B5BD78C-C908-4195-B981-EFB2EF032039}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {9B5BD78C-C908-4195-B981-EFB2EF032039}.Debug|Any CPU.Build.0 = Debug|Any CPU + {9B5BD78C-C908-4195-B981-EFB2EF032039}.Release|Any CPU.ActiveCfg = Release|Any CPU + {9B5BD78C-C908-4195-B981-EFB2EF032039}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/OCR/.NET/Adding-Line-Breaks-Using-OCR/.NET/Adding-Line-Breaks-Using-OCR/Adding-Line-Breaks-Using-OCR.csproj b/OCR/.NET/Adding-Line-Breaks-Using-OCR/.NET/Adding-Line-Breaks-Using-OCR/Adding-Line-Breaks-Using-OCR.csproj new file mode 100644 index 00000000..1d3e43e7 --- /dev/null +++ b/OCR/.NET/Adding-Line-Breaks-Using-OCR/.NET/Adding-Line-Breaks-Using-OCR/Adding-Line-Breaks-Using-OCR.csproj @@ -0,0 +1,15 @@ + + + + Exe + net8.0 + Adding_Line_Breaks_Using_OCR + enable + enable + + + + + + + diff --git a/OCR/.NET/Adding-Line-Breaks-Using-OCR/.NET/Adding-Line-Breaks-Using-OCR/Data/Input.pdf b/OCR/.NET/Adding-Line-Breaks-Using-OCR/.NET/Adding-Line-Breaks-Using-OCR/Data/Input.pdf new file mode 100644 index 00000000..5c2bf8c5 Binary files /dev/null and b/OCR/.NET/Adding-Line-Breaks-Using-OCR/.NET/Adding-Line-Breaks-Using-OCR/Data/Input.pdf differ diff --git a/OCR/.NET/Adding-Line-Breaks-Using-OCR/.NET/Adding-Line-Breaks-Using-OCR/Output/gitkeep.txt b/OCR/.NET/Adding-Line-Breaks-Using-OCR/.NET/Adding-Line-Breaks-Using-OCR/Output/gitkeep.txt new file mode 100644 index 00000000..e69de29b diff --git a/OCR/.NET/Adding-Line-Breaks-Using-OCR/.NET/Adding-Line-Breaks-Using-OCR/Program.cs b/OCR/.NET/Adding-Line-Breaks-Using-OCR/.NET/Adding-Line-Breaks-Using-OCR/Program.cs new file mode 100644 index 00000000..62391f1a --- /dev/null +++ b/OCR/.NET/Adding-Line-Breaks-Using-OCR/.NET/Adding-Line-Breaks-Using-OCR/Program.cs @@ -0,0 +1,32 @@ +using Syncfusion.OCRProcessor; +using Syncfusion.Pdf.Parsing; + +// Initialize the OCR processor +using (OCRProcessor processor = new OCRProcessor()) +{ + // Load the existing PDF document + using (FileStream stream = new FileStream(Path.GetFullPath(@"Data/Input.pdf"), FileMode.Open)) + { + PdfLoadedDocument pdfLoadedDocument = new PdfLoadedDocument(stream); + + // Set OCR language to process + processor.Settings.Language = Languages.English; + + processor.Settings.PageSegment = PageSegMode.SparseTextOsd; + + // Process OCR by providing the PDF document + processor.PerformOCR(pdfLoadedDocument, processor.TessDataPath, out OCRLayoutResult layoutResult); + string ocrText = string.Join("\n", layoutResult.Pages[0].Lines.Select(line => line.Text)); + + + //Create file stream. + using (FileStream outputFileStream = new FileStream(Path.GetFullPath(@"Output/Output.pdf"), FileMode.Create, FileAccess.ReadWrite)) + { + //Save the PDF document to file stream. + pdfLoadedDocument.Save(outputFileStream); + } + //Close the document. + pdfLoadedDocument.Close(true); + File.WriteAllText(Path.GetFullPath(@"Output/Output.txt"), ocrText); + } +}