Loading SimpleOCRLibrary/SimpleOCRLibrary/OCRService.cs +8 −8 Original line number Diff line number Diff line using SimpleOCR.Library.Core; using System; using System.IO; using System.Reflection; using Tesseract; namespace SimpleOCRLibrary.Core Loading @@ -9,12 +9,12 @@ namespace SimpleOCRLibrary.Core { public string GetOCRContent(byte[] fileContent) { string dataPath = @"E:\Data\Projects\Common\SimpleOCR\SimpleOCRLibrary\Other\Resources\OCRData"; using var engine = new TesseractEngine(dataPath, "deu", EngineMode.Default); using var ms = new MemoryStream(fileContent); using var img = Pix.LoadFromMemory(ms.ToArray()); using var page = engine.Process(img); var plainResult= page.GetText(); string dataPath = Path.Combine(System.IO.Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), "OCRData", "tessdata"); using TesseractEngine engine = new TesseractEngine(dataPath, "deu", EngineMode.Default); using MemoryStream ms = new MemoryStream(fileContent); using Pix img = Pix.LoadFromMemory(ms.ToArray()); using Page page = engine.Process(img); string plainResult = page.GetText(); string result = Utilities.NormalizeString(plainResult); return result; } Loading SimpleOCRLibrary/SimpleOCRLibrary/SimpleOCRLibrary.csproj +5 −0 Original line number Diff line number Diff line Loading @@ -85,4 +85,9 @@ <PackageReference Include="TagLibSharp" Version="2.3.0" /> <PackageReference Include="Tesseract" Version="5.2.0" /> </ItemGroup> <ItemGroup> <None Update="..\Other\Resources\OCRData\**"> <CopyToOutputDirectory>Always</CopyToOutputDirectory> </None> </ItemGroup> </Project> SimpleOCRLibrary/SimpleOCRLibrary/Utilities.cs +1 −7 Original line number Diff line number Diff line using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; namespace SimpleOCR.Library.Core namespace SimpleOCR.Library.Core { internal static class Utilities { Loading SimpleOCRLibrary/SimpleOCRLibraryTests/SimpleOCRLibraryTests.csproj +4 −3 Original line number Diff line number Diff line Loading @@ -65,6 +65,7 @@ <PrivateAssets>all</PrivateAssets> <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets> </PackageReference> <PackageReference Include="GRYLibrary" Version="0.12.148" /> <PackageReference Include="HtmlAgilityPack" Version="1.12.1" /> <PackageReference Include="Microsoft.CodeCoverage" Version="17.14.1" /> <PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" Version="9.0.5" /> Loading @@ -85,8 +86,8 @@ <PackageReference Include="System.Text.RegularExpressions" Version="4.3.1" /> </ItemGroup> <ItemGroup> <None Update="TestData\**"> <CopyToOutputDirectory>Always</CopyToOutputDirectory> </None> <EmbeddedResource Include="TestData\**"> <CopyToOutputDirectory>Never</CopyToOutputDirectory> </EmbeddedResource> </ItemGroup> </Project> No newline at end of file SimpleOCRLibrary/SimpleOCRLibraryTests/Testcases/OCRTests.cs +3 −4 Original line number Diff line number Diff line using Microsoft.VisualStudio.TestTools.UnitTesting; using SimpleOCR.Library.Tests.Utilities; using SimpleOCRLibrary.Core; using System.IO; namespace SimpleOCR.Library.Tests.Testcases { Loading @@ -12,14 +12,13 @@ namespace SimpleOCR.Library.Tests.Testcases { // arrange IOCRService ocrService = new OCRService(); string testFile = @"E:\Data\Projects\Common\SimpleOCR\SimpleOCRLibrary\SimpleOCRLibraryTests\TestData\Test.png";//TODO byte[] testFileContent = File.ReadAllBytes(testFile); byte[] testFileContent = TestUtilities.LoadResourceFileContent("Test.png"); string expectedContent = SimpleOCR.Library.Core.Utilities.NormalizeString(@"Test1 Test2"); // act var actualContent = ocrService.GetOCRContent(testFileContent); string actualContent = ocrService.GetOCRContent(testFileContent); // assert Assert.AreEqual(expectedContent, actualContent); Loading Loading
SimpleOCRLibrary/SimpleOCRLibrary/OCRService.cs +8 −8 Original line number Diff line number Diff line using SimpleOCR.Library.Core; using System; using System.IO; using System.Reflection; using Tesseract; namespace SimpleOCRLibrary.Core Loading @@ -9,12 +9,12 @@ namespace SimpleOCRLibrary.Core { public string GetOCRContent(byte[] fileContent) { string dataPath = @"E:\Data\Projects\Common\SimpleOCR\SimpleOCRLibrary\Other\Resources\OCRData"; using var engine = new TesseractEngine(dataPath, "deu", EngineMode.Default); using var ms = new MemoryStream(fileContent); using var img = Pix.LoadFromMemory(ms.ToArray()); using var page = engine.Process(img); var plainResult= page.GetText(); string dataPath = Path.Combine(System.IO.Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), "OCRData", "tessdata"); using TesseractEngine engine = new TesseractEngine(dataPath, "deu", EngineMode.Default); using MemoryStream ms = new MemoryStream(fileContent); using Pix img = Pix.LoadFromMemory(ms.ToArray()); using Page page = engine.Process(img); string plainResult = page.GetText(); string result = Utilities.NormalizeString(plainResult); return result; } Loading
SimpleOCRLibrary/SimpleOCRLibrary/SimpleOCRLibrary.csproj +5 −0 Original line number Diff line number Diff line Loading @@ -85,4 +85,9 @@ <PackageReference Include="TagLibSharp" Version="2.3.0" /> <PackageReference Include="Tesseract" Version="5.2.0" /> </ItemGroup> <ItemGroup> <None Update="..\Other\Resources\OCRData\**"> <CopyToOutputDirectory>Always</CopyToOutputDirectory> </None> </ItemGroup> </Project>
SimpleOCRLibrary/SimpleOCRLibrary/Utilities.cs +1 −7 Original line number Diff line number Diff line using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; namespace SimpleOCR.Library.Core namespace SimpleOCR.Library.Core { internal static class Utilities { Loading
SimpleOCRLibrary/SimpleOCRLibraryTests/SimpleOCRLibraryTests.csproj +4 −3 Original line number Diff line number Diff line Loading @@ -65,6 +65,7 @@ <PrivateAssets>all</PrivateAssets> <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets> </PackageReference> <PackageReference Include="GRYLibrary" Version="0.12.148" /> <PackageReference Include="HtmlAgilityPack" Version="1.12.1" /> <PackageReference Include="Microsoft.CodeCoverage" Version="17.14.1" /> <PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" Version="9.0.5" /> Loading @@ -85,8 +86,8 @@ <PackageReference Include="System.Text.RegularExpressions" Version="4.3.1" /> </ItemGroup> <ItemGroup> <None Update="TestData\**"> <CopyToOutputDirectory>Always</CopyToOutputDirectory> </None> <EmbeddedResource Include="TestData\**"> <CopyToOutputDirectory>Never</CopyToOutputDirectory> </EmbeddedResource> </ItemGroup> </Project> No newline at end of file
SimpleOCRLibrary/SimpleOCRLibraryTests/Testcases/OCRTests.cs +3 −4 Original line number Diff line number Diff line using Microsoft.VisualStudio.TestTools.UnitTesting; using SimpleOCR.Library.Tests.Utilities; using SimpleOCRLibrary.Core; using System.IO; namespace SimpleOCR.Library.Tests.Testcases { Loading @@ -12,14 +12,13 @@ namespace SimpleOCR.Library.Tests.Testcases { // arrange IOCRService ocrService = new OCRService(); string testFile = @"E:\Data\Projects\Common\SimpleOCR\SimpleOCRLibrary\SimpleOCRLibraryTests\TestData\Test.png";//TODO byte[] testFileContent = File.ReadAllBytes(testFile); byte[] testFileContent = TestUtilities.LoadResourceFileContent("Test.png"); string expectedContent = SimpleOCR.Library.Core.Utilities.NormalizeString(@"Test1 Test2"); // act var actualContent = ocrService.GetOCRContent(testFileContent); string actualContent = ocrService.GetOCRContent(testFileContent); // assert Assert.AreEqual(expectedContent, actualContent); Loading