Loading SimpleOCRLibrary/SimpleOCRLibrary/IOCRService.cs +1 −0 Original line number Diff line number Diff line Loading @@ -7,5 +7,6 @@ namespace SimpleOCR.Library.Core public string GetOCRContent(byte[] fileContent, ISet<string> languages, string fileType); /// <returns>Returns the file converted to fileType=jpg.</returns> public byte[] ToPicture(byte[] fileContent, string fileType); public ISet<string> GetSupportedLanguages(); } } SimpleOCRLibrary/SimpleOCRLibrary/OCRService.cs +20 −4 Original line number Diff line number Diff line using SimpleOCR.Library.Core; using System; using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Reflection; using System.Text.RegularExpressions; using Tesseract; namespace SimpleOCR.Library.Core { public class OCRService : IOCRService { private readonly string _DataPath = Path.Combine(System.IO.Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), "tessdata"); private readonly IList<string> _PictureFileTypesSupportedByTessData = new List<string> { "png", "jpg", "jpeg" }; public OCRService() { Loading @@ -23,8 +24,7 @@ namespace SimpleOCR.Library.Core fileContent = ToPicture(fileContent, fileType); fileType = "jpg"; } string dataPath = Path.Combine(System.IO.Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), "tessdata"); using TesseractEngine engine = new TesseractEngine(dataPath, this.FormatLanguages(languages), EngineMode.Default); using TesseractEngine engine = new TesseractEngine(_DataPath, this.FormatLanguages(languages), EngineMode.Default); using MemoryStream ms = new MemoryStream(fileContent); using Pix img = Pix.LoadFromMemory(ms.ToArray()); using Page page = engine.Process(img); Loading @@ -50,5 +50,21 @@ namespace SimpleOCR.Library.Core { throw new NotImplementedException(); } public ISet<string> GetSupportedLanguages() { var result = new HashSet<string>(); string pattern = @"([a-z][a-z][a-z])\.traineddata"; foreach (var file in Directory.GetFiles(_DataPath)) { string filename = Path.GetFileName(file); Match match = Regex.Match(filename, pattern); if (match.Success) { result.Add(match.Groups[1].Value); } } return result; } } } SimpleOCRLibrary/SimpleOCRLibraryTests/Testcases/OCRTests.cs→SimpleOCRLibrary/SimpleOCRLibraryTests/Testcases/OCRServiceTests.cs +14 −2 Original line number Diff line number Diff line Loading @@ -6,10 +6,10 @@ using System.Collections.Generic; namespace SimpleOCR.Library.Tests.Testcases { [TestClass] public class OCRTests public class OCRServiceTests { [TestMethod] public void SimpleOCRTests() public void SimpleOCRTest() { // arrange IOCRService ocrService = new OCRService(); Loading @@ -26,5 +26,17 @@ Test2"); // assert Assert.AreEqual(expectedContent, actualContent); } [TestMethod] public void GetSupportedLanguagesTest() { // arrange IOCRService ocrService = new OCRService(); // act ISet<string> supportedLanguages = ocrService.GetSupportedLanguages(); // assert Assert.IsTrue(100 < supportedLanguages.Count); } } } SimpleOCRLibrary/SimpleOCRLibraryTests/Utilities/TestUtilities.cs +1 −3 Original line number Diff line number Diff line using System.Linq; namespace SimpleOCR.Library.Tests.Utilities namespace SimpleOCR.Library.Tests.Utilities { internal static class TestUtilities { Loading Loading
SimpleOCRLibrary/SimpleOCRLibrary/IOCRService.cs +1 −0 Original line number Diff line number Diff line Loading @@ -7,5 +7,6 @@ namespace SimpleOCR.Library.Core public string GetOCRContent(byte[] fileContent, ISet<string> languages, string fileType); /// <returns>Returns the file converted to fileType=jpg.</returns> public byte[] ToPicture(byte[] fileContent, string fileType); public ISet<string> GetSupportedLanguages(); } }
SimpleOCRLibrary/SimpleOCRLibrary/OCRService.cs +20 −4 Original line number Diff line number Diff line using SimpleOCR.Library.Core; using System; using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Reflection; using System.Text.RegularExpressions; using Tesseract; namespace SimpleOCR.Library.Core { public class OCRService : IOCRService { private readonly string _DataPath = Path.Combine(System.IO.Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), "tessdata"); private readonly IList<string> _PictureFileTypesSupportedByTessData = new List<string> { "png", "jpg", "jpeg" }; public OCRService() { Loading @@ -23,8 +24,7 @@ namespace SimpleOCR.Library.Core fileContent = ToPicture(fileContent, fileType); fileType = "jpg"; } string dataPath = Path.Combine(System.IO.Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), "tessdata"); using TesseractEngine engine = new TesseractEngine(dataPath, this.FormatLanguages(languages), EngineMode.Default); using TesseractEngine engine = new TesseractEngine(_DataPath, this.FormatLanguages(languages), EngineMode.Default); using MemoryStream ms = new MemoryStream(fileContent); using Pix img = Pix.LoadFromMemory(ms.ToArray()); using Page page = engine.Process(img); Loading @@ -50,5 +50,21 @@ namespace SimpleOCR.Library.Core { throw new NotImplementedException(); } public ISet<string> GetSupportedLanguages() { var result = new HashSet<string>(); string pattern = @"([a-z][a-z][a-z])\.traineddata"; foreach (var file in Directory.GetFiles(_DataPath)) { string filename = Path.GetFileName(file); Match match = Regex.Match(filename, pattern); if (match.Success) { result.Add(match.Groups[1].Value); } } return result; } } }
SimpleOCRLibrary/SimpleOCRLibraryTests/Testcases/OCRTests.cs→SimpleOCRLibrary/SimpleOCRLibraryTests/Testcases/OCRServiceTests.cs +14 −2 Original line number Diff line number Diff line Loading @@ -6,10 +6,10 @@ using System.Collections.Generic; namespace SimpleOCR.Library.Tests.Testcases { [TestClass] public class OCRTests public class OCRServiceTests { [TestMethod] public void SimpleOCRTests() public void SimpleOCRTest() { // arrange IOCRService ocrService = new OCRService(); Loading @@ -26,5 +26,17 @@ Test2"); // assert Assert.AreEqual(expectedContent, actualContent); } [TestMethod] public void GetSupportedLanguagesTest() { // arrange IOCRService ocrService = new OCRService(); // act ISet<string> supportedLanguages = ocrService.GetSupportedLanguages(); // assert Assert.IsTrue(100 < supportedLanguages.Count); } } }
SimpleOCRLibrary/SimpleOCRLibraryTests/Utilities/TestUtilities.cs +1 −3 Original line number Diff line number Diff line using System.Linq; namespace SimpleOCR.Library.Tests.Utilities namespace SimpleOCR.Library.Tests.Utilities { internal static class TestUtilities { Loading