Unverified Commit 232bda01 authored by Marius Göcke's avatar Marius Göcke
Browse files

added GetSupportedLanguages;+semver:minor

parent ee67ca8d
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -7,5 +7,6 @@ namespace SimpleOCR.Library.Core
        public string GetOCRContent(byte[] fileContent, ISet<string> languages, string fileType);
        /// <returns>Returns the file converted to fileType=jpg.</returns>
        public byte[] ToPicture(byte[] fileContent, string fileType);
        public ISet<string> GetSupportedLanguages();
    }
}
+20 −4
Original line number Diff line number Diff line
using SimpleOCR.Library.Core;
using System;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Reflection;
using System.Text.RegularExpressions;
using Tesseract;

namespace SimpleOCR.Library.Core
{
    public class OCRService : IOCRService
    {
        private readonly string _DataPath = Path.Combine(System.IO.Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), "tessdata");
        private readonly IList<string> _PictureFileTypesSupportedByTessData = new List<string> { "png", "jpg", "jpeg" };
        public OCRService()
        {
@@ -23,8 +24,7 @@ namespace SimpleOCR.Library.Core
                fileContent = ToPicture(fileContent, fileType);
                fileType = "jpg";
            }
            string dataPath = Path.Combine(System.IO.Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), "tessdata");
            using TesseractEngine engine = new TesseractEngine(dataPath, this.FormatLanguages(languages), EngineMode.Default);
            using TesseractEngine engine = new TesseractEngine(_DataPath, this.FormatLanguages(languages), EngineMode.Default);
            using MemoryStream ms = new MemoryStream(fileContent);
            using Pix img = Pix.LoadFromMemory(ms.ToArray());
            using Page page = engine.Process(img);
@@ -50,5 +50,21 @@ namespace SimpleOCR.Library.Core
        {
            throw new NotImplementedException();
        }

        public ISet<string> GetSupportedLanguages()
        {
            var result = new HashSet<string>();
            string pattern = @"([a-z][a-z][a-z])\.traineddata";
            foreach (var file in Directory.GetFiles(_DataPath))
            {
                string filename = Path.GetFileName(file);
                Match match = Regex.Match(filename, pattern);
                if (match.Success)
                {
                    result.Add(match.Groups[1].Value);
                }
            }
            return result;
        }
    }
}
+14 −2
Original line number Diff line number Diff line
@@ -6,10 +6,10 @@ using System.Collections.Generic;
namespace SimpleOCR.Library.Tests.Testcases
{
    [TestClass]
    public class OCRTests
    public class OCRServiceTests
    {
        [TestMethod]
        public void SimpleOCRTests()
        public void SimpleOCRTest()
        {
            // arrange
            IOCRService ocrService = new OCRService();
@@ -26,5 +26,17 @@ Test2");
            // assert
            Assert.AreEqual(expectedContent, actualContent);
        }
        [TestMethod]
        public void GetSupportedLanguagesTest()
        {
            // arrange
            IOCRService ocrService = new OCRService();

            // act
            ISet<string> supportedLanguages = ocrService.GetSupportedLanguages();

            // assert
            Assert.IsTrue(100 < supportedLanguages.Count);
        }
    }
}
+1 −3
Original line number Diff line number Diff line
using System.Linq;

namespace SimpleOCR.Library.Tests.Utilities
namespace SimpleOCR.Library.Tests.Utilities
{
    internal static class TestUtilities
    {