Unverified Commit 31f6b546 authored by Marius Göcke's avatar Marius Göcke
Browse files

fix

parent a1d1a80f
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -10,7 +10,7 @@
		"tasks": [
			{
				"label": "Base: Build all codeunits",
				"command": "scbuildcodeunits",
				"command": "scbuildcodeunits {{.CLI_ARGS}}",
				"type": "shell",
				"options": {
					"cwd": "${workspaceFolder}"
+0 −2
Original line number Diff line number Diff line
import os
from ScriptCollection.GeneralUtilities import GeneralUtilities
from ScriptCollection.TFCPS.DotNet.TFCPS_CodeUnitSpecific_DotNet import TFCPS_CodeUnitSpecific_DotNet_Functions,TFCPS_CodeUnitSpecific_DotNet_CLI
from ScriptCollection.TFCPS.DotNet.CertificateGeneratorInformationNoGenerate import CertificateGeneratorInformationNoGenerate
 
+12 −50
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@ using SimpleOCR.Library.Core.VIsitors;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
using Tesseract;

@@ -22,7 +23,7 @@ namespace SimpleOCR.Library.Core
        /// <remarks><see cref="OCRService"/> takes care of the download itself. The only requirement is that git is available as command.</remarks>
        public OCRService(string dataFolder, IGRYLog log)
        {
            this._DataFolder = dataFolder;
            this._DataFolder = dataFolder.Replace('\\', '/');
            this._Log = log;
        }
        public string GetOCRContent(byte[] fileContent, ISet<string> languages)
@@ -44,7 +45,7 @@ namespace SimpleOCR.Library.Core
                    throw new BadRequestException($"Language '{language}' is not supported.");
                }
            }
            string dataPath = this.GetTessDataPath();
            string dataPath = _DataFolder;
            using TesseractEngine engine = new TesseractEngine(dataPath, languagesConcatenated, EngineMode.Default);
            using MemoryStream ms = new MemoryStream(fileContent);
            using Pix img = Pix.LoadFromMemory(ms.ToArray());
@@ -63,7 +64,8 @@ namespace SimpleOCR.Library.Core
        {
            var result = new HashSet<string>();
            string pattern = @"([a-z][a-z][a-z])\.traineddata";
            var files = Directory.GetFiles(this.GetTessDataPath());
            string tessdataFolder = _DataFolder;
            var files = Directory.GetFiles(tessdataFolder);
            foreach (var file in files)
            {
                string filename = Path.GetFileName(file);
@@ -80,49 +82,13 @@ namespace SimpleOCR.Library.Core
        {
            try
            {
                GRYLibrary.Core.Misc.Utilities.AssertCondition(!string.IsNullOrEmpty(_DataFolder),"No OCR-data-folder set.");
                this._Log.Log($"OCRFolder: {this._DataFolder}");
                string tessdataFolder = _DataFolder;
                GRYLibrary.Core.Misc.Utilities.AssertCondition(!string.IsNullOrEmpty(tessdataFolder), "No OCR-data-folder set.");
                this._Log.Log($"OCRFolder: {tessdataFolder}");
                if (!this.IsInitialized)
                {
                    this._Log.Log("Initialize OCR-data");
                    GRYLibrary.Core.Misc.Utilities.EnsureDirectoryExists(this._DataFolder);
                    string tessdataFolder = this.GetTessDataPath();

                    if (!Directory.Exists(tessdataFolder))
                    {
                        string repoOwner = "tesseract-ocr";
                        string repoName = "tessdata_best";
                        this._Log.Log("Download OCR-data...");
                        GRYLibrary.Core.Misc.Utilities.EnsureDirectoryExists(tessdataFolder);
                        using (var e = new ExternalProgramExecutor(new ExternalProgramExecutorConfiguration()
                        {
                            Program = "git",
                            Argument = $"clone --recurse-submodules https://github.com/{repoOwner}/{repoName} {tessdataFolder}",

                        }))
                        {
                            e.LogObject = this._Log;
                            e.Run();
                        }

                        GRYLibrary.Core.Misc.Utilities.ForEachFileAndDirectoryTransitively(tessdataFolder, (string path, object _) =>
                        {
                            path = path.Replace("\\", "/");
                            if (path.EndsWith("/.git"))
                            {
                                GRYLibrary.Core.Misc.Utilities.EnsureDirectoryDoesNotExist(path);
                            }
                        }, (string path, object _) =>
                        {
                            path = path.Replace("\\", "/");
                            if (path.EndsWith("/.git"))
                            {
                                GRYLibrary.Core.Misc.Utilities.EnsureFileDoesNotExist(path);
                            }
                        }, false);
                        GRYLibrary.Core.Misc.Utilities.EnsureDirectoryDoesNotExist(Path.Combine(tessdataFolder, ".git"));
                    }
                    this._Log.Log("Finished initialization of OCR-data...");
                    var traineddataFiles = Directory.GetFiles(_DataFolder).Where(file => file.EndsWith(".traineddata"));
                    GRYLibrary.Core.Misc.Utilities.AssertCondition(1 < traineddataFiles.Count(), "Expected multiple *.traineddata-files.");
                    this.IsInitialized = true;
                }
            }
@@ -132,14 +98,10 @@ namespace SimpleOCR.Library.Core
            }
        }

        private string GetTessDataPath()
        {
            return Path.Combine(this._DataFolder, "tessdata_best").Replace('\\', '/');
        }

        public void ReInitialize()
        {
            GRYLibrary.Core.Misc.Utilities.EnsureDirectoryDoesNotExist(this.GetTessDataPath());
            GRYLibrary.Core.Misc.Utilities.EnsureDirectoryDoesNotExist(_DataFolder);
            this.IsInitialized = false;
            this.Initialize();
        }
+1 −1
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@ tasks:
    silent: true
    dir: "."
    cmds:
      - "scbuildcodeunits"
      - "scbuildcodeunits {{.CLI_ARGS}}"
    aliases:
      - basebuildallcodeunits
      - bb