Unverified Commit 52461409 authored by Marius Göcke's avatar Marius Göcke
Browse files

wip

parent dede09fa
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -12,7 +12,7 @@ def build():
        "image_debian":tf.tfcps_Tools_General.oci_image_manager.get_registry_address_for_image_with_default_tag(tf.get_repository_folder(),"Debian"),
    })
    tf.tfcps_Tools_General.merge_sbom_file_from_dependent_codeunit_into_this(tf.get_codeunit_folder(),tf.get_codeunit_name(),"SimpleOCRService",tf.use_cache())

    # TODO add libreoffice etc. to the sbom.

if __name__ == "__main__":
    build()
+2 −1
Original line number Diff line number Diff line
@@ -16,7 +16,8 @@ RUN mkdir /Workspace && \
    mkdir /Workspace/Other/Certificates && \
    mkdir /Workspace/Other/EntryPoint && \
    apt-get update && \
    apt-get install -y curl nginx git libreoffice-core libreoffice-writer libreoffice-calc wget nano libgomp1 tesseract-ocr  libpng-dev libjpeg-dev libtiff-dev libwebp-dev ghostscript 
    apt-get install -y curl nginx git libreoffice libreoffice-writer libreoffice-calc libreoffice-impress fonts-dejavu fonts-liberation nano libgomp1 tesseract-ocr  libpng-dev libjpeg-dev libtiff-dev libwebp-dev ghostscript 
# TODO install a certain version of libreoffice etc. which is defined in the dependencies-folder and which can be updated by a script.

WORKDIR /Workspace/Other/EntryPoint

+1 −0
Original line number Diff line number Diff line
@@ -4,6 +4,7 @@ from ScriptCollection.TFCPS.DotNet.TFCPS_CodeUnitSpecific_DotNet import TFCPS_Co
def update_dependencies():
    tf:TFCPS_CodeUnitSpecific_DotNet_Functions=TFCPS_CodeUnitSpecific_DotNet_CLI.parse(__file__)
    tf.update_dependencies()
    #TODO call default-sc-function to update dependencies in requirements.txt


if __name__ == "__main__":
+2 −0
Original line number Diff line number Diff line
@@ -25,6 +25,8 @@ namespace SimpleOCR.Library.Core.Misc
                ["application/vnd.openxmlformats-officedocument.spreadsheetml.template"] = "xltx",
                ["application/vnd.ms-excel.template.macroEnabled.12"] = "xltm",
                ["application/vnd.ms-excel.sheet.binary.macroEnabled.12"] = "xlsb",
                ["application/msword"] = "doc",
                ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"] = "docx",
            };
        }

+5 −3
Original line number Diff line number Diff line
@@ -12,14 +12,16 @@ namespace SimpleOCR.Library.Core.Misc.Visitors
        private string _MimeType;
        private ISet<string> _LanguagesAsISO639_3Names;
        private readonly IOCRService _OCRService;
        private readonly bool _EnforceVerbose;
        private readonly TesseractCallBase _TesseractCallBase;
        public GetOCRContentVisitor(byte[] fileContent, string mimeType, ISet<string> languagesAsISO639_3Names, OCRService oCRService)
        public GetOCRContentVisitor(byte[] fileContent, string mimeType, ISet<string> languagesAsISO639_3Names, OCRService oCRService,bool enforceVerbose)
        {
            this._FileContent = fileContent;
            this._MimeType = mimeType;
            this._LanguagesAsISO639_3Names = languagesAsISO639_3Names;
            this._OCRService = oCRService;
            GRYLibrary.Core.OperatingSystem.OperatingSystem os = GRYLibrary.Core.OperatingSystem.OperatingSystem.GetCurrentOperatingSystem();
            _EnforceVerbose = enforceVerbose;
            if(os is GRYLibrary.Core.OperatingSystem.ConcreteOperatingSystems.Windows)
            {
                this._TesseractCallBase = new TesseractByLibrary();
@@ -66,9 +68,9 @@ namespace SimpleOCR.Library.Core.Misc.Visitors
        private string GetTextFromPictures(FileType fileType)
        {
            string result = string.Empty;
            foreach (byte[]? pictureContent in fileType.Accept(new ToPicturesVisitor(this._FileContent, this._MimeType)))
            foreach (byte[]? pictureContent in fileType.Accept(new ToPicturesVisitor(this._FileContent, this._MimeType,_EnforceVerbose)))
            {
                var visitor = this.GetCallTesseractVisitor(pictureContent);
                ITesseractCallBaseVisitor<string> visitor = this.GetCallTesseractVisitor(pictureContent);
                string text = this._TesseractCallBase.Accept(visitor);
                if (result != string.Empty)
                {
Loading