diff --git a/.github/workflows/PDF-OCR.yml b/.github/workflows/PDF-OCR.yml new file mode 100644 index 0000000..90b07a7 --- /dev/null +++ b/.github/workflows/PDF-OCR.yml @@ -0,0 +1,40 @@ +# This is a basic workflow to help you get started with Actions + +name: CI + +# Controls when the workflow will run +on: + schedule: + - cron: '0 23 * * *' + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# A workflow run is made up of one or more jobs that can run sequentially or in parallel +jobs: + # This workflow contains a single job called "build" + build: + # The type of runner that the job will run on + runs-on: ubuntu-latest + + # Steps represent a sequence of tasks that will be executed as part of the job + steps: + # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it + - uses: actions/checkout@v3 + - uses: awalsh128/cache-apt-pkgs-action@latest + with: + packages: ocrmypdf + version: 1.0 + - name: Installing Write + run: | + wget https://www.styluslabs.com/download/write-tgz, + mkdir Write, + tar –xvzf *.tar.gz -C Write --strip-components 1. + + - name: Creating PDFs + run: | + sudo chmod u+x PdfScript/pdfcreator.sh, + ./PdfScript/pdfcreator.sh. + + - name: Creating OCR layer + run: find . -name *-DavideTonelli.pdf -exec ocrmypdf --language eng --output-type pdf --verbose 1 {} {} \; diff --git a/PdfScript/pdfcreator.sh b/PdfScript/pdfcreator.sh new file mode 100755 index 0000000..21fa3de --- /dev/null +++ b/PdfScript/pdfcreator.sh @@ -0,0 +1,14 @@ +readarray -d '' dir_array < <(find . -type d -name "*-DT" -print0) + +$array + +for i in ${dir_array[@]} +do + readarray -d '' array < <(find $i -name "*.svgz" -print0) +done + +for i in ${array[@]} +do + out=${i%.svgz}.pdf + ~/Write/Write --exit --out $out $i +done diff --git a/Year_1/First_semester/MCM/MCM-DT/test.pdf b/Year_1/First_semester/MCM/MCM-DT/test.pdf new file mode 100644 index 0000000..fa4c8f4 Binary files /dev/null and b/Year_1/First_semester/MCM/MCM-DT/test.pdf differ