From 7011f08e08f2489a967bf586ee7289869d19be02 Mon Sep 17 00:00:00 2001 From: Thubtenrigzin Date: Fri, 18 May 2018 23:11:22 +0800 Subject: [PATCH] - add the preprocess argument "l2" for the double layout book - check if file or directory exist before the deleting or moving actions - stability improvements - The script ./namsel-ocr doesn't delete the directory ./data/out after the ocr completion --- 1book | 50 ++++++++++++++++++++++++++++++++++++++++++++------ 1pecha | 29 +++++++++++++++++++++++------ README.md | 18 ++++++++++++------ book | 19 ++++++++++++------- namsel-ocr | 18 +++++++++++------- pecha | 19 ++++++++++++------- preprocess | 40 ++++++++++++++++++++++++++++++++++++---- 7 files changed, 150 insertions(+), 43 deletions(-) diff --git a/1book b/1book index c24b8d5..9282483 100644 --- a/1book +++ b/1book @@ -1,11 +1,49 @@ -#! /bin/bash +#! /bin/bash -x -if [ -z "$1" ] +if [ $# -eq 2 ] +then + if [[ $1 =~ ^-?[0-9]+$ ]] && [ $2 == "l2" ] + then + python namsel.py preprocess --threshold=$1 --layout double ./data + elif [ $1 == "l2" ] && [[ $2 =~ ^-?[0-9]+$ ]] + then + python namsel.py preprocess --threshold=$2 --layout double ./data + elif [ $1 == "l2" ] && [ $2 == "l2" ] + then + python namsel.py preprocess --layout double ./data + elif [[ $1 =~ ^-?[0-9]+$ ]] && [[ $2 =~ ^-?[0-9]+$ ]] then - python namsel.py preprocess ./data - else python namsel.py preprocess --threshold=$1 ./data + elif [ $1 == "l2" ] + then + python namsel.py preprocess --layout double ./data + elif [[ $1 =~ ^-?[0-9]+$ ]] + then + python namsel.py preprocess --threshold=$1 ./data + elif [ $2 == "l2" ] + then + python namsel.py preprocess --layout double ./data + elif [[ $2 =~ ^-?[0-9]+$ ]] + then + python namsel.py preprocess --threshold=$2 ./data + else + python namsel.py preprocess ./data + fi +elif [ $1 == "l2" ] +then + python namsel.py preprocess --layout double ./data +elif [[ $1 =~ ^-?[0-9]+$ ]] +then + python namsel.py preprocess --threshold=$1 ./data +else + python namsel.py preprocess ./data fi python namsel.py recognize-volume --format text --page_type book --line_break_method line_cut --clear_hr True ./data/out -mv ocr_output.txt ./data -rm -R ./data/out \ No newline at end of file +if [ -f ./ocr_output.txt ] +then + mv ocr_output.txt ./data +fi +if [ -d ./data/out ] +then + rm -R ./data/out +fi \ No newline at end of file diff --git a/1pecha b/1pecha index f1f8de6..e5e1caf 100644 --- a/1pecha +++ b/1pecha @@ -1,11 +1,28 @@ -#! /bin/bash +#! /bin/bash -x -if [ -z "$1" ] +if [ $# -eq 2 ] +then + if [[ $1 =~ ^-?[0-9]+$ ]] then - python namsel.py preprocess ./data - else python namsel.py preprocess --threshold=$1 ./data + elif [[ $2 =~ ^-?[0-9]+$ ]] + then + python namsel.py preprocess --threshold=$2 ./data + else + python namsel.py preprocess ./data + fi +elif [[ $1 =~ ^-?[0-9]+$ ]] +then + python namsel.py preprocess --threshold=$1 ./data +else + python namsel.py preprocess ./data fi python namsel.py recognize-volume --format text --page_type pecha --line_break_method line_cluster ./data/out -mv ocr_output.txt ./data -rm -R ./data/out \ No newline at end of file +if [ -f ./ocr_output.txt ] +then + mv ocr_output.txt ./data +fi +if [ -d ./data/out ] +then + rm -R ./data/out +fi \ No newline at end of file diff --git a/README.md b/README.md index a24368d..8cb3e54 100644 --- a/README.md +++ b/README.md @@ -12,9 +12,9 @@ docker run -itd --name namsel -v ~/data:/home/namsel-ocr/data thubtenrigzin/dock #### Preprocessing Scantaillor will prepare all the images stored in your local directory *~/data*. -It is possible to add optionaly the threshold value. +It is possible to add optionaly the threshold value and the double page layout by adding "l2". ``` -docker exec namsel ./preprocess [threshold value] +docker exec namsel ./preprocess [threshold value] l2 ``` #### Recognition ##### Pecha format @@ -36,11 +36,11 @@ docker exec namsel ./namsel-ocr [parameter1 parameter2 etc...] #### Automatising the recognition with the preprocess included An all in one button for the book and pecha recognition. -The threshold preprocess value can be optionaly add as a parameter. +The threshold preprocess value can be optionaly add as a parameter and the double page layout by adding "l2". For the book recognition: ``` -docker exec namsel ./1book [threshold value] +docker exec namsel ./1book [threshold value] l2 ``` And for the Pecha recognition: @@ -54,7 +54,13 @@ Please refer to [namsel-ocr](https://github.com/thubtenrigzin/namsel-ocr) reposi All the Docker source will take place on [docker-namsel-ocr](https://github.com/thubtenrigzin/docker-namsel-ocr) repository on Github. ### Realease notes: -#### v2.1.0 or latest +#### v2.2.0 or latest +- add the preprocess argument "l2" for the double layout book +- check if file or directory exist before the deleting or moving actions +- stability improvements +- The script ./namsel-ocr doesn't delete the directory ./data/out after the ocr completion + +#### v2.1.0 - delete the directory "out" after the recognition completion - test if the "out" directory exists and uses the non-scantailored scan image if the preprocess has not been launched before the recognition - use the tag "latest" for the basic image @@ -65,4 +71,4 @@ All the Docker source will take place on [docker-namsel-ocr](https://github.com/ - correcting an issue in book script file letting the book recognition work properly #### v1.0.0 -First release of the project +First release of the project \ No newline at end of file diff --git a/book b/book index 21e62f6..d77e3a3 100644 --- a/book +++ b/book @@ -1,12 +1,17 @@ -#! /bin/bash +#! /bin/bash -x -if [ -d ./data/out ]; then - dir="./data/out" +if [ -d ./data/out ] +then + dir="./data/out" else - dir="./data" + dir="./data" fi python namsel.py recognize-volume --format text --page_type book --line_break_method line_cut --clear_hr True $dir -mv ocr_output.txt ./data -if [ -d ./data/out ]; then - rm -R ./data/out +if [ -f ./ocr_output.txt ] +then + mv ocr_output.txt ./data +fi +if [ -d ./data/out ] +then + rm -R ./data/out fi \ No newline at end of file diff --git a/namsel-ocr b/namsel-ocr index a4e884e..c8a23b3 100644 --- a/namsel-ocr +++ b/namsel-ocr @@ -1,12 +1,16 @@ -#! /bin/bash +#! /bin/bash -x -if [ -d ./data/out ]; then - dir="./data/out" +if [ $1 == "preprocess" ] +then + dir="./data" +elif [ -d ./data/out ] +then + dir="./data/out" else - dir="./data" + dir="./data" fi python namsel.py $* $dir -mv ocr_output.txt ./data -if [ -d ./data/out ]; then - rm -R ./data/out +if [ $1 != "preprocess" ] && [ -f ./ocr_output.txt ] +then + mv ocr_output.txt ./data fi \ No newline at end of file diff --git a/pecha b/pecha index 9b85c0c..c7e2a5b 100644 --- a/pecha +++ b/pecha @@ -1,12 +1,17 @@ -#! /bin/bash +#! /bin/bash -x -if [ -d ./data/out ]; then - dir="./data/out" +if [ -d ./data/out ] +then + dir="./data/out" else - dir="./data" + dir="./data" fi python namsel.py recognize-volume --format text --page_type pecha --line_break_method line_cluster $dir -mv ocr_output.txt ./data -if [ -d ./data/out ]; then - rm -R ./data/out +if [ -f ./ocr_output.txt ] +then + mv ocr_output.txt ./data +fi +if [ -d ./data/out ] +then + rm -R ./data/out fi \ No newline at end of file diff --git a/preprocess b/preprocess index b81f8f1..86fb839 100644 --- a/preprocess +++ b/preprocess @@ -1,8 +1,40 @@ -#! /bin/bash +#! /bin/bash -x -if [ -z "$1" ] +if [ $# -eq 2 ] +then + if [[ $1 =~ ^-?[0-9]+$ ]] && [ $2 == "l2" ] + then + python namsel.py preprocess --threshold=$1 --layout double ./data + elif [ $1 == "l2" ] && [[ $2 =~ ^-?[0-9]+$ ]] + then + python namsel.py preprocess --threshold=$2 --layout double ./data + elif [ $1 == "l2" ] && [ $2 == "l2" ] + then + python namsel.py preprocess --layout double ./data + elif [[ $1 =~ ^-?[0-9]+$ ]] && [[ $2 =~ ^-?[0-9]+$ ]] + then + python namsel.py preprocess --threshold=$1 ./data + elif [ $1 == "l2" ] + then + python namsel.py preprocess --layout double ./data + elif [[ $1 =~ ^-?[0-9]+$ ]] then - python namsel.py preprocess ./data - else python namsel.py preprocess --threshold=$1 ./data + elif [ $2 == "l2" ] + then + python namsel.py preprocess --layout double ./data + elif [[ $2 =~ ^-?[0-9]+$ ]] + then + python namsel.py preprocess --threshold=$2 ./data + else + python namsel.py preprocess ./data + fi +elif [ $1 == "l2" ] +then + python namsel.py preprocess --layout double ./data +elif [[ $1 =~ ^-?[0-9]+$ ]] +then + python namsel.py preprocess --threshold=$1 ./data +else + python namsel.py preprocess ./data fi \ No newline at end of file