- add the preprocess argument "l2" for the double layout book

- check if file or directory exist before the deleting or moving actions - stability improvements - The script ./namsel-ocr doesn't delete the directory ./data/out after the ocr completion
thubtenrigzin · May 18, 2018 · 7011f08 · 7011f08
1 parent fcfa4f0
commit 7011f08
Show file tree

Hide file tree

Showing 7 changed files with 150 additions and 43 deletions.
diff --git a/1book b/1book
@@ -1,11 +1,49 @@
-#! /bin/bash
+#! /bin/bash -x
 
-if [ -z "$1" ]
+if [ $# -eq 2 ]
+then
+	if [[ $1 =~ ^-?[0-9]+$ ]] && [ $2 == "l2" ]
+	then
+		python namsel.py preprocess --threshold=$1 --layout double ./data
+	elif [ $1 == "l2" ] && [[ $2 =~ ^-?[0-9]+$ ]]
+	then
+		python namsel.py preprocess --threshold=$2 --layout double ./data
+	elif [ $1 == "l2" ] && [ $2 == "l2" ]
+	then
+		python namsel.py preprocess --layout double ./data
+	elif [[ $1 =~ ^-?[0-9]+$ ]] && [[ $2 =~ ^-?[0-9]+$ ]]
 	then
-		python namsel.py preprocess ./data
-	else
 		python namsel.py preprocess --threshold=$1 ./data
+	elif [ $1 == "l2" ]
+	then
+		python namsel.py preprocess --layout double ./data
+	elif [[ $1 =~ ^-?[0-9]+$ ]]
+	then
+		python namsel.py preprocess --threshold=$1 ./data
+	elif [ $2 == "l2" ]
+	then
+		python namsel.py preprocess --layout double ./data
+	elif [[ $2 =~ ^-?[0-9]+$ ]]
+	then
+		python namsel.py preprocess --threshold=$2 ./data
+	else
+		python namsel.py preprocess ./data
+	fi
+elif [ $1 == "l2" ]
+then
+	python namsel.py preprocess --layout double ./data
+elif [[ $1 =~ ^-?[0-9]+$ ]]
+then
+	python namsel.py preprocess --threshold=$1 ./data
+else
+	python namsel.py preprocess ./data
 fi
 python namsel.py recognize-volume --format text --page_type book --line_break_method line_cut --clear_hr True ./data/out
-mv ocr_output.txt ./data
-rm -R ./data/out
+if [ -f ./ocr_output.txt ]
+then
+	mv ocr_output.txt ./data
+fi
+if [ -d ./data/out ]
+then
+	rm -R ./data/out
+fi
diff --git a/1pecha b/1pecha
@@ -1,11 +1,28 @@
-#! /bin/bash
+#! /bin/bash -x
 
-if [ -z "$1" ]
+if [ $# -eq 2 ]
+then
+	if [[ $1 =~ ^-?[0-9]+$ ]]
 	then
-		python namsel.py preprocess ./data
-	else
 		python namsel.py preprocess --threshold=$1 ./data
+	elif [[ $2 =~ ^-?[0-9]+$ ]]
+	then
+		python namsel.py preprocess --threshold=$2 ./data
+	else
+		python namsel.py preprocess ./data
+	fi
+elif [[ $1 =~ ^-?[0-9]+$ ]]
+then
+	python namsel.py preprocess --threshold=$1 ./data
+else
+	python namsel.py preprocess ./data
 fi
 python namsel.py recognize-volume --format text --page_type pecha --line_break_method line_cluster ./data/out
-mv ocr_output.txt ./data
-rm -R ./data/out
+if [ -f ./ocr_output.txt ]
+then
+	mv ocr_output.txt ./data
+fi
+if [ -d ./data/out ]
+then
+	rm -R ./data/out
+fi
diff --git a/README.md b/README.md
@@ -12,9 +12,9 @@ docker run -itd --name namsel -v ~/data:/home/namsel-ocr/data thubtenrigzin/dock
 #### Preprocessing
 Scantaillor will prepare all the images stored in your local directory *~/data*.
 
-It is possible to add optionaly the threshold value.
+It is possible to add optionaly the threshold value and the double page layout by adding "l2".
 ```
-docker exec namsel ./preprocess [threshold value]
+docker exec namsel ./preprocess [threshold value] l2
 ```
 #### Recognition
 ##### Pecha format
@@ -36,11 +36,11 @@ docker exec namsel ./namsel-ocr [parameter1 parameter2 etc...]
 #### Automatising the recognition with the preprocess included
 An all in one button for the book and pecha recognition.
 
-The threshold preprocess value can be optionaly add as a parameter.
+The threshold preprocess value can be optionaly add as a parameter and the double page layout by adding "l2".
 
 For the book recognition:
 ```
-docker exec namsel ./1book [threshold value]
+docker exec namsel ./1book [threshold value] l2
 ```
 
 And for the Pecha recognition:
@@ -54,7 +54,13 @@ Please refer to [namsel-ocr](https://github.com/thubtenrigzin/namsel-ocr) reposi
 All the Docker source will take place on [docker-namsel-ocr](https://github.com/thubtenrigzin/docker-namsel-ocr) repository on Github.
 
 ### Realease notes:
-#### v2.1.0 or latest
+#### v2.2.0 or latest
+- add the preprocess argument "l2" for the double layout book
+- check if file or directory exist before the deleting or moving actions
+- stability improvements
+- The script ./namsel-ocr doesn't delete the directory ./data/out after the ocr completion
+
+#### v2.1.0
 - delete the directory "out" after the recognition completion
 - test if the "out" directory exists and uses the non-scantailored scan image if the preprocess has not been launched before the recognition
 - use the tag "latest" for the basic image
@@ -65,4 +71,4 @@ All the Docker source will take place on [docker-namsel-ocr](https://github.com/
 - correcting an issue in book script file letting the book recognition work properly
 
 #### v1.0.0
-First release of the project
+First release of the project
diff --git a/book b/book
@@ -1,12 +1,17 @@
-#! /bin/bash
+#! /bin/bash -x
 
-if [ -d ./data/out ]; then
-   dir="./data/out"
+if [ -d ./data/out ]
+then
+	dir="./data/out"
 else
-   dir="./data"
+	dir="./data"
 fi
 python namsel.py recognize-volume --format text --page_type book --line_break_method line_cut --clear_hr True $dir
-mv ocr_output.txt ./data
-if [ -d ./data/out ]; then
-   rm -R ./data/out
+if [ -f ./ocr_output.txt ]
+then
+	mv ocr_output.txt ./data
+fi
+if [ -d ./data/out ]
+then
+	rm -R ./data/out
 fi
diff --git a/namsel-ocr b/namsel-ocr
@@ -1,12 +1,16 @@
-#! /bin/bash
+#! /bin/bash -x
 
-if [ -d ./data/out ]; then
-   dir="./data/out"
+if [ $1 == "preprocess" ]
+then
+	dir="./data"
+elif [ -d ./data/out ]
+then
+	dir="./data/out"
 else
-   dir="./data"
+	dir="./data"
 fi
 python namsel.py $* $dir
-mv ocr_output.txt ./data
-if [ -d ./data/out ]; then
-   rm -R ./data/out
+if [ $1 != "preprocess" ] && [ -f ./ocr_output.txt ]
+then
+	mv ocr_output.txt ./data
 fi
diff --git a/pecha b/pecha
@@ -1,12 +1,17 @@
-#! /bin/bash
+#! /bin/bash -x
 
-if [ -d ./data/out ]; then
-   dir="./data/out"
+if [ -d ./data/out ]
+then
+	dir="./data/out"
 else
-   dir="./data"
+	dir="./data"
 fi
 python namsel.py recognize-volume --format text --page_type pecha --line_break_method line_cluster $dir
-mv ocr_output.txt ./data
-if [ -d ./data/out ]; then
-   rm -R ./data/out
+if [ -f ./ocr_output.txt ]
+then
+	mv ocr_output.txt ./data
+fi
+if [ -d ./data/out ]
+then
+	rm -R ./data/out
 fi
diff --git a/preprocess b/preprocess
@@ -1,8 +1,40 @@
-#! /bin/bash
+#! /bin/bash -x
 
-if [ -z "$1" ]
+if [ $# -eq 2 ]
+then
+	if [[ $1 =~ ^-?[0-9]+$ ]] && [ $2 == "l2" ]
+	then
+		python namsel.py preprocess --threshold=$1 --layout double ./data
+	elif [ $1 == "l2" ] && [[ $2 =~ ^-?[0-9]+$ ]]
+	then
+		python namsel.py preprocess --threshold=$2 --layout double ./data
+	elif [ $1 == "l2" ] && [ $2 == "l2" ]
+	then
+		python namsel.py preprocess --layout double ./data
+	elif [[ $1 =~ ^-?[0-9]+$ ]] && [[ $2 =~ ^-?[0-9]+$ ]]
+	then
+		python namsel.py preprocess --threshold=$1 ./data
+	elif [ $1 == "l2" ]
+	then
+		python namsel.py preprocess --layout double ./data
+	elif [[ $1 =~ ^-?[0-9]+$ ]]
 	then
-		python namsel.py preprocess ./data
-	else
 		python namsel.py preprocess --threshold=$1 ./data
+	elif [ $2 == "l2" ]
+	then
+		python namsel.py preprocess --layout double ./data
+	elif [[ $2 =~ ^-?[0-9]+$ ]]
+	then
+		python namsel.py preprocess --threshold=$2 ./data
+	else
+		python namsel.py preprocess ./data
+	fi
+elif [ $1 == "l2" ]
+then
+	python namsel.py preprocess --layout double ./data
+elif [[ $1 =~ ^-?[0-9]+$ ]]
+then
+	python namsel.py preprocess --threshold=$1 ./data
+else
+	python namsel.py preprocess ./data
 fi