From 4faf676675260c9e42c447e5538567fdde561fa6 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Sat, 19 Feb 2022 04:44:20 +0100 Subject: [PATCH 01/11] pass on PYTHON, and install via PPA in deps-ubuntu if it contains minor --- Dockerfile | 4 ++++ Makefile | 10 +++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 8dd603a2..2e37007f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -68,6 +68,10 @@ ENV OCRD_MODULES="${OCRD_MODULES}" # (defaults to no extra options) ARG PIP_OPTIONS="" +# allow passing build-time parameter for Python version +# (defaults to python3 in all modules) +ARG PYTHON=python3 + # build in parallel to speed up (but risk running into clashes # when not all dependencies have been correctly explicated): ARG PARALLEL="" diff --git a/Makefile b/Makefile index 1cf05127..707511f9 100644 --- a/Makefile +++ b/Makefile @@ -785,13 +785,20 @@ clean-tesseract: # install git and parallel first (which is required for the module updates) deps-ubuntu: apt-get -y install git parallel +ifneq ($(suffix $(PYTHON)),) +# install specific Python version in system via PPA + apt-get install -y software-properties-common + add-apt-repository -y ppa:deadsnakes/ppa + apt-get update + apt-get install -y --no-install-recommends $(notdir $(PYTHON))-dev $(notdir $(PYTHON))-venv +endif $(MAKE) deps-ubuntu-modules chown -R --reference=$(CURDIR) .git $(OCRD_MODULES) # prevent the sem commands during above module updates from imposing sudo perms on HOME: chown -R --reference=$(HOME) $(HOME)/.parallel deps-ubuntu-modules: - set -e; for dir in $^; do $(MAKE) -C $$dir deps-ubuntu; done + set -e; for dir in $^; do $(MAKE) -C $$dir deps-ubuntu PYTHON=$(PYTHON) PIP=$(PIP); done apt-get -y install $(CUSTOM_DEPS) .PHONY: deps-ubuntu deps-ubuntu-modules @@ -836,6 +843,7 @@ docker%: Dockerfile $(DOCKER_MODULES) --build-arg OCRD_MODULES="$(DOCKER_MODULES)" \ --build-arg PIP_OPTIONS="$(PIP_OPTIONS)" \ --build-arg PARALLEL="$(DOCKER_PARALLEL)" \ + --build-arg PYTHON="$(PYTHON)" \ -t $(DOCKER_TAG):$(or $(*:-%=%),latest) . From 1a8deee1e2d29ef3d244505ac441e04abb3f4344 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Sat, 19 Feb 2022 04:45:17 +0100 Subject: [PATCH 02/11] fixup TF1 via nvidia-pyindex --- Makefile | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/Makefile b/Makefile index 707511f9..32a644d4 100644 --- a/Makefile +++ b/Makefile @@ -46,6 +46,8 @@ PKG_CONFIG_PATH := $(VIRTUAL_ENV)/lib/pkgconfig:$(PKG_CONFIG_PATH) endif export PKG_CONFIG_PATH +SHELL = /bin/bash + OCRD_EXECUTABLES = $(BIN)/ocrd # add more CLIs below CUSTOM_DEPS = unzip wget python3-venv parallel git less # add more packages for deps-ubuntu below (or modules as preqrequisites) @@ -236,6 +238,7 @@ ifeq (0,$(MAKELEVEL)) cor-asv-ann-check: $(MAKE) check OCRD_MODULES=cor-asv-ann VIRTUAL_ENV=$(SUB_VENV)/headless-tf1 else + $(pip_install_tf1nvidia) $(pip_install) endif endif @@ -267,6 +270,7 @@ ifeq (0,$(MAKELEVEL)) cor-asv-fst-check: $(MAKE) check OCRD_MODULES=cor-asv-fst VIRTUAL_ENV=$(SUB_VENV)/headless-tf1 else + $(pip_install_tf1nvidia) . $(ACTIVATE_VENV) && $(MAKE) -C $< deps $(pip_install) endif @@ -283,6 +287,7 @@ ifeq (0,$(MAKELEVEL)) ocrd_keraslm-check: $(MAKE) check OCRD_MODULES=ocrd_keraslm VIRTUAL_ENV=$(SUB_VENV)/headless-tf1 else + $(pip_install_tf1nvidia) $(pip_install) endif endif @@ -353,6 +358,7 @@ ifeq (0,$(MAKELEVEL)) ocrd_segment-check: $(MAKE) check OCRD_MODULES=ocrd_segment VIRTUAL_ENV=$(SUB_VENV)/headless-tf1 else + $(pip_install_tf1nvidia) $(pip_install) endif endif @@ -519,6 +525,7 @@ ifeq (0,$(MAKELEVEL)) sbb_binarization-check: $(MAKE) check OCRD_MODULES=sbb_binarization VIRTUAL_ENV=$(SUB_VENV)/headless-tf1 else + $(pip_install_tf1nvidia) $(pip_install) endif endif @@ -537,6 +544,7 @@ ifeq (0,$(MAKELEVEL)) sbb_textline_detector-check: $(MAKE) check OCRD_MODULES=sbb_textline_detector VIRTUAL_ENV=$(SUB_VENV)/headless-tf1 else + $(pip_install_tf1nvidia) $(pip_install) endif endif @@ -555,6 +563,7 @@ ifeq (0,$(MAKELEVEL)) eynollah-check: $(MAKE) check OCRD_MODULES=eynollah VIRTUAL_ENV=$(SUB_VENV)/headless-tf1 else + $(pip_install_tf1nvidia) $(pip_install) endif endif @@ -593,6 +602,25 @@ define pip_install . $(ACTIVATE_VENV) && cd $< && $(PIP) install --no-deps --force-reinstall $(PIP_OPTIONS) . endef +# Workaround for missing prebuilt versions of TF<2 for Python>3.6 +# Nvidia has them, but under a different name, so let's rewrite that: +define pip_install_tf1nvidia = +. $(ACTIVATE_VENV) && if ! $(PIP) show -q tensorflow-gpu; then \ + $(PIP) install nvidia-pyindex && \ + pushd $$(mktemp -d) && \ + $(PIP) download --no-deps nvidia-tensorflow && \ + for name in nvidia_tensorflow-*.whl; do name=$${name%.whl}; done && \ + $(PYTHON) -m wheel unpack $$name.whl && \ + for name in nvidia_tensorflow-*/; do name=$${name%/}; done && \ + newname=$${name/nvidia_tensorflow/tensorflow_gpu} &&\ + sed -i s/nvidia_tensorflow/tensorflow_gpu/g $$name/$$name.dist-info/METADATA && \ + sed -i s/nvidia_tensorflow/tensorflow_gpu/g $$name/$$name.dist-info/RECORD && \ + sed -i s/nvidia_tensorflow/tensorflow_gpu/g $$name/tensorflow_core/tools/pip_package/setup.py && \ + pushd $$name && for path in $$name*; do mv $$path $${path/$$name/$$newname}; done && popd && \ + $(PYTHON) -m wheel pack $$name && \ + $(PIP) install $$newname*.whl && popd && rm -fr $$OLDPWD; fi +endef + # pattern for recursive make: # $(executables...): module... # ifeq (0,$(MAKELEVEL)) From c4fe86f10fe3ceae41ceca7cec09401846dd8167 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Sat, 19 Feb 2022 04:45:43 +0100 Subject: [PATCH 03/11] update ocrd_anybaseocr, promote TF2 --- Makefile | 6 +++--- ocrd_anybaseocr | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 32a644d4..8985add0 100644 --- a/Makefile +++ b/Makefile @@ -478,10 +478,10 @@ OCRD_ANYBASEOCR += $(BIN)/ocrd-anybaseocr-textline OCRD_ANYBASEOCR += $(BIN)/ocrd-anybaseocr-layout-analysis $(call multirule,$(OCRD_ANYBASEOCR)): ocrd_anybaseocr ifeq (0,$(MAKELEVEL)) - $(MAKE) -B -o $< $(notdir $(OCRD_ANYBASEOCR)) VIRTUAL_ENV=$(SUB_VENV)/headless-tf21 - $(call delegate_venv,$(OCRD_ANYBASEOCR),$(SUB_VENV)/headless-tf21) + $(MAKE) -B -o $< $(notdir $(OCRD_ANYBASEOCR)) VIRTUAL_ENV=$(SUB_VENV)/headless-tf2 + $(call delegate_venv,$(OCRD_ANYBASEOCR),$(SUB_VENV)/headless-tf2) ocrd_anybaseocr-check: - $(MAKE) check OCRD_MODULES=ocrd_anybaseocr VIRTUAL_ENV=$(SUB_VENV)/headless-tf21 + $(MAKE) check OCRD_MODULES=ocrd_anybaseocr VIRTUAL_ENV=$(SUB_VENV)/headless-tf2 else cd $< ; $(MAKE) patch-pix2pixhd $(pip_install) diff --git a/ocrd_anybaseocr b/ocrd_anybaseocr index e930c931..45743971 160000 --- a/ocrd_anybaseocr +++ b/ocrd_anybaseocr @@ -1 +1 @@ -Subproject commit e930c931d71e574045f6fe2d397f517e7ff1584c +Subproject commit 457439715d76a13cd4e2c84d4772395ee3690b63 From 88dbcf60821f360ee1565880522e277893db81d6 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Mon, 21 Feb 2022 01:25:22 +0100 Subject: [PATCH 04/11] update ocrd_anybaseocr once again --- ocrd_anybaseocr | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocrd_anybaseocr b/ocrd_anybaseocr index 45743971..01aea45d 160000 --- a/ocrd_anybaseocr +++ b/ocrd_anybaseocr @@ -1 +1 @@ -Subproject commit 457439715d76a13cd4e2c84d4772395ee3690b63 +Subproject commit 01aea45d409cf271fef3dcc04dbe810258774dd2 From ccfdc83fcd3318d19bb150eb03e2d777646c13ec Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Fri, 4 Mar 2022 17:47:49 +0100 Subject: [PATCH 05/11] circleci: try without parallelization --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index efced8f9..5c364c6a 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -6,7 +6,7 @@ jobs: steps: - checkout - setup_remote_docker # https://circleci.com/docs/2.0/building-docker-images/ - - run: make docker-maximum-cuda DOCKER_PARALLEL=-j3 + - run: make docker-maximum-cuda - run: name: persist image command: docker image save ocrd/all:maximum-cuda > ocrd-all-maximum.tar From c376a6bd79a092efe87a1286965f52f3a0b2ea3a Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Mon, 7 Mar 2022 12:03:37 +0100 Subject: [PATCH 06/11] nvidia-tensorflow recipe: only for Python 3.8 --- Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 4c6f5df1..736b1dd6 100644 --- a/Makefile +++ b/Makefile @@ -603,10 +603,11 @@ define pip_install . $(ACTIVATE_VENV) && cd $< && $(SEMPIP) $(PIP) install $(PIP_OPTIONS_E) . && touch -c $@ endef -# Workaround for missing prebuilt versions of TF<2 for Python>3.6 +# Workaround for missing prebuilt versions of TF<2 for Python==3.8 +# todo: find another solution for 3.9, 3.10 etc # Nvidia has them, but under a different name, so let's rewrite that: define pip_install_tf1nvidia = -. $(ACTIVATE_VENV) && if ! $(PIP) show -q tensorflow-gpu; then \ +. $(ACTIVATE_VENV) && if ! $(PYTHON) -c "import sys; sys.exit(sys.version_info.major==3 and sys.version_info.minor==8)" && ! $(PIP) show -q tensorflow-gpu; then \ $(PIP) install nvidia-pyindex && \ pushd $$(mktemp -d) && \ $(PIP) download --no-deps nvidia-tensorflow && \ From dd0027b9ce7b17d99e13a2563a193ad3436fe461 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Wed, 16 Mar 2022 12:47:18 +0100 Subject: [PATCH 07/11] headless-tf1: extend fixup with workaround for numpy (imageio/scikit-image/tensorflow) conflict --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 736b1dd6..27bfaebe 100644 --- a/Makefile +++ b/Makefile @@ -620,7 +620,8 @@ define pip_install_tf1nvidia = sed -i s/nvidia_tensorflow/tensorflow_gpu/g $$name/tensorflow_core/tools/pip_package/setup.py && \ pushd $$name && for path in $$name*; do mv $$path $${path/$$name/$$newname}; done && popd && \ $(PYTHON) -m wheel pack $$name && \ - $(PIP) install $$newname*.whl && popd && rm -fr $$OLDPWD; fi + $(PIP) install $$newname*.whl && popd && rm -fr $$OLDPWD; fi && \ + $(PIP) install imageio==2.4.1 # preempt conflict over numpy between scikit-image and tensorflow endef # pattern for recursive make: From 033b5dd0f85a7d335e2405d7c5a7d2a0755c25ac Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Wed, 16 Mar 2022 17:40:54 +0100 Subject: [PATCH 08/11] CircleCI: try zipping artifact (to gain speed) --- .circleci/config.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 5c364c6a..80ab9352 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -9,11 +9,13 @@ jobs: - run: make docker-maximum-cuda - run: name: persist image - command: docker image save ocrd/all:maximum-cuda > ocrd-all-maximum.tar + command: | + sudo apt install pigz + docker image save ocrd/all:maximum-cuda | pigz --fast > ocrd-all-maximum.tar.gz no_output_timeout: 30m # can be downloaded from CircleCI.com and imported via "docker image load" - store_artifacts: - path: ocrd-all-maximum.tar + path: ocrd-all-maximum.tar.gz destination: artifacts deploy: docker: From 6ca49465ea5425f38f7227737aa889bc9e579aa1 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Fri, 18 Mar 2022 12:05:37 +0100 Subject: [PATCH 09/11] headless-tf1: extend fixup with workaround for numpy (tifffile/tensorflow) conflict --- Makefile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 27bfaebe..604d4168 100644 --- a/Makefile +++ b/Makefile @@ -621,8 +621,12 @@ define pip_install_tf1nvidia = pushd $$name && for path in $$name*; do mv $$path $${path/$$name/$$newname}; done && popd && \ $(PYTHON) -m wheel pack $$name && \ $(PIP) install $$newname*.whl && popd && rm -fr $$OLDPWD; fi && \ - $(PIP) install imageio==2.4.1 # preempt conflict over numpy between scikit-image and tensorflow + $(PIP) install imageio==2.4.1 && \ + $(PIP) install tifffile==2021.11.2 endef +# last recipe 2 lines: +# - preempt conflict over numpy between scikit-image and tensorflow +# - preempt conflict over numpy between tifffile and tensorflow # pattern for recursive make: # $(executables...): module... From 0faf59e5d6b65b4d41ea01157fb47cf6fc002ba8 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Fri, 18 Mar 2022 12:21:08 +0100 Subject: [PATCH 10/11] headless-tf1: improve numpy fixup --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 604d4168..0a1c3422 100644 --- a/Makefile +++ b/Makefile @@ -622,7 +622,7 @@ define pip_install_tf1nvidia = $(PYTHON) -m wheel pack $$name && \ $(PIP) install $$newname*.whl && popd && rm -fr $$OLDPWD; fi && \ $(PIP) install imageio==2.4.1 && \ - $(PIP) install tifffile==2021.11.2 + $(PIP) install "tifffile<2022" endef # last recipe 2 lines: # - preempt conflict over numpy between scikit-image and tensorflow From 4de0f5c0ece58bfa33f2043ab8e3f13b5b9384e7 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Fri, 18 Mar 2022 13:43:19 +0100 Subject: [PATCH 11/11] CircleCI: disable Docker image upload for PR build --- .circleci/config.yml | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 80ab9352..a5a42885 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -7,16 +7,20 @@ jobs: - checkout - setup_remote_docker # https://circleci.com/docs/2.0/building-docker-images/ - run: make docker-maximum-cuda - - run: - name: persist image - command: | - sudo apt install pigz - docker image save ocrd/all:maximum-cuda | pigz --fast > ocrd-all-maximum.tar.gz - no_output_timeout: 30m - # can be downloaded from CircleCI.com and imported via "docker image load" - - store_artifacts: - path: ocrd-all-maximum.tar.gz - destination: artifacts + - when: + # takes too long for 1h1m CircleCI timeout overall + condition: false + steps: + - run: + name: persist image + command: | + sudo apt install pigz + docker image save ocrd/all:maximum-cuda | pigz --fast > ocrd-all-maximum.tar.gz + no_output_timeout: 30m + # can be downloaded from CircleCI.com and imported via "docker image load" + - store_artifacts: + path: ocrd-all-maximum.tar.gz + destination: artifacts deploy: docker: - image: circleci/buildpack-deps:stretch