Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

make Python 3.8 work #289

Merged
merged 12 commits into from
Mar 18, 2022
24 changes: 15 additions & 9 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,21 @@ jobs:
steps:
- checkout
- setup_remote_docker # https://circleci.com/docs/2.0/building-docker-images/
- run: make docker-maximum-cuda DOCKER_PARALLEL=-j3
- run:
name: persist image
command: docker image save ocrd/all:maximum-cuda > ocrd-all-maximum.tar
no_output_timeout: 30m
# can be downloaded from CircleCI.com and imported via "docker image load"
- store_artifacts:
path: ocrd-all-maximum.tar
destination: artifacts
- run: make docker-maximum-cuda
- when:
# takes too long for 1h1m CircleCI timeout overall
condition: false
steps:
- run:
name: persist image
command: |
sudo apt install pigz
docker image save ocrd/all:maximum-cuda | pigz --fast > ocrd-all-maximum.tar.gz
no_output_timeout: 30m
# can be downloaded from CircleCI.com and imported via "docker image load"
- store_artifacts:
path: ocrd-all-maximum.tar.gz
destination: artifacts
deploy:
docker:
- image: circleci/buildpack-deps:stretch
Expand Down
4 changes: 4 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ ENV OCRD_MODULES="${OCRD_MODULES}"
# (defaults to no extra options)
ARG PIP_OPTIONS=""

# allow passing build-time parameter for Python version
# (defaults to python3 in all modules)
ARG PYTHON=python3

# build in parallel to speed up (but risk running into clashes
# when not all dependencies have been correctly explicated):
ARG PARALLEL=""
Expand Down
50 changes: 46 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ PKG_CONFIG_PATH := $(VIRTUAL_ENV)/lib/pkgconfig:$(PKG_CONFIG_PATH)
endif
export PKG_CONFIG_PATH

SHELL = /bin/bash

OCRD_EXECUTABLES = $(BIN)/ocrd # add more CLIs below
CUSTOM_DEPS = unzip wget python3-venv parallel git less # add more packages for deps-ubuntu below (or modules as preqrequisites)

Expand Down Expand Up @@ -238,6 +240,7 @@ ifeq (0,$(MAKELEVEL))
cor-asv-ann-check:
$(MAKE) check OCRD_MODULES=cor-asv-ann VIRTUAL_ENV=$(SUB_VENV)/headless-tf1
else
$(pip_install_tf1nvidia)
$(pip_install)
endif
endif
Expand Down Expand Up @@ -269,6 +272,7 @@ ifeq (0,$(MAKELEVEL))
cor-asv-fst-check:
$(MAKE) check OCRD_MODULES=cor-asv-fst VIRTUAL_ENV=$(SUB_VENV)/headless-tf1
else
$(pip_install_tf1nvidia)
. $(ACTIVATE_VENV) && $(MAKE) -C $< deps
$(pip_install)
endif
Expand All @@ -285,6 +289,7 @@ ifeq (0,$(MAKELEVEL))
ocrd_keraslm-check:
$(MAKE) check OCRD_MODULES=ocrd_keraslm VIRTUAL_ENV=$(SUB_VENV)/headless-tf1
else
$(pip_install_tf1nvidia)
$(pip_install)
endif
endif
Expand Down Expand Up @@ -355,6 +360,7 @@ ifeq (0,$(MAKELEVEL))
ocrd_segment-check:
$(MAKE) check OCRD_MODULES=ocrd_segment VIRTUAL_ENV=$(SUB_VENV)/headless-tf1
else
$(pip_install_tf1nvidia)
$(pip_install)
endif
endif
Expand Down Expand Up @@ -474,10 +480,10 @@ OCRD_ANYBASEOCR += $(BIN)/ocrd-anybaseocr-textline
OCRD_ANYBASEOCR += $(BIN)/ocrd-anybaseocr-layout-analysis
$(call multirule,$(OCRD_ANYBASEOCR)): ocrd_anybaseocr
ifeq (0,$(MAKELEVEL))
$(MAKE) -B -o $< $(notdir $(OCRD_ANYBASEOCR)) VIRTUAL_ENV=$(SUB_VENV)/headless-tf21
$(call delegate_venv,$(OCRD_ANYBASEOCR),$(SUB_VENV)/headless-tf21)
$(MAKE) -B -o $< $(notdir $(OCRD_ANYBASEOCR)) VIRTUAL_ENV=$(SUB_VENV)/headless-tf2
$(call delegate_venv,$(OCRD_ANYBASEOCR),$(SUB_VENV)/headless-tf2)
ocrd_anybaseocr-check:
$(MAKE) check OCRD_MODULES=ocrd_anybaseocr VIRTUAL_ENV=$(SUB_VENV)/headless-tf21
$(MAKE) check OCRD_MODULES=ocrd_anybaseocr VIRTUAL_ENV=$(SUB_VENV)/headless-tf2
else
cd $< ; $(MAKE) patch-pix2pixhd
$(pip_install)
Expand Down Expand Up @@ -521,6 +527,7 @@ ifeq (0,$(MAKELEVEL))
sbb_binarization-check:
$(MAKE) check OCRD_MODULES=sbb_binarization VIRTUAL_ENV=$(SUB_VENV)/headless-tf1
else
$(pip_install_tf1nvidia)
$(pip_install)
endif
endif
Expand All @@ -539,6 +546,7 @@ ifeq (0,$(MAKELEVEL))
sbb_textline_detector-check:
$(MAKE) check OCRD_MODULES=sbb_textline_detector VIRTUAL_ENV=$(SUB_VENV)/headless-tf1
else
$(pip_install_tf1nvidia)
$(pip_install)
endif
endif
Expand All @@ -557,6 +565,7 @@ ifeq (0,$(MAKELEVEL))
eynollah-check:
$(MAKE) check OCRD_MODULES=eynollah VIRTUAL_ENV=$(SUB_VENV)/headless-tf1
else
$(pip_install_tf1nvidia)
$(pip_install)
endif
endif
Expand Down Expand Up @@ -594,6 +603,31 @@ define pip_install
. $(ACTIVATE_VENV) && cd $< && $(SEMPIP) $(PIP) install $(PIP_OPTIONS_E) . && touch -c $@
endef

# Workaround for missing prebuilt versions of TF<2 for Python==3.8
# todo: find another solution for 3.9, 3.10 etc
# Nvidia has them, but under a different name, so let's rewrite that:
define pip_install_tf1nvidia =
. $(ACTIVATE_VENV) && if ! $(PYTHON) -c "import sys; sys.exit(sys.version_info.major==3 and sys.version_info.minor==8)" && ! $(PIP) show -q tensorflow-gpu; then \
$(PIP) install nvidia-pyindex && \
pushd $$(mktemp -d) && \
$(PIP) download --no-deps nvidia-tensorflow && \
for name in nvidia_tensorflow-*.whl; do name=$${name%.whl}; done && \
$(PYTHON) -m wheel unpack $$name.whl && \
for name in nvidia_tensorflow-*/; do name=$${name%/}; done && \
newname=$${name/nvidia_tensorflow/tensorflow_gpu} &&\
sed -i s/nvidia_tensorflow/tensorflow_gpu/g $$name/$$name.dist-info/METADATA && \
sed -i s/nvidia_tensorflow/tensorflow_gpu/g $$name/$$name.dist-info/RECORD && \
sed -i s/nvidia_tensorflow/tensorflow_gpu/g $$name/tensorflow_core/tools/pip_package/setup.py && \
pushd $$name && for path in $$name*; do mv $$path $${path/$$name/$$newname}; done && popd && \
bertsky marked this conversation as resolved.
Show resolved Hide resolved
$(PYTHON) -m wheel pack $$name && \
$(PIP) install $$newname*.whl && popd && rm -fr $$OLDPWD; fi && \
$(PIP) install imageio==2.4.1 && \
$(PIP) install "tifffile<2022"
endef
# last recipe 2 lines:
# - preempt conflict over numpy between scikit-image and tensorflow
# - preempt conflict over numpy between tifffile and tensorflow

# pattern for recursive make:
# $(executables...): module...
# ifeq (0,$(MAKELEVEL))
Expand Down Expand Up @@ -786,13 +820,20 @@ clean-tesseract:
# install git and parallel first (which is required for the module updates)
deps-ubuntu:
apt-get -y install git parallel
ifneq ($(suffix $(PYTHON)),)
# install specific Python version in system via PPA
apt-get install -y software-properties-common
add-apt-repository -y ppa:deadsnakes/ppa
apt-get update
apt-get install -y --no-install-recommends $(notdir $(PYTHON))-dev $(notdir $(PYTHON))-venv
endif
Comment on lines +823 to +829
Copy link
Collaborator

@stweil stweil Mar 30, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why was this block added? I don't think that it is a good idea to install a Python package from PPA just because the user added PYTHON=python3.7 to the build options. It can either overwrite an existing installation or install an unneeded 2nd Python installation from a less secure source (which is even added to the apt sources).

I noticed these instructions while building a Docker image with GitLab actions which already provides any desired Python version.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because our current reference platform, i.e. the one targeted by deps-ubuntu, is still Ubuntu 18.04. And that uses 3.6 by default for python3, and needs PPAs for more recent versions. (Same with our ocrd_tesserocr recipe.)

I agree it's not a good pattern, but we need some kind of dual version support during the grace period between Ubuntu 18 and 20 (Python 3.6 and 3.8). Think of it as something we can drop as soon as the shift to 20 is complete. (And after that we should find better mechanisms to encapsulate system dependencies.)

I think the natural way to avoid interference with tailored environments like CI is to not set PYTHON there, but instead make it that environment's default version.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

18.04 does not need PPA for Python 3.7: https://packages.ubuntu.com/search?keywords=python3.7. And I don't think that the installation of Python should be part of the Makefile rules. Let it fail if someone says make all PYTHON=python3.7 without providing that version.

$(MAKE) deps-ubuntu-modules
chown -R --reference=$(CURDIR) .git $(OCRD_MODULES)
# prevent the sem commands during above module updates from imposing sudo perms on HOME:
chown -R --reference=$(HOME) $(HOME)/.parallel

deps-ubuntu-modules:
set -e; for dir in $^; do $(MAKE) -C $$dir deps-ubuntu; done
set -e; for dir in $^; do $(MAKE) -C $$dir deps-ubuntu PYTHON=$(PYTHON) PIP=$(PIP); done
apt-get -y install $(CUSTOM_DEPS)

.PHONY: deps-ubuntu deps-ubuntu-modules
Expand Down Expand Up @@ -837,6 +878,7 @@ docker%: Dockerfile $(DOCKER_MODULES)
--build-arg OCRD_MODULES="$(DOCKER_MODULES)" \
--build-arg PIP_OPTIONS="$(PIP_OPTIONS)" \
--build-arg PARALLEL="$(DOCKER_PARALLEL)" \
--build-arg PYTHON="$(PYTHON)" \
-t $(DOCKER_TAG):$(or $(*:-%=%),latest) .


Expand Down