From 0add39db5047e8118bc8958449e1b0daf1a9e3bd Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Wed, 6 Sep 2023 20:50:48 -0400 Subject: [PATCH] Build protoc from source, rather than using a binary protoc. Migrate to using the Bazel rules_proto and rules_python packages to build protocol buffers, rather than rolling our own protobuf generation code. Fixes https://github.com/tensorflow/lingvo/issues/327 --- WORKSPACE | 41 ++++++++++++- lingvo/BUILD | 33 ++++++++++- lingvo/lingvo.bzl | 79 +++++++++++--------------- lingvo/repo.bzl | 16 ------ lingvo/tools/generate_tf_dot_protos.sh | 4 -- 5 files changed, 103 insertions(+), 70 deletions(-) diff --git a/WORKSPACE b/WORKSPACE index 870c7ab0c..e50186399 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -1,7 +1,7 @@ """Workspace file for lingvo.""" load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") -load("//lingvo:repo.bzl", "cc_tf_configure", "icu", "lingvo_protoc_deps", "lingvo_testonly_deps") +load("//lingvo:repo.bzl", "cc_tf_configure", "icu", "lingvo_testonly_deps") http_archive( name = "org_tensorflow", @@ -14,10 +14,45 @@ load("@org_tensorflow//tensorflow/tools/toolchains/remote_config:configs.bzl", " initialize_rbe_configs() +http_archive( + name = "rules_python", + sha256 = "5868e73107a8e85d8f323806e60cad7283f34b32163ea6ff1020cf27abef6036", + strip_prefix = "rules_python-0.25.0", + url = "https://github.com/bazelbuild/rules_python/releases/download/0.25.0/rules_python-0.25.0.tar.gz", +) + +load("@rules_python//python:repositories.bzl", "py_repositories") + +py_repositories() + +http_archive( + name = "com_google_protobuf", + sha256 = "75be42bd736f4df6d702a0e4e4d30de9ee40eac024c4b845d17ae4cc831fe4ae", + strip_prefix = "protobuf-21.7", + urls = [ + "https://mirror.bazel.build/github.com/protocolbuffers/protobuf/archive/v21.7.tar.gz", + "https://github.com/protocolbuffers/protobuf/archive/v21.7.tar.gz", + ], +) +load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps") + +protobuf_deps() + +http_archive( + name = "rules_proto", + sha256 = "dc3fb206a2cb3441b485eb1e423165b231235a1ea9b031b4433cf7bc1fa460dd", + strip_prefix = "rules_proto-5.3.0-21.7", + urls = [ + "https://github.com/bazelbuild/rules_proto/archive/refs/tags/5.3.0-21.7.tar.gz", + ], +) +load("@rules_proto//proto:repositories.bzl", "rules_proto_dependencies", "rules_proto_toolchains") +rules_proto_dependencies() +rules_proto_toolchains() + + cc_tf_configure() lingvo_testonly_deps() -lingvo_protoc_deps() - icu() diff --git a/lingvo/BUILD b/lingvo/BUILD index f0cd49f65..83d56d6ae 100644 --- a/lingvo/BUILD +++ b/lingvo/BUILD @@ -5,7 +5,8 @@ load( # Placeholder: load py_library # Placeholder: load py_test -load("//lingvo:lingvo.bzl", "pytype_library", "pytype_strict_library") +load("//lingvo:lingvo.bzl", "pytype_library", "pytype_strict_library", + "WELL_KNOWN_PROTO_LIBS") package(default_visibility = ["//visibility:public"]) @@ -327,10 +328,38 @@ lingvo_py_binary( ], ) +TF_PROTOS = [ + "tensorflow/core/framework/tensor.proto", + "tensorflow/core/framework/versions.proto", + "tensorflow/core/framework/op_def.proto", + "tensorflow/core/framework/resource_handle.proto", + "tensorflow/core/framework/function.proto", + "tensorflow/core/framework/graph_debug_info.proto", + "tensorflow/core/framework/node_def.proto", + "tensorflow/core/framework/graph.proto", + "tensorflow/core/framework/attr_value.proto", + "tensorflow/core/framework/variable.proto", + "tensorflow/core/framework/full_type.proto", + "tensorflow/core/framework/tensor_shape.proto", + "tensorflow/core/framework/types.proto", + "tensorflow/core/protobuf/trackable_object_graph.proto", + "tensorflow/core/protobuf/saver.proto", + "tensorflow/core/protobuf/struct.proto", + "tensorflow/core/protobuf/saved_object_graph.proto", + "tensorflow/core/protobuf/meta_graph.proto", +] + +proto_library( + name = "tf_protos", + srcs = TF_PROTOS, + strip_import_prefix = "/lingvo", + deps = WELL_KNOWN_PROTO_LIBS, +) + genrule( name = "tf_dot_protos", srcs = [], - outs = ["tf_protos.tar"], + outs = TF_PROTOS, cmd = "$(location //lingvo/tools:" + "generate_tf_dot_protos) $(location " + diff --git a/lingvo/lingvo.bzl b/lingvo/lingvo.bzl index 251e25f88..2f502c294 100644 --- a/lingvo/lingvo.bzl +++ b/lingvo/lingvo.bzl @@ -1,5 +1,7 @@ """Implements custom rules for Lingvo.""" +load("@rules_python//python:proto.bzl", "py_proto_library") + def tf_copts(): # "-Wno-sign-compare", "-mavx" removed for compat with aarch64 return ["-std=c++17"] + select({ @@ -107,65 +109,52 @@ def lingvo_cuda_py_test(name, tags = [], deps = [], **kwargs): **kwargs ) -def _proto_gen_cc_src(name, basename): - native.genrule( - name = name, - srcs = [basename + ".proto"], - outs = [basename + ".pb.cc", basename + ".pb.h"], - tools = [ - "@protobuf_protoc//:protoc_bin", - "//lingvo:tf_dot_protos", - ], - # TODO(drpng): only unpack if tf_proto dependency is requested. - cmd = """ - mkdir -p $(@D)/tf_proto.$$$$; - tar -C $(@D)/tf_proto.$$$$ -xf $(location //lingvo:tf_dot_protos); - $(location @protobuf_protoc//:protoc_bin) --proto_path=$(@D)/tf_proto.$$$$ --proto_path=. --cpp_out=$(GENDIR) $(<); - rm -rf $(@D)/tf_proto.$$$$ - """, - ) - -def _proto_gen_py_src(name, basename): - native.genrule( - name = name, - srcs = [basename + ".proto"], - outs = [basename + "_pb2.py"], - tools = [ - "@protobuf_protoc//:protoc_bin", - "//lingvo:tf_dot_protos", - ], - # TODO(drpng): only unpack if tf_proto dependency is requested. - cmd = """ - mkdir -p $(@D)/tf_proto.$$$$; - tar -C $(@D)/tf_proto.$$$$ -xf $(location //lingvo:tf_dot_protos); - $(location @protobuf_protoc//:protoc_bin) --proto_path=$(@D)/tf_proto.$$$$ --proto_path=. --python_out=$(GENDIR) $(<); - rm -rf $(@D)/tf_proto.$$$$ - """, - ) +WELL_KNOWN_PROTO_LIBS = [ + "@com_google_protobuf//:any_proto", + "@com_google_protobuf//:api_proto", + "@com_google_protobuf//:compiler_plugin_proto", + "@com_google_protobuf//:descriptor_proto", + "@com_google_protobuf//:duration_proto", + "@com_google_protobuf//:empty_proto", + "@com_google_protobuf//:field_mask_proto", + "@com_google_protobuf//:source_context_proto", + "@com_google_protobuf//:struct_proto", + "@com_google_protobuf//:timestamp_proto", + "@com_google_protobuf//:type_proto", + "@com_google_protobuf//:wrappers_proto", +] def lingvo_proto_cc(name, src, deps = []): # TODO(drpng): only works with proto with no deps within lingvo. _unused = [deps] basename = src.replace(".proto", "") - _proto_gen_cc_src(name + "_gencc", basename) - lingvo_cc_library( + native.proto_library( name = name, - srcs = [basename + ".pb.cc"], - hdrs = [basename + ".pb.h"], + srcs = [src], + deps = [ + "//lingvo:tf_protos", + ] + WELL_KNOWN_PROTO_LIBS, ) - lingvo_cc_library( - name = "%s_cc" % name, - deps = [":%s" % name], + native.cc_proto_library( + name = name + "_cc", + deps = [":" + name] ) + def lingvo_proto_py(name, src, deps = []): # TODO(drpng): only works with proto with no deps within lingvo. _unused = [deps] basename = src.replace(".proto", "") - _proto_gen_py_src(name + "_genpy", basename) - native.py_library( + native.proto_library( + name = name + "_pyproto", + srcs = [src], + deps = [ + "//lingvo:tf_protos", + ] + WELL_KNOWN_PROTO_LIBS, + ) + py_proto_library( name = name, - srcs = [basename + "_pb2.py"], + deps = [name + "_pyproto"], ) # Placeholders to use until bazel supports pytype_{,strict_}{library,test,binary}. diff --git a/lingvo/repo.bzl b/lingvo/repo.bzl index ce65822d2..672be1dc8 100644 --- a/lingvo/repo.bzl +++ b/lingvo/repo.bzl @@ -221,22 +221,6 @@ cc_library( strip_prefix = "googletest-release-1.10.0", ) -def lingvo_protoc_deps(): - http_archive( - name = "protobuf_protoc", - build_file_content = """ -filegroup( - name = "protoc_bin", - srcs = ["bin/protoc"], - visibility = ["//visibility:public"], -) -""", - urls = [ - "https://github.com/protocolbuffers/protobuf/releases/download/v21.9/protoc-21.9-linux-x86_64.zip", - ], - sha256 = "3cd951aff8ce713b94cde55e12378f505f2b89d47bf080508cf77e3934f680b6", - ) - def icu(): third_party_http_archive( name = "icu", diff --git a/lingvo/tools/generate_tf_dot_protos.sh b/lingvo/tools/generate_tf_dot_protos.sh index 333154b28..ecf8925ba 100755 --- a/lingvo/tools/generate_tf_dot_protos.sh +++ b/lingvo/tools/generate_tf_dot_protos.sh @@ -31,7 +31,3 @@ mkdir -p ${dest}/tensorflow/core/framework mkdir -p ${dest}/tensorflow/core/protobuf ${binary} ${dest} - -# genrule requires statically determined outputs, so we package all -# into a single file. -tar -C ${dest} -cf ${dest}/tf_protos.tar tensorflow/core/{framework,protobuf}