diff --git a/CMakeLists.txt b/CMakeLists.txt index ee9c00ab..f61eacfd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -38,6 +38,7 @@ find_package(nlohmann_json REQUIRED) find_package(vincentlaucsb-csv-parser REQUIRED) find_package(uchardet REQUIRED) find_package(utf8cpp REQUIRED) +find_package(httplib REQUIRED) configure_file("src/odr/internal/project_info.cpp.in" "src/odr/internal/project_info.cpp") @@ -66,6 +67,7 @@ set(ODR_SOURCE_FILES "src/odr/global_params.cpp" "src/odr/html.cpp" "src/odr/html_service.cpp" + "src/odr/http_server.cpp" "src/odr/open_document_reader.cpp" "src/odr/quantity.cpp" "src/odr/style.cpp" @@ -194,6 +196,7 @@ target_link_libraries(odr vincentlaucsb-csv-parser::vincentlaucsb-csv-parser uchardet::uchardet utf8::cpp + httplib::httplib ) if (WITH_PDF2HTMLEX) diff --git a/cli/CMakeLists.txt b/cli/CMakeLists.txt index 8aed679d..06da9456 100644 --- a/cli/CMakeLists.txt +++ b/cli/CMakeLists.txt @@ -24,3 +24,14 @@ target_include_directories(back_translate PRIVATE ../src ) + +add_executable(server src/server.cpp) +target_link_libraries(server + PRIVATE + odr + httplib::httplib +) +target_include_directories(server + PRIVATE + ../src +) diff --git a/cli/src/server.cpp b/cli/src/server.cpp new file mode 100644 index 00000000..8c0f32fe --- /dev/null +++ b/cli/src/server.cpp @@ -0,0 +1,54 @@ +#include +#include + +#include +#include + +using namespace odr; + +int main(int argc, char **argv) { + std::string input{argv[1]}; + + std::optional password; + if (argc >= 3) { + password = argv[2]; + } + + DecodePreference decode_preference; + decode_preference.engine_priority = { + DecoderEngine::poppler, DecoderEngine::wvware, DecoderEngine::odr}; + + DecodedFile decoded_file{input, decode_preference}; + + if (decoded_file.is_document_file()) { + DocumentFile document_file = decoded_file.document_file(); + if (document_file.password_encrypted() && !password) { + std::cerr << "document encrypted but no password given" << std::endl; + return 2; + } + if (document_file.password_encrypted() && + !document_file.decrypt(*password)) { + std::cerr << "wrong password" << std::endl; + return 1; + } + } + + HttpServer::Config config; + HttpServer server(config); + + { + std::string id = server.host_file(File(input)); + std::cout << "hosted file with id: " << id << std::endl; + std::cout << "http://localhost:8080/" << id << std::endl; + } + + { + std::string id = server.host_file(decoded_file); + std::cout << "hosted decoded file with id: " << id << std::endl; + std::cout << "http://localhost:8080/" << id << std::endl; + } + + server.listen("localhost", 8080); + + return 0; +} diff --git a/cli/src/translate.cpp b/cli/src/translate.cpp index dc027844..3df964a4 100644 --- a/cli/src/translate.cpp +++ b/cli/src/translate.cpp @@ -1,4 +1,3 @@ -#include #include #include diff --git a/conanfile.py b/conanfile.py index 2950f050..26509ed5 100644 --- a/conanfile.py +++ b/conanfile.py @@ -47,6 +47,7 @@ def requirements(self): self.requires("pdf2htmlex/0.18.8.rc1-odr-pr1") if self.options.get_safe("with_wvWare", False): self.requires("wvware/1.2.9-odr") + self.requires("cpp-httplib/0.16.3") def build_requirements(self): self.test_requires("gtest/1.14.0") diff --git a/src/odr/global_params.cpp b/src/odr/global_params.cpp index f4ac7bab..347e4296 100644 --- a/src/odr/global_params.cpp +++ b/src/odr/global_params.cpp @@ -9,16 +9,28 @@ namespace odr { GlobalParams &GlobalParams::instance() { - static GlobalParams instance; - return instance; + struct HolderAndInitializer { + GlobalParams params; + HolderAndInitializer() { +#ifdef ODR_WITH_PDF2HTMLEX + globalParams = std::make_unique<::GlobalParams>( + params.m_poppler_data_path.empty() + ? nullptr + : params.m_poppler_data_path.c_str()); +#endif + } + }; + static HolderAndInitializer instance; + + return instance.params; } const std::string &GlobalParams::odr_core_data_path() { return instance().m_odr_core_data_path; } -const std::string &GlobalParams::fontforge_data_path() { - return instance().m_fontforge_data_path; +const std::string &GlobalParams::fontconfig_data_path() { + return instance().m_fontconfig_data_path; } const std::string &GlobalParams::poppler_data_path() { @@ -33,8 +45,8 @@ void GlobalParams::set_odr_core_data_path(const std::string &path) { instance().m_odr_core_data_path = path; } -void GlobalParams::set_fontforge_data_path(const std::string &path) { - instance().m_fontforge_data_path = path; +void GlobalParams::set_fontconfig_data_path(const std::string &path) { + instance().m_fontconfig_data_path = path; } void GlobalParams::set_poppler_data_path(const std::string &path) { @@ -50,11 +62,10 @@ void GlobalParams::set_pdf2htmlex_data_path(const std::string &path) { instance().m_pdf2htmlex_data_path = path; } -GlobalParams::GlobalParams() { - set_odr_core_data_path(""); // TODO - set_fontforge_data_path(internal::project_info::fontconfig_data_path()); - set_poppler_data_path(internal::project_info::poppler_data_path()); - set_pdf2htmlex_data_path(internal::project_info::pdf2htmlex_data_path()); -} +GlobalParams::GlobalParams() + : m_odr_core_data_path{}, // TODO + m_fontconfig_data_path{internal::project_info::fontconfig_data_path()}, + m_poppler_data_path{internal::project_info::poppler_data_path()}, + m_pdf2htmlex_data_path{internal::project_info::pdf2htmlex_data_path()} {} } // namespace odr diff --git a/src/odr/global_params.hpp b/src/odr/global_params.hpp index 665dc7bc..9888be17 100644 --- a/src/odr/global_params.hpp +++ b/src/odr/global_params.hpp @@ -7,12 +7,12 @@ namespace odr { class GlobalParams { public: static const std::string &odr_core_data_path(); - static const std::string &fontforge_data_path(); + static const std::string &fontconfig_data_path(); static const std::string &poppler_data_path(); static const std::string &pdf2htmlex_data_path(); static void set_odr_core_data_path(const std::string &path); - static void set_fontforge_data_path(const std::string &path); + static void set_fontconfig_data_path(const std::string &path); static void set_poppler_data_path(const std::string &path); static void set_pdf2htmlex_data_path(const std::string &path); @@ -22,7 +22,7 @@ class GlobalParams { GlobalParams(); std::string m_odr_core_data_path; - std::string m_fontforge_data_path; + std::string m_fontconfig_data_path; std::string m_poppler_data_path; std::string m_pdf2htmlex_data_path; }; diff --git a/src/odr/html.cpp b/src/odr/html.cpp index c619d33a..069867e3 100644 --- a/src/odr/html.cpp +++ b/src/odr/html.cpp @@ -21,7 +21,6 @@ #include using namespace odr::internal; -namespace fs = std::filesystem; namespace odr { @@ -89,13 +88,13 @@ Html html::translate(const DecodedFile &decoded_file, Html html::translate(const TextFile &text_file, const std::string &output_path, const HtmlConfig &config) { - fs::create_directories(output_path); + std::filesystem::create_directories(output_path); return internal::html::translate_text_file(text_file, output_path, config); } Html html::translate(const ImageFile &image_file, const std::string &output_path, const HtmlConfig &config) { - fs::create_directories(output_path); + std::filesystem::create_directories(output_path); return internal::html::translate_image_file(image_file, output_path, config); } @@ -112,7 +111,7 @@ Html html::translate(const DocumentFile &document_file, if (auto wv_document_file = std::dynamic_pointer_cast( document_file_impl)) { - fs::create_directories(output_path); + std::filesystem::create_directories(output_path); return internal::html::translate_wvware_oldms_file(*wv_document_file, output_path, config); } @@ -128,7 +127,7 @@ Html html::translate(const PdfFile &pdf_file, const std::string &output_path, #ifdef ODR_WITH_PDF2HTMLEX if (auto poppler_pdf_file = std::dynamic_pointer_cast(pdf_file_impl)) { - fs::create_directories(output_path); + std::filesystem::create_directories(output_path); return internal::html::translate_poppler_pdf_file(*poppler_pdf_file, output_path, config); } @@ -139,14 +138,14 @@ Html html::translate(const PdfFile &pdf_file, const std::string &output_path, Html html::translate(const Archive &archive, const std::string &output_path, const HtmlConfig &config) { - fs::create_directories(output_path); + std::filesystem::create_directories(output_path); return internal::html::translate_filesystem( FileType::unknown, archive.filesystem(), output_path, config); } Html html::translate(const Document &document, const std::string &output_path, const HtmlConfig &config) { - fs::create_directories(output_path); + std::filesystem::create_directories(output_path); return internal::html::translate_document(document, output_path, config); } diff --git a/src/odr/html.hpp b/src/odr/html.hpp index 2e8088b4..ebe5e4a0 100644 --- a/src/odr/html.hpp +++ b/src/odr/html.hpp @@ -39,7 +39,8 @@ struct HtmlConfig { std::string page_output_file_name{"page{index}.html"}; // embedding - bool embed_resources{true}; + bool embed_images{true}; + bool embed_shipped_resources{true}; // resources std::string external_resource_path; diff --git a/src/odr/html_service.cpp b/src/odr/html_service.cpp index 42b7619d..3f955875 100644 --- a/src/odr/html_service.cpp +++ b/src/odr/html_service.cpp @@ -7,24 +7,21 @@ namespace odr { -HtmlService::HtmlService(std::shared_ptr impl) - : m_impl{std::move(impl)} {} +HtmlDocumentService::HtmlDocumentService() = default; -const HtmlConfig &HtmlService::config() const { return m_impl->config(); } +HtmlDocumentService::HtmlDocumentService( + std::shared_ptr impl) + : m_impl{std::move(impl)} {} -const HtmlResourceLocator &HtmlService::resource_locator() const { - return m_impl->resource_locator(); +const HtmlConfig &HtmlDocumentService::config() const { + return m_impl->config(); } -std::vector HtmlService::fragments() const { - std::vector result; - for (const auto &fragment : m_impl->fragments()) { - result.emplace_back(fragment); - } - return result; +const HtmlResourceLocator &HtmlDocumentService::resource_locator() const { + return m_impl->resource_locator(); } -HtmlResources HtmlService::write_document(std::ostream &os) const { +HtmlResources HtmlDocumentService::write_document(std::ostream &os) const { internal::html::HtmlWriter out(os, config()); auto internal_resources = m_impl->write_document(out); @@ -36,35 +33,33 @@ HtmlResources HtmlService::write_document(std::ostream &os) const { return resources; } -HtmlFragment::HtmlFragment( - std::shared_ptr impl) - : m_impl{std::move(impl)} {} +const std::shared_ptr & +HtmlDocumentService::impl() const { + return m_impl; +} -std::string HtmlFragment::name() const { return m_impl->name(); } +HtmlFragmentService::HtmlFragmentService( + std::shared_ptr impl) + : m_impl{std::move(impl)} {} -const HtmlConfig &HtmlFragment::config() const { return m_impl->config(); } +const HtmlConfig &HtmlFragmentService::config() const { + return m_impl->config(); +} -const HtmlResourceLocator &HtmlFragment::resource_locator() const { +const HtmlResourceLocator &HtmlFragmentService::resource_locator() const { return m_impl->resource_locator(); } -void HtmlFragment::write_fragment(std::ostream &os, - HtmlResources &resources) const { +void HtmlFragmentService::write_fragment(std::ostream &os, + HtmlResources &resources) const { internal::html::HtmlWriter out(os, config()); m_impl->write_fragment(out, resources); } -HtmlResources HtmlFragment::write_document(std::ostream &os) const { - internal::html::HtmlWriter out(os, config()); - - auto internal_resources = m_impl->write_document(out); - - HtmlResources resources; - for (const auto &[resource, location] : internal_resources) { - resources.emplace_back(HtmlResource(resource), location); - } - return resources; +const std::shared_ptr & +HtmlFragmentService::impl() const { + return m_impl; } HtmlResource::HtmlResource() = default; @@ -75,6 +70,10 @@ HtmlResource::HtmlResource( HtmlResourceType HtmlResource::type() const { return m_impl->type(); } +const std::string &HtmlResource::mime_type() const { + return m_impl->mime_type(); +} + const std::string &HtmlResource::name() const { return m_impl->name(); } const std::string &HtmlResource::path() const { return m_impl->path(); } @@ -85,6 +84,8 @@ bool HtmlResource::is_shipped() const { return m_impl->is_shipped(); } bool HtmlResource::is_relocatable() const { return m_impl->is_relocatable(); } +bool HtmlResource::is_external() const { return m_impl->is_external(); } + void HtmlResource::write_resource(std::ostream &os) const { m_impl->write_resource(os); } diff --git a/src/odr/html_service.hpp b/src/odr/html_service.hpp index 57f2da3c..0491fde7 100644 --- a/src/odr/html_service.hpp +++ b/src/odr/html_service.hpp @@ -8,8 +8,8 @@ #include namespace odr::internal::abstract { -class HtmlService; -class HtmlFragment; +class HtmlDocumentService; +class HtmlFragmentService; class HtmlResource; } // namespace odr::internal::abstract @@ -18,7 +18,6 @@ enum class FileType; class File; struct HtmlConfig; -class HtmlFragment; class HtmlResource; enum class HtmlResourceType { @@ -35,36 +34,39 @@ using HtmlResourceLocator = using HtmlResources = std::vector>; -class HtmlService final { +class HtmlDocumentService final { public: - explicit HtmlService(std::shared_ptr impl); + HtmlDocumentService(); + explicit HtmlDocumentService( + std::shared_ptr impl); [[nodiscard]] const HtmlConfig &config() const; [[nodiscard]] const HtmlResourceLocator &resource_locator() const; - [[nodiscard]] std::vector fragments() const; - HtmlResources write_document(std::ostream &os) const; + [[nodiscard]] const std::shared_ptr & + impl() const; + private: - std::shared_ptr m_impl; + std::shared_ptr m_impl; }; -class HtmlFragment final { +class HtmlFragmentService final { public: - explicit HtmlFragment(std::shared_ptr impl); - - [[nodiscard]] std::string name() const; + explicit HtmlFragmentService( + std::shared_ptr impl); [[nodiscard]] const HtmlConfig &config() const; [[nodiscard]] const HtmlResourceLocator &resource_locator() const; void write_fragment(std::ostream &os, HtmlResources &resources) const; - HtmlResources write_document(std::ostream &os) const; + [[nodiscard]] const std::shared_ptr & + impl() const; private: - std::shared_ptr m_impl; + std::shared_ptr m_impl; }; class HtmlResource final { @@ -73,11 +75,13 @@ class HtmlResource final { explicit HtmlResource(std::shared_ptr impl); [[nodiscard]] HtmlResourceType type() const; + [[nodiscard]] const std::string &mime_type() const; [[nodiscard]] const std::string &name() const; [[nodiscard]] const std::string &path() const; [[nodiscard]] const File &file() const; [[nodiscard]] bool is_shipped() const; [[nodiscard]] bool is_relocatable() const; + [[nodiscard]] bool is_external() const; void write_resource(std::ostream &os) const; diff --git a/src/odr/http_server.cpp b/src/odr/http_server.cpp new file mode 100644 index 00000000..b8e53d6f --- /dev/null +++ b/src/odr/http_server.cpp @@ -0,0 +1,194 @@ +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +namespace odr { + +class HttpServer::Impl { +public: + explicit Impl(const HttpServer::Config &config) : m_config{config} {} + + [[nodiscard]] std::string host_file(File file) { + std::string id = get_id(); + m_server.Get( + "/" + id, + [this, file = std::move(file)](const httplib::Request & /*req*/, + httplib::Response &res) -> void { + struct State { + State(std::unique_ptr stream_, + std::size_t buffer_size) + : stream{std::move(stream_)}, buffer(buffer_size, 0) {} + + std::unique_ptr stream; + std::vector buffer; + }; + + auto state = + std::make_shared(file.stream(), m_config.buffer_size); + httplib::ContentProvider content_provider = + [state = std::move(state)](std::size_t offset, std::size_t length, + httplib::DataSink &sink) -> bool { + std::istream &stream = *state->stream; + + stream.seekg(static_cast(offset), std::ios::beg); + stream.read(state->buffer.data(), + static_cast(length)); + sink.write(state->buffer.data(), + static_cast(stream.gcount())); + + return !stream.eof(); + }; + + res.set_content_provider(file.size(), "application/octet-stream", + content_provider); + }); + return id; + } + + [[nodiscard]] std::string host_file(DecodedFile file) { + std::string id = get_id(); + + // TODO + HtmlConfig config; + config.embed_images = false; + config.embed_shipped_resources = false; + std::string output_path = "/tmp/" + id; + + std::filesystem::create_directories(output_path); + + HtmlDocumentService html_service; + + if (file.is_document_file()) { + DocumentFile document_file = file.document_file(); + if (document_file.password_encrypted()) { + throw std::runtime_error("Document is encrypted"); + } + auto document = document_file.document(); + html_service = internal::html::create_document_service( + document, output_path, config); +#ifdef ODR_WITH_PDF2HTMLEX + } else if (file.is_pdf_file()) { + PdfFile pdf_file = file.pdf_file(); + if (pdf_file.password_encrypted()) { + throw std::runtime_error("Document is encrypted"); + } + html_service = internal::html::create_poppler_pdf_service( + dynamic_cast(*pdf_file.impl()), + output_path, config); +#endif + } else { + throw std::runtime_error("Unsupported file type"); + } + + std::ofstream null; + HtmlResources resources = html_service.write_document(null); + + m_server.Get("/" + id, [id](const httplib::Request & /*req*/, + httplib::Response &res) { + res.set_redirect("/" + id + "/document.html"); + }); + + m_server.Get("/" + id + "/document.html", + [=](const httplib::Request & /*req*/, httplib::Response &res) { + httplib::ContentProviderWithoutLength content_provider = + [html_service](std::size_t offset, + httplib::DataSink &sink) -> bool { + if (offset != 0) { + throw std::runtime_error( + "Invalid offset: " + std::to_string(offset) + + ". Must be 0."); + } + html_service.write_document(sink.os); + return false; + }; + res.set_content_provider("text/html", content_provider); + }); + + for (const auto &[resource, location] : resources) { + if (!location.has_value() || resource.is_external()) { + continue; + } + + m_server.Get( + "/" + id + "/" + location.value(), + [=](const httplib::Request & /*req*/, httplib::Response &res) { + httplib::ContentProviderWithoutLength content_provider = + [resource](std::size_t offset, + httplib::DataSink &sink) -> bool { + if (offset != 0) { + throw std::runtime_error( + "Invalid offset: " + std::to_string(offset) + + ". Must be 0."); + } + resource.write_resource(sink.os); + return false; + }; + res.set_content_provider(resource.mime_type(), content_provider); + }); + } + + return id; + } + + [[nodiscard]] std::string host_filesystem(Filesystem filesystem) { + std::string id = get_id(); + m_server.Get("/" + id, + [filesystem = std::move(filesystem)]( + const httplib::Request & /*req*/, httplib::Response &res) { + res.set_content("Hello World!", "text/plain"); + }); + return id; + } + + void listen(const std::string &host, std::uint32_t port) { + m_server.listen(host, static_cast(port)); + } + +private: + [[nodiscard]] static std::string get_id() { + std::mt19937 rng(std::random_device{}()); + std::uniform_int_distribution dist(0, 9); + + std::string id(10, '0'); + for (char &c : id) { + c = static_cast('0' + dist(rng)); + } + + return id; + } + + HttpServer::Config m_config; + + httplib::Server m_server; +}; + +HttpServer::HttpServer(const Config &config) + : m_impl{std::make_unique(config)} {} + +std::string HttpServer::host_file(File file) { + return m_impl->host_file(std::move(file)); +} + +std::string HttpServer::host_file(DecodedFile file) { + return m_impl->host_file(std::move(file)); +} + +std::string HttpServer::host_filesystem(Filesystem filesystem) { + return m_impl->host_filesystem(std::move(filesystem)); +} + +void HttpServer::listen(const std::string &host, std::uint32_t port) { + m_impl->listen(host, port); +} + +} // namespace odr diff --git a/src/odr/http_server.hpp b/src/odr/http_server.hpp new file mode 100644 index 00000000..86cef9ed --- /dev/null +++ b/src/odr/http_server.hpp @@ -0,0 +1,32 @@ +#pragma once + +#include +#include +#include + +namespace odr { +class File; +class DecodedFile; +class Filesystem; + +class HttpServer { +public: + struct Config { + std::size_t buffer_size{4096}; + }; + + explicit HttpServer(const Config &config); + + std::string host_file(File file); + std::string host_file(DecodedFile file); + std::string host_filesystem(Filesystem filesystem); + + void listen(const std::string &host, std::uint32_t port); + +private: + class Impl; + + std::shared_ptr m_impl; +}; + +} // namespace odr diff --git a/src/odr/internal/abstract/html_service.hpp b/src/odr/internal/abstract/html_service.hpp index bc6d6ee1..4610ee77 100644 --- a/src/odr/internal/abstract/html_service.hpp +++ b/src/odr/internal/abstract/html_service.hpp @@ -5,41 +5,35 @@ #include #include +namespace odr { +class File; +} + namespace odr::internal::html { class HtmlWriter; } namespace odr::internal::abstract { -class File; -class HtmlFragment; -class HtmlResource; -class HtmlService { +class HtmlDocumentService { public: - virtual ~HtmlService() = default; + virtual ~HtmlDocumentService() = default; [[nodiscard]] virtual const HtmlConfig &config() const = 0; [[nodiscard]] virtual const HtmlResourceLocator &resource_locator() const = 0; - [[nodiscard]] virtual const std::vector> & - fragments() const = 0; - virtual HtmlResources write_document(html::HtmlWriter &out) const = 0; }; -class HtmlFragment { +class HtmlFragmentService { public: - virtual ~HtmlFragment() = default; - - [[nodiscard]] virtual std::string name() const = 0; + virtual ~HtmlFragmentService() = default; [[nodiscard]] virtual const HtmlConfig &config() const = 0; [[nodiscard]] virtual const HtmlResourceLocator &resource_locator() const = 0; virtual void write_fragment(html::HtmlWriter &out, HtmlResources &resources) const = 0; - - virtual HtmlResources write_document(html::HtmlWriter &out) const = 0; }; class HtmlResource { @@ -47,11 +41,13 @@ class HtmlResource { virtual ~HtmlResource() = default; [[nodiscard]] virtual HtmlResourceType type() const = 0; + [[nodiscard]] virtual const std::string &mime_type() const = 0; [[nodiscard]] virtual const std::string &name() const = 0; [[nodiscard]] virtual const std::string &path() const = 0; [[nodiscard]] virtual const odr::File &file() const = 0; [[nodiscard]] virtual bool is_shipped() const = 0; [[nodiscard]] virtual bool is_relocatable() const = 0; + [[nodiscard]] virtual bool is_external() const = 0; virtual void write_resource(std::ostream &os) const = 0; }; diff --git a/src/odr/internal/html/common.cpp b/src/odr/internal/html/common.cpp index afbeb80b..fff4e1c6 100644 --- a/src/odr/internal/html/common.cpp +++ b/src/odr/internal/html/common.cpp @@ -62,9 +62,11 @@ std::string html::file_to_url(const abstract::File &file, HtmlResourceLocator html::local_resource_locator(const std::string &output_path, const HtmlConfig &config) { return [&](const odr::HtmlResource &resource) -> HtmlResourceLocation { - // TODO remove `!is_shipped_resource` check after supporting external - // resources - if (config.embed_resources || !resource.is_shipped()) { + if (!resource.is_relocatable()) { + return resource.path(); + } + if ((config.embed_shipped_resources && resource.is_shipped()) || + (config.embed_images && resource.type() == HtmlResourceType::image)) { return std::nullopt; } diff --git a/src/odr/internal/html/document.cpp b/src/odr/internal/html/document.cpp index b0b3f491..e72e03ed 100644 --- a/src/odr/internal/html/document.cpp +++ b/src/odr/internal/html/document.cpp @@ -39,8 +39,9 @@ void front(const Document &document, const WritingState &state) { } auto odr_css_file = Resources::open(common::Path("odr.css")); - odr::HtmlResource odr_css_resource = html::HtmlResource::create( - HtmlResourceType::css, "odr.css", "odr.css", odr_css_file, true, true); + odr::HtmlResource odr_css_resource = + html::HtmlResource::create(HtmlResourceType::css, "text/css", "odr.css", + "odr.css", odr_css_file, true, true, false); HtmlResourceLocation odr_css_location = state.resource_locator()(odr_css_resource); state.resources().emplace_back(std::move(odr_css_resource), odr_css_location); @@ -56,8 +57,8 @@ void front(const Document &document, const WritingState &state) { auto odr_spreadsheet_css_file = Resources::open(common::Path("odr_spreadsheet.css")); odr::HtmlResource odr_spreadsheet_css_resource = html::HtmlResource::create( - HtmlResourceType::css, "odr_spreadsheet.css", "odr_spreadsheet.css", - odr_spreadsheet_css_file, true, true); + HtmlResourceType::css, "text/css", "odr_spreadsheet.css", + "odr_spreadsheet.css", odr_spreadsheet_css_file, true, true, false); HtmlResourceLocation odr_spreadsheet_css_location = state.resource_locator()(odr_spreadsheet_css_resource); state.resources().emplace_back(std::move(odr_spreadsheet_css_resource), @@ -93,7 +94,8 @@ void back(const Document &document, const WritingState &state) { auto odr_js_file = Resources::open(common::Path("odr.js")); odr::HtmlResource odr_js_resource = html::HtmlResource::create( - HtmlResourceType::js, "odr.js", "odr.js", odr_js_file, true, true); + HtmlResourceType::js, "text/javascript", "odr.js", "odr.js", odr_js_file, + true, true, false); HtmlResourceLocation odr_js_location = state.resource_locator()(odr_js_resource); state.resources().emplace_back(std::move(odr_js_resource), odr_js_location); @@ -137,56 +139,78 @@ std::string get_output_path(const Document &document, std::uint32_t index, } } -class HtmlServiceImpl : public HtmlService { +class HtmlFragmentBase : public HtmlFragmentService { public: - HtmlServiceImpl( - Document document, - std::vector> fragments, - HtmlConfig config, HtmlResourceLocator resource_locator) - : HtmlService(std::move(config), std::move(resource_locator), - std::move(fragments)), - m_document{std::move(document)} {} + HtmlFragmentBase(std::string name, Document document, HtmlConfig config, + HtmlResourceLocator resource_locator) + : HtmlFragmentService(std::move(config), std::move(resource_locator)), + m_name{std::move(name)}, m_document{std::move(document)} {} - HtmlResources write_document(HtmlWriter &out) const override { - HtmlResources resources; + const std::string &name() const { return m_name; } - WritingState state(out, config(), resource_locator(), resources); + Document document() const { return m_document; } - front(m_document, state); - for (const auto &fragment : fragments()) { - fragment->write_fragment(out, resources); - } - back(m_document, state); - - return resources; - } - -private: +protected: + std::string m_name; Document m_document; }; -class HtmlFragmentBase : public HtmlFragment { +class HtmlDocumentServiceImpl final + : public internal::abstract::HtmlDocumentService { public: - HtmlFragmentBase(std::string name, Document document, HtmlConfig config, - HtmlResourceLocator resource_locator) - : HtmlFragment(std::move(name), std::move(config), - std::move(resource_locator)), - m_document{std::move(document)} {} + explicit HtmlDocumentServiceImpl(std::shared_ptr fragment) + : m_fragment{std::move(fragment)} {} + + [[nodiscard]] const std::shared_ptr &fragment() const { + return m_fragment; + } + + [[nodiscard]] const HtmlConfig &config() const final { + return m_fragment->config(); + } + [[nodiscard]] const HtmlResourceLocator &resource_locator() const final { + return m_fragment->resource_locator(); + } HtmlResources write_document(HtmlWriter &out) const final { HtmlResources resources; WritingState state(out, config(), resource_locator(), resources); - front(m_document, state); - write_fragment(out, resources); - back(m_document, state); + front(m_fragment->document(), state); + m_fragment->write_fragment(out, resources); + back(m_fragment->document(), state); return resources; } -protected: - Document m_document; +private: + std::shared_ptr m_fragment; +}; + +class HtmlFragmentCollection : public HtmlFragmentBase { +public: + HtmlFragmentCollection( + Document document, + std::vector> fragments, + HtmlConfig config, HtmlResourceLocator resource_locator) + : HtmlFragmentBase("document", std::move(document), std::move(config), + std::move(resource_locator)), + m_fragments{std::move(fragments)} {} + + [[nodiscard]] const std::vector> & + fragments() const { + return m_fragments; + } + + void write_fragment(HtmlWriter &out, HtmlResources &resources) const final { + for (const auto &fragment : m_fragments) { + fragment->write_fragment(out, resources); + } + } + +private: + std::vector> m_fragments; }; class TextHtmlFragment final : public HtmlFragmentBase { @@ -285,13 +309,14 @@ class PageHtmlFragment final : public HtmlFragmentBase { namespace odr::internal { -odr::HtmlService html::create_document_service(const Document &document, - const std::string &output_path, - const HtmlConfig &config) { +odr::HtmlDocumentService +html::create_document_service(const Document &document, + const std::string &output_path, + const HtmlConfig &config) { HtmlResourceLocator resource_locator = local_resource_locator(output_path, config); - std::vector> fragments; + std::vector> fragments; if (document.document_type() == DocumentType::text) { fragments.push_back( @@ -315,20 +340,27 @@ odr::HtmlService html::create_document_service(const Document &document, throw UnknownDocumentType(); } - return odr::HtmlService(std::make_unique( - document, fragments, config, resource_locator)); + return odr::HtmlDocumentService(std::make_unique( + std::make_unique(document, fragments, config, + resource_locator))); } Html html::translate_document(const odr::Document &document, const std::string &output_path, const odr::HtmlConfig &config) { - odr::HtmlService service = + odr::HtmlDocumentService document_service = create_document_service(document, output_path, config); + auto document_service_impl = + std::dynamic_pointer_cast( + document_service.impl()); + auto fragment_service_impl = + std::dynamic_pointer_cast( + document_service_impl->fragment()); std::vector pages; std::uint32_t i = 0; - for (const auto &fragment : service.fragments()) { + for (const auto &fragment : fragment_service_impl->fragments()) { std::string filled_path = get_output_path(document, i, output_path, config); std::ofstream ostream(filled_path, std::ios::out); if (!ostream.is_open()) { @@ -336,9 +368,9 @@ Html html::translate_document(const odr::Document &document, } html::HtmlWriter out(ostream, config.format_html, config.html_indent); - fragment.write_document(out.out()); + HtmlDocumentServiceImpl(fragment).write_document(out); - pages.emplace_back(fragment.name(), filled_path); + pages.emplace_back(fragment->name(), filled_path); ++i; } diff --git a/src/odr/internal/html/document.hpp b/src/odr/internal/html/document.hpp index f9468205..a0630443 100644 --- a/src/odr/internal/html/document.hpp +++ b/src/odr/internal/html/document.hpp @@ -6,14 +6,14 @@ namespace odr { class Document; struct HtmlConfig; class Html; -class HtmlService; +class HtmlDocumentService; } // namespace odr namespace odr::internal::html { -odr::HtmlService create_document_service(const Document &document, - const std::string &output_path, - const HtmlConfig &config); +odr::HtmlDocumentService create_document_service(const Document &document, + const std::string &output_path, + const HtmlConfig &config); Html translate_document(const Document &document, const std::string &output_path, diff --git a/src/odr/internal/html/document_element.cpp b/src/odr/internal/html/document_element.cpp index c42c56c4..84c64cfe 100644 --- a/src/odr/internal/html/document_element.cpp +++ b/src/odr/internal/html/document_element.cpp @@ -390,12 +390,14 @@ void html::translate_image(Element element, const WritingState &state) { odr::HtmlResource resource; HtmlResourceLocation resource_location; if (image.is_internal()) { - resource = HtmlResource::create(HtmlResourceType::image, "image", "image", - image.file().value(), false, true); + resource = HtmlResource::create(HtmlResourceType::image, "image/jpg", + image.href(), image.href(), + image.file().value(), false, true, false); resource_location = state.resource_locator()(resource); } else { - resource = HtmlResource::create(HtmlResourceType::image, "image", "image", - odr::File(), false, false); + resource = + HtmlResource::create(HtmlResourceType::image, "image/jpg", "image", + "image", odr::File(), false, false, true); resource_location = image.href(); } state.resources().emplace_back(std::move(resource), resource_location); diff --git a/src/odr/internal/html/html_service.cpp b/src/odr/internal/html/html_service.cpp index 86152623..fcc9b0dc 100644 --- a/src/odr/internal/html/html_service.cpp +++ b/src/odr/internal/html/html_service.cpp @@ -3,55 +3,55 @@ #include #include +#include namespace odr::internal::html { -HtmlService::HtmlService( - HtmlConfig config, HtmlResourceLocator resource_locator, - std::vector> fragments) - : m_config{config}, m_resource_locator{resource_locator}, - m_fragments{std::move(fragments)} {} +HtmlDocumentService::HtmlDocumentService(HtmlConfig config, + HtmlResourceLocator resource_locator) + : m_config{std::move(config)}, + m_resource_locator{std::move(resource_locator)} {} -const HtmlConfig &HtmlService::config() const { return m_config; } +const HtmlConfig &HtmlDocumentService::config() const { return m_config; } -const HtmlResourceLocator &HtmlService::resource_locator() const { +const HtmlResourceLocator &HtmlDocumentService::resource_locator() const { return m_resource_locator; } -const std::vector> & -HtmlService::fragments() const { - return m_fragments; -} - -HtmlFragment::HtmlFragment(std::string name, HtmlConfig config, - HtmlResourceLocator resource_locator) - : m_name{name}, m_config{config}, m_resource_locator{resource_locator} {} - -std::string HtmlFragment::name() const { return m_name; } +HtmlFragmentService::HtmlFragmentService(HtmlConfig config, + HtmlResourceLocator resource_locator) + : m_config{std::move(config)}, + m_resource_locator{std::move(resource_locator)} {} -const HtmlConfig &HtmlFragment::config() const { return m_config; } +const HtmlConfig &HtmlFragmentService::config() const { return m_config; } -const HtmlResourceLocator &HtmlFragment::resource_locator() const { +const HtmlResourceLocator &HtmlFragmentService::resource_locator() const { return m_resource_locator; } odr::HtmlResource HtmlResource::create(HtmlResourceType type, - const std::string &name, - const std::string &path, - const odr::File &file, bool is_shipped, - bool is_relocatable) { + std::string mime_type, std::string name, + std::string path, odr::File file, + bool is_shipped, bool is_relocatable, + bool is_external) { return odr::HtmlResource(std::make_shared( - type, name, path, file, is_shipped, is_relocatable)); + type, std::move(mime_type), std::move(name), std::move(path), + std::move(file), is_shipped, is_relocatable, is_external)); } -HtmlResource::HtmlResource(HtmlResourceType type, const std::string &name, - const std::string &path, const odr::File &file, - bool is_shipped, bool is_relocatable) - : m_type{type}, m_name{name}, m_path{path}, m_file{file}, - m_is_shipped{is_shipped}, m_is_relocatable{is_relocatable} {} +HtmlResource::HtmlResource(HtmlResourceType type, std::string mime_type, + std::string name, std::string path, odr::File file, + bool is_shipped, bool is_relocatable, + bool is_external) + : m_type{type}, m_mime_type{std::move(mime_type)}, m_name{std::move(name)}, + m_path{std::move(path)}, m_file{std::move(file)}, + m_is_shipped{is_shipped}, m_is_relocatable{is_relocatable}, + m_is_external{is_external} {} HtmlResourceType HtmlResource::type() const { return m_type; } +const std::string &HtmlResource::mime_type() const { return m_mime_type; } + const std::string &HtmlResource::name() const { return m_name; } const std::string &HtmlResource::path() const { return m_path; } @@ -62,6 +62,8 @@ bool HtmlResource::is_shipped() const { return m_is_shipped; } bool HtmlResource::is_relocatable() const { return m_is_relocatable; } +bool HtmlResource::is_external() const { return m_is_external; } + void HtmlResource::write_resource(std::ostream &os) const { util::stream::pipe(*m_file.stream(), os); } diff --git a/src/odr/internal/html/html_service.hpp b/src/odr/internal/html/html_service.hpp index 804c37e5..2f27be90 100644 --- a/src/odr/internal/html/html_service.hpp +++ b/src/odr/internal/html/html_service.hpp @@ -7,67 +7,61 @@ namespace odr::internal::html { -class HtmlService : public abstract::HtmlService { +class HtmlDocumentService : public abstract::HtmlDocumentService { public: - HtmlService(HtmlConfig config, HtmlResourceLocator resource_locator, - std::vector> fragments); + HtmlDocumentService(HtmlConfig config, HtmlResourceLocator resource_locator); [[nodiscard]] const HtmlConfig &config() const override; [[nodiscard]] const HtmlResourceLocator &resource_locator() const override; - [[nodiscard]] const std::vector> & - fragments() const override; - private: HtmlConfig m_config; HtmlResourceLocator m_resource_locator; - std::vector> m_fragments; }; -class HtmlFragment : public abstract::HtmlFragment { +class HtmlFragmentService : public abstract::HtmlFragmentService { public: - HtmlFragment(std::string name, HtmlConfig config, - HtmlResourceLocator resource_locator); - - [[nodiscard]] std::string name() const override; + HtmlFragmentService(HtmlConfig config, HtmlResourceLocator resource_locator); [[nodiscard]] const HtmlConfig &config() const override; [[nodiscard]] const HtmlResourceLocator &resource_locator() const override; private: - std::string m_name; HtmlConfig m_config; HtmlResourceLocator m_resource_locator; }; class HtmlResource : public abstract::HtmlResource { public: - static odr::HtmlResource create(HtmlResourceType type, - const std::string &name, - const std::string &path, - const odr::File &file, bool is_shipped, - bool is_relocatable); + static odr::HtmlResource create(HtmlResourceType type, std::string mime_type, + std::string name, std::string path, + odr::File file, bool is_shipped, + bool is_relocatable, bool is_external); - HtmlResource(HtmlResourceType type, const std::string &name, - const std::string &path, const odr::File &file, bool is_shipped, - bool is_relocatable); + HtmlResource(HtmlResourceType type, std::string mime_type, std::string name, + std::string path, odr::File file, bool is_shipped, + bool is_relocatable, bool is_external); [[nodiscard]] HtmlResourceType type() const override; + [[nodiscard]] const std::string &mime_type() const override; [[nodiscard]] const std::string &name() const override; [[nodiscard]] const std::string &path() const override; [[nodiscard]] const odr::File &file() const override; [[nodiscard]] bool is_shipped() const override; [[nodiscard]] bool is_relocatable() const override; + [[nodiscard]] bool is_external() const override; void write_resource(std::ostream &os) const override; private: HtmlResourceType m_type; + std::string m_mime_type; std::string m_name; std::string m_path; odr::File m_file; - bool m_is_shipped; - bool m_is_relocatable; + bool m_is_shipped{}; + bool m_is_relocatable{}; + bool m_is_external{}; }; } // namespace odr::internal::html diff --git a/src/odr/internal/html/pdf2htmlex_wrapper.cpp b/src/odr/internal/html/pdf2htmlex_wrapper.cpp index b1443227..0d94fdd3 100644 --- a/src/odr/internal/html/pdf2htmlex_wrapper.cpp +++ b/src/odr/internal/html/pdf2htmlex_wrapper.cpp @@ -22,6 +22,8 @@ namespace { pdf2htmlEX::Param create_params(PDFDoc &pdf_doc, const HtmlConfig &config, const std::string &output_path) { + (void)config; + pdf2htmlEX::Param param; // pages @@ -111,7 +113,7 @@ class BackgroundImageResource : public HtmlResource { static std::string file_name(std::size_t page_number, const std::string &format) { std::stringstream stream; - stream << "bg" << page_number; + stream << "bg"; stream << std::hex << page_number; stream << "." << format; return stream.str(); @@ -120,14 +122,17 @@ class BackgroundImageResource : public HtmlResource { BackgroundImageResource( PopplerPdfFile pdf_file, std::string output_path, std::shared_ptr html_renderer, - std::shared_ptr html_renderer_mutex, int page_number, + std::shared_ptr html_renderer_mutex, + std::shared_ptr html_renderer_param, int page_number, const std::string &format) - : HtmlResource(HtmlResourceType::image, file_name(page_number, format), + : HtmlResource(HtmlResourceType::image, "image/jpg", + file_name(page_number, format), output_path + "/" + file_name(page_number, format), - odr::File(), false, false), + odr::File(), false, false, false), m_pdf_file{std::move(pdf_file)}, m_output_path{std::move(output_path)}, m_html_renderer{std::move(html_renderer)}, m_html_renderer_mutex{std::move(html_renderer_mutex)}, + m_html_renderer_param{std::move(html_renderer_param)}, m_page_number{page_number} {} void write_resource(std::ostream &os) const override { @@ -152,41 +157,41 @@ class BackgroundImageResource : public HtmlResource { std::string m_output_path; std::shared_ptr m_html_renderer; std::shared_ptr m_html_renderer_mutex; + std::shared_ptr m_html_renderer_param; int m_page_number; mutable std::mutex m_mutex; }; -class HtmlServiceImpl : public HtmlService { +class HtmlDocumentServiceImpl final : public HtmlDocumentService { public: - HtmlServiceImpl(PopplerPdfFile pdf_file, std::string output_path, - std::shared_ptr html_renderer, - std::shared_ptr html_renderer_mutex, - std::shared_ptr html_renderer_param, - HtmlConfig config, HtmlResourceLocator resource_locator) - : HtmlService(std::move(config), std::move(resource_locator), {}), + HtmlDocumentServiceImpl( + PopplerPdfFile pdf_file, std::string output_path, + std::shared_ptr html_renderer, + std::shared_ptr html_renderer_mutex, + std::shared_ptr html_renderer_param, HtmlConfig config, + HtmlResourceLocator resource_locator) + : HtmlDocumentService(std::move(config), std::move(resource_locator)), m_pdf_file{std::move(pdf_file)}, m_output_path{std::move(output_path)}, m_html_renderer{std::move(html_renderer)}, m_html_renderer_mutex{std::move(html_renderer_mutex)}, m_html_renderer_param{std::move(html_renderer_param)} { for (int i = 1; i <= m_pdf_file.pdf_doc().getNumPages(); ++i) { auto resource = std::make_shared( - m_pdf_file, m_output_path, m_html_renderer, m_html_renderer_mutex, i, - m_html_renderer_param->bg_format); + m_pdf_file, m_output_path, m_html_renderer, m_html_renderer_mutex, + m_html_renderer_param, i, m_html_renderer_param->bg_format); std::string file_name = BackgroundImageResource::file_name( i, m_html_renderer_param->bg_format); m_resources.emplace_back(std::move(resource), std::move(file_name)); } } - HtmlResources write_document(HtmlWriter &out) const override { - HtmlResources resources; - + HtmlResources write_document(HtmlWriter &out) const final { { std::ifstream in(m_output_path + "/document.html"); util::stream::pipe(in, out.out()); } - return resources; + return m_resources; } private: @@ -203,7 +208,7 @@ class HtmlServiceImpl : public HtmlService { namespace odr::internal { -odr::HtmlService +odr::HtmlDocumentService html::create_poppler_pdf_service(const PopplerPdfFile &pdf_file, const std::string &output_path, const HtmlConfig &config) { @@ -223,7 +228,7 @@ html::create_poppler_pdf_service(const PopplerPdfFile &pdf_file, // TODO not sure what the `progPath` is used for. it cannot be `nullptr` // TODO potentially just a cache dir? auto html_renderer = std::make_shared( - odr::GlobalParams::fontforge_data_path().c_str(), *html_renderer_param); + odr::GlobalParams::fontconfig_data_path().c_str(), *html_renderer_param); html_renderer->process(&pdf_doc); HtmlResourceLocator resource_locator = @@ -233,7 +238,7 @@ html::create_poppler_pdf_service(const PopplerPdfFile &pdf_file, // TODO check if this can be achieved in pdf2htmlEX auto html_renderer_mutex = std::make_shared(); - return odr::HtmlService(std::make_shared( + return odr::HtmlDocumentService(std::make_shared( pdf_file, output_path, std::move(html_renderer), std::move(html_renderer_mutex), std::move(html_renderer_param), config, resource_locator)); @@ -254,7 +259,7 @@ Html html::translate_poppler_pdf_file(const PopplerPdfFile &pdf_file, // TODO not sure what the `progPath` is used for. it cannot be `nullptr` // TODO potentially just a cache dir? - pdf2htmlEX::HTMLRenderer(odr::GlobalParams::fontforge_data_path().c_str(), + pdf2htmlEX::HTMLRenderer(odr::GlobalParams::fontconfig_data_path().c_str(), param) .process(&pdf_doc); diff --git a/src/odr/internal/html/pdf2htmlex_wrapper.hpp b/src/odr/internal/html/pdf2htmlex_wrapper.hpp index 30bd6c4a..4fdd4d93 100644 --- a/src/odr/internal/html/pdf2htmlex_wrapper.hpp +++ b/src/odr/internal/html/pdf2htmlex_wrapper.hpp @@ -6,7 +6,7 @@ namespace odr { struct HtmlConfig; class Html; -class HtmlService; +class HtmlDocumentService; } // namespace odr namespace odr::internal { @@ -15,9 +15,10 @@ class PopplerPdfFile; namespace odr::internal::html { -odr::HtmlService create_poppler_pdf_service(const PopplerPdfFile &pdf_file, - const std::string &output_path, - const HtmlConfig &config); +odr::HtmlDocumentService +create_poppler_pdf_service(const PopplerPdfFile &pdf_file, + const std::string &output_path, + const HtmlConfig &config); Html translate_poppler_pdf_file(const PopplerPdfFile &pdf_file, const std::string &output_path, diff --git a/test/src/html_output_test.cpp b/test/src/html_output_test.cpp index 61315d37..06284345 100644 --- a/test/src/html_output_test.cpp +++ b/test/src/html_output_test.cpp @@ -129,7 +129,8 @@ TEST_P(HtmlOutputTests, html_meta) { OpenDocumentReader::copy_resources(resource_path); HtmlConfig config; - config.embed_resources = false; + config.embed_images = true; + config.embed_shipped_resources = false; config.external_resource_path = resource_path; config.relative_resource_paths = true; config.editable = true;