From f683779ea174371c36cf771f928115471670c271 Mon Sep 17 00:00:00 2001 From: Andreas Stefl Date: Sat, 28 Sep 2024 15:23:09 +0200 Subject: [PATCH] wvware progress --- src/odr/file.cpp | 4 ++ src/odr/file.hpp | 4 +- src/odr/html.cpp | 46 +++++++++++++----- src/odr/html.hpp | 27 +++++++++-- .../oldms_wvware/wvware_oldms_file.cpp | 47 ++++++++++++++----- .../oldms_wvware/wvware_oldms_file.hpp | 7 ++- src/odr/internal/open_strategy.cpp | 34 ++++++++------ src/odr/open_document_reader.cpp | 8 ++-- test/src/html_output_test.cpp | 6 ++- 9 files changed, 131 insertions(+), 52 deletions(-) diff --git a/src/odr/file.cpp b/src/odr/file.cpp index 7a189f18..75b9f944 100644 --- a/src/odr/file.cpp +++ b/src/odr/file.cpp @@ -244,6 +244,10 @@ DocumentMeta DocumentFile::document_meta() const { Document DocumentFile::document() const { return Document(m_impl->document()); } +std::shared_ptr DocumentFile::impl() const { + return m_impl; +} + PdfFile::PdfFile(std::shared_ptr impl) : DecodedFile(impl), m_impl{std::move(impl)} {} diff --git a/src/odr/file.hpp b/src/odr/file.hpp index 72013e19..1506fb04 100644 --- a/src/odr/file.hpp +++ b/src/odr/file.hpp @@ -98,7 +98,7 @@ enum class FileLocation { enum class DecoderEngine { odr, poppler, - wv_ware, + wvware, }; /// @brief Preference for decoding files. @@ -267,6 +267,8 @@ class DocumentFile final : public DecodedFile { [[nodiscard]] Document document() const; + [[nodiscard]] std::shared_ptr impl() const; + private: std::shared_ptr m_impl; }; diff --git a/src/odr/html.cpp b/src/odr/html.cpp index b3dd6a78..5caeebf8 100644 --- a/src/odr/html.cpp +++ b/src/odr/html.cpp @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include #include @@ -75,11 +77,9 @@ Html html::translate(const DecodedFile &decoded_file, } else if (decoded_file.is_image_file()) { return translate(decoded_file.image_file(), output_path, config); } else if (decoded_file.is_archive_file()) { - return translate(decoded_file.archive_file().archive(), output_path, - config); + return translate(decoded_file.archive_file(), output_path, config); } else if (decoded_file.is_document_file()) { - return translate(decoded_file.document_file().document(), output_path, - config); + return translate(decoded_file.document_file(), output_path, config); } else if (decoded_file.is_pdf_file()) { return translate(decoded_file.pdf_file(), output_path, config); } @@ -99,17 +99,24 @@ Html html::translate(const ImageFile &image_file, return internal::html::translate_image_file(image_file, output_path, config); } -Html html::translate(const Archive &archive, const std::string &output_path, - const HtmlConfig &config) { - fs::create_directories(output_path); - return internal::html::translate_filesystem( - FileType::unknown, archive.filesystem(), output_path, config); +Html html::translate(const ArchiveFile &archive_file, + const std::string &output_path, const HtmlConfig &config) { + return translate(archive_file.archive(), output_path, config); } -Html html::translate(const Document &document, const std::string &output_path, - const HtmlConfig &config) { - fs::create_directories(output_path); - return internal::html::translate_document(document, output_path, config); +Html html::translate(const DocumentFile &document_file, + const std::string &output_path, const HtmlConfig &config) { + auto document_file_impl = document_file.impl(); + + if (auto wv_document_file = + std::dynamic_pointer_cast( + document_file_impl)) { + fs::create_directories(output_path); + return internal::html::translate_wvware_oldms_file(*wv_document_file, + output_path, config); + } + + return translate(document_file.document(), output_path, config); } Html html::translate(const PdfFile &pdf_file, const std::string &output_path, @@ -126,6 +133,19 @@ Html html::translate(const PdfFile &pdf_file, const std::string &output_path, return internal::html::translate_pdf_file(pdf_file, output_path, config); } +Html html::translate(const Archive &archive, const std::string &output_path, + const HtmlConfig &config) { + fs::create_directories(output_path); + return internal::html::translate_filesystem( + FileType::unknown, archive.filesystem(), output_path, config); +} + +Html html::translate(const Document &document, const std::string &output_path, + const HtmlConfig &config) { + fs::create_directories(output_path); + return internal::html::translate_document(document, output_path, config); +} + void html::edit(const Document &document, const char *diff) { auto json = nlohmann::json::parse(diff); for (const auto &[key, value] : json["modifiedText"].items()) { diff --git a/src/odr/html.hpp b/src/odr/html.hpp index ecf9199c..7996cb18 100644 --- a/src/odr/html.hpp +++ b/src/odr/html.hpp @@ -144,20 +144,20 @@ Html translate(const ImageFile &image_file, const std::string &output_path, const HtmlConfig &config); /// @brief Translates an archive to HTML. /// -/// @param archive Archive to translate. +/// @param archive Archive file to translate. /// @param output_path Path to save the HTML output. /// @param config Configuration for the HTML output. /// @return HTML output. -Html translate(const Archive &archive, const std::string &output_path, +Html translate(const ArchiveFile &archive_file, const std::string &output_path, const HtmlConfig &config); /// @brief Translates a document to HTML. /// -/// @param document Document to translate. +/// @param document_file Document file to translate. /// @param output_path Path to save the HTML output. /// @param config Configuration for the HTML output. /// @return HTML output. -Html translate(const Document &document, const std::string &output_path, - const HtmlConfig &config); +Html translate(const DocumentFile &document_file, + const std::string &output_path, const HtmlConfig &config); /// @brief Translates a PDF file to HTML. /// /// @param pdf_file PDF file to translate. @@ -167,6 +167,23 @@ Html translate(const Document &document, const std::string &output_path, Html translate(const PdfFile &pdf_file, const std::string &output_path, const HtmlConfig &config); +/// @brief Translates an archive to HTML. +/// +/// @param archive Archive to translate. +/// @param output_path Path to save the HTML output. +/// @param config Configuration for the HTML output. +/// @return HTML output. +Html translate(const Archive &archive, const std::string &output_path, + const HtmlConfig &config); +/// @brief Translates a document to HTML. +/// +/// @param document Document to translate. +/// @param output_path Path to save the HTML output. +/// @param config Configuration for the HTML output. +/// @return HTML output. +Html translate(const Document &document, const std::string &output_path, + const HtmlConfig &config); + /// @brief Edits a document with a diff. /// /// @note The diff is generated by our JavaScript code in the browser. diff --git a/src/odr/internal/oldms_wvware/wvware_oldms_file.cpp b/src/odr/internal/oldms_wvware/wvware_oldms_file.cpp index d5177692..6dcd6d1d 100644 --- a/src/odr/internal/oldms_wvware/wvware_oldms_file.cpp +++ b/src/odr/internal/oldms_wvware/wvware_oldms_file.cpp @@ -6,6 +6,8 @@ #include #include +#include +#include #include namespace odr::internal { @@ -13,9 +15,34 @@ namespace odr::internal { WvWareLegacyMicrosoftFile::WvWareLegacyMicrosoftFile( std::shared_ptr file) : m_file{std::move(file)} { + GError *error = nullptr; + + m_gsf_input = + gsf_input_stdio_new(m_file->disk_path()->string().c_str(), &error); + + if (m_gsf_input == nullptr) { + throw std::runtime_error("gsf_input_stdio_new failed"); + } + + open(); +} + +WvWareLegacyMicrosoftFile::WvWareLegacyMicrosoftFile( + std::shared_ptr file) + : m_file{std::move(file)} { + m_gsf_input = gsf_input_memory_new( + reinterpret_cast(m_file->memory_data()), + static_cast(m_file->size()), false); + + open(); +} + +WvWareLegacyMicrosoftFile::~WvWareLegacyMicrosoftFile() { wvOLEFree(&m_ps); } + +void WvWareLegacyMicrosoftFile::open() { wvInit(); - char *path = const_cast(m_file->disk_path()->string().c_str()); - int ret = wvInitParser(&m_ps, path); + + int ret = wvInitParser_gsf(&m_ps, m_gsf_input); // check if password is required if ((ret & 0x8000) != 0) { @@ -27,7 +54,7 @@ WvWareLegacyMicrosoftFile::WvWareLegacyMicrosoftFile( ret = 0; } } else { - m_encryption_state = EncryptionState::decrypted; + m_encryption_state = EncryptionState::not_encrypted; } if (ret != 0) { @@ -36,32 +63,28 @@ WvWareLegacyMicrosoftFile::WvWareLegacyMicrosoftFile( } } -WvWareLegacyMicrosoftFile::~WvWareLegacyMicrosoftFile() { wvOLEFree(&m_ps); } - std::shared_ptr WvWareLegacyMicrosoftFile::file() const noexcept { return m_file; } FileType WvWareLegacyMicrosoftFile::file_type() const noexcept { - return {}; // TODO + return FileType::legacy_word_document; } FileMeta WvWareLegacyMicrosoftFile::file_meta() const noexcept { - return {}; // TODO + return {file_type(), password_encrypted(), document_meta()}; } DecoderEngine WvWareLegacyMicrosoftFile::decoder_engine() const noexcept { - return DecoderEngine::wv_ware; + return DecoderEngine::wvware; } DocumentType WvWareLegacyMicrosoftFile::document_type() const { - return {}; // TODO + return DocumentType::text; } -DocumentMeta WvWareLegacyMicrosoftFile::document_meta() const { - return {}; // TODO -} +DocumentMeta WvWareLegacyMicrosoftFile::document_meta() const { return {}; } bool WvWareLegacyMicrosoftFile::password_encrypted() const noexcept { return m_encryption_state == EncryptionState::encrypted || diff --git a/src/odr/internal/oldms_wvware/wvware_oldms_file.hpp b/src/odr/internal/oldms_wvware/wvware_oldms_file.hpp index 3d80717f..6b553bf5 100644 --- a/src/odr/internal/oldms_wvware/wvware_oldms_file.hpp +++ b/src/odr/internal/oldms_wvware/wvware_oldms_file.hpp @@ -13,6 +13,7 @@ namespace odr::internal::common { class DiskFile; +class MemoryFile; } // namespace odr::internal::common namespace odr::internal { @@ -20,6 +21,7 @@ namespace odr::internal { class WvWareLegacyMicrosoftFile final : public abstract::DocumentFile { public: explicit WvWareLegacyMicrosoftFile(std::shared_ptr file); + explicit WvWareLegacyMicrosoftFile(std::shared_ptr file); ~WvWareLegacyMicrosoftFile() final; [[nodiscard]] std::shared_ptr file() const noexcept final; @@ -40,12 +42,15 @@ class WvWareLegacyMicrosoftFile final : public abstract::DocumentFile { [[nodiscard]] wvParseStruct &parse_struct() const; private: - std::shared_ptr m_file; + std::shared_ptr m_file; + GsfInput *m_gsf_input{}; EncryptionState m_encryption_state{EncryptionState::unknown}; wvParseStruct m_ps{}; int m_encryption_flag{}; + + void open(); }; } // namespace odr::internal diff --git a/src/odr/internal/open_strategy.cpp b/src/odr/internal/open_strategy.cpp index aab82396..aa50941f 100644 --- a/src/odr/internal/open_strategy.cpp +++ b/src/odr/internal/open_strategy.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -101,10 +102,8 @@ open_strategy::engines(const std::shared_ptr &file, result.push_back(DecoderEngine::odr); - if (as == FileType::legacy_word_document || - as == FileType::legacy_powerpoint_presentation || - as == FileType::legacy_excel_worksheets) { - result.push_back(DecoderEngine::wv_ware); + if (as == FileType::legacy_word_document) { + result.push_back(DecoderEngine::wvware); } if (as == FileType::portable_document_format) { @@ -251,6 +250,15 @@ open_strategy::open_file(std::shared_ptr file, FileType as, } return nullptr; } + if (with == DecoderEngine::wvware) { + try { + auto memory_file = std::make_shared(*file); + return std::make_unique( + std::move(memory_file)); + } catch (...) { + } + return nullptr; + } return nullptr; } @@ -363,11 +371,10 @@ open_strategy::open_file(std::shared_ptr file, FileType as, std::unique_ptr open_strategy::open_file(std::shared_ptr file, const DecodePreference &preference) { - std::vector probe_types = - preference.as_file_type.has_value() - ? std::vector{*preference.as_file_type} - : preference.file_type_priority; - { + std::vector probe_types; + if (preference.as_file_type.has_value()) { + probe_types.push_back(*preference.as_file_type); + } else { std::vector detected_types = types(file); probe_types.insert(probe_types.end(), detected_types.begin(), detected_types.end()); @@ -376,11 +383,10 @@ open_strategy::open_file(std::shared_ptr file, } for (FileType as : probe_types) { - std::vector probe_engines = - preference.with_engine.has_value() - ? std::vector{*preference.with_engine} - : preference.engine_priority; - { + std::vector probe_engines; + if (preference.with_engine.has_value()) { + probe_engines.push_back(*preference.with_engine); + } else { std::vector detected_engines = engines(file, as); probe_engines.insert(probe_engines.end(), detected_engines.begin(), detected_engines.end()); diff --git a/src/odr/open_document_reader.cpp b/src/odr/open_document_reader.cpp index 52b35c3f..d2da7fa6 100644 --- a/src/odr/open_document_reader.cpp +++ b/src/odr/open_document_reader.cpp @@ -169,8 +169,8 @@ std::string OpenDocumentReader::engine_to_string(const DecoderEngine engine) { return "odr"; } else if (engine == DecoderEngine::poppler) { return "poppler"; - } else if (engine == DecoderEngine::wv_ware) { - return "wv_ware"; + } else if (engine == DecoderEngine::wvware) { + return "wvware"; } throw UnknownDecoderEngine(); } @@ -180,8 +180,8 @@ DecoderEngine OpenDocumentReader::engine_by_name(const std::string &name) { return DecoderEngine::odr; } else if (name == "poppler") { return DecoderEngine::poppler; - } else if (name == "wv_ware") { - return DecoderEngine::wv_ware; + } else if (name == "wvware") { + return DecoderEngine::wvware; } throw UnknownDecoderEngine(); } diff --git a/test/src/html_output_test.cpp b/test/src/html_output_test.cpp index 80c033e3..6430f0be 100644 --- a/test/src/html_output_test.cpp +++ b/test/src/html_output_test.cpp @@ -35,7 +35,6 @@ using HtmlOutputTests = ::testing::TestWithParam; TEST_P(HtmlOutputTests, html_meta) { const TestParams ¶ms = GetParam(); const TestFile &test_file = params.test_file; - const std::string &test_file_path = params.path; const DecoderEngine engine = params.engine; const std::string &test_repo = params.test_repo; const std::string &output_path = params.output_path; @@ -48,7 +47,6 @@ TEST_P(HtmlOutputTests, html_meta) { // these files cannot be opened if (util::string::ends_with(test_file.short_path, ".sxw") || - (test_file.type == FileType::legacy_word_document) || (test_file.type == FileType::legacy_powerpoint_presentation) || (test_file.type == FileType::legacy_excel_worksheets) || (test_file.type == FileType::word_perfect) || @@ -187,6 +185,10 @@ std::vector list_test_params() { if (test_file.type == FileType::portable_document_format) { engines.push_back(DecoderEngine::poppler); } + if (test_file.type == FileType::legacy_word_document) { + engines.clear(); + engines.push_back(DecoderEngine::wvware); + } for (const DecoderEngine engine : engines) { params.push_back(create_test_params(test_file, engine));