diff --git a/kindle2latex/.gitignore b/kindle2latex/.gitignore new file mode 100644 index 0000000..4a0b530 --- /dev/null +++ b/kindle2latex/.gitignore @@ -0,0 +1,74 @@ +# This file is used to ignore files which are generated +# ---------------------------------------------------------------------------- + +*~ +*.autosave +*.a +*.core +*.moc +*.o +*.obj +*.orig +*.rej +*.so +*.so.* +*_pch.h.cpp +*_resource.rc +*.qm +.#* +*.*# +core +!core/ +tags +.DS_Store +.directory +*.debug +Makefile* +*.prl +*.app +moc_*.cpp +ui_*.h +qrc_*.cpp +Thumbs.db +*.res +*.rc +/.qmake.cache +/.qmake.stash + +# qtcreator generated files +*.pro.user* +CMakeLists.txt.user* + +# xemacs temporary files +*.flc + +# Vim temporary files +.*.swp + +# Visual Studio generated files +*.ib_pdb_index +*.idb +*.ilk +*.pdb +*.sln +*.suo +*.vcproj +*vcproj.*.*.user +*.ncb +*.sdf +*.opensdf +*.vcxproj +*vcxproj.* + +# MinGW generated files +*.Debug +*.Release + +# Python byte code +*.pyc + +# Binaries +# -------- +*.dll +*.exe + diff --git a/kindle2latex/fsm.h b/kindle2latex/fsm.h new file mode 100644 index 0000000..d674496 --- /dev/null +++ b/kindle2latex/fsm.h @@ -0,0 +1,295 @@ +#pragma once + +#include +#include +#include + +/** @brief Missing transition policy. */ +enum class missing_transition_policy { + /** @brief Missing transition is considered required by validation. */ + strict, + + /** @brief Missing transition is allowed (event ignored). */ + ignore +}; + +/** @brief Marker for undefined transition. */ +struct no_transition {}; + +/** @brief Default guard: always allow. */ +struct always_allow { + template + bool operator() (TContext &, const TFrom &, const TEvent &) const noexcept { + return true; + } +}; + +/** @brief Default action: do nothing. */ +struct do_nothing { + template + void operator() (TContext &, const TFrom &, const TTo &, const TEvent &) const noexcept { + } +}; + +/** + * @brief Transition descriptor. + * + * @tparam TTo Destination state type. + * @tparam TGuard Guard functor type. + * @tparam TAction Action functor type. + */ +template +struct transition_to { + using to_state = TTo; + using guard = TGuard; + using action = TAction; +}; + +/** + * @brief Transition table mapping: specialize to define transitions. + * Default: no_transition. + */ +template +struct transition { + using type = no_transition; +}; + +/** @brief Optional mapping to names for logs. */ +template +struct name_of { + static constexpr const char *value = "?"; +}; + +/** + * @brief Logger interface (default no-op). + * + * You can implement: + * - ignored_transition(from, event) + * - unhandled_transition(from, event) // policy=strict but missing transition at runtime + * - guard_blocked(from, event) + */ +struct null_logger { + void ignored_transition (const char *, const char *) noexcept {} + void unhandled_transition (const char *, const char *) noexcept {} + void guard_blocked (const char *, const char *) noexcept {} +}; + +/** @brief Hooks: state-only. */ +template +struct on_exit_state { + template + static void call (TContext &, const TState &) noexcept {} +}; + +template +struct on_entry_state { + template + static void call (TContext &, const TState &) noexcept {} +}; + +/** @brief Hooks: event-specific. */ +template +struct on_exit_event { + template + static void call (TContext &, const TState &, const TEvent &) noexcept {} +}; + +template +struct on_entry_event { + template + static void call (TContext &, const TState &, const TEvent &) noexcept {} +}; + +/** + * @brief Factory: construct destination state. + * + * IMPORTANT: Now includes TContext in template parameters to avoid templated make(). + * + * Default: To{}. + * Specialize state_factory as needed. + */ +template +struct state_factory { + static TTo make (TContext &, const TFrom &, const TEvent &) { + return TTo{}; + } +}; + +/** + * @brief Per (State,Event) missing transition policy. + * + * Default is ignore. Mark only required pairs strict. + */ +template +struct missing_transition { + static constexpr missing_transition_policy policy = missing_transition_policy::ignore; +}; + +/** @brief Helper: is T one of Ts... */ +template +struct is_one_of : std::disjunction...> {}; + +/** @brief Helper: transition exists? */ +template +struct has_transition + : std::bool_constant < !std::is_same_v::type, no_transition >> {}; + +/** @brief Factory validation (call-form). */ +template +struct is_factory_invocable : std::false_type {}; + +template +struct is_factory_invocable < + TContext, TFrom, TEvent, TTo, +std::void_t::make ( + std::declval(), + std::declval(), + std::declval())) >> + : std::bool_constant::make ( + std::declval(), + std::declval(), + std::declval())), + TTo>> {}; + +/** + * @brief FSM with runtime state storage and explicit compile-time validation. + * + * dispatch(): + * - never static_asserts on missing transitions (avoids StateƗEvent explosion with std::visit) + * - handles missing transitions via missing_transition::policy (ignore/strict) + * + * validate_events(): + * - compile-time checks ONLY for pairs marked strict. + */ +template +class fsm { + public: + using state_variant = std::variant; + + template ::value>> + explicit fsm (TContext &ctx, TInitial initial, TLogger logger = TLogger{}) + : m_ctx (ctx) + , m_logger (std::move (logger)) + , m_state (std::move (initial)) { + std::visit (init_entry_visitor{*this}, m_state); + } + + template + bool dispatch (const TEvent &ev) { + dispatch_visitor v{*this, ev}; + return std::visit (v, m_state); + } + + template + static constexpr void validate_events() { + (validate_one_event(), ...); + } + + const state_variant &state() const noexcept { + return m_state; + } + state_variant &state() noexcept { + return m_state; + } + + private: + template + struct dispatch_visitor { + fsm &self; + const TEvent &ev; + + template + bool operator() (TFrom &from) const { + return self.template dispatch_from (from, ev); + } + }; + + struct init_entry_visitor { + fsm &self; + template + void operator() (TState &st) const { + on_entry_state::call (self.m_ctx, st); + } + }; + + template + struct entry_after_commit_visitor { + fsm &self; + const TEvent &ev; + + template + void operator() (TState &st) const { + on_entry_state::call (self.m_ctx, st); + on_entry_event::call (self.m_ctx, st, ev); + } + }; + + template + bool dispatch_from (TFrom &from, const TEvent &ev) { + using tr = typename transition::type; + + if constexpr (std::is_same_v) { + if constexpr (missing_transition::policy == missing_transition_policy::strict) + m_logger.unhandled_transition (name_of::value, name_of::value); + else + m_logger.ignored_transition (name_of::value, name_of::value); + return false; + } else { + using to_state = typename tr::to_state; + using guard_t = typename tr::guard; + using action_t = typename tr::action; + + static_assert (is_one_of::value, + "FSM: transition target state not in FSM state list."); + + static_assert (std::is_invocable_r_v, + "FSM: guard must be callable as bool(Context&, const From&, const Event&)."); + + static_assert (std::is_invocable_r_v, + "FSM: action must be callable as void(Context&, const From&, const To&, const Event&)."); + + static_assert (is_factory_invocable::value, + "FSM: state_factory::make must be callable as " + "To make(Context&, const From&, const Event&)."); + + guard_t guard{}; + if (!guard (m_ctx, from, ev)) { + m_logger.guard_blocked (name_of::value, name_of::value); + return false; + } + + on_exit_event::call (m_ctx, from, ev); + on_exit_state::call (m_ctx, from); + + to_state to = state_factory::make (m_ctx, from, ev); + + action_t action{}; + action (m_ctx, from, to, ev); + + m_state = std::move (to); + + std::visit (entry_after_commit_visitor {*this, ev}, m_state); + return true; + } + } + + template + static constexpr void validate_one_event() { + (validate_pair(), ...); + } + + template + static constexpr void validate_pair() { + if constexpr (missing_transition::policy == missing_transition_policy::strict) { + static_assert (has_transition::value, + "FSM validation: required (strict) transition is missing for (State, Event)."); + } + } + + private: + TContext &m_ctx; + TLogger m_logger; + state_variant m_state; +}; diff --git a/kindle2latex/kindle2latex.pro b/kindle2latex/kindle2latex.pro new file mode 100644 index 0000000..51a0ecc --- /dev/null +++ b/kindle2latex/kindle2latex.pro @@ -0,0 +1,10 @@ +TEMPLATE = app +CONFIG += console c++17 +CONFIG -= app_bundle +CONFIG -= qt + +SOURCES += \ + main.cpp + +HEADERS += \ + fsm.h diff --git a/kindle2latex/main.cpp b/kindle2latex/main.cpp new file mode 100644 index 0000000..131c0bc --- /dev/null +++ b/kindle2latex/main.cpp @@ -0,0 +1,372 @@ +#include "fsm.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// ------------------------------------------------------------ +// Data model +// ------------------------------------------------------------ + +struct Quote { + std::string meta; + std::vector text_lines; +}; + +static std::string rstrip_cr (std::string s) { + if (!s.empty() && s.back() == '\r') + s.pop_back(); + return s; +} + +static std::string latex_escape (const std::string &s) { + // Conservative escaping for LaTeX special chars. + std::string out; + out.reserve (s.size() + s.size() / 8); + for (const unsigned char ch : s) { + switch (ch) { + case '\\': + out += "\\textbackslash{}"; + break; + case '{': + out += "\\{"; + break; + case '}': + out += "\\}"; + break; + case '%': + out += "\\%"; + break; + case '$': + out += "\\$"; + break; + case '#': + out += "\\#"; + break; + case '&': + out += "\\&"; + break; + case '_': + out += "\\_"; + break; + case '^': + out += "\\textasciicircum{}"; + break; + case '~': + out += "\\textasciitilde{}"; + break; + default: + out.push_back (static_cast (ch)); + break; + } + } + return out; +} + +// ------------------------------------------------------------ +// FSM: parsing Kindle "My Clippings" style format +// ------------------------------------------------------------ + +struct StExpectTitle {}; +struct StExpectMeta {}; +struct StExpectBlank {}; +struct StCollectText {}; + +struct EvTitle { + std::string s; +}; +struct EvMeta { + std::string s; +}; +struct EvBlank {}; +struct EvText { + std::string s; +}; +struct EvSep {}; +struct EvEof {}; + +struct ParserContext { + // group storage + std::vector order; + std::unordered_map> by_title; + std::unordered_map seen; + + // currently parsed quote + std::string cur_title; + std::string cur_meta; + std::vector cur_text; + + void start_title (std::string t) { + cur_title = std::move (t); + cur_meta.clear(); + cur_text.clear(); + } + void set_meta (std::string m) { + cur_meta = std::move (m); + } + void add_text (std::string line) { + cur_text.push_back (std::move (line)); + } + + void finalize_quote_if_ready() { + if (cur_title.empty() || cur_meta.empty()) { + cur_meta.clear(); + cur_text.clear(); + return; + } + + if (seen.find (cur_title) == seen.end()) { + seen.emplace (cur_title, order.size()); + order.push_back (cur_title); + } + + by_title[cur_title].push_back (Quote{cur_meta, cur_text}); + + cur_meta.clear(); + cur_text.clear(); + } +}; + +struct SilentLogger : null_logger {}; + +// Actions +struct ActSetTitle { + void operator() (ParserContext &ctx, const StExpectTitle &, const StExpectMeta &, const EvTitle &ev) const { + ctx.start_title (ev.s); + } +}; + +struct ActSetMeta { + void operator() (ParserContext &ctx, const StExpectMeta &, const StExpectBlank &, const EvMeta &ev) const { + ctx.set_meta (ev.s); + } +}; + +struct ActAddText { + template + void operator() (ParserContext &ctx, const TFrom &, const StCollectText &, const EvText &ev) const { + ctx.add_text (ev.s); + } +}; + +struct ActFinalizeOnSep { + template + void operator() (ParserContext &ctx, const TFrom &, const StExpectTitle &, const EvSep &) const { + ctx.finalize_quote_if_ready(); + } +}; + +struct ActFinalizeOnEof { + template + void operator() (ParserContext &ctx, const TFrom &, const StExpectTitle &, const EvEof &) const { + ctx.finalize_quote_if_ready(); + } +}; + +// Transitions +template <> struct transition { + using type = transition_to; +}; +template <> struct transition { + using type = transition_to; +}; +template <> struct transition { + using type = transition_to; +}; + +template <> struct transition { + using type = transition_to; +}; + +template <> struct transition { + using type = transition_to; +}; +template <> struct transition { + using type = transition_to; +}; + +template <> struct transition { + using type = transition_to; +}; +template <> struct transition { + using type = transition_to; +}; +template <> struct transition { + using type = transition_to; +}; + +// Strict contract for the pairs we dispatch. +template <> struct missing_transition { + static constexpr missing_transition_policy policy = missing_transition_policy::strict; +}; +template <> struct missing_transition { + static constexpr missing_transition_policy policy = missing_transition_policy::strict; +}; +template <> struct missing_transition { + static constexpr missing_transition_policy policy = missing_transition_policy::strict; +}; + +template <> struct missing_transition { + static constexpr missing_transition_policy policy = missing_transition_policy::strict; +}; + +template <> struct missing_transition { + static constexpr missing_transition_policy policy = missing_transition_policy::strict; +}; +template <> struct missing_transition { + static constexpr missing_transition_policy policy = missing_transition_policy::strict; +}; + +template <> struct missing_transition { + static constexpr missing_transition_policy policy = missing_transition_policy::strict; +}; +template <> struct missing_transition { + static constexpr missing_transition_policy policy = missing_transition_policy::strict; +}; +template <> struct missing_transition { + static constexpr missing_transition_policy policy = missing_transition_policy::strict; +}; + +using ParserFsm = fsm; + +static constexpr void validate_fsm_contract() { + ParserFsm::validate_events(); +} + +// ------------------------------------------------------------ +// CLI +// ------------------------------------------------------------ + +struct CliArgs { + std::string input; + std::string output; +}; + +static void print_usage (const char *argv0) { + std::cerr << "Usage: " << argv0 << " --input --output \n"; +} + +static bool parse_args (int argc, char **argv, CliArgs &out) { + static option long_opts[] = { + {"input", required_argument, nullptr, 'i'}, + {"output", required_argument, nullptr, 'o'}, + {"help", no_argument, nullptr, 'h'}, + {nullptr, 0, nullptr, 0 } + }; + + int c = 0; + while ((c = ::getopt_long (argc, argv, "i:o:h", long_opts, nullptr)) != -1) { + switch (c) { + case 'i': + out.input = optarg; + break; + case 'o': + out.output = optarg; + break; + case 'h': + print_usage (argv[0]); + return false; + default: + print_usage (argv[0]); + return false; + } + } + + if (out.input.empty() || out.output.empty()) { + print_usage (argv[0]); + return false; + } + return true; +} + +// ------------------------------------------------------------ +// Conversion +// ------------------------------------------------------------ + +static int convert (const std::string &in_path, const std::string &out_path) { + std::ifstream in (in_path); + if (!in.is_open()) { + std::cerr << "Failed to open input file: " << in_path << " (" << std::strerror (errno) << ")\n"; + return 2; + } + + ParserContext ctx{}; + ParserFsm fsm (ctx, StExpectTitle{}, SilentLogger{}); + + std::string line; + while (std::getline (in, line)) { + line = rstrip_cr (std::move (line)); + + if (line == "==========") { + fsm.dispatch (EvSep{}); + continue; + } + + const bool is_blank = line.empty(); + const auto &st = fsm.state(); + + if (std::holds_alternative (st)) { + if (is_blank) + fsm.dispatch (EvBlank{}); + else + fsm.dispatch (EvTitle{line}); + } else if (std::holds_alternative (st)) { + // Kindle format expects meta here. If missing, keep going. + fsm.dispatch (EvMeta{line}); + } else if (std::holds_alternative (st)) { + if (is_blank) + fsm.dispatch (EvBlank{}); + else + fsm.dispatch (EvText{line}); // some clippings have no blank line + } else { + // StCollectText + fsm.dispatch (EvText{line}); + } + } + + fsm.dispatch (EvEof{}); + + std::ofstream out (out_path, std::ios::trunc); + if (!out.is_open()) { + std::cerr << "Failed to open output file: " << out_path << " (" << std::strerror (errno) << ")\n"; + return 3; + } + + for (std::size_t i = 0; i < ctx.order.size(); ++i) { + const auto &title = ctx.order[i]; + out << "\\section {" << latex_escape (title) << "}\n"; + + const auto it = ctx.by_title.find (title); + if (it == ctx.by_title.end()) + continue; + + for (const auto &q : it->second) { + out << " \\subsection {" << latex_escape (q.meta) << "}\n"; + for (const auto &tl : q.text_lines) + out << " " << latex_escape (tl) << "\n"; + out << " \\subsubsection{notes}\n\n"; + } + + if (i + 1 < ctx.order.size()) + out << "\n"; + } + + return 0; +} + +int main (int argc, char **argv) { + validate_fsm_contract(); + + CliArgs args; + if (!parse_args (argc, argv, args)) + return 1; + + return convert (args.input, args.output); +}