#include "fsm.h" #include #include #include #include #include #include #include #include #include #include // ------------------------------------------------------------ // Data model // ------------------------------------------------------------ struct Quote { std::string meta; std::vector text_lines; }; static std::string rstrip_cr (std::string s) { if (!s.empty() && s.back() == '\r') s.pop_back(); return s; } static std::string latex_escape (const std::string &s) { // Conservative escaping for LaTeX special chars. std::string out; out.reserve (s.size() + s.size() / 8); for (const unsigned char ch : s) { switch (ch) { case '\\': out += "\\textbackslash{}"; break; case '{': out += "\\{"; break; case '}': out += "\\}"; break; case '%': out += "\\%"; break; case '$': out += "\\$"; break; case '#': out += "\\#"; break; case '&': out += "\\&"; break; case '_': out += "\\_"; break; case '^': out += "\\textasciicircum{}"; break; case '~': out += "\\textasciitilde{}"; break; default: out.push_back (static_cast (ch)); break; } } return out; } // ------------------------------------------------------------ // FSM: parsing Kindle "My Clippings" style format // ------------------------------------------------------------ struct StExpectTitle {}; struct StExpectMeta {}; struct StExpectBlank {}; struct StCollectText {}; struct EvTitle { std::string s; }; struct EvMeta { std::string s; }; struct EvBlank {}; struct EvText { std::string s; }; struct EvSep {}; struct EvEof {}; struct ParserContext { // group storage std::vector order; std::unordered_map> by_title; std::unordered_map seen; // currently parsed quote std::string cur_title; std::string cur_meta; std::vector cur_text; void start_title (std::string t) { cur_title = std::move (t); cur_meta.clear(); cur_text.clear(); } void set_meta (std::string m) { cur_meta = std::move (m); } void add_text (std::string line) { cur_text.push_back (std::move (line)); } void finalize_quote_if_ready() { if (cur_title.empty() || cur_meta.empty()) { cur_meta.clear(); cur_text.clear(); return; } if (seen.find (cur_title) == seen.end()) { seen.emplace (cur_title, order.size()); order.push_back (cur_title); } by_title[cur_title].push_back (Quote{cur_meta, cur_text}); cur_meta.clear(); cur_text.clear(); } }; struct SilentLogger : null_logger {}; // Actions struct ActSetTitle { void operator() (ParserContext &ctx, const StExpectTitle &, const StExpectMeta &, const EvTitle &ev) const { ctx.start_title (ev.s); } }; struct ActSetMeta { void operator() (ParserContext &ctx, const StExpectMeta &, const StExpectBlank &, const EvMeta &ev) const { ctx.set_meta (ev.s); } }; struct ActAddText { template void operator() (ParserContext &ctx, const TFrom &, const StCollectText &, const EvText &ev) const { ctx.add_text (ev.s); } }; struct ActFinalizeOnSep { template void operator() (ParserContext &ctx, const TFrom &, const StExpectTitle &, const EvSep &) const { ctx.finalize_quote_if_ready(); } }; struct ActFinalizeOnEof { template void operator() (ParserContext &ctx, const TFrom &, const StExpectTitle &, const EvEof &) const { ctx.finalize_quote_if_ready(); } }; // Transitions template <> struct transition { using type = transition_to; }; template <> struct transition { using type = transition_to; }; template <> struct transition { using type = transition_to; }; template <> struct transition { using type = transition_to; }; template <> struct transition { using type = transition_to; }; template <> struct transition { using type = transition_to; }; template <> struct transition { using type = transition_to; }; template <> struct transition { using type = transition_to; }; template <> struct transition { using type = transition_to; }; // Strict contract for the pairs we dispatch. template <> struct missing_transition { static constexpr missing_transition_policy policy = missing_transition_policy::strict; }; template <> struct missing_transition { static constexpr missing_transition_policy policy = missing_transition_policy::strict; }; template <> struct missing_transition { static constexpr missing_transition_policy policy = missing_transition_policy::strict; }; template <> struct missing_transition { static constexpr missing_transition_policy policy = missing_transition_policy::strict; }; template <> struct missing_transition { static constexpr missing_transition_policy policy = missing_transition_policy::strict; }; template <> struct missing_transition { static constexpr missing_transition_policy policy = missing_transition_policy::strict; }; template <> struct missing_transition { static constexpr missing_transition_policy policy = missing_transition_policy::strict; }; template <> struct missing_transition { static constexpr missing_transition_policy policy = missing_transition_policy::strict; }; template <> struct missing_transition { static constexpr missing_transition_policy policy = missing_transition_policy::strict; }; using ParserFsm = fsm; static constexpr void validate_fsm_contract() { ParserFsm::validate_events(); } // ------------------------------------------------------------ // CLI // ------------------------------------------------------------ struct CliArgs { std::string input; std::string output; }; static void print_usage (const char *argv0) { std::cerr << "Usage: " << argv0 << " --input --output \n"; } static bool parse_args (int argc, char **argv, CliArgs &out) { static option long_opts[] = { {"input", required_argument, nullptr, 'i'}, {"output", required_argument, nullptr, 'o'}, {"help", no_argument, nullptr, 'h'}, {nullptr, 0, nullptr, 0 } }; int c = 0; while ((c = ::getopt_long (argc, argv, "i:o:h", long_opts, nullptr)) != -1) { switch (c) { case 'i': out.input = optarg; break; case 'o': out.output = optarg; break; case 'h': print_usage (argv[0]); return false; default: print_usage (argv[0]); return false; } } if (out.input.empty() || out.output.empty()) { print_usage (argv[0]); return false; } return true; } // ------------------------------------------------------------ // Conversion // ------------------------------------------------------------ static int convert (const std::string &in_path, const std::string &out_path) { std::ifstream in (in_path); if (!in.is_open()) { std::cerr << "Failed to open input file: " << in_path << " (" << std::strerror (errno) << ")\n"; return 2; } ParserContext ctx{}; ParserFsm fsm (ctx, StExpectTitle{}, SilentLogger{}); std::string line; while (std::getline (in, line)) { line = rstrip_cr (std::move (line)); if (line == "==========") { fsm.dispatch (EvSep{}); continue; } const bool is_blank = line.empty(); const auto &st = fsm.state(); if (std::holds_alternative (st)) { if (is_blank) fsm.dispatch (EvBlank{}); else fsm.dispatch (EvTitle{line}); } else if (std::holds_alternative (st)) { // Kindle format expects meta here. If missing, keep going. fsm.dispatch (EvMeta{line}); } else if (std::holds_alternative (st)) { if (is_blank) fsm.dispatch (EvBlank{}); else fsm.dispatch (EvText{line}); // some clippings have no blank line } else { // StCollectText fsm.dispatch (EvText{line}); } } fsm.dispatch (EvEof{}); std::ofstream out (out_path, std::ios::trunc); if (!out.is_open()) { std::cerr << "Failed to open output file: " << out_path << " (" << std::strerror (errno) << ")\n"; return 3; } for (std::size_t i = 0; i < ctx.order.size(); ++i) { const auto &title = ctx.order[i]; out << "\\section {" << latex_escape (title) << "}\n"; const auto it = ctx.by_title.find (title); if (it == ctx.by_title.end()) continue; for (const auto &q : it->second) { out << " \\subsection {" << latex_escape (q.meta) << "}\n"; for (const auto &tl : q.text_lines) out << " " << latex_escape (tl) << "\n"; out << " \\subsubsection{notes}\n\n"; } if (i + 1 < ctx.order.size()) out << "\n"; } return 0; } int main (int argc, char **argv) { validate_fsm_contract(); CliArgs args; if (!parse_args (argc, argv, args)) return 1; return convert (args.input, args.output); }