move to roor folder
This commit is contained in:
372
main.cpp
Normal file
372
main.cpp
Normal file
@@ -0,0 +1,372 @@
|
||||
#include "fsm.h"
|
||||
|
||||
#include <cerrno>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <getopt.h>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// Data model
|
||||
// ------------------------------------------------------------
|
||||
|
||||
struct Quote {
|
||||
std::string meta;
|
||||
std::vector<std::string> text_lines;
|
||||
};
|
||||
|
||||
static std::string rstrip_cr (std::string s) {
|
||||
if (!s.empty() && s.back() == '\r')
|
||||
s.pop_back();
|
||||
return s;
|
||||
}
|
||||
|
||||
static std::string latex_escape (const std::string &s) {
|
||||
// Conservative escaping for LaTeX special chars.
|
||||
std::string out;
|
||||
out.reserve (s.size() + s.size() / 8);
|
||||
for (const unsigned char ch : s) {
|
||||
switch (ch) {
|
||||
case '\\':
|
||||
out += "\\textbackslash{}";
|
||||
break;
|
||||
case '{':
|
||||
out += "\\{";
|
||||
break;
|
||||
case '}':
|
||||
out += "\\}";
|
||||
break;
|
||||
case '%':
|
||||
out += "\\%";
|
||||
break;
|
||||
case '$':
|
||||
out += "\\$";
|
||||
break;
|
||||
case '#':
|
||||
out += "\\#";
|
||||
break;
|
||||
case '&':
|
||||
out += "\\&";
|
||||
break;
|
||||
case '_':
|
||||
out += "\\_";
|
||||
break;
|
||||
case '^':
|
||||
out += "\\textasciicircum{}";
|
||||
break;
|
||||
case '~':
|
||||
out += "\\textasciitilde{}";
|
||||
break;
|
||||
default:
|
||||
out.push_back (static_cast<char> (ch));
|
||||
break;
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// FSM: parsing Kindle "My Clippings" style format
|
||||
// ------------------------------------------------------------
|
||||
|
||||
struct StExpectTitle {};
|
||||
struct StExpectMeta {};
|
||||
struct StExpectBlank {};
|
||||
struct StCollectText {};
|
||||
|
||||
struct EvTitle {
|
||||
std::string s;
|
||||
};
|
||||
struct EvMeta {
|
||||
std::string s;
|
||||
};
|
||||
struct EvBlank {};
|
||||
struct EvText {
|
||||
std::string s;
|
||||
};
|
||||
struct EvSep {};
|
||||
struct EvEof {};
|
||||
|
||||
struct ParserContext {
|
||||
// group storage
|
||||
std::vector<std::string> order;
|
||||
std::unordered_map<std::string, std::vector<Quote>> by_title;
|
||||
std::unordered_map<std::string, std::size_t> seen;
|
||||
|
||||
// currently parsed quote
|
||||
std::string cur_title;
|
||||
std::string cur_meta;
|
||||
std::vector<std::string> cur_text;
|
||||
|
||||
void start_title (std::string t) {
|
||||
cur_title = std::move (t);
|
||||
cur_meta.clear();
|
||||
cur_text.clear();
|
||||
}
|
||||
void set_meta (std::string m) {
|
||||
cur_meta = std::move (m);
|
||||
}
|
||||
void add_text (std::string line) {
|
||||
cur_text.push_back (std::move (line));
|
||||
}
|
||||
|
||||
void finalize_quote_if_ready() {
|
||||
if (cur_title.empty() || cur_meta.empty()) {
|
||||
cur_meta.clear();
|
||||
cur_text.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
if (seen.find (cur_title) == seen.end()) {
|
||||
seen.emplace (cur_title, order.size());
|
||||
order.push_back (cur_title);
|
||||
}
|
||||
|
||||
by_title[cur_title].push_back (Quote{cur_meta, cur_text});
|
||||
|
||||
cur_meta.clear();
|
||||
cur_text.clear();
|
||||
}
|
||||
};
|
||||
|
||||
struct SilentLogger : null_logger {};
|
||||
|
||||
// Actions
|
||||
struct ActSetTitle {
|
||||
void operator() (ParserContext &ctx, const StExpectTitle &, const StExpectMeta &, const EvTitle &ev) const {
|
||||
ctx.start_title (ev.s);
|
||||
}
|
||||
};
|
||||
|
||||
struct ActSetMeta {
|
||||
void operator() (ParserContext &ctx, const StExpectMeta &, const StExpectBlank &, const EvMeta &ev) const {
|
||||
ctx.set_meta (ev.s);
|
||||
}
|
||||
};
|
||||
|
||||
struct ActAddText {
|
||||
template <typename TFrom>
|
||||
void operator() (ParserContext &ctx, const TFrom &, const StCollectText &, const EvText &ev) const {
|
||||
ctx.add_text (ev.s);
|
||||
}
|
||||
};
|
||||
|
||||
struct ActFinalizeOnSep {
|
||||
template <typename TFrom>
|
||||
void operator() (ParserContext &ctx, const TFrom &, const StExpectTitle &, const EvSep &) const {
|
||||
ctx.finalize_quote_if_ready();
|
||||
}
|
||||
};
|
||||
|
||||
struct ActFinalizeOnEof {
|
||||
template <typename TFrom>
|
||||
void operator() (ParserContext &ctx, const TFrom &, const StExpectTitle &, const EvEof &) const {
|
||||
ctx.finalize_quote_if_ready();
|
||||
}
|
||||
};
|
||||
|
||||
// Transitions
|
||||
template <> struct transition<StExpectTitle, EvTitle> {
|
||||
using type = transition_to<StExpectMeta, always_allow, ActSetTitle>;
|
||||
};
|
||||
template <> struct transition<StExpectTitle, EvBlank> {
|
||||
using type = transition_to<StExpectTitle>;
|
||||
};
|
||||
template <> struct transition<StExpectTitle, EvEof> {
|
||||
using type = transition_to<StExpectTitle, always_allow, ActFinalizeOnEof>;
|
||||
};
|
||||
|
||||
template <> struct transition<StExpectMeta, EvMeta> {
|
||||
using type = transition_to<StExpectBlank, always_allow, ActSetMeta>;
|
||||
};
|
||||
|
||||
template <> struct transition<StExpectBlank, EvBlank> {
|
||||
using type = transition_to<StCollectText>;
|
||||
};
|
||||
template <> struct transition<StExpectBlank, EvText> {
|
||||
using type = transition_to<StCollectText, always_allow, ActAddText>;
|
||||
};
|
||||
|
||||
template <> struct transition<StCollectText, EvText> {
|
||||
using type = transition_to<StCollectText, always_allow, ActAddText>;
|
||||
};
|
||||
template <> struct transition<StCollectText, EvSep> {
|
||||
using type = transition_to<StExpectTitle, always_allow, ActFinalizeOnSep>;
|
||||
};
|
||||
template <> struct transition<StCollectText, EvEof> {
|
||||
using type = transition_to<StExpectTitle, always_allow, ActFinalizeOnEof>;
|
||||
};
|
||||
|
||||
// Strict contract for the pairs we dispatch.
|
||||
template <> struct missing_transition<StExpectTitle, EvTitle> {
|
||||
static constexpr missing_transition_policy policy = missing_transition_policy::strict;
|
||||
};
|
||||
template <> struct missing_transition<StExpectTitle, EvBlank> {
|
||||
static constexpr missing_transition_policy policy = missing_transition_policy::strict;
|
||||
};
|
||||
template <> struct missing_transition<StExpectTitle, EvEof> {
|
||||
static constexpr missing_transition_policy policy = missing_transition_policy::strict;
|
||||
};
|
||||
|
||||
template <> struct missing_transition<StExpectMeta, EvMeta> {
|
||||
static constexpr missing_transition_policy policy = missing_transition_policy::strict;
|
||||
};
|
||||
|
||||
template <> struct missing_transition<StExpectBlank, EvBlank> {
|
||||
static constexpr missing_transition_policy policy = missing_transition_policy::strict;
|
||||
};
|
||||
template <> struct missing_transition<StExpectBlank, EvText> {
|
||||
static constexpr missing_transition_policy policy = missing_transition_policy::strict;
|
||||
};
|
||||
|
||||
template <> struct missing_transition<StCollectText, EvText> {
|
||||
static constexpr missing_transition_policy policy = missing_transition_policy::strict;
|
||||
};
|
||||
template <> struct missing_transition<StCollectText, EvSep> {
|
||||
static constexpr missing_transition_policy policy = missing_transition_policy::strict;
|
||||
};
|
||||
template <> struct missing_transition<StCollectText, EvEof> {
|
||||
static constexpr missing_transition_policy policy = missing_transition_policy::strict;
|
||||
};
|
||||
|
||||
using ParserFsm = fsm<ParserContext, SilentLogger, StExpectTitle, StExpectMeta, StExpectBlank, StCollectText>;
|
||||
|
||||
static constexpr void validate_fsm_contract() {
|
||||
ParserFsm::validate_events<EvTitle, EvMeta, EvBlank, EvText, EvSep, EvEof>();
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// CLI
|
||||
// ------------------------------------------------------------
|
||||
|
||||
struct CliArgs {
|
||||
std::string input;
|
||||
std::string output;
|
||||
};
|
||||
|
||||
static void print_usage (const char *argv0) {
|
||||
std::cerr << "Usage: " << argv0 << " --input <file> --output <file>\n";
|
||||
}
|
||||
|
||||
static bool parse_args (int argc, char **argv, CliArgs &out) {
|
||||
static option long_opts[] = {
|
||||
{"input", required_argument, nullptr, 'i'},
|
||||
{"output", required_argument, nullptr, 'o'},
|
||||
{"help", no_argument, nullptr, 'h'},
|
||||
{nullptr, 0, nullptr, 0 }
|
||||
};
|
||||
|
||||
int c = 0;
|
||||
while ((c = ::getopt_long (argc, argv, "i:o:h", long_opts, nullptr)) != -1) {
|
||||
switch (c) {
|
||||
case 'i':
|
||||
out.input = optarg;
|
||||
break;
|
||||
case 'o':
|
||||
out.output = optarg;
|
||||
break;
|
||||
case 'h':
|
||||
print_usage (argv[0]);
|
||||
return false;
|
||||
default:
|
||||
print_usage (argv[0]);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (out.input.empty() || out.output.empty()) {
|
||||
print_usage (argv[0]);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// Conversion
|
||||
// ------------------------------------------------------------
|
||||
|
||||
static int convert (const std::string &in_path, const std::string &out_path) {
|
||||
std::ifstream in (in_path);
|
||||
if (!in.is_open()) {
|
||||
std::cerr << "Failed to open input file: " << in_path << " (" << std::strerror (errno) << ")\n";
|
||||
return 2;
|
||||
}
|
||||
|
||||
ParserContext ctx{};
|
||||
ParserFsm fsm (ctx, StExpectTitle{}, SilentLogger{});
|
||||
|
||||
std::string line;
|
||||
while (std::getline (in, line)) {
|
||||
line = rstrip_cr (std::move (line));
|
||||
|
||||
if (line == "==========") {
|
||||
fsm.dispatch (EvSep{});
|
||||
continue;
|
||||
}
|
||||
|
||||
const bool is_blank = line.empty();
|
||||
const auto &st = fsm.state();
|
||||
|
||||
if (std::holds_alternative<StExpectTitle> (st)) {
|
||||
if (is_blank)
|
||||
fsm.dispatch (EvBlank{});
|
||||
else
|
||||
fsm.dispatch (EvTitle{line});
|
||||
} else if (std::holds_alternative<StExpectMeta> (st)) {
|
||||
// Kindle format expects meta here. If missing, keep going.
|
||||
fsm.dispatch (EvMeta{line});
|
||||
} else if (std::holds_alternative<StExpectBlank> (st)) {
|
||||
if (is_blank)
|
||||
fsm.dispatch (EvBlank{});
|
||||
else
|
||||
fsm.dispatch (EvText{line}); // some clippings have no blank line
|
||||
} else {
|
||||
// StCollectText
|
||||
fsm.dispatch (EvText{line});
|
||||
}
|
||||
}
|
||||
|
||||
fsm.dispatch (EvEof{});
|
||||
|
||||
std::ofstream out (out_path, std::ios::trunc);
|
||||
if (!out.is_open()) {
|
||||
std::cerr << "Failed to open output file: " << out_path << " (" << std::strerror (errno) << ")\n";
|
||||
return 3;
|
||||
}
|
||||
|
||||
for (std::size_t i = 0; i < ctx.order.size(); ++i) {
|
||||
const auto &title = ctx.order[i];
|
||||
out << "\\section {" << latex_escape (title) << "}\n";
|
||||
|
||||
const auto it = ctx.by_title.find (title);
|
||||
if (it == ctx.by_title.end())
|
||||
continue;
|
||||
|
||||
for (const auto &q : it->second) {
|
||||
out << " \\subsection {" << latex_escape (q.meta) << "}\n";
|
||||
for (const auto &tl : q.text_lines)
|
||||
out << " " << latex_escape (tl) << "\n";
|
||||
out << " \\subsubsection{notes}\n\n";
|
||||
}
|
||||
|
||||
if (i + 1 < ctx.order.size())
|
||||
out << "\n";
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (int argc, char **argv) {
|
||||
validate_fsm_contract();
|
||||
|
||||
CliArgs args;
|
||||
if (!parse_args (argc, argv, args))
|
||||
return 1;
|
||||
|
||||
return convert (args.input, args.output);
|
||||
}
|
||||
Reference in New Issue
Block a user