type factory based kindle 2 latex converter initial commit.
This commit is contained in:
295
fsm.h
295
fsm.h
@@ -1,295 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include <variant>
|
||||
|
||||
/** @brief Missing transition policy. */
|
||||
enum class missing_transition_policy {
|
||||
/** @brief Missing transition is considered required by validation. */
|
||||
strict,
|
||||
|
||||
/** @brief Missing transition is allowed (event ignored). */
|
||||
ignore
|
||||
};
|
||||
|
||||
/** @brief Marker for undefined transition. */
|
||||
struct no_transition {};
|
||||
|
||||
/** @brief Default guard: always allow. */
|
||||
struct always_allow {
|
||||
template <typename TContext, typename TFrom, typename TEvent>
|
||||
bool operator() (TContext &, const TFrom &, const TEvent &) const noexcept {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
/** @brief Default action: do nothing. */
|
||||
struct do_nothing {
|
||||
template <typename TContext, typename TFrom, typename TTo, typename TEvent>
|
||||
void operator() (TContext &, const TFrom &, const TTo &, const TEvent &) const noexcept {
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Transition descriptor.
|
||||
*
|
||||
* @tparam TTo Destination state type.
|
||||
* @tparam TGuard Guard functor type.
|
||||
* @tparam TAction Action functor type.
|
||||
*/
|
||||
template <typename TTo, typename TGuard = always_allow, typename TAction = do_nothing>
|
||||
struct transition_to {
|
||||
using to_state = TTo;
|
||||
using guard = TGuard;
|
||||
using action = TAction;
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Transition table mapping: specialize to define transitions.
|
||||
* Default: no_transition.
|
||||
*/
|
||||
template <typename TFrom, typename TEvent>
|
||||
struct transition {
|
||||
using type = no_transition;
|
||||
};
|
||||
|
||||
/** @brief Optional mapping to names for logs. */
|
||||
template <typename T>
|
||||
struct name_of {
|
||||
static constexpr const char *value = "?";
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Logger interface (default no-op).
|
||||
*
|
||||
* You can implement:
|
||||
* - ignored_transition(from, event)
|
||||
* - unhandled_transition(from, event) // policy=strict but missing transition at runtime
|
||||
* - guard_blocked(from, event)
|
||||
*/
|
||||
struct null_logger {
|
||||
void ignored_transition (const char *, const char *) noexcept {}
|
||||
void unhandled_transition (const char *, const char *) noexcept {}
|
||||
void guard_blocked (const char *, const char *) noexcept {}
|
||||
};
|
||||
|
||||
/** @brief Hooks: state-only. */
|
||||
template <typename TState>
|
||||
struct on_exit_state {
|
||||
template <typename TContext>
|
||||
static void call (TContext &, const TState &) noexcept {}
|
||||
};
|
||||
|
||||
template <typename TState>
|
||||
struct on_entry_state {
|
||||
template <typename TContext>
|
||||
static void call (TContext &, const TState &) noexcept {}
|
||||
};
|
||||
|
||||
/** @brief Hooks: event-specific. */
|
||||
template <typename TState, typename TEvent>
|
||||
struct on_exit_event {
|
||||
template <typename TContext>
|
||||
static void call (TContext &, const TState &, const TEvent &) noexcept {}
|
||||
};
|
||||
|
||||
template <typename TState, typename TEvent>
|
||||
struct on_entry_event {
|
||||
template <typename TContext>
|
||||
static void call (TContext &, const TState &, const TEvent &) noexcept {}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Factory: construct destination state.
|
||||
*
|
||||
* IMPORTANT: Now includes TContext in template parameters to avoid templated make<TContext>().
|
||||
*
|
||||
* Default: To{}.
|
||||
* Specialize state_factory<TContext, From, Event, To> as needed.
|
||||
*/
|
||||
template <typename TContext, typename TFrom, typename TEvent, typename TTo>
|
||||
struct state_factory {
|
||||
static TTo make (TContext &, const TFrom &, const TEvent &) {
|
||||
return TTo{};
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Per (State,Event) missing transition policy.
|
||||
*
|
||||
* Default is ignore. Mark only required pairs strict.
|
||||
*/
|
||||
template <typename TFrom, typename TEvent>
|
||||
struct missing_transition {
|
||||
static constexpr missing_transition_policy policy = missing_transition_policy::ignore;
|
||||
};
|
||||
|
||||
/** @brief Helper: is T one of Ts... */
|
||||
template <typename T, typename... Ts>
|
||||
struct is_one_of : std::disjunction<std::is_same<T, Ts>...> {};
|
||||
|
||||
/** @brief Helper: transition exists? */
|
||||
template <typename TFrom, typename TEvent>
|
||||
struct has_transition
|
||||
: std::bool_constant < !std::is_same_v<typename transition<TFrom, TEvent>::type, no_transition >> {};
|
||||
|
||||
/** @brief Factory validation (call-form). */
|
||||
template <typename TContext, typename TFrom, typename TEvent, typename TTo, typename = void>
|
||||
struct is_factory_invocable : std::false_type {};
|
||||
|
||||
template <typename TContext, typename TFrom, typename TEvent, typename TTo>
|
||||
struct is_factory_invocable <
|
||||
TContext, TFrom, TEvent, TTo,
|
||||
std::void_t<decltype (state_factory<TContext, TFrom, TEvent, TTo>::make (
|
||||
std::declval<TContext &>(),
|
||||
std::declval<const TFrom &>(),
|
||||
std::declval<const TEvent &>())) >>
|
||||
: std::bool_constant<std::is_same_v<
|
||||
decltype (state_factory<TContext, TFrom, TEvent, TTo>::make (
|
||||
std::declval<TContext &>(),
|
||||
std::declval<const TFrom &>(),
|
||||
std::declval<const TEvent &>())),
|
||||
TTo>> {};
|
||||
|
||||
/**
|
||||
* @brief FSM with runtime state storage and explicit compile-time validation.
|
||||
*
|
||||
* dispatch():
|
||||
* - never static_asserts on missing transitions (avoids State×Event explosion with std::visit)
|
||||
* - handles missing transitions via missing_transition<State,Event>::policy (ignore/strict)
|
||||
*
|
||||
* validate_events<Ev...>():
|
||||
* - compile-time checks ONLY for pairs marked strict.
|
||||
*/
|
||||
template <typename TContext, typename TLogger, typename... TStates>
|
||||
class fsm {
|
||||
public:
|
||||
using state_variant = std::variant<TStates...>;
|
||||
|
||||
template <typename TInitial,
|
||||
typename = std::enable_if_t<is_one_of<TInitial, TStates...>::value>>
|
||||
explicit fsm (TContext &ctx, TInitial initial, TLogger logger = TLogger{})
|
||||
: m_ctx (ctx)
|
||||
, m_logger (std::move (logger))
|
||||
, m_state (std::move (initial)) {
|
||||
std::visit (init_entry_visitor{*this}, m_state);
|
||||
}
|
||||
|
||||
template <typename TEvent>
|
||||
bool dispatch (const TEvent &ev) {
|
||||
dispatch_visitor<TEvent> v{*this, ev};
|
||||
return std::visit (v, m_state);
|
||||
}
|
||||
|
||||
template <typename... TEvents>
|
||||
static constexpr void validate_events() {
|
||||
(validate_one_event<TEvents>(), ...);
|
||||
}
|
||||
|
||||
const state_variant &state() const noexcept {
|
||||
return m_state;
|
||||
}
|
||||
state_variant &state() noexcept {
|
||||
return m_state;
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename TEvent>
|
||||
struct dispatch_visitor {
|
||||
fsm &self;
|
||||
const TEvent &ev;
|
||||
|
||||
template <typename TFrom>
|
||||
bool operator() (TFrom &from) const {
|
||||
return self.template dispatch_from<TFrom> (from, ev);
|
||||
}
|
||||
};
|
||||
|
||||
struct init_entry_visitor {
|
||||
fsm &self;
|
||||
template <typename TState>
|
||||
void operator() (TState &st) const {
|
||||
on_entry_state<TState>::call (self.m_ctx, st);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename TEvent>
|
||||
struct entry_after_commit_visitor {
|
||||
fsm &self;
|
||||
const TEvent &ev;
|
||||
|
||||
template <typename TState>
|
||||
void operator() (TState &st) const {
|
||||
on_entry_state<TState>::call (self.m_ctx, st);
|
||||
on_entry_event<TState, TEvent>::call (self.m_ctx, st, ev);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename TFrom, typename TEvent>
|
||||
bool dispatch_from (TFrom &from, const TEvent &ev) {
|
||||
using tr = typename transition<TFrom, TEvent>::type;
|
||||
|
||||
if constexpr (std::is_same_v<tr, no_transition>) {
|
||||
if constexpr (missing_transition<TFrom, TEvent>::policy == missing_transition_policy::strict)
|
||||
m_logger.unhandled_transition (name_of<TFrom>::value, name_of<TEvent>::value);
|
||||
else
|
||||
m_logger.ignored_transition (name_of<TFrom>::value, name_of<TEvent>::value);
|
||||
return false;
|
||||
} else {
|
||||
using to_state = typename tr::to_state;
|
||||
using guard_t = typename tr::guard;
|
||||
using action_t = typename tr::action;
|
||||
|
||||
static_assert (is_one_of<to_state, TStates...>::value,
|
||||
"FSM: transition target state not in FSM state list.");
|
||||
|
||||
static_assert (std::is_invocable_r_v<bool, guard_t, TContext &, const TFrom &, const TEvent &>,
|
||||
"FSM: guard must be callable as bool(Context&, const From&, const Event&).");
|
||||
|
||||
static_assert (std::is_invocable_r_v<void, action_t, TContext &, const TFrom &, const to_state &, const TEvent &>,
|
||||
"FSM: action must be callable as void(Context&, const From&, const To&, const Event&).");
|
||||
|
||||
static_assert (is_factory_invocable<TContext, TFrom, TEvent, to_state>::value,
|
||||
"FSM: state_factory<Context,From,Event,To>::make must be callable as "
|
||||
"To make(Context&, const From&, const Event&).");
|
||||
|
||||
guard_t guard{};
|
||||
if (!guard (m_ctx, from, ev)) {
|
||||
m_logger.guard_blocked (name_of<TFrom>::value, name_of<TEvent>::value);
|
||||
return false;
|
||||
}
|
||||
|
||||
on_exit_event<TFrom, TEvent>::call (m_ctx, from, ev);
|
||||
on_exit_state<TFrom>::call (m_ctx, from);
|
||||
|
||||
to_state to = state_factory<TContext, TFrom, TEvent, to_state>::make (m_ctx, from, ev);
|
||||
|
||||
action_t action{};
|
||||
action (m_ctx, from, to, ev);
|
||||
|
||||
m_state = std::move (to);
|
||||
|
||||
std::visit (entry_after_commit_visitor<TEvent> {*this, ev}, m_state);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TEvent>
|
||||
static constexpr void validate_one_event() {
|
||||
(validate_pair<TStates, TEvent>(), ...);
|
||||
}
|
||||
|
||||
template <typename TState, typename TEvent>
|
||||
static constexpr void validate_pair() {
|
||||
if constexpr (missing_transition<TState, TEvent>::policy == missing_transition_policy::strict) {
|
||||
static_assert (has_transition<TState, TEvent>::value,
|
||||
"FSM validation: required (strict) transition is missing for (State, Event).");
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
TContext &m_ctx;
|
||||
TLogger m_logger;
|
||||
state_variant m_state;
|
||||
};
|
||||
372
main.cpp
372
main.cpp
@@ -1,372 +0,0 @@
|
||||
#include "fsm.h"
|
||||
|
||||
#include <cerrno>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <getopt.h>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// Data model
|
||||
// ------------------------------------------------------------
|
||||
|
||||
struct Quote {
|
||||
std::string meta;
|
||||
std::vector<std::string> text_lines;
|
||||
};
|
||||
|
||||
static std::string rstrip_cr (std::string s) {
|
||||
if (!s.empty() && s.back() == '\r')
|
||||
s.pop_back();
|
||||
return s;
|
||||
}
|
||||
|
||||
static std::string latex_escape (const std::string &s) {
|
||||
// Conservative escaping for LaTeX special chars.
|
||||
std::string out;
|
||||
out.reserve (s.size() + s.size() / 8);
|
||||
for (const unsigned char ch : s) {
|
||||
switch (ch) {
|
||||
case '\\':
|
||||
out += "\\textbackslash{}";
|
||||
break;
|
||||
case '{':
|
||||
out += "\\{";
|
||||
break;
|
||||
case '}':
|
||||
out += "\\}";
|
||||
break;
|
||||
case '%':
|
||||
out += "\\%";
|
||||
break;
|
||||
case '$':
|
||||
out += "\\$";
|
||||
break;
|
||||
case '#':
|
||||
out += "\\#";
|
||||
break;
|
||||
case '&':
|
||||
out += "\\&";
|
||||
break;
|
||||
case '_':
|
||||
out += "\\_";
|
||||
break;
|
||||
case '^':
|
||||
out += "\\textasciicircum{}";
|
||||
break;
|
||||
case '~':
|
||||
out += "\\textasciitilde{}";
|
||||
break;
|
||||
default:
|
||||
out.push_back (static_cast<char> (ch));
|
||||
break;
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// FSM: parsing Kindle "My Clippings" style format
|
||||
// ------------------------------------------------------------
|
||||
|
||||
struct StExpectTitle {};
|
||||
struct StExpectMeta {};
|
||||
struct StExpectBlank {};
|
||||
struct StCollectText {};
|
||||
|
||||
struct EvTitle {
|
||||
std::string s;
|
||||
};
|
||||
struct EvMeta {
|
||||
std::string s;
|
||||
};
|
||||
struct EvBlank {};
|
||||
struct EvText {
|
||||
std::string s;
|
||||
};
|
||||
struct EvSep {};
|
||||
struct EvEof {};
|
||||
|
||||
struct ParserContext {
|
||||
// group storage
|
||||
std::vector<std::string> order;
|
||||
std::unordered_map<std::string, std::vector<Quote>> by_title;
|
||||
std::unordered_map<std::string, std::size_t> seen;
|
||||
|
||||
// currently parsed quote
|
||||
std::string cur_title;
|
||||
std::string cur_meta;
|
||||
std::vector<std::string> cur_text;
|
||||
|
||||
void start_title (std::string t) {
|
||||
cur_title = std::move (t);
|
||||
cur_meta.clear();
|
||||
cur_text.clear();
|
||||
}
|
||||
void set_meta (std::string m) {
|
||||
cur_meta = std::move (m);
|
||||
}
|
||||
void add_text (std::string line) {
|
||||
cur_text.push_back (std::move (line));
|
||||
}
|
||||
|
||||
void finalize_quote_if_ready() {
|
||||
if (cur_title.empty() || cur_meta.empty()) {
|
||||
cur_meta.clear();
|
||||
cur_text.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
if (seen.find (cur_title) == seen.end()) {
|
||||
seen.emplace (cur_title, order.size());
|
||||
order.push_back (cur_title);
|
||||
}
|
||||
|
||||
by_title[cur_title].push_back (Quote{cur_meta, cur_text});
|
||||
|
||||
cur_meta.clear();
|
||||
cur_text.clear();
|
||||
}
|
||||
};
|
||||
|
||||
struct SilentLogger : null_logger {};
|
||||
|
||||
// Actions
|
||||
struct ActSetTitle {
|
||||
void operator() (ParserContext &ctx, const StExpectTitle &, const StExpectMeta &, const EvTitle &ev) const {
|
||||
ctx.start_title (ev.s);
|
||||
}
|
||||
};
|
||||
|
||||
struct ActSetMeta {
|
||||
void operator() (ParserContext &ctx, const StExpectMeta &, const StExpectBlank &, const EvMeta &ev) const {
|
||||
ctx.set_meta (ev.s);
|
||||
}
|
||||
};
|
||||
|
||||
struct ActAddText {
|
||||
template <typename TFrom>
|
||||
void operator() (ParserContext &ctx, const TFrom &, const StCollectText &, const EvText &ev) const {
|
||||
ctx.add_text (ev.s);
|
||||
}
|
||||
};
|
||||
|
||||
struct ActFinalizeOnSep {
|
||||
template <typename TFrom>
|
||||
void operator() (ParserContext &ctx, const TFrom &, const StExpectTitle &, const EvSep &) const {
|
||||
ctx.finalize_quote_if_ready();
|
||||
}
|
||||
};
|
||||
|
||||
struct ActFinalizeOnEof {
|
||||
template <typename TFrom>
|
||||
void operator() (ParserContext &ctx, const TFrom &, const StExpectTitle &, const EvEof &) const {
|
||||
ctx.finalize_quote_if_ready();
|
||||
}
|
||||
};
|
||||
|
||||
// Transitions
|
||||
template <> struct transition<StExpectTitle, EvTitle> {
|
||||
using type = transition_to<StExpectMeta, always_allow, ActSetTitle>;
|
||||
};
|
||||
template <> struct transition<StExpectTitle, EvBlank> {
|
||||
using type = transition_to<StExpectTitle>;
|
||||
};
|
||||
template <> struct transition<StExpectTitle, EvEof> {
|
||||
using type = transition_to<StExpectTitle, always_allow, ActFinalizeOnEof>;
|
||||
};
|
||||
|
||||
template <> struct transition<StExpectMeta, EvMeta> {
|
||||
using type = transition_to<StExpectBlank, always_allow, ActSetMeta>;
|
||||
};
|
||||
|
||||
template <> struct transition<StExpectBlank, EvBlank> {
|
||||
using type = transition_to<StCollectText>;
|
||||
};
|
||||
template <> struct transition<StExpectBlank, EvText> {
|
||||
using type = transition_to<StCollectText, always_allow, ActAddText>;
|
||||
};
|
||||
|
||||
template <> struct transition<StCollectText, EvText> {
|
||||
using type = transition_to<StCollectText, always_allow, ActAddText>;
|
||||
};
|
||||
template <> struct transition<StCollectText, EvSep> {
|
||||
using type = transition_to<StExpectTitle, always_allow, ActFinalizeOnSep>;
|
||||
};
|
||||
template <> struct transition<StCollectText, EvEof> {
|
||||
using type = transition_to<StExpectTitle, always_allow, ActFinalizeOnEof>;
|
||||
};
|
||||
|
||||
// Strict contract for the pairs we dispatch.
|
||||
template <> struct missing_transition<StExpectTitle, EvTitle> {
|
||||
static constexpr missing_transition_policy policy = missing_transition_policy::strict;
|
||||
};
|
||||
template <> struct missing_transition<StExpectTitle, EvBlank> {
|
||||
static constexpr missing_transition_policy policy = missing_transition_policy::strict;
|
||||
};
|
||||
template <> struct missing_transition<StExpectTitle, EvEof> {
|
||||
static constexpr missing_transition_policy policy = missing_transition_policy::strict;
|
||||
};
|
||||
|
||||
template <> struct missing_transition<StExpectMeta, EvMeta> {
|
||||
static constexpr missing_transition_policy policy = missing_transition_policy::strict;
|
||||
};
|
||||
|
||||
template <> struct missing_transition<StExpectBlank, EvBlank> {
|
||||
static constexpr missing_transition_policy policy = missing_transition_policy::strict;
|
||||
};
|
||||
template <> struct missing_transition<StExpectBlank, EvText> {
|
||||
static constexpr missing_transition_policy policy = missing_transition_policy::strict;
|
||||
};
|
||||
|
||||
template <> struct missing_transition<StCollectText, EvText> {
|
||||
static constexpr missing_transition_policy policy = missing_transition_policy::strict;
|
||||
};
|
||||
template <> struct missing_transition<StCollectText, EvSep> {
|
||||
static constexpr missing_transition_policy policy = missing_transition_policy::strict;
|
||||
};
|
||||
template <> struct missing_transition<StCollectText, EvEof> {
|
||||
static constexpr missing_transition_policy policy = missing_transition_policy::strict;
|
||||
};
|
||||
|
||||
using ParserFsm = fsm<ParserContext, SilentLogger, StExpectTitle, StExpectMeta, StExpectBlank, StCollectText>;
|
||||
|
||||
static constexpr void validate_fsm_contract() {
|
||||
ParserFsm::validate_events<EvTitle, EvMeta, EvBlank, EvText, EvSep, EvEof>();
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// CLI
|
||||
// ------------------------------------------------------------
|
||||
|
||||
struct CliArgs {
|
||||
std::string input;
|
||||
std::string output;
|
||||
};
|
||||
|
||||
static void print_usage (const char *argv0) {
|
||||
std::cerr << "Usage: " << argv0 << " --input <file> --output <file>\n";
|
||||
}
|
||||
|
||||
static bool parse_args (int argc, char **argv, CliArgs &out) {
|
||||
static option long_opts[] = {
|
||||
{"input", required_argument, nullptr, 'i'},
|
||||
{"output", required_argument, nullptr, 'o'},
|
||||
{"help", no_argument, nullptr, 'h'},
|
||||
{nullptr, 0, nullptr, 0 }
|
||||
};
|
||||
|
||||
int c = 0;
|
||||
while ((c = ::getopt_long (argc, argv, "i:o:h", long_opts, nullptr)) != -1) {
|
||||
switch (c) {
|
||||
case 'i':
|
||||
out.input = optarg;
|
||||
break;
|
||||
case 'o':
|
||||
out.output = optarg;
|
||||
break;
|
||||
case 'h':
|
||||
print_usage (argv[0]);
|
||||
return false;
|
||||
default:
|
||||
print_usage (argv[0]);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (out.input.empty() || out.output.empty()) {
|
||||
print_usage (argv[0]);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// Conversion
|
||||
// ------------------------------------------------------------
|
||||
|
||||
static int convert (const std::string &in_path, const std::string &out_path) {
|
||||
std::ifstream in (in_path);
|
||||
if (!in.is_open()) {
|
||||
std::cerr << "Failed to open input file: " << in_path << " (" << std::strerror (errno) << ")\n";
|
||||
return 2;
|
||||
}
|
||||
|
||||
ParserContext ctx{};
|
||||
ParserFsm fsm (ctx, StExpectTitle{}, SilentLogger{});
|
||||
|
||||
std::string line;
|
||||
while (std::getline (in, line)) {
|
||||
line = rstrip_cr (std::move (line));
|
||||
|
||||
if (line == "==========") {
|
||||
fsm.dispatch (EvSep{});
|
||||
continue;
|
||||
}
|
||||
|
||||
const bool is_blank = line.empty();
|
||||
const auto &st = fsm.state();
|
||||
|
||||
if (std::holds_alternative<StExpectTitle> (st)) {
|
||||
if (is_blank)
|
||||
fsm.dispatch (EvBlank{});
|
||||
else
|
||||
fsm.dispatch (EvTitle{line});
|
||||
} else if (std::holds_alternative<StExpectMeta> (st)) {
|
||||
// Kindle format expects meta here. If missing, keep going.
|
||||
fsm.dispatch (EvMeta{line});
|
||||
} else if (std::holds_alternative<StExpectBlank> (st)) {
|
||||
if (is_blank)
|
||||
fsm.dispatch (EvBlank{});
|
||||
else
|
||||
fsm.dispatch (EvText{line}); // some clippings have no blank line
|
||||
} else {
|
||||
// StCollectText
|
||||
fsm.dispatch (EvText{line});
|
||||
}
|
||||
}
|
||||
|
||||
fsm.dispatch (EvEof{});
|
||||
|
||||
std::ofstream out (out_path, std::ios::trunc);
|
||||
if (!out.is_open()) {
|
||||
std::cerr << "Failed to open output file: " << out_path << " (" << std::strerror (errno) << ")\n";
|
||||
return 3;
|
||||
}
|
||||
|
||||
for (std::size_t i = 0; i < ctx.order.size(); ++i) {
|
||||
const auto &title = ctx.order[i];
|
||||
out << "\\section {" << latex_escape (title) << "}\n";
|
||||
|
||||
const auto it = ctx.by_title.find (title);
|
||||
if (it == ctx.by_title.end())
|
||||
continue;
|
||||
|
||||
for (const auto &q : it->second) {
|
||||
out << " \\subsection {" << latex_escape (q.meta) << "}\n";
|
||||
for (const auto &tl : q.text_lines)
|
||||
out << " " << latex_escape (tl) << "\n";
|
||||
out << " \\subsubsection{notes}\n\n";
|
||||
}
|
||||
|
||||
if (i + 1 < ctx.order.size())
|
||||
out << "\n";
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (int argc, char **argv) {
|
||||
validate_fsm_contract();
|
||||
|
||||
CliArgs args;
|
||||
if (!parse_args (argc, argv, args))
|
||||
return 1;
|
||||
|
||||
return convert (args.input, args.output);
|
||||
}
|
||||
74
typefactory/.gitignore
vendored
Normal file
74
typefactory/.gitignore
vendored
Normal file
@@ -0,0 +1,74 @@
|
||||
# This file is used to ignore files which are generated
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
*~
|
||||
*.autosave
|
||||
*.a
|
||||
*.core
|
||||
*.moc
|
||||
*.o
|
||||
*.obj
|
||||
*.orig
|
||||
*.rej
|
||||
*.so
|
||||
*.so.*
|
||||
*_pch.h.cpp
|
||||
*_resource.rc
|
||||
*.qm
|
||||
.#*
|
||||
*.*#
|
||||
core
|
||||
!core/
|
||||
tags
|
||||
.DS_Store
|
||||
.directory
|
||||
*.debug
|
||||
Makefile*
|
||||
*.prl
|
||||
*.app
|
||||
moc_*.cpp
|
||||
ui_*.h
|
||||
qrc_*.cpp
|
||||
Thumbs.db
|
||||
*.res
|
||||
*.rc
|
||||
/.qmake.cache
|
||||
/.qmake.stash
|
||||
|
||||
# qtcreator generated files
|
||||
*.pro.user*
|
||||
CMakeLists.txt.user*
|
||||
|
||||
# xemacs temporary files
|
||||
*.flc
|
||||
|
||||
# Vim temporary files
|
||||
.*.swp
|
||||
|
||||
# Visual Studio generated files
|
||||
*.ib_pdb_index
|
||||
*.idb
|
||||
*.ilk
|
||||
*.pdb
|
||||
*.sln
|
||||
*.suo
|
||||
*.vcproj
|
||||
*vcproj.*.*.user
|
||||
*.ncb
|
||||
*.sdf
|
||||
*.opensdf
|
||||
*.vcxproj
|
||||
*vcxproj.*
|
||||
|
||||
# MinGW generated files
|
||||
*.Debug
|
||||
*.Release
|
||||
|
||||
# Python byte code
|
||||
*.pyc
|
||||
|
||||
# Binaries
|
||||
# --------
|
||||
*.dll
|
||||
*.exe
|
||||
|
||||
7
typefactory/kindle2latex.pro
Normal file
7
typefactory/kindle2latex.pro
Normal file
@@ -0,0 +1,7 @@
|
||||
TEMPLATE = app
|
||||
CONFIG += console c++17
|
||||
CONFIG -= app_bundle
|
||||
CONFIG -= qt
|
||||
|
||||
SOURCES += \
|
||||
main.cpp
|
||||
325
typefactory/main.cpp
Normal file
325
typefactory/main.cpp
Normal file
@@ -0,0 +1,325 @@
|
||||
#include <array>
|
||||
#include <cerrno>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <getopt.h>
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "typefactory.h" // <-- твой хедер
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// Helpers
|
||||
// ------------------------------------------------------------
|
||||
|
||||
static std::string rstrip_cr (std::string s) {
|
||||
if (!s.empty() && s.back() == '\r')
|
||||
s.pop_back();
|
||||
return s;
|
||||
}
|
||||
|
||||
static std::string latex_escape (const std::string &s) {
|
||||
std::string out;
|
||||
out.reserve (s.size() + s.size() / 8);
|
||||
for (unsigned char ch : s) {
|
||||
switch (ch) {
|
||||
case '\\':
|
||||
out += "\\textbackslash{}";
|
||||
break;
|
||||
case '{':
|
||||
out += "\\{";
|
||||
break;
|
||||
case '}':
|
||||
out += "\\}";
|
||||
break;
|
||||
case '%':
|
||||
out += "\\%";
|
||||
break;
|
||||
case '$':
|
||||
out += "\\$";
|
||||
break;
|
||||
case '#':
|
||||
out += "\\#";
|
||||
break;
|
||||
case '&':
|
||||
out += "\\&";
|
||||
break;
|
||||
case '_':
|
||||
out += "\\_";
|
||||
break;
|
||||
case '^':
|
||||
out += "\\textasciicircum{}";
|
||||
break;
|
||||
case '~':
|
||||
out += "\\textasciitilde{}";
|
||||
break;
|
||||
default:
|
||||
out.push_back (static_cast<char> (ch));
|
||||
break;
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// Model
|
||||
// ------------------------------------------------------------
|
||||
|
||||
struct Quote {
|
||||
std::string meta;
|
||||
std::vector<std::string> text_lines;
|
||||
};
|
||||
|
||||
struct ParseContext {
|
||||
std::vector<std::string> order;
|
||||
std::unordered_map<std::string, std::vector<Quote>> by_title;
|
||||
std::unordered_map<std::string, std::size_t> seen;
|
||||
|
||||
std::string cur_title;
|
||||
std::string cur_meta;
|
||||
std::vector<std::string> cur_text;
|
||||
|
||||
void start_title (std::string t) {
|
||||
cur_title = std::move (t);
|
||||
cur_meta.clear();
|
||||
cur_text.clear();
|
||||
}
|
||||
|
||||
void set_meta (std::string m) {
|
||||
cur_meta = std::move (m);
|
||||
}
|
||||
void add_text (std::string line) {
|
||||
cur_text.push_back (std::move (line));
|
||||
}
|
||||
|
||||
void finalize_quote() {
|
||||
if (cur_title.empty() || cur_meta.empty()) {
|
||||
cur_meta.clear();
|
||||
cur_text.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
if (seen.find (cur_title) == seen.end()) {
|
||||
seen.emplace (cur_title, order.size());
|
||||
order.push_back (cur_title);
|
||||
}
|
||||
|
||||
by_title[cur_title].push_back (Quote{cur_meta, cur_text});
|
||||
|
||||
cur_meta.clear();
|
||||
cur_text.clear();
|
||||
}
|
||||
};
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// Handlers
|
||||
// ------------------------------------------------------------
|
||||
|
||||
enum class Stage { Title, Meta, Body };
|
||||
|
||||
static constexpr std::array<Stage, 3> kAllStages = {
|
||||
Stage::Title, Stage::Meta, Stage::Body
|
||||
};
|
||||
|
||||
struct ILineHandler {
|
||||
virtual ~ILineHandler() = default;
|
||||
virtual void handle (const std::string &line, ParseContext &ctx, Stage &next) = 0;
|
||||
};
|
||||
|
||||
struct TitleHandler final : ILineHandler {
|
||||
void handle (const std::string &line, ParseContext &ctx, Stage &next) override {
|
||||
if (line.empty()) {
|
||||
next = Stage::Title; // пропускаем пустые между блоками
|
||||
return;
|
||||
}
|
||||
ctx.start_title (line);
|
||||
next = Stage::Meta;
|
||||
}
|
||||
};
|
||||
|
||||
struct MetaHandler final : ILineHandler {
|
||||
void handle (const std::string &line, ParseContext &ctx, Stage &next) override {
|
||||
ctx.set_meta (line);
|
||||
next = Stage::Body;
|
||||
}
|
||||
};
|
||||
|
||||
struct BodyHandler final : ILineHandler {
|
||||
void handle (const std::string &line, ParseContext &ctx, Stage &next) override {
|
||||
if (line == "==========") {
|
||||
ctx.finalize_quote();
|
||||
next = Stage::Title;
|
||||
return;
|
||||
}
|
||||
ctx.add_text (line); // в том числе пустые строки внутри цитаты
|
||||
next = Stage::Body;
|
||||
}
|
||||
};
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// Factory wiring (using your typefactory.h) + caching handlers
|
||||
// ------------------------------------------------------------
|
||||
|
||||
using HandlerPtr = std::shared_ptr<ILineHandler>;
|
||||
using HandlerMap = std::unordered_map<Stage, HandlerPtr>;
|
||||
|
||||
static TypeFactory<Stage, ILineHandler> build_factory() {
|
||||
TypeFactory<Stage, ILineHandler> f;
|
||||
f.registerType<TitleHandler> (Stage::Title);
|
||||
f.registerType<MetaHandler> (Stage::Meta);
|
||||
f.registerType<BodyHandler> (Stage::Body);
|
||||
return f;
|
||||
}
|
||||
|
||||
static HandlerMap build_handlers_cache (const TypeFactory<Stage, ILineHandler> &factory) {
|
||||
HandlerMap handlers;
|
||||
handlers.reserve (kAllStages.size());
|
||||
|
||||
// Рантайм-валидация: для каждого Stage обязаны уметь создать handler
|
||||
for (Stage st : kAllStages) {
|
||||
try {
|
||||
auto h = factory.create (st); // shared_ptr<ILineHandler>
|
||||
if (!h)
|
||||
throw std::runtime_error ("Factory returned null handler");
|
||||
handlers.emplace (st, std::move (h));
|
||||
} catch (const std::out_of_range &) {
|
||||
throw std::runtime_error ("Missing handler registration for some Stage");
|
||||
}
|
||||
}
|
||||
|
||||
// Доп.проверка: ensure all are present (на случай коллизий/ошибок emplace)
|
||||
if (handlers.size() != kAllStages.size())
|
||||
throw std::runtime_error ("Handler cache size mismatch");
|
||||
|
||||
return handlers;
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// CLI
|
||||
// ------------------------------------------------------------
|
||||
|
||||
struct CliArgs {
|
||||
std::string input;
|
||||
std::string output;
|
||||
};
|
||||
|
||||
static void print_usage (const char *argv0) {
|
||||
std::cerr << "Usage: " << argv0 << " --input <file> --output <file>\n";
|
||||
}
|
||||
|
||||
static bool parse_args (int argc, char **argv, CliArgs &out) {
|
||||
static option long_opts[] = {
|
||||
{"input", required_argument, nullptr, 'i'},
|
||||
{"output", required_argument, nullptr, 'o'},
|
||||
{"help", no_argument, nullptr, 'h'},
|
||||
{nullptr, 0, nullptr, 0 }
|
||||
};
|
||||
|
||||
int c = 0;
|
||||
while ((c = ::getopt_long (argc, argv, "i:o:h", long_opts, nullptr)) != -1) {
|
||||
switch (c) {
|
||||
case 'i':
|
||||
out.input = optarg;
|
||||
break;
|
||||
case 'o':
|
||||
out.output = optarg;
|
||||
break;
|
||||
case 'h':
|
||||
print_usage (argv[0]);
|
||||
return false;
|
||||
default:
|
||||
print_usage (argv[0]);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (out.input.empty() || out.output.empty()) {
|
||||
print_usage (argv[0]);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------
|
||||
// Convert
|
||||
// ------------------------------------------------------------
|
||||
|
||||
static int convert (const std::string &in_path, const std::string &out_path) {
|
||||
std::ifstream in (in_path);
|
||||
if (!in.is_open()) {
|
||||
std::cerr << "Failed to open input file: " << in_path
|
||||
<< " (" << std::strerror (errno) << ")\n";
|
||||
return 2;
|
||||
}
|
||||
|
||||
// Build factory + cache handlers once
|
||||
TypeFactory<Stage, ILineHandler> factory = build_factory();
|
||||
|
||||
HandlerMap handlers;
|
||||
try {
|
||||
handlers = build_handlers_cache (factory);
|
||||
} catch (const std::exception &e) {
|
||||
std::cerr << "Internal error while building handler cache: " << e.what() << "\n";
|
||||
return 4;
|
||||
}
|
||||
|
||||
ParseContext ctx{};
|
||||
Stage stage = Stage::Title;
|
||||
|
||||
std::string line;
|
||||
while (std::getline (in, line)) {
|
||||
line = rstrip_cr (std::move (line));
|
||||
|
||||
auto it = handlers.find (stage);
|
||||
if (it == handlers.end() || !it->second) {
|
||||
std::cerr << "Internal error: handler missing at runtime\n";
|
||||
return 4;
|
||||
}
|
||||
|
||||
Stage next = stage;
|
||||
it->second->handle (line, ctx, next);
|
||||
stage = next;
|
||||
}
|
||||
|
||||
// EOF: если файл не закончился "==========", всё равно зафиксируем последний блок
|
||||
ctx.finalize_quote();
|
||||
|
||||
std::ofstream out (out_path, std::ios::trunc);
|
||||
if (!out.is_open()) {
|
||||
std::cerr << "Failed to open output file: " << out_path
|
||||
<< " (" << std::strerror (errno) << ")\n";
|
||||
return 3;
|
||||
}
|
||||
|
||||
for (std::size_t i = 0; i < ctx.order.size(); ++i) {
|
||||
const auto &title = ctx.order[i];
|
||||
out << "\\section {" << latex_escape (title) << "}\n";
|
||||
|
||||
const auto it = ctx.by_title.find (title);
|
||||
if (it == ctx.by_title.end())
|
||||
continue;
|
||||
|
||||
for (const auto &q : it->second) {
|
||||
out << " \\subsection {" << latex_escape (q.meta) << "}\n";
|
||||
for (const auto &tl : q.text_lines)
|
||||
out << " " << latex_escape (tl) << "\n";
|
||||
out << " \\subsubsection{notes}\n\n";
|
||||
}
|
||||
|
||||
if (i + 1 < ctx.order.size())
|
||||
out << "\n";
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (int argc, char **argv) {
|
||||
CliArgs args;
|
||||
if (!parse_args (argc, argv, args))
|
||||
return 1;
|
||||
return convert (args.input, args.output);
|
||||
}
|
||||
118
typefactory/typefactory.h
Normal file
118
typefactory/typefactory.h
Normal file
@@ -0,0 +1,118 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2026 Dim Himro
|
||||
|
||||
#ifndef TYPEFACTORY_H
|
||||
#define TYPEFACTORY_H
|
||||
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
#include <utility> // std::move
|
||||
|
||||
/**
|
||||
* @brief Registry-based factory for creating objects by runtime identifier.
|
||||
*
|
||||
* Allows registering derived types and instantiating them using a ClassId key.
|
||||
*
|
||||
* Notes:
|
||||
* - Not thread-safe. Register types during initialization phase only.
|
||||
* - Re-registering the same id overwrites the previous entry ("last wins").
|
||||
* - ClassId must be hashable (std::hash<ClassId> specialization must exist).
|
||||
*
|
||||
* @tparam ClassId -Unique class identifier type (e.g. std::string, int, enum).
|
||||
* @tparam BaseClass -Base type for all created objects.
|
||||
* @tparam Arg - Constructor arguments forwarded to registered derived types (by value).
|
||||
*
|
||||
* Usage example:
|
||||
* @code
|
||||
* #include <string>
|
||||
*
|
||||
* struct Base {
|
||||
* virtual ~Base() = default;
|
||||
* virtual int get() = 0;
|
||||
* };
|
||||
*
|
||||
* struct Derived1 : Base {
|
||||
* explicit Derived1(int start) : m_start(start) {}
|
||||
* int get() override { return m_start + 1; }
|
||||
* int m_start = 0;
|
||||
* };
|
||||
*
|
||||
* struct Derived2 : Base {
|
||||
* explicit Derived2(int start) : m_start(start) {}
|
||||
* int get() override { return m_start + 2; }
|
||||
* int m_start = 0;
|
||||
* };
|
||||
*
|
||||
* TypeFactory<std::string, Base, int> factory;
|
||||
* factory.registerType<Derived1>("one");
|
||||
* factory.registerType<Derived2>("two");
|
||||
*
|
||||
* auto a = factory.create("one", 10);
|
||||
* auto b = factory.creator("two")(10); // advanced API: get creator function
|
||||
*
|
||||
* (void)a->get();
|
||||
* (void)b->get();
|
||||
* @endcode
|
||||
*/
|
||||
template <class ClassId, class BaseClass, class... Args>
|
||||
class TypeFactory {
|
||||
public:
|
||||
using BasePtr = std::shared_ptr<BaseClass>;
|
||||
using CreatorFn = BasePtr (*)(Args...);
|
||||
|
||||
TypeFactory() = default;
|
||||
|
||||
/**
|
||||
* @brief Registers a Derived type under the given id.
|
||||
*
|
||||
* Requirements (checked at compile-time):
|
||||
* - Derived must inherit from BaseClass
|
||||
* - Derived must be constructible from Args...
|
||||
*
|
||||
* Re-registering the same id overwrites the previous entry ("last wins").
|
||||
*/
|
||||
template <class Derived>
|
||||
void registerType(const ClassId& id) {
|
||||
static_assert(std::is_base_of_v<BaseClass, Derived>,
|
||||
"Derived must inherit from BaseClass");
|
||||
static_assert(std::is_constructible_v<Derived, Args...>,
|
||||
"Derived must be constructible from Args...");
|
||||
|
||||
classes.insert_or_assign(id, &instantiate<Derived>);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Returns creator function for the given id.
|
||||
* @throws std::out_of_range when id is not registered.
|
||||
*/
|
||||
[[nodiscard]] CreatorFn creator(const ClassId& id) const {
|
||||
return classes.at(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Creates an instance for the given id using provided arguments.
|
||||
* @throws std::out_of_range when id is not registered.
|
||||
*/
|
||||
[[nodiscard]] BasePtr create(const ClassId& id, Args... args) const {
|
||||
return creator(id)(std::move(args)...);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Checks whether an id is registered.
|
||||
*/
|
||||
[[nodiscard]] bool contains(const ClassId& id) const {
|
||||
return classes.find(id) != classes.end();
|
||||
}
|
||||
|
||||
protected:
|
||||
// Protected by design: derived factories may extend/inspect the registry.
|
||||
std::unordered_map<ClassId, CreatorFn> classes;
|
||||
|
||||
private:
|
||||
template <class Derived>
|
||||
static BasePtr instantiate(Args... args) {
|
||||
return std::make_shared<Derived>(std::move(args)...);
|
||||
}
|
||||
};
|
||||
#endif // TYPEFACTORY_H
|
||||
Reference in New Issue
Block a user