separate fsm version from other

This commit is contained in:
2026-02-28 21:44:47 -05:00
parent 956e031ad2
commit 4400641430
3 changed files with 677 additions and 0 deletions

295
fsm/fsm.h Normal file
View File

@@ -0,0 +1,295 @@
#pragma once
#include <type_traits>
#include <utility>
#include <variant>
/** @brief Missing transition policy. */
enum class missing_transition_policy {
/** @brief Missing transition is considered required by validation. */
strict,
/** @brief Missing transition is allowed (event ignored). */
ignore
};
/** @brief Marker for undefined transition. */
struct no_transition {};
/** @brief Default guard: always allow. */
struct always_allow {
template <typename TContext, typename TFrom, typename TEvent>
bool operator() (TContext &, const TFrom &, const TEvent &) const noexcept {
return true;
}
};
/** @brief Default action: do nothing. */
struct do_nothing {
template <typename TContext, typename TFrom, typename TTo, typename TEvent>
void operator() (TContext &, const TFrom &, const TTo &, const TEvent &) const noexcept {
}
};
/**
* @brief Transition descriptor.
*
* @tparam TTo Destination state type.
* @tparam TGuard Guard functor type.
* @tparam TAction Action functor type.
*/
template <typename TTo, typename TGuard = always_allow, typename TAction = do_nothing>
struct transition_to {
using to_state = TTo;
using guard = TGuard;
using action = TAction;
};
/**
* @brief Transition table mapping: specialize to define transitions.
* Default: no_transition.
*/
template <typename TFrom, typename TEvent>
struct transition {
using type = no_transition;
};
/** @brief Optional mapping to names for logs. */
template <typename T>
struct name_of {
static constexpr const char *value = "?";
};
/**
* @brief Logger interface (default no-op).
*
* You can implement:
* - ignored_transition(from, event)
* - unhandled_transition(from, event) // policy=strict but missing transition at runtime
* - guard_blocked(from, event)
*/
struct null_logger {
void ignored_transition (const char *, const char *) noexcept {}
void unhandled_transition (const char *, const char *) noexcept {}
void guard_blocked (const char *, const char *) noexcept {}
};
/** @brief Hooks: state-only. */
template <typename TState>
struct on_exit_state {
template <typename TContext>
static void call (TContext &, const TState &) noexcept {}
};
template <typename TState>
struct on_entry_state {
template <typename TContext>
static void call (TContext &, const TState &) noexcept {}
};
/** @brief Hooks: event-specific. */
template <typename TState, typename TEvent>
struct on_exit_event {
template <typename TContext>
static void call (TContext &, const TState &, const TEvent &) noexcept {}
};
template <typename TState, typename TEvent>
struct on_entry_event {
template <typename TContext>
static void call (TContext &, const TState &, const TEvent &) noexcept {}
};
/**
* @brief Factory: construct destination state.
*
* IMPORTANT: Now includes TContext in template parameters to avoid templated make<TContext>().
*
* Default: To{}.
* Specialize state_factory<TContext, From, Event, To> as needed.
*/
template <typename TContext, typename TFrom, typename TEvent, typename TTo>
struct state_factory {
static TTo make (TContext &, const TFrom &, const TEvent &) {
return TTo{};
}
};
/**
* @brief Per (State,Event) missing transition policy.
*
* Default is ignore. Mark only required pairs strict.
*/
template <typename TFrom, typename TEvent>
struct missing_transition {
static constexpr missing_transition_policy policy = missing_transition_policy::ignore;
};
/** @brief Helper: is T one of Ts... */
template <typename T, typename... Ts>
struct is_one_of : std::disjunction<std::is_same<T, Ts>...> {};
/** @brief Helper: transition exists? */
template <typename TFrom, typename TEvent>
struct has_transition
: std::bool_constant < !std::is_same_v<typename transition<TFrom, TEvent>::type, no_transition >> {};
/** @brief Factory validation (call-form). */
template <typename TContext, typename TFrom, typename TEvent, typename TTo, typename = void>
struct is_factory_invocable : std::false_type {};
template <typename TContext, typename TFrom, typename TEvent, typename TTo>
struct is_factory_invocable <
TContext, TFrom, TEvent, TTo,
std::void_t<decltype (state_factory<TContext, TFrom, TEvent, TTo>::make (
std::declval<TContext &>(),
std::declval<const TFrom &>(),
std::declval<const TEvent &>())) >>
: std::bool_constant<std::is_same_v<
decltype (state_factory<TContext, TFrom, TEvent, TTo>::make (
std::declval<TContext &>(),
std::declval<const TFrom &>(),
std::declval<const TEvent &>())),
TTo>> {};
/**
* @brief FSM with runtime state storage and explicit compile-time validation.
*
* dispatch():
* - never static_asserts on missing transitions (avoids State×Event explosion with std::visit)
* - handles missing transitions via missing_transition<State,Event>::policy (ignore/strict)
*
* validate_events<Ev...>():
* - compile-time checks ONLY for pairs marked strict.
*/
template <typename TContext, typename TLogger, typename... TStates>
class fsm {
public:
using state_variant = std::variant<TStates...>;
template <typename TInitial,
typename = std::enable_if_t<is_one_of<TInitial, TStates...>::value>>
explicit fsm (TContext &ctx, TInitial initial, TLogger logger = TLogger{})
: m_ctx (ctx)
, m_logger (std::move (logger))
, m_state (std::move (initial)) {
std::visit (init_entry_visitor{*this}, m_state);
}
template <typename TEvent>
bool dispatch (const TEvent &ev) {
dispatch_visitor<TEvent> v{*this, ev};
return std::visit (v, m_state);
}
template <typename... TEvents>
static constexpr void validate_events() {
(validate_one_event<TEvents>(), ...);
}
const state_variant &state() const noexcept {
return m_state;
}
state_variant &state() noexcept {
return m_state;
}
private:
template <typename TEvent>
struct dispatch_visitor {
fsm &self;
const TEvent &ev;
template <typename TFrom>
bool operator() (TFrom &from) const {
return self.template dispatch_from<TFrom> (from, ev);
}
};
struct init_entry_visitor {
fsm &self;
template <typename TState>
void operator() (TState &st) const {
on_entry_state<TState>::call (self.m_ctx, st);
}
};
template <typename TEvent>
struct entry_after_commit_visitor {
fsm &self;
const TEvent &ev;
template <typename TState>
void operator() (TState &st) const {
on_entry_state<TState>::call (self.m_ctx, st);
on_entry_event<TState, TEvent>::call (self.m_ctx, st, ev);
}
};
template <typename TFrom, typename TEvent>
bool dispatch_from (TFrom &from, const TEvent &ev) {
using tr = typename transition<TFrom, TEvent>::type;
if constexpr (std::is_same_v<tr, no_transition>) {
if constexpr (missing_transition<TFrom, TEvent>::policy == missing_transition_policy::strict)
m_logger.unhandled_transition (name_of<TFrom>::value, name_of<TEvent>::value);
else
m_logger.ignored_transition (name_of<TFrom>::value, name_of<TEvent>::value);
return false;
} else {
using to_state = typename tr::to_state;
using guard_t = typename tr::guard;
using action_t = typename tr::action;
static_assert (is_one_of<to_state, TStates...>::value,
"FSM: transition target state not in FSM state list.");
static_assert (std::is_invocable_r_v<bool, guard_t, TContext &, const TFrom &, const TEvent &>,
"FSM: guard must be callable as bool(Context&, const From&, const Event&).");
static_assert (std::is_invocable_r_v<void, action_t, TContext &, const TFrom &, const to_state &, const TEvent &>,
"FSM: action must be callable as void(Context&, const From&, const To&, const Event&).");
static_assert (is_factory_invocable<TContext, TFrom, TEvent, to_state>::value,
"FSM: state_factory<Context,From,Event,To>::make must be callable as "
"To make(Context&, const From&, const Event&).");
guard_t guard{};
if (!guard (m_ctx, from, ev)) {
m_logger.guard_blocked (name_of<TFrom>::value, name_of<TEvent>::value);
return false;
}
on_exit_event<TFrom, TEvent>::call (m_ctx, from, ev);
on_exit_state<TFrom>::call (m_ctx, from);
to_state to = state_factory<TContext, TFrom, TEvent, to_state>::make (m_ctx, from, ev);
action_t action{};
action (m_ctx, from, to, ev);
m_state = std::move (to);
std::visit (entry_after_commit_visitor<TEvent> {*this, ev}, m_state);
return true;
}
}
template <typename TEvent>
static constexpr void validate_one_event() {
(validate_pair<TStates, TEvent>(), ...);
}
template <typename TState, typename TEvent>
static constexpr void validate_pair() {
if constexpr (missing_transition<TState, TEvent>::policy == missing_transition_policy::strict) {
static_assert (has_transition<TState, TEvent>::value,
"FSM validation: required (strict) transition is missing for (State, Event).");
}
}
private:
TContext &m_ctx;
TLogger m_logger;
state_variant m_state;
};

10
fsm/kindle2latex.pro Normal file
View File

@@ -0,0 +1,10 @@
TEMPLATE = app
CONFIG += console c++17
CONFIG -= app_bundle
CONFIG -= qt
SOURCES += \
main.cpp
HEADERS += \
fsm.h

372
fsm/main.cpp Normal file
View File

@@ -0,0 +1,372 @@
#include "fsm.h"
#include <cerrno>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <getopt.h>
#include <iostream>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
// ------------------------------------------------------------
// Data model
// ------------------------------------------------------------
struct Quote {
std::string meta;
std::vector<std::string> text_lines;
};
static std::string rstrip_cr (std::string s) {
if (!s.empty() && s.back() == '\r')
s.pop_back();
return s;
}
static std::string latex_escape (const std::string &s) {
// Conservative escaping for LaTeX special chars.
std::string out;
out.reserve (s.size() + s.size() / 8);
for (const unsigned char ch : s) {
switch (ch) {
case '\\':
out += "\\textbackslash{}";
break;
case '{':
out += "\\{";
break;
case '}':
out += "\\}";
break;
case '%':
out += "\\%";
break;
case '$':
out += "\\$";
break;
case '#':
out += "\\#";
break;
case '&':
out += "\\&";
break;
case '_':
out += "\\_";
break;
case '^':
out += "\\textasciicircum{}";
break;
case '~':
out += "\\textasciitilde{}";
break;
default:
out.push_back (static_cast<char> (ch));
break;
}
}
return out;
}
// ------------------------------------------------------------
// FSM: parsing Kindle "My Clippings" style format
// ------------------------------------------------------------
struct StExpectTitle {};
struct StExpectMeta {};
struct StExpectBlank {};
struct StCollectText {};
struct EvTitle {
std::string s;
};
struct EvMeta {
std::string s;
};
struct EvBlank {};
struct EvText {
std::string s;
};
struct EvSep {};
struct EvEof {};
struct ParserContext {
// group storage
std::vector<std::string> order;
std::unordered_map<std::string, std::vector<Quote>> by_title;
std::unordered_map<std::string, std::size_t> seen;
// currently parsed quote
std::string cur_title;
std::string cur_meta;
std::vector<std::string> cur_text;
void start_title (std::string t) {
cur_title = std::move (t);
cur_meta.clear();
cur_text.clear();
}
void set_meta (std::string m) {
cur_meta = std::move (m);
}
void add_text (std::string line) {
cur_text.push_back (std::move (line));
}
void finalize_quote_if_ready() {
if (cur_title.empty() || cur_meta.empty()) {
cur_meta.clear();
cur_text.clear();
return;
}
if (seen.find (cur_title) == seen.end()) {
seen.emplace (cur_title, order.size());
order.push_back (cur_title);
}
by_title[cur_title].push_back (Quote{cur_meta, cur_text});
cur_meta.clear();
cur_text.clear();
}
};
struct SilentLogger : null_logger {};
// Actions
struct ActSetTitle {
void operator() (ParserContext &ctx, const StExpectTitle &, const StExpectMeta &, const EvTitle &ev) const {
ctx.start_title (ev.s);
}
};
struct ActSetMeta {
void operator() (ParserContext &ctx, const StExpectMeta &, const StExpectBlank &, const EvMeta &ev) const {
ctx.set_meta (ev.s);
}
};
struct ActAddText {
template <typename TFrom>
void operator() (ParserContext &ctx, const TFrom &, const StCollectText &, const EvText &ev) const {
ctx.add_text (ev.s);
}
};
struct ActFinalizeOnSep {
template <typename TFrom>
void operator() (ParserContext &ctx, const TFrom &, const StExpectTitle &, const EvSep &) const {
ctx.finalize_quote_if_ready();
}
};
struct ActFinalizeOnEof {
template <typename TFrom>
void operator() (ParserContext &ctx, const TFrom &, const StExpectTitle &, const EvEof &) const {
ctx.finalize_quote_if_ready();
}
};
// Transitions
template <> struct transition<StExpectTitle, EvTitle> {
using type = transition_to<StExpectMeta, always_allow, ActSetTitle>;
};
template <> struct transition<StExpectTitle, EvBlank> {
using type = transition_to<StExpectTitle>;
};
template <> struct transition<StExpectTitle, EvEof> {
using type = transition_to<StExpectTitle, always_allow, ActFinalizeOnEof>;
};
template <> struct transition<StExpectMeta, EvMeta> {
using type = transition_to<StExpectBlank, always_allow, ActSetMeta>;
};
template <> struct transition<StExpectBlank, EvBlank> {
using type = transition_to<StCollectText>;
};
template <> struct transition<StExpectBlank, EvText> {
using type = transition_to<StCollectText, always_allow, ActAddText>;
};
template <> struct transition<StCollectText, EvText> {
using type = transition_to<StCollectText, always_allow, ActAddText>;
};
template <> struct transition<StCollectText, EvSep> {
using type = transition_to<StExpectTitle, always_allow, ActFinalizeOnSep>;
};
template <> struct transition<StCollectText, EvEof> {
using type = transition_to<StExpectTitle, always_allow, ActFinalizeOnEof>;
};
// Strict contract for the pairs we dispatch.
template <> struct missing_transition<StExpectTitle, EvTitle> {
static constexpr missing_transition_policy policy = missing_transition_policy::strict;
};
template <> struct missing_transition<StExpectTitle, EvBlank> {
static constexpr missing_transition_policy policy = missing_transition_policy::strict;
};
template <> struct missing_transition<StExpectTitle, EvEof> {
static constexpr missing_transition_policy policy = missing_transition_policy::strict;
};
template <> struct missing_transition<StExpectMeta, EvMeta> {
static constexpr missing_transition_policy policy = missing_transition_policy::strict;
};
template <> struct missing_transition<StExpectBlank, EvBlank> {
static constexpr missing_transition_policy policy = missing_transition_policy::strict;
};
template <> struct missing_transition<StExpectBlank, EvText> {
static constexpr missing_transition_policy policy = missing_transition_policy::strict;
};
template <> struct missing_transition<StCollectText, EvText> {
static constexpr missing_transition_policy policy = missing_transition_policy::strict;
};
template <> struct missing_transition<StCollectText, EvSep> {
static constexpr missing_transition_policy policy = missing_transition_policy::strict;
};
template <> struct missing_transition<StCollectText, EvEof> {
static constexpr missing_transition_policy policy = missing_transition_policy::strict;
};
using ParserFsm = fsm<ParserContext, SilentLogger, StExpectTitle, StExpectMeta, StExpectBlank, StCollectText>;
static constexpr void validate_fsm_contract() {
ParserFsm::validate_events<EvTitle, EvMeta, EvBlank, EvText, EvSep, EvEof>();
}
// ------------------------------------------------------------
// CLI
// ------------------------------------------------------------
struct CliArgs {
std::string input;
std::string output;
};
static void print_usage (const char *argv0) {
std::cerr << "Usage: " << argv0 << " --input <file> --output <file>\n";
}
static bool parse_args (int argc, char **argv, CliArgs &out) {
static option long_opts[] = {
{"input", required_argument, nullptr, 'i'},
{"output", required_argument, nullptr, 'o'},
{"help", no_argument, nullptr, 'h'},
{nullptr, 0, nullptr, 0 }
};
int c = 0;
while ((c = ::getopt_long (argc, argv, "i:o:h", long_opts, nullptr)) != -1) {
switch (c) {
case 'i':
out.input = optarg;
break;
case 'o':
out.output = optarg;
break;
case 'h':
print_usage (argv[0]);
return false;
default:
print_usage (argv[0]);
return false;
}
}
if (out.input.empty() || out.output.empty()) {
print_usage (argv[0]);
return false;
}
return true;
}
// ------------------------------------------------------------
// Conversion
// ------------------------------------------------------------
static int convert (const std::string &in_path, const std::string &out_path) {
std::ifstream in (in_path);
if (!in.is_open()) {
std::cerr << "Failed to open input file: " << in_path << " (" << std::strerror (errno) << ")\n";
return 2;
}
ParserContext ctx{};
ParserFsm fsm (ctx, StExpectTitle{}, SilentLogger{});
std::string line;
while (std::getline (in, line)) {
line = rstrip_cr (std::move (line));
if (line == "==========") {
fsm.dispatch (EvSep{});
continue;
}
const bool is_blank = line.empty();
const auto &st = fsm.state();
if (std::holds_alternative<StExpectTitle> (st)) {
if (is_blank)
fsm.dispatch (EvBlank{});
else
fsm.dispatch (EvTitle{line});
} else if (std::holds_alternative<StExpectMeta> (st)) {
// Kindle format expects meta here. If missing, keep going.
fsm.dispatch (EvMeta{line});
} else if (std::holds_alternative<StExpectBlank> (st)) {
if (is_blank)
fsm.dispatch (EvBlank{});
else
fsm.dispatch (EvText{line}); // some clippings have no blank line
} else {
// StCollectText
fsm.dispatch (EvText{line});
}
}
fsm.dispatch (EvEof{});
std::ofstream out (out_path, std::ios::trunc);
if (!out.is_open()) {
std::cerr << "Failed to open output file: " << out_path << " (" << std::strerror (errno) << ")\n";
return 3;
}
for (std::size_t i = 0; i < ctx.order.size(); ++i) {
const auto &title = ctx.order[i];
out << "\\section {" << latex_escape (title) << "}\n";
const auto it = ctx.by_title.find (title);
if (it == ctx.by_title.end())
continue;
for (const auto &q : it->second) {
out << " \\subsection {" << latex_escape (q.meta) << "}\n";
for (const auto &tl : q.text_lines)
out << " " << latex_escape (tl) << "\n";
out << " \\subsubsection{notes}\n\n";
}
if (i + 1 < ctx.order.size())
out << "\n";
}
return 0;
}
int main (int argc, char **argv) {
validate_fsm_contract();
CliArgs args;
if (!parse_args (argc, argv, args))
return 1;
return convert (args.input, args.output);
}