Compare commits
5 Commits
1ebdde1ea2
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| cfcacee550 | |||
| 3ef56d2c52 | |||
| 6a003d5df4 | |||
| c2f42fd1aa | |||
| c980049751 |
136
README.md
136
README.md
@@ -1,3 +1,135 @@
|
|||||||
# kindle2latex
|
# Kindle Clippings → LaTeX Converter
|
||||||
|
|
||||||
Convert amazon kindle clippings to latex
|
A small console utility that converts Amazon Kindle *My Clippings* text exports into structured LaTeX.
|
||||||
|
|
||||||
|
The tool parses Kindle highlights and groups them by book title, producing a LaTeX structure with:
|
||||||
|
|
||||||
|
* `\section{}` — per book
|
||||||
|
* `\subsection{}` — per highlight (metadata line)
|
||||||
|
* Highlight text — inserted as plain LaTeX content
|
||||||
|
* `\subsubsection{notes}` — placeholder for future comments
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
This project demonstrates **two different parsing approaches** solving the same problem:
|
||||||
|
|
||||||
|
### 1️⃣ FSM-based parser
|
||||||
|
|
||||||
|
Implemented using a template-based finite state machine (`fsm.h`).
|
||||||
|
|
||||||
|
Characteristics:
|
||||||
|
|
||||||
|
* Compile-time validated transitions
|
||||||
|
* Strong type safety
|
||||||
|
* Explicit state/event model
|
||||||
|
* Strict contract enforcement
|
||||||
|
|
||||||
|
This version is useful when:
|
||||||
|
|
||||||
|
* The input format is more complex
|
||||||
|
* You want compile-time guarantees for state transitions
|
||||||
|
* The parsing logic may grow over time
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 2️⃣ TypeFactory-based parser
|
||||||
|
|
||||||
|
Implemented using a registration-based factory (`typefactory.h`).
|
||||||
|
|
||||||
|
Characteristics:
|
||||||
|
|
||||||
|
* Stage-driven pipeline
|
||||||
|
* One handler per parsing stage
|
||||||
|
* Runtime validation of handler registration
|
||||||
|
* No per-line allocations (handlers cached once)
|
||||||
|
|
||||||
|
This version is:
|
||||||
|
|
||||||
|
* Simpler
|
||||||
|
* More readable
|
||||||
|
* Easier to debug
|
||||||
|
* Well suited for linear, protocol-like formats
|
||||||
|
|
||||||
|
Both implementations produce identical LaTeX output.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Input Format
|
||||||
|
|
||||||
|
Expected input is a standard Kindle `My Clippings.txt` export.
|
||||||
|
|
||||||
|
Each clipping block follows this structure:
|
||||||
|
|
||||||
|
```
|
||||||
|
Book Title
|
||||||
|
- Your Highlight on Location 123-125 | Added on ...
|
||||||
|
|
||||||
|
Highlighted text line 1
|
||||||
|
Highlighted text line 2
|
||||||
|
==========
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Output Format
|
||||||
|
|
||||||
|
Generated LaTeX structure:
|
||||||
|
|
||||||
|
```latex
|
||||||
|
\section{Book Title}
|
||||||
|
|
||||||
|
\subsection{- Your Highlight on Location 123-125 | Added on ...}
|
||||||
|
Highlighted text line 1
|
||||||
|
Highlighted text line 2
|
||||||
|
\subsubsection{notes}
|
||||||
|
```
|
||||||
|
|
||||||
|
Highlights are grouped by book title.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Build
|
||||||
|
|
||||||
|
Requires a C++17-compatible compiler.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
g++ -std=gnu++17 -Wall -Wextra -O2 -o kindle2latex main.cpp
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./kindle2latex --input input.txt --output output.tex
|
||||||
|
```
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
|
||||||
|
| Argument | Description |
|
||||||
|
| ---------- | ----------------------------- |
|
||||||
|
| `--input` | Path to Kindle clippings file |
|
||||||
|
| `--output` | Path to generated LaTeX file |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Design Notes
|
||||||
|
|
||||||
|
* No dynamic allocations per input line (handlers are cached).
|
||||||
|
* Order of books is preserved as in the original file.
|
||||||
|
* LaTeX special characters are escaped automatically.
|
||||||
|
* Incomplete clipping blocks are safely ignored.
|
||||||
|
* The final block is flushed even if the file does not end with `==========`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Why Two Implementations?
|
||||||
|
|
||||||
|
This repository intentionally keeps two different parsing styles:
|
||||||
|
|
||||||
|
* The FSM version demonstrates strict compile-time state control.
|
||||||
|
* The TypeFactory version demonstrates a clean, extensible runtime pipeline.
|
||||||
|
|
||||||
|
The goal is architectural exploration and comparison, not just solving the parsing task.
|
||||||
|
|||||||
@@ -10,7 +10,7 @@
|
|||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "typefactory.h" // <-- твой хедер
|
#include "typefactory.h"
|
||||||
|
|
||||||
// ------------------------------------------------------------
|
// ------------------------------------------------------------
|
||||||
// Helpers
|
// Helpers
|
||||||
@@ -66,7 +66,7 @@ static std::string latex_escape (const std::string &s) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ------------------------------------------------------------
|
// ------------------------------------------------------------
|
||||||
// Model
|
// Data model
|
||||||
// ------------------------------------------------------------
|
// ------------------------------------------------------------
|
||||||
|
|
||||||
struct Quote {
|
struct Quote {
|
||||||
@@ -97,12 +97,14 @@ struct ParseContext {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void finalize_quote() {
|
void finalize_quote() {
|
||||||
|
// Ignore incomplete entries
|
||||||
if (cur_title.empty() || cur_meta.empty()) {
|
if (cur_title.empty() || cur_meta.empty()) {
|
||||||
cur_meta.clear();
|
cur_meta.clear();
|
||||||
cur_text.clear();
|
cur_text.clear();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Preserve insertion order of sections
|
||||||
if (seen.find (cur_title) == seen.end()) {
|
if (seen.find (cur_title) == seen.end()) {
|
||||||
seen.emplace (cur_title, order.size());
|
seen.emplace (cur_title, order.size());
|
||||||
order.push_back (cur_title);
|
order.push_back (cur_title);
|
||||||
@@ -116,52 +118,71 @@ struct ParseContext {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// ------------------------------------------------------------
|
// ------------------------------------------------------------
|
||||||
// Handlers
|
// Parsing stages
|
||||||
// ------------------------------------------------------------
|
// ------------------------------------------------------------
|
||||||
|
|
||||||
enum class Stage { Title, Meta, Body };
|
enum class Stage { Title, Meta, Body };
|
||||||
|
|
||||||
|
// Explicit list of all supported stages for runtime validation
|
||||||
static constexpr std::array<Stage, 3> kAllStages = {
|
static constexpr std::array<Stage, 3> kAllStages = {
|
||||||
Stage::Title, Stage::Meta, Stage::Body
|
Stage::Title, Stage::Meta, Stage::Body
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ILineHandler {
|
struct ILineHandler {
|
||||||
virtual ~ILineHandler() = default;
|
virtual ~ILineHandler() = default;
|
||||||
virtual void handle (const std::string &line, ParseContext &ctx, Stage &next) = 0;
|
|
||||||
|
// Process a single line and decide the next stage
|
||||||
|
virtual void handle (const std::string &line,
|
||||||
|
ParseContext &ctx,
|
||||||
|
Stage &next) = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct TitleHandler final : ILineHandler {
|
struct TitleHandler final : ILineHandler {
|
||||||
void handle (const std::string &line, ParseContext &ctx, Stage &next) override {
|
void handle (const std::string &line,
|
||||||
|
ParseContext &ctx,
|
||||||
|
Stage &next) override {
|
||||||
|
|
||||||
|
// Skip empty lines between entries
|
||||||
if (line.empty()) {
|
if (line.empty()) {
|
||||||
next = Stage::Title; // пропускаем пустые между блоками
|
next = Stage::Title;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx.start_title (line);
|
ctx.start_title (line);
|
||||||
next = Stage::Meta;
|
next = Stage::Meta;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct MetaHandler final : ILineHandler {
|
struct MetaHandler final : ILineHandler {
|
||||||
void handle (const std::string &line, ParseContext &ctx, Stage &next) override {
|
void handle (const std::string &line,
|
||||||
|
ParseContext &ctx,
|
||||||
|
Stage &next) override {
|
||||||
|
|
||||||
ctx.set_meta (line);
|
ctx.set_meta (line);
|
||||||
next = Stage::Body;
|
next = Stage::Body;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct BodyHandler final : ILineHandler {
|
struct BodyHandler final : ILineHandler {
|
||||||
void handle (const std::string &line, ParseContext &ctx, Stage &next) override {
|
void handle (const std::string &line,
|
||||||
|
ParseContext &ctx,
|
||||||
|
Stage &next) override {
|
||||||
|
|
||||||
|
// Separator marks the end of a clipping block
|
||||||
if (line == "==========") {
|
if (line == "==========") {
|
||||||
ctx.finalize_quote();
|
ctx.finalize_quote();
|
||||||
next = Stage::Title;
|
next = Stage::Title;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
ctx.add_text (line); // в том числе пустые строки внутри цитаты
|
|
||||||
|
// Keep body lines as-is (including empty ones)
|
||||||
|
ctx.add_text (line);
|
||||||
next = Stage::Body;
|
next = Stage::Body;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// ------------------------------------------------------------
|
// ------------------------------------------------------------
|
||||||
// Factory wiring (using your typefactory.h) + caching handlers
|
// Factory wiring + handler caching
|
||||||
// ------------------------------------------------------------
|
// ------------------------------------------------------------
|
||||||
|
|
||||||
using HandlerPtr = std::shared_ptr<ILineHandler>;
|
using HandlerPtr = std::shared_ptr<ILineHandler>;
|
||||||
@@ -169,29 +190,33 @@ using HandlerMap = std::unordered_map<Stage, HandlerPtr>;
|
|||||||
|
|
||||||
static TypeFactory<Stage, ILineHandler> build_factory() {
|
static TypeFactory<Stage, ILineHandler> build_factory() {
|
||||||
TypeFactory<Stage, ILineHandler> f;
|
TypeFactory<Stage, ILineHandler> f;
|
||||||
|
|
||||||
f.registerType<TitleHandler> (Stage::Title);
|
f.registerType<TitleHandler> (Stage::Title);
|
||||||
f.registerType<MetaHandler> (Stage::Meta);
|
f.registerType<MetaHandler> (Stage::Meta);
|
||||||
f.registerType<BodyHandler> (Stage::Body);
|
f.registerType<BodyHandler> (Stage::Body);
|
||||||
|
|
||||||
return f;
|
return f;
|
||||||
}
|
}
|
||||||
|
|
||||||
static HandlerMap build_handlers_cache (const TypeFactory<Stage, ILineHandler> &factory) {
|
static HandlerMap build_handlers_cache (
|
||||||
|
const TypeFactory<Stage, ILineHandler> &factory) {
|
||||||
|
|
||||||
HandlerMap handlers;
|
HandlerMap handlers;
|
||||||
handlers.reserve (kAllStages.size());
|
handlers.reserve (kAllStages.size());
|
||||||
|
|
||||||
// Рантайм-валидация: для каждого Stage обязаны уметь создать handler
|
// Runtime validation: ensure each stage has a registered handler
|
||||||
for (Stage st : kAllStages) {
|
for (Stage st : kAllStages) {
|
||||||
try {
|
try {
|
||||||
auto h = factory.create (st); // shared_ptr<ILineHandler>
|
auto h = factory.create (st);
|
||||||
if (!h)
|
if (!h)
|
||||||
throw std::runtime_error ("Factory returned null handler");
|
throw std::runtime_error ("Factory returned null handler");
|
||||||
handlers.emplace (st, std::move (h));
|
handlers.emplace (st, std::move (h));
|
||||||
} catch (const std::out_of_range &) {
|
} catch (const std::out_of_range &) {
|
||||||
throw std::runtime_error ("Missing handler registration for some Stage");
|
throw std::runtime_error ("Missing handler registration for Stage");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Доп.проверка: ensure all are present (на случай коллизий/ошибок emplace)
|
// Extra consistency check
|
||||||
if (handlers.size() != kAllStages.size())
|
if (handlers.size() != kAllStages.size())
|
||||||
throw std::runtime_error ("Handler cache size mismatch");
|
throw std::runtime_error ("Handler cache size mismatch");
|
||||||
|
|
||||||
@@ -199,7 +224,7 @@ static HandlerMap build_handlers_cache (const TypeFactory<Stage, ILineHandler> &
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ------------------------------------------------------------
|
// ------------------------------------------------------------
|
||||||
// CLI
|
// CLI handling
|
||||||
// ------------------------------------------------------------
|
// ------------------------------------------------------------
|
||||||
|
|
||||||
struct CliArgs {
|
struct CliArgs {
|
||||||
@@ -208,10 +233,14 @@ struct CliArgs {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static void print_usage (const char *argv0) {
|
static void print_usage (const char *argv0) {
|
||||||
std::cerr << "Usage: " << argv0 << " --input <file> --output <file>\n";
|
std::cerr << "Usage: " << argv0
|
||||||
|
<< " --input <file> --output <file>\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool parse_args (int argc, char **argv, CliArgs &out) {
|
static bool parse_args (int argc,
|
||||||
|
char **argv,
|
||||||
|
CliArgs &out) {
|
||||||
|
|
||||||
static option long_opts[] = {
|
static option long_opts[] = {
|
||||||
{"input", required_argument, nullptr, 'i'},
|
{"input", required_argument, nullptr, 'i'},
|
||||||
{"output", required_argument, nullptr, 'o'},
|
{"output", required_argument, nullptr, 'o'},
|
||||||
@@ -220,7 +249,10 @@ static bool parse_args (int argc, char **argv, CliArgs &out) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
int c = 0;
|
int c = 0;
|
||||||
while ((c = ::getopt_long (argc, argv, "i:o:h", long_opts, nullptr)) != -1) {
|
while ((c = ::getopt_long (argc, argv,
|
||||||
|
"i:o:h",
|
||||||
|
long_opts,
|
||||||
|
nullptr)) != -1) {
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case 'i':
|
case 'i':
|
||||||
out.input = optarg;
|
out.input = optarg;
|
||||||
@@ -241,29 +273,34 @@ static bool parse_args (int argc, char **argv, CliArgs &out) {
|
|||||||
print_usage (argv[0]);
|
print_usage (argv[0]);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ------------------------------------------------------------
|
// ------------------------------------------------------------
|
||||||
// Convert
|
// Conversion logic
|
||||||
// ------------------------------------------------------------
|
// ------------------------------------------------------------
|
||||||
|
|
||||||
static int convert (const std::string &in_path, const std::string &out_path) {
|
static int convert (const std::string &in_path,
|
||||||
|
const std::string &out_path) {
|
||||||
|
|
||||||
std::ifstream in (in_path);
|
std::ifstream in (in_path);
|
||||||
if (!in.is_open()) {
|
if (!in.is_open()) {
|
||||||
std::cerr << "Failed to open input file: " << in_path
|
std::cerr << "Failed to open input file: "
|
||||||
|
<< in_path
|
||||||
<< " (" << std::strerror (errno) << ")\n";
|
<< " (" << std::strerror (errno) << ")\n";
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build factory + cache handlers once
|
// Build factory and cache handlers once
|
||||||
TypeFactory<Stage, ILineHandler> factory = build_factory();
|
TypeFactory<Stage, ILineHandler> factory = build_factory();
|
||||||
|
|
||||||
HandlerMap handlers;
|
HandlerMap handlers;
|
||||||
try {
|
try {
|
||||||
handlers = build_handlers_cache (factory);
|
handlers = build_handlers_cache (factory);
|
||||||
} catch (const std::exception &e) {
|
} catch (const std::exception &e) {
|
||||||
std::cerr << "Internal error while building handler cache: " << e.what() << "\n";
|
std::cerr << "Internal error while building handler cache: "
|
||||||
|
<< e.what() << "\n";
|
||||||
return 4;
|
return 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -276,7 +313,7 @@ static int convert (const std::string &in_path, const std::string &out_path) {
|
|||||||
|
|
||||||
auto it = handlers.find (stage);
|
auto it = handlers.find (stage);
|
||||||
if (it == handlers.end() || !it->second) {
|
if (it == handlers.end() || !it->second) {
|
||||||
std::cerr << "Internal error: handler missing at runtime\n";
|
std::cerr << "Internal error: missing handler at runtime\n";
|
||||||
return 4;
|
return 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -285,16 +322,18 @@ static int convert (const std::string &in_path, const std::string &out_path) {
|
|||||||
stage = next;
|
stage = next;
|
||||||
}
|
}
|
||||||
|
|
||||||
// EOF: если файл не закончился "==========", всё равно зафиксируем последний блок
|
// Ensure the last entry is flushed if file does not end with separator
|
||||||
ctx.finalize_quote();
|
ctx.finalize_quote();
|
||||||
|
|
||||||
std::ofstream out (out_path, std::ios::trunc);
|
std::ofstream out (out_path, std::ios::trunc);
|
||||||
if (!out.is_open()) {
|
if (!out.is_open()) {
|
||||||
std::cerr << "Failed to open output file: " << out_path
|
std::cerr << "Failed to open output file: "
|
||||||
|
<< out_path
|
||||||
<< " (" << std::strerror (errno) << ")\n";
|
<< " (" << std::strerror (errno) << ")\n";
|
||||||
return 3;
|
return 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Generate LaTeX output
|
||||||
for (std::size_t i = 0; i < ctx.order.size(); ++i) {
|
for (std::size_t i = 0; i < ctx.order.size(); ++i) {
|
||||||
const auto &title = ctx.order[i];
|
const auto &title = ctx.order[i];
|
||||||
out << "\\section {" << latex_escape (title) << "}\n";
|
out << "\\section {" << latex_escape (title) << "}\n";
|
||||||
@@ -305,8 +344,10 @@ static int convert (const std::string &in_path, const std::string &out_path) {
|
|||||||
|
|
||||||
for (const auto &q : it->second) {
|
for (const auto &q : it->second) {
|
||||||
out << " \\subsection {" << latex_escape (q.meta) << "}\n";
|
out << " \\subsection {" << latex_escape (q.meta) << "}\n";
|
||||||
|
|
||||||
for (const auto &tl : q.text_lines)
|
for (const auto &tl : q.text_lines)
|
||||||
out << " " << latex_escape (tl) << "\n";
|
out << " " << latex_escape (tl) << "\n";
|
||||||
|
|
||||||
out << " \\subsubsection{notes}\n\n";
|
out << " \\subsubsection{notes}\n\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -321,5 +362,6 @@ int main (int argc, char **argv) {
|
|||||||
CliArgs args;
|
CliArgs args;
|
||||||
if (!parse_args (argc, argv, args))
|
if (!parse_args (argc, argv, args))
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
return convert (args.input, args.output);
|
return convert (args.input, args.output);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user