From 6ae5f83fc9739d364bee67577a464311aed43598 Mon Sep 17 00:00:00 2001 From: deeaitch Date: Mon, 18 May 2026 16:46:07 -0400 Subject: [PATCH] Issue #1. Build example files --- .../examples/two_sum_logs_demo.cpp | 223 ------------------ .../examples/two_sum_logs_demo/.gitignore | 74 ++++++ .../examples/two_sum_logs_demo/main.cpp | 206 ++++++++++++++++ .../two_sum_logs_demo/two_sum_logs_demo.pro | 7 + 4 files changed, 287 insertions(+), 223 deletions(-) delete mode 100644 analysis/03-Two_Sum_Is_Not_About_Numbers/examples/two_sum_logs_demo.cpp create mode 100644 analysis/03-Two_Sum_Is_Not_About_Numbers/examples/two_sum_logs_demo/.gitignore create mode 100644 analysis/03-Two_Sum_Is_Not_About_Numbers/examples/two_sum_logs_demo/main.cpp create mode 100644 analysis/03-Two_Sum_Is_Not_About_Numbers/examples/two_sum_logs_demo/two_sum_logs_demo.pro diff --git a/analysis/03-Two_Sum_Is_Not_About_Numbers/examples/two_sum_logs_demo.cpp b/analysis/03-Two_Sum_Is_Not_About_Numbers/examples/two_sum_logs_demo.cpp deleted file mode 100644 index 7102a6f..0000000 --- a/analysis/03-Two_Sum_Is_Not_About_Numbers/examples/two_sum_logs_demo.cpp +++ /dev/null @@ -1,223 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct Event { - std::int64_t timestamp_ms; - std::string service; - std::string event; - int latency_ms; - std::string request_id; - std::string raw_line; -}; - -struct PairResult { - bool found = false; - Event first; - Event second; -}; - -std::int64_t parseTimestampMs(const std::string& timestamp) -{ - // Expected format: - // YYYY-MM-DDTHH:MM:SS.mmmZ - // For this demo we only convert the HH:MM:SS.mmm part to milliseconds. - const std::size_t t_pos = timestamp.find('T'); - const std::size_t z_pos = timestamp.find('Z'); - - if (t_pos == std::string::npos || z_pos == std::string::npos) { - throw std::runtime_error("Invalid timestamp: " + timestamp); - } - - const std::string time_part = timestamp.substr(t_pos + 1, z_pos - t_pos - 1); - - int hours = 0; - int minutes = 0; - int seconds = 0; - int millis = 0; - char colon1 = '\0'; - char colon2 = '\0'; - char dot = '\0'; - - std::istringstream iss(time_part); - iss >> hours >> colon1 >> minutes >> colon2 >> seconds >> dot >> millis; - - if (!iss || colon1 != ':' || colon2 != ':' || dot != '.') { - throw std::runtime_error("Invalid time part: " + time_part); - } - - return (((hours * 60LL) + minutes) * 60LL + seconds) * 1000LL + millis; -} - -Event parseLogLine(const std::string& line) -{ - std::istringstream iss(line); - - std::string timestamp; - std::string service_token; - std::string event_token; - std::string latency_token; - std::string request_token; - - if (!(iss >> timestamp >> service_token >> event_token >> latency_token >> request_token)) { - throw std::runtime_error("Cannot parse log line: " + line); - } - - auto valueAfterEquals = [](const std::string& token) -> std::string { - const std::size_t pos = token.find('='); - if (pos == std::string::npos || pos + 1 >= token.size()) { - throw std::runtime_error("Invalid token: " + token); - } - return token.substr(pos + 1); - }; - - Event result; - result.timestamp_ms = parseTimestampMs(timestamp); - result.service = valueAfterEquals(service_token); - result.event = valueAfterEquals(event_token); - - std::string latency_value = valueAfterEquals(latency_token); - if (latency_value.size() < 3 || latency_value.substr(latency_value.size() - 2) != "ms") { - throw std::runtime_error("Invalid latency token: " + latency_token); - } - latency_value.erase(latency_value.size() - 2); - result.latency_ms = std::stoi(latency_value); - - result.request_id = valueAfterEquals(request_token); - result.raw_line = line; - - return result; -} - -std::vector parseLogs(const std::vector& lines) -{ - std::vector events; - events.reserve(lines.size()); - - for (const std::string& line : lines) { - events.push_back(parseLogLine(line)); - } - - return events; -} - -void printPair(const PairResult& result, const std::string& label) -{ - std::cout << label << '\n'; - - if (!result.found) { - std::cout << " no pair found\n\n"; - return; - } - - std::cout << " first : " << result.first.raw_line << '\n'; - std::cout << " second: " << result.second.raw_line << '\n'; - std::cout << " combined latency: " - << (result.first.latency_ms + result.second.latency_ms) - << "ms\n\n"; -} - -PairResult interviewStyleReduction(const std::vector& events, int threshold_ms) -{ - // Intentionally wrong for the real-world problem: - // it ignores request_id and time. - for (std::size_t i = 0; i < events.size(); ++i) { - for (std::size_t j = i + 1; j < events.size(); ++j) { - if (events[i].latency_ms + events[j].latency_ms > threshold_ms) { - return PairResult{true, events[i], events[j]}; - } - } - } - - return PairResult{}; -} - -PairResult realSlidingWindowDetection(const std::vector& events, - int threshold_ms, - std::int64_t window_ms) -{ - std::unordered_map > active_events; - - for (const Event& current : events) { - std::deque& bucket = active_events[current.request_id]; - - while (!bucket.empty() && - (current.timestamp_ms - bucket.front().timestamp_ms) > window_ms) { - bucket.pop_front(); - } - - for (const Event& previous : bucket) { - const std::int64_t delta = current.timestamp_ms - previous.timestamp_ms; - - if (delta >= 0 && delta <= window_ms && - previous.latency_ms + current.latency_ms > threshold_ms) { - return PairResult{true, previous, current}; - } - } - - bucket.push_back(current); - } - - return PairResult{}; -} - -void printEvents(const std::vector& events) -{ - std::cout << "Synthetic log stream:\n"; - for (const Event& event : events) { - std::cout << " " << event.raw_line << '\n'; - } - std::cout << '\n'; -} - -int main() -{ - try { - const std::vector raw_logs = { - "2026-04-16T10:15:01.100Z service=api event=parse_input latency=12ms request_id=req-1001", - "2026-04-16T10:15:01.110Z service=cache event=cache_miss latency=48ms request_id=req-1001", - "2026-04-16T10:15:01.120Z service=auth event=token_check latency=58ms request_id=req-2001", - "2026-04-16T10:15:01.130Z service=db event=read_user latency=43ms request_id=req-3001", - "2026-04-16T10:15:01.135Z service=db event=read_user latency=55ms request_id=req-1001", - "2026-04-16T10:15:01.144Z service=net event=external_call latency=47ms request_id=req-1001", - "2026-04-16T10:15:01.200Z service=cache event=cache_miss latency=60ms request_id=req-3001", - "2026-04-16T10:15:01.260Z service=net event=external_call latency=52ms request_id=req-3001" - }; - - const int threshold_ms = 100; - const std::int64_t window_ms = 20; - - const std::vector events = parseLogs(raw_logs); - - printEvents(events); - - std::cout << "Threshold: " << threshold_ms << "ms\n"; - std::cout << "Time window: " << window_ms << "ms\n\n"; - - const PairResult naive_result = interviewStyleReduction(events, threshold_ms); - printPair(naive_result, "Interview-style reduction (ignores request_id and time):"); - - const PairResult real_result = realSlidingWindowDetection(events, threshold_ms, window_ms); - printPair(real_result, "Streaming sliding-window detection:"); - - std::cout << "Notes:\n"; - std::cout << " - The interview-style version can produce a false correlation.\n"; - std::cout << " - In this dataset, it first matches 58ms from req-2001 with 43ms from req-3001.\n"; - std::cout << " - That pair exceeds the threshold, but it is operationally meaningless.\n"; - std::cout << " - The streaming version only correlates events from the same request_id\n"; - std::cout << " and only within the configured time window.\n"; - - return 0; - } - catch (const std::exception& ex) { - std::cerr << "Error: " << ex.what() << '\n'; - return 1; - } -} diff --git a/analysis/03-Two_Sum_Is_Not_About_Numbers/examples/two_sum_logs_demo/.gitignore b/analysis/03-Two_Sum_Is_Not_About_Numbers/examples/two_sum_logs_demo/.gitignore new file mode 100644 index 0000000..4a0b530 --- /dev/null +++ b/analysis/03-Two_Sum_Is_Not_About_Numbers/examples/two_sum_logs_demo/.gitignore @@ -0,0 +1,74 @@ +# This file is used to ignore files which are generated +# ---------------------------------------------------------------------------- + +*~ +*.autosave +*.a +*.core +*.moc +*.o +*.obj +*.orig +*.rej +*.so +*.so.* +*_pch.h.cpp +*_resource.rc +*.qm +.#* +*.*# +core +!core/ +tags +.DS_Store +.directory +*.debug +Makefile* +*.prl +*.app +moc_*.cpp +ui_*.h +qrc_*.cpp +Thumbs.db +*.res +*.rc +/.qmake.cache +/.qmake.stash + +# qtcreator generated files +*.pro.user* +CMakeLists.txt.user* + +# xemacs temporary files +*.flc + +# Vim temporary files +.*.swp + +# Visual Studio generated files +*.ib_pdb_index +*.idb +*.ilk +*.pdb +*.sln +*.suo +*.vcproj +*vcproj.*.*.user +*.ncb +*.sdf +*.opensdf +*.vcxproj +*vcxproj.* + +# MinGW generated files +*.Debug +*.Release + +# Python byte code +*.pyc + +# Binaries +# -------- +*.dll +*.exe + diff --git a/analysis/03-Two_Sum_Is_Not_About_Numbers/examples/two_sum_logs_demo/main.cpp b/analysis/03-Two_Sum_Is_Not_About_Numbers/examples/two_sum_logs_demo/main.cpp new file mode 100644 index 0000000..d6bd8b2 --- /dev/null +++ b/analysis/03-Two_Sum_Is_Not_About_Numbers/examples/two_sum_logs_demo/main.cpp @@ -0,0 +1,206 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct Event { + std::int64_t timestamp_ms; + std::string service; + std::string event; + int latency_ms; + std::string request_id; + std::string raw_line; +}; + +struct PairResult { + bool found = false; + Event first; + Event second; +}; + +std::int64_t parseTimestampMs (const std::string ×tamp) { + // Expected format: + // YYYY-MM-DDTHH:MM:SS.mmmZ + // For this demo we only convert the HH:MM:SS.mmm part to milliseconds. + const std::size_t t_pos = timestamp.find ('T'); + const std::size_t z_pos = timestamp.find ('Z'); + + if (t_pos == std::string::npos || z_pos == std::string::npos) + throw std::runtime_error ("Invalid timestamp: " + timestamp); + + const std::string time_part = timestamp.substr (t_pos + 1, z_pos - t_pos - 1); + + int hours = 0; + int minutes = 0; + int seconds = 0; + int millis = 0; + char colon1 = '\0'; + char colon2 = '\0'; + char dot = '\0'; + + std::istringstream iss (time_part); + iss >> hours >> colon1 >> minutes >> colon2 >> seconds >> dot >> millis; + + if (!iss || colon1 != ':' || colon2 != ':' || dot != '.') + throw std::runtime_error ("Invalid time part: " + time_part); + + return (((hours * 60LL) + minutes) * 60LL + seconds) * 1000LL + millis; +} + +Event parseLogLine (const std::string &line) { + std::istringstream iss (line); + + std::string timestamp; + std::string service_token; + std::string event_token; + std::string latency_token; + std::string request_token; + + if (! (iss >> timestamp >> service_token >> event_token >> latency_token >> request_token)) + throw std::runtime_error ("Cannot parse log line: " + line); + + auto valueAfterEquals = [] (const std::string & token) -> std::string { + const std::size_t pos = token.find ('='); + if (pos == std::string::npos || pos + 1 >= token.size()) + throw std::runtime_error ("Invalid token: " + token); + return token.substr (pos + 1); + }; + + Event result; + result.timestamp_ms = parseTimestampMs (timestamp); + result.service = valueAfterEquals (service_token); + result.event = valueAfterEquals (event_token); + + std::string latency_value = valueAfterEquals (latency_token); + if (latency_value.size() < 3 || latency_value.substr (latency_value.size() - 2) != "ms") + throw std::runtime_error ("Invalid latency token: " + latency_token); + latency_value.erase (latency_value.size() - 2); + result.latency_ms = std::stoi (latency_value); + + result.request_id = valueAfterEquals (request_token); + result.raw_line = line; + + return result; +} + +std::vector parseLogs (const std::vector &lines) { + std::vector events; + events.reserve (lines.size()); + + for (const std::string &line : lines) + events.push_back (parseLogLine (line)); + + return events; +} + +void printPair (const PairResult &result, const std::string &label) { + std::cout << label << '\n'; + + if (!result.found) { + std::cout << " no pair found\n\n"; + return; + } + + std::cout << " first : " << result.first.raw_line << '\n'; + std::cout << " second: " << result.second.raw_line << '\n'; + std::cout << " combined latency: " + << (result.first.latency_ms + result.second.latency_ms) + << "ms\n\n"; +} + +PairResult interviewStyleReduction (const std::vector &events, int threshold_ms) { + // Intentionally wrong for the real-world problem: + // it ignores request_id and time. + for (std::size_t i = 0; i < events.size(); ++i) { + for (std::size_t j = i + 1; j < events.size(); ++j) { + if (events[i].latency_ms + events[j].latency_ms > threshold_ms) { + return PairResult{true, events[i], events[j]}; + } + } + } + + return PairResult{}; +} + +PairResult realSlidingWindowDetection (const std::vector &events, + int threshold_ms, + std::int64_t window_ms) { + std::unordered_map > active_events; + + for (const Event ¤t : events) { + std::deque &bucket = active_events[current.request_id]; + + while (!bucket.empty() && + (current.timestamp_ms - bucket.front().timestamp_ms) > window_ms) + bucket.pop_front(); + + for (const Event &previous : bucket) { + const std::int64_t delta = current.timestamp_ms - previous.timestamp_ms; + + if (delta >= 0 && delta <= window_ms && + previous.latency_ms + current.latency_ms > threshold_ms) { + return PairResult{true, previous, current}; + } + } + + bucket.push_back (current); + } + + return PairResult{}; +} + +void printEvents (const std::vector &events) { + std::cout << "Synthetic log stream:\n"; + for (const Event &event : events) + std::cout << " " << event.raw_line << '\n'; + std::cout << '\n'; +} + +int main() { + try { + const std::vector raw_logs = { + "2026-04-16T10:15:01.100Z service=api event=parse_input latency=12ms request_id=req-1001", + "2026-04-16T10:15:01.110Z service=cache event=cache_miss latency=48ms request_id=req-1001", + "2026-04-16T10:15:01.120Z service=auth event=token_check latency=58ms request_id=req-2001", + "2026-04-16T10:15:01.130Z service=db event=read_user latency=43ms request_id=req-3001", + "2026-04-16T10:15:01.135Z service=db event=read_user latency=55ms request_id=req-1001", + "2026-04-16T10:15:01.144Z service=net event=external_call latency=47ms request_id=req-1001", + "2026-04-16T10:15:01.200Z service=cache event=cache_miss latency=60ms request_id=req-3001", + "2026-04-16T10:15:01.260Z service=net event=external_call latency=52ms request_id=req-3001" + }; + + const int threshold_ms = 100; + const std::int64_t window_ms = 20; + + const std::vector events = parseLogs (raw_logs); + + printEvents (events); + + std::cout << "Threshold: " << threshold_ms << "ms\n"; + std::cout << "Time window: " << window_ms << "ms\n\n"; + + const PairResult naive_result = interviewStyleReduction (events, threshold_ms); + printPair (naive_result, "Interview-style reduction (ignores request_id and time):"); + + const PairResult real_result = realSlidingWindowDetection (events, threshold_ms, window_ms); + printPair (real_result, "Streaming sliding-window detection:"); + + std::cout << "Notes:\n"; + std::cout << " - The interview-style version can produce a false correlation.\n"; + std::cout << " - In this dataset, it first matches 58ms from req-2001 with 43ms from req-3001.\n"; + std::cout << " - That pair exceeds the threshold, but it is operationally meaningless.\n"; + std::cout << " - The streaming version only correlates events from the same request_id\n"; + std::cout << " and only within the configured time window.\n"; + + return 0; + } catch (const std::exception &ex) { + std::cerr << "Error: " << ex.what() << '\n'; + return 1; + } +} diff --git a/analysis/03-Two_Sum_Is_Not_About_Numbers/examples/two_sum_logs_demo/two_sum_logs_demo.pro b/analysis/03-Two_Sum_Is_Not_About_Numbers/examples/two_sum_logs_demo/two_sum_logs_demo.pro new file mode 100644 index 0000000..595bdaa --- /dev/null +++ b/analysis/03-Two_Sum_Is_Not_About_Numbers/examples/two_sum_logs_demo/two_sum_logs_demo.pro @@ -0,0 +1,7 @@ +TEMPLATE = app +CONFIG += console c++17 +CONFIG -= app_bundle +CONFIG -= qt + +SOURCES += \ + main.cpp