Issue #1. Build example files

This commit is contained in:
2026-05-18 16:46:07 -04:00
parent 41be5a2e24
commit 6ae5f83fc9
4 changed files with 287 additions and 223 deletions

View File

@@ -1,223 +0,0 @@
#include <algorithm>
#include <cstdint>
#include <deque>
#include <iomanip>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <string>
#include <unordered_map>
#include <vector>
struct Event {
std::int64_t timestamp_ms;
std::string service;
std::string event;
int latency_ms;
std::string request_id;
std::string raw_line;
};
struct PairResult {
bool found = false;
Event first;
Event second;
};
std::int64_t parseTimestampMs(const std::string& timestamp)
{
// Expected format:
// YYYY-MM-DDTHH:MM:SS.mmmZ
// For this demo we only convert the HH:MM:SS.mmm part to milliseconds.
const std::size_t t_pos = timestamp.find('T');
const std::size_t z_pos = timestamp.find('Z');
if (t_pos == std::string::npos || z_pos == std::string::npos) {
throw std::runtime_error("Invalid timestamp: " + timestamp);
}
const std::string time_part = timestamp.substr(t_pos + 1, z_pos - t_pos - 1);
int hours = 0;
int minutes = 0;
int seconds = 0;
int millis = 0;
char colon1 = '\0';
char colon2 = '\0';
char dot = '\0';
std::istringstream iss(time_part);
iss >> hours >> colon1 >> minutes >> colon2 >> seconds >> dot >> millis;
if (!iss || colon1 != ':' || colon2 != ':' || dot != '.') {
throw std::runtime_error("Invalid time part: " + time_part);
}
return (((hours * 60LL) + minutes) * 60LL + seconds) * 1000LL + millis;
}
Event parseLogLine(const std::string& line)
{
std::istringstream iss(line);
std::string timestamp;
std::string service_token;
std::string event_token;
std::string latency_token;
std::string request_token;
if (!(iss >> timestamp >> service_token >> event_token >> latency_token >> request_token)) {
throw std::runtime_error("Cannot parse log line: " + line);
}
auto valueAfterEquals = [](const std::string& token) -> std::string {
const std::size_t pos = token.find('=');
if (pos == std::string::npos || pos + 1 >= token.size()) {
throw std::runtime_error("Invalid token: " + token);
}
return token.substr(pos + 1);
};
Event result;
result.timestamp_ms = parseTimestampMs(timestamp);
result.service = valueAfterEquals(service_token);
result.event = valueAfterEquals(event_token);
std::string latency_value = valueAfterEquals(latency_token);
if (latency_value.size() < 3 || latency_value.substr(latency_value.size() - 2) != "ms") {
throw std::runtime_error("Invalid latency token: " + latency_token);
}
latency_value.erase(latency_value.size() - 2);
result.latency_ms = std::stoi(latency_value);
result.request_id = valueAfterEquals(request_token);
result.raw_line = line;
return result;
}
std::vector<Event> parseLogs(const std::vector<std::string>& lines)
{
std::vector<Event> events;
events.reserve(lines.size());
for (const std::string& line : lines) {
events.push_back(parseLogLine(line));
}
return events;
}
void printPair(const PairResult& result, const std::string& label)
{
std::cout << label << '\n';
if (!result.found) {
std::cout << " no pair found\n\n";
return;
}
std::cout << " first : " << result.first.raw_line << '\n';
std::cout << " second: " << result.second.raw_line << '\n';
std::cout << " combined latency: "
<< (result.first.latency_ms + result.second.latency_ms)
<< "ms\n\n";
}
PairResult interviewStyleReduction(const std::vector<Event>& events, int threshold_ms)
{
// Intentionally wrong for the real-world problem:
// it ignores request_id and time.
for (std::size_t i = 0; i < events.size(); ++i) {
for (std::size_t j = i + 1; j < events.size(); ++j) {
if (events[i].latency_ms + events[j].latency_ms > threshold_ms) {
return PairResult{true, events[i], events[j]};
}
}
}
return PairResult{};
}
PairResult realSlidingWindowDetection(const std::vector<Event>& events,
int threshold_ms,
std::int64_t window_ms)
{
std::unordered_map<std::string, std::deque<Event> > active_events;
for (const Event& current : events) {
std::deque<Event>& bucket = active_events[current.request_id];
while (!bucket.empty() &&
(current.timestamp_ms - bucket.front().timestamp_ms) > window_ms) {
bucket.pop_front();
}
for (const Event& previous : bucket) {
const std::int64_t delta = current.timestamp_ms - previous.timestamp_ms;
if (delta >= 0 && delta <= window_ms &&
previous.latency_ms + current.latency_ms > threshold_ms) {
return PairResult{true, previous, current};
}
}
bucket.push_back(current);
}
return PairResult{};
}
void printEvents(const std::vector<Event>& events)
{
std::cout << "Synthetic log stream:\n";
for (const Event& event : events) {
std::cout << " " << event.raw_line << '\n';
}
std::cout << '\n';
}
int main()
{
try {
const std::vector<std::string> raw_logs = {
"2026-04-16T10:15:01.100Z service=api event=parse_input latency=12ms request_id=req-1001",
"2026-04-16T10:15:01.110Z service=cache event=cache_miss latency=48ms request_id=req-1001",
"2026-04-16T10:15:01.120Z service=auth event=token_check latency=58ms request_id=req-2001",
"2026-04-16T10:15:01.130Z service=db event=read_user latency=43ms request_id=req-3001",
"2026-04-16T10:15:01.135Z service=db event=read_user latency=55ms request_id=req-1001",
"2026-04-16T10:15:01.144Z service=net event=external_call latency=47ms request_id=req-1001",
"2026-04-16T10:15:01.200Z service=cache event=cache_miss latency=60ms request_id=req-3001",
"2026-04-16T10:15:01.260Z service=net event=external_call latency=52ms request_id=req-3001"
};
const int threshold_ms = 100;
const std::int64_t window_ms = 20;
const std::vector<Event> events = parseLogs(raw_logs);
printEvents(events);
std::cout << "Threshold: " << threshold_ms << "ms\n";
std::cout << "Time window: " << window_ms << "ms\n\n";
const PairResult naive_result = interviewStyleReduction(events, threshold_ms);
printPair(naive_result, "Interview-style reduction (ignores request_id and time):");
const PairResult real_result = realSlidingWindowDetection(events, threshold_ms, window_ms);
printPair(real_result, "Streaming sliding-window detection:");
std::cout << "Notes:\n";
std::cout << " - The interview-style version can produce a false correlation.\n";
std::cout << " - In this dataset, it first matches 58ms from req-2001 with 43ms from req-3001.\n";
std::cout << " - That pair exceeds the threshold, but it is operationally meaningless.\n";
std::cout << " - The streaming version only correlates events from the same request_id\n";
std::cout << " and only within the configured time window.\n";
return 0;
}
catch (const std::exception& ex) {
std::cerr << "Error: " << ex.what() << '\n';
return 1;
}
}

View File

@@ -0,0 +1,74 @@
# This file is used to ignore files which are generated
# ----------------------------------------------------------------------------
*~
*.autosave
*.a
*.core
*.moc
*.o
*.obj
*.orig
*.rej
*.so
*.so.*
*_pch.h.cpp
*_resource.rc
*.qm
.#*
*.*#
core
!core/
tags
.DS_Store
.directory
*.debug
Makefile*
*.prl
*.app
moc_*.cpp
ui_*.h
qrc_*.cpp
Thumbs.db
*.res
*.rc
/.qmake.cache
/.qmake.stash
# qtcreator generated files
*.pro.user*
CMakeLists.txt.user*
# xemacs temporary files
*.flc
# Vim temporary files
.*.swp
# Visual Studio generated files
*.ib_pdb_index
*.idb
*.ilk
*.pdb
*.sln
*.suo
*.vcproj
*vcproj.*.*.user
*.ncb
*.sdf
*.opensdf
*.vcxproj
*vcxproj.*
# MinGW generated files
*.Debug
*.Release
# Python byte code
*.pyc
# Binaries
# --------
*.dll
*.exe

View File

@@ -0,0 +1,206 @@
#include <algorithm>
#include <cstdint>
#include <deque>
#include <iomanip>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <string>
#include <unordered_map>
#include <vector>
struct Event {
std::int64_t timestamp_ms;
std::string service;
std::string event;
int latency_ms;
std::string request_id;
std::string raw_line;
};
struct PairResult {
bool found = false;
Event first;
Event second;
};
std::int64_t parseTimestampMs (const std::string &timestamp) {
// Expected format:
// YYYY-MM-DDTHH:MM:SS.mmmZ
// For this demo we only convert the HH:MM:SS.mmm part to milliseconds.
const std::size_t t_pos = timestamp.find ('T');
const std::size_t z_pos = timestamp.find ('Z');
if (t_pos == std::string::npos || z_pos == std::string::npos)
throw std::runtime_error ("Invalid timestamp: " + timestamp);
const std::string time_part = timestamp.substr (t_pos + 1, z_pos - t_pos - 1);
int hours = 0;
int minutes = 0;
int seconds = 0;
int millis = 0;
char colon1 = '\0';
char colon2 = '\0';
char dot = '\0';
std::istringstream iss (time_part);
iss >> hours >> colon1 >> minutes >> colon2 >> seconds >> dot >> millis;
if (!iss || colon1 != ':' || colon2 != ':' || dot != '.')
throw std::runtime_error ("Invalid time part: " + time_part);
return (((hours * 60LL) + minutes) * 60LL + seconds) * 1000LL + millis;
}
Event parseLogLine (const std::string &line) {
std::istringstream iss (line);
std::string timestamp;
std::string service_token;
std::string event_token;
std::string latency_token;
std::string request_token;
if (! (iss >> timestamp >> service_token >> event_token >> latency_token >> request_token))
throw std::runtime_error ("Cannot parse log line: " + line);
auto valueAfterEquals = [] (const std::string & token) -> std::string {
const std::size_t pos = token.find ('=');
if (pos == std::string::npos || pos + 1 >= token.size())
throw std::runtime_error ("Invalid token: " + token);
return token.substr (pos + 1);
};
Event result;
result.timestamp_ms = parseTimestampMs (timestamp);
result.service = valueAfterEquals (service_token);
result.event = valueAfterEquals (event_token);
std::string latency_value = valueAfterEquals (latency_token);
if (latency_value.size() < 3 || latency_value.substr (latency_value.size() - 2) != "ms")
throw std::runtime_error ("Invalid latency token: " + latency_token);
latency_value.erase (latency_value.size() - 2);
result.latency_ms = std::stoi (latency_value);
result.request_id = valueAfterEquals (request_token);
result.raw_line = line;
return result;
}
std::vector<Event> parseLogs (const std::vector<std::string> &lines) {
std::vector<Event> events;
events.reserve (lines.size());
for (const std::string &line : lines)
events.push_back (parseLogLine (line));
return events;
}
void printPair (const PairResult &result, const std::string &label) {
std::cout << label << '\n';
if (!result.found) {
std::cout << " no pair found\n\n";
return;
}
std::cout << " first : " << result.first.raw_line << '\n';
std::cout << " second: " << result.second.raw_line << '\n';
std::cout << " combined latency: "
<< (result.first.latency_ms + result.second.latency_ms)
<< "ms\n\n";
}
PairResult interviewStyleReduction (const std::vector<Event> &events, int threshold_ms) {
// Intentionally wrong for the real-world problem:
// it ignores request_id and time.
for (std::size_t i = 0; i < events.size(); ++i) {
for (std::size_t j = i + 1; j < events.size(); ++j) {
if (events[i].latency_ms + events[j].latency_ms > threshold_ms) {
return PairResult{true, events[i], events[j]};
}
}
}
return PairResult{};
}
PairResult realSlidingWindowDetection (const std::vector<Event> &events,
int threshold_ms,
std::int64_t window_ms) {
std::unordered_map<std::string, std::deque<Event> > active_events;
for (const Event &current : events) {
std::deque<Event> &bucket = active_events[current.request_id];
while (!bucket.empty() &&
(current.timestamp_ms - bucket.front().timestamp_ms) > window_ms)
bucket.pop_front();
for (const Event &previous : bucket) {
const std::int64_t delta = current.timestamp_ms - previous.timestamp_ms;
if (delta >= 0 && delta <= window_ms &&
previous.latency_ms + current.latency_ms > threshold_ms) {
return PairResult{true, previous, current};
}
}
bucket.push_back (current);
}
return PairResult{};
}
void printEvents (const std::vector<Event> &events) {
std::cout << "Synthetic log stream:\n";
for (const Event &event : events)
std::cout << " " << event.raw_line << '\n';
std::cout << '\n';
}
int main() {
try {
const std::vector<std::string> raw_logs = {
"2026-04-16T10:15:01.100Z service=api event=parse_input latency=12ms request_id=req-1001",
"2026-04-16T10:15:01.110Z service=cache event=cache_miss latency=48ms request_id=req-1001",
"2026-04-16T10:15:01.120Z service=auth event=token_check latency=58ms request_id=req-2001",
"2026-04-16T10:15:01.130Z service=db event=read_user latency=43ms request_id=req-3001",
"2026-04-16T10:15:01.135Z service=db event=read_user latency=55ms request_id=req-1001",
"2026-04-16T10:15:01.144Z service=net event=external_call latency=47ms request_id=req-1001",
"2026-04-16T10:15:01.200Z service=cache event=cache_miss latency=60ms request_id=req-3001",
"2026-04-16T10:15:01.260Z service=net event=external_call latency=52ms request_id=req-3001"
};
const int threshold_ms = 100;
const std::int64_t window_ms = 20;
const std::vector<Event> events = parseLogs (raw_logs);
printEvents (events);
std::cout << "Threshold: " << threshold_ms << "ms\n";
std::cout << "Time window: " << window_ms << "ms\n\n";
const PairResult naive_result = interviewStyleReduction (events, threshold_ms);
printPair (naive_result, "Interview-style reduction (ignores request_id and time):");
const PairResult real_result = realSlidingWindowDetection (events, threshold_ms, window_ms);
printPair (real_result, "Streaming sliding-window detection:");
std::cout << "Notes:\n";
std::cout << " - The interview-style version can produce a false correlation.\n";
std::cout << " - In this dataset, it first matches 58ms from req-2001 with 43ms from req-3001.\n";
std::cout << " - That pair exceeds the threshold, but it is operationally meaningless.\n";
std::cout << " - The streaming version only correlates events from the same request_id\n";
std::cout << " and only within the configured time window.\n";
return 0;
} catch (const std::exception &ex) {
std::cerr << "Error: " << ex.what() << '\n';
return 1;
}
}

View File

@@ -0,0 +1,7 @@
TEMPLATE = app
CONFIG += console c++17
CONFIG -= app_bundle
CONFIG -= qt
SOURCES += \
main.cpp