This commit is contained in:
2026-05-18 15:31:52 -04:00
parent 269d560847
commit 41be5a2e24
3 changed files with 384 additions and 239 deletions

View File

@@ -0,0 +1,223 @@
#include <algorithm>
#include <cstdint>
#include <deque>
#include <iomanip>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <string>
#include <unordered_map>
#include <vector>
struct Event {
std::int64_t timestamp_ms;
std::string service;
std::string event;
int latency_ms;
std::string request_id;
std::string raw_line;
};
struct PairResult {
bool found = false;
Event first;
Event second;
};
std::int64_t parseTimestampMs(const std::string& timestamp)
{
// Expected format:
// YYYY-MM-DDTHH:MM:SS.mmmZ
// For this demo we only convert the HH:MM:SS.mmm part to milliseconds.
const std::size_t t_pos = timestamp.find('T');
const std::size_t z_pos = timestamp.find('Z');
if (t_pos == std::string::npos || z_pos == std::string::npos) {
throw std::runtime_error("Invalid timestamp: " + timestamp);
}
const std::string time_part = timestamp.substr(t_pos + 1, z_pos - t_pos - 1);
int hours = 0;
int minutes = 0;
int seconds = 0;
int millis = 0;
char colon1 = '\0';
char colon2 = '\0';
char dot = '\0';
std::istringstream iss(time_part);
iss >> hours >> colon1 >> minutes >> colon2 >> seconds >> dot >> millis;
if (!iss || colon1 != ':' || colon2 != ':' || dot != '.') {
throw std::runtime_error("Invalid time part: " + time_part);
}
return (((hours * 60LL) + minutes) * 60LL + seconds) * 1000LL + millis;
}
Event parseLogLine(const std::string& line)
{
std::istringstream iss(line);
std::string timestamp;
std::string service_token;
std::string event_token;
std::string latency_token;
std::string request_token;
if (!(iss >> timestamp >> service_token >> event_token >> latency_token >> request_token)) {
throw std::runtime_error("Cannot parse log line: " + line);
}
auto valueAfterEquals = [](const std::string& token) -> std::string {
const std::size_t pos = token.find('=');
if (pos == std::string::npos || pos + 1 >= token.size()) {
throw std::runtime_error("Invalid token: " + token);
}
return token.substr(pos + 1);
};
Event result;
result.timestamp_ms = parseTimestampMs(timestamp);
result.service = valueAfterEquals(service_token);
result.event = valueAfterEquals(event_token);
std::string latency_value = valueAfterEquals(latency_token);
if (latency_value.size() < 3 || latency_value.substr(latency_value.size() - 2) != "ms") {
throw std::runtime_error("Invalid latency token: " + latency_token);
}
latency_value.erase(latency_value.size() - 2);
result.latency_ms = std::stoi(latency_value);
result.request_id = valueAfterEquals(request_token);
result.raw_line = line;
return result;
}
std::vector<Event> parseLogs(const std::vector<std::string>& lines)
{
std::vector<Event> events;
events.reserve(lines.size());
for (const std::string& line : lines) {
events.push_back(parseLogLine(line));
}
return events;
}
void printPair(const PairResult& result, const std::string& label)
{
std::cout << label << '\n';
if (!result.found) {
std::cout << " no pair found\n\n";
return;
}
std::cout << " first : " << result.first.raw_line << '\n';
std::cout << " second: " << result.second.raw_line << '\n';
std::cout << " combined latency: "
<< (result.first.latency_ms + result.second.latency_ms)
<< "ms\n\n";
}
PairResult interviewStyleReduction(const std::vector<Event>& events, int threshold_ms)
{
// Intentionally wrong for the real-world problem:
// it ignores request_id and time.
for (std::size_t i = 0; i < events.size(); ++i) {
for (std::size_t j = i + 1; j < events.size(); ++j) {
if (events[i].latency_ms + events[j].latency_ms > threshold_ms) {
return PairResult{true, events[i], events[j]};
}
}
}
return PairResult{};
}
PairResult realSlidingWindowDetection(const std::vector<Event>& events,
int threshold_ms,
std::int64_t window_ms)
{
std::unordered_map<std::string, std::deque<Event> > active_events;
for (const Event& current : events) {
std::deque<Event>& bucket = active_events[current.request_id];
while (!bucket.empty() &&
(current.timestamp_ms - bucket.front().timestamp_ms) > window_ms) {
bucket.pop_front();
}
for (const Event& previous : bucket) {
const std::int64_t delta = current.timestamp_ms - previous.timestamp_ms;
if (delta >= 0 && delta <= window_ms &&
previous.latency_ms + current.latency_ms > threshold_ms) {
return PairResult{true, previous, current};
}
}
bucket.push_back(current);
}
return PairResult{};
}
void printEvents(const std::vector<Event>& events)
{
std::cout << "Synthetic log stream:\n";
for (const Event& event : events) {
std::cout << " " << event.raw_line << '\n';
}
std::cout << '\n';
}
int main()
{
try {
const std::vector<std::string> raw_logs = {
"2026-04-16T10:15:01.100Z service=api event=parse_input latency=12ms request_id=req-1001",
"2026-04-16T10:15:01.110Z service=cache event=cache_miss latency=48ms request_id=req-1001",
"2026-04-16T10:15:01.120Z service=auth event=token_check latency=58ms request_id=req-2001",
"2026-04-16T10:15:01.130Z service=db event=read_user latency=43ms request_id=req-3001",
"2026-04-16T10:15:01.135Z service=db event=read_user latency=55ms request_id=req-1001",
"2026-04-16T10:15:01.144Z service=net event=external_call latency=47ms request_id=req-1001",
"2026-04-16T10:15:01.200Z service=cache event=cache_miss latency=60ms request_id=req-3001",
"2026-04-16T10:15:01.260Z service=net event=external_call latency=52ms request_id=req-3001"
};
const int threshold_ms = 100;
const std::int64_t window_ms = 20;
const std::vector<Event> events = parseLogs(raw_logs);
printEvents(events);
std::cout << "Threshold: " << threshold_ms << "ms\n";
std::cout << "Time window: " << window_ms << "ms\n\n";
const PairResult naive_result = interviewStyleReduction(events, threshold_ms);
printPair(naive_result, "Interview-style reduction (ignores request_id and time):");
const PairResult real_result = realSlidingWindowDetection(events, threshold_ms, window_ms);
printPair(real_result, "Streaming sliding-window detection:");
std::cout << "Notes:\n";
std::cout << " - The interview-style version can produce a false correlation.\n";
std::cout << " - In this dataset, it first matches 58ms from req-2001 with 43ms from req-3001.\n";
std::cout << " - That pair exceeds the threshold, but it is operationally meaningless.\n";
std::cout << " - The streaming version only correlates events from the same request_id\n";
std::cout << " and only within the configured time window.\n";
return 0;
}
catch (const std::exception& ex) {
std::cerr << "Error: " << ex.what() << '\n';
return 1;
}
}