Closes #01
This commit is contained in:
@@ -0,0 +1,223 @@
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <deque>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
struct Event {
|
||||
std::int64_t timestamp_ms;
|
||||
std::string service;
|
||||
std::string event;
|
||||
int latency_ms;
|
||||
std::string request_id;
|
||||
std::string raw_line;
|
||||
};
|
||||
|
||||
struct PairResult {
|
||||
bool found = false;
|
||||
Event first;
|
||||
Event second;
|
||||
};
|
||||
|
||||
std::int64_t parseTimestampMs(const std::string& timestamp)
|
||||
{
|
||||
// Expected format:
|
||||
// YYYY-MM-DDTHH:MM:SS.mmmZ
|
||||
// For this demo we only convert the HH:MM:SS.mmm part to milliseconds.
|
||||
const std::size_t t_pos = timestamp.find('T');
|
||||
const std::size_t z_pos = timestamp.find('Z');
|
||||
|
||||
if (t_pos == std::string::npos || z_pos == std::string::npos) {
|
||||
throw std::runtime_error("Invalid timestamp: " + timestamp);
|
||||
}
|
||||
|
||||
const std::string time_part = timestamp.substr(t_pos + 1, z_pos - t_pos - 1);
|
||||
|
||||
int hours = 0;
|
||||
int minutes = 0;
|
||||
int seconds = 0;
|
||||
int millis = 0;
|
||||
char colon1 = '\0';
|
||||
char colon2 = '\0';
|
||||
char dot = '\0';
|
||||
|
||||
std::istringstream iss(time_part);
|
||||
iss >> hours >> colon1 >> minutes >> colon2 >> seconds >> dot >> millis;
|
||||
|
||||
if (!iss || colon1 != ':' || colon2 != ':' || dot != '.') {
|
||||
throw std::runtime_error("Invalid time part: " + time_part);
|
||||
}
|
||||
|
||||
return (((hours * 60LL) + minutes) * 60LL + seconds) * 1000LL + millis;
|
||||
}
|
||||
|
||||
Event parseLogLine(const std::string& line)
|
||||
{
|
||||
std::istringstream iss(line);
|
||||
|
||||
std::string timestamp;
|
||||
std::string service_token;
|
||||
std::string event_token;
|
||||
std::string latency_token;
|
||||
std::string request_token;
|
||||
|
||||
if (!(iss >> timestamp >> service_token >> event_token >> latency_token >> request_token)) {
|
||||
throw std::runtime_error("Cannot parse log line: " + line);
|
||||
}
|
||||
|
||||
auto valueAfterEquals = [](const std::string& token) -> std::string {
|
||||
const std::size_t pos = token.find('=');
|
||||
if (pos == std::string::npos || pos + 1 >= token.size()) {
|
||||
throw std::runtime_error("Invalid token: " + token);
|
||||
}
|
||||
return token.substr(pos + 1);
|
||||
};
|
||||
|
||||
Event result;
|
||||
result.timestamp_ms = parseTimestampMs(timestamp);
|
||||
result.service = valueAfterEquals(service_token);
|
||||
result.event = valueAfterEquals(event_token);
|
||||
|
||||
std::string latency_value = valueAfterEquals(latency_token);
|
||||
if (latency_value.size() < 3 || latency_value.substr(latency_value.size() - 2) != "ms") {
|
||||
throw std::runtime_error("Invalid latency token: " + latency_token);
|
||||
}
|
||||
latency_value.erase(latency_value.size() - 2);
|
||||
result.latency_ms = std::stoi(latency_value);
|
||||
|
||||
result.request_id = valueAfterEquals(request_token);
|
||||
result.raw_line = line;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<Event> parseLogs(const std::vector<std::string>& lines)
|
||||
{
|
||||
std::vector<Event> events;
|
||||
events.reserve(lines.size());
|
||||
|
||||
for (const std::string& line : lines) {
|
||||
events.push_back(parseLogLine(line));
|
||||
}
|
||||
|
||||
return events;
|
||||
}
|
||||
|
||||
void printPair(const PairResult& result, const std::string& label)
|
||||
{
|
||||
std::cout << label << '\n';
|
||||
|
||||
if (!result.found) {
|
||||
std::cout << " no pair found\n\n";
|
||||
return;
|
||||
}
|
||||
|
||||
std::cout << " first : " << result.first.raw_line << '\n';
|
||||
std::cout << " second: " << result.second.raw_line << '\n';
|
||||
std::cout << " combined latency: "
|
||||
<< (result.first.latency_ms + result.second.latency_ms)
|
||||
<< "ms\n\n";
|
||||
}
|
||||
|
||||
PairResult interviewStyleReduction(const std::vector<Event>& events, int threshold_ms)
|
||||
{
|
||||
// Intentionally wrong for the real-world problem:
|
||||
// it ignores request_id and time.
|
||||
for (std::size_t i = 0; i < events.size(); ++i) {
|
||||
for (std::size_t j = i + 1; j < events.size(); ++j) {
|
||||
if (events[i].latency_ms + events[j].latency_ms > threshold_ms) {
|
||||
return PairResult{true, events[i], events[j]};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return PairResult{};
|
||||
}
|
||||
|
||||
PairResult realSlidingWindowDetection(const std::vector<Event>& events,
|
||||
int threshold_ms,
|
||||
std::int64_t window_ms)
|
||||
{
|
||||
std::unordered_map<std::string, std::deque<Event> > active_events;
|
||||
|
||||
for (const Event& current : events) {
|
||||
std::deque<Event>& bucket = active_events[current.request_id];
|
||||
|
||||
while (!bucket.empty() &&
|
||||
(current.timestamp_ms - bucket.front().timestamp_ms) > window_ms) {
|
||||
bucket.pop_front();
|
||||
}
|
||||
|
||||
for (const Event& previous : bucket) {
|
||||
const std::int64_t delta = current.timestamp_ms - previous.timestamp_ms;
|
||||
|
||||
if (delta >= 0 && delta <= window_ms &&
|
||||
previous.latency_ms + current.latency_ms > threshold_ms) {
|
||||
return PairResult{true, previous, current};
|
||||
}
|
||||
}
|
||||
|
||||
bucket.push_back(current);
|
||||
}
|
||||
|
||||
return PairResult{};
|
||||
}
|
||||
|
||||
void printEvents(const std::vector<Event>& events)
|
||||
{
|
||||
std::cout << "Synthetic log stream:\n";
|
||||
for (const Event& event : events) {
|
||||
std::cout << " " << event.raw_line << '\n';
|
||||
}
|
||||
std::cout << '\n';
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
try {
|
||||
const std::vector<std::string> raw_logs = {
|
||||
"2026-04-16T10:15:01.100Z service=api event=parse_input latency=12ms request_id=req-1001",
|
||||
"2026-04-16T10:15:01.110Z service=cache event=cache_miss latency=48ms request_id=req-1001",
|
||||
"2026-04-16T10:15:01.120Z service=auth event=token_check latency=58ms request_id=req-2001",
|
||||
"2026-04-16T10:15:01.130Z service=db event=read_user latency=43ms request_id=req-3001",
|
||||
"2026-04-16T10:15:01.135Z service=db event=read_user latency=55ms request_id=req-1001",
|
||||
"2026-04-16T10:15:01.144Z service=net event=external_call latency=47ms request_id=req-1001",
|
||||
"2026-04-16T10:15:01.200Z service=cache event=cache_miss latency=60ms request_id=req-3001",
|
||||
"2026-04-16T10:15:01.260Z service=net event=external_call latency=52ms request_id=req-3001"
|
||||
};
|
||||
|
||||
const int threshold_ms = 100;
|
||||
const std::int64_t window_ms = 20;
|
||||
|
||||
const std::vector<Event> events = parseLogs(raw_logs);
|
||||
|
||||
printEvents(events);
|
||||
|
||||
std::cout << "Threshold: " << threshold_ms << "ms\n";
|
||||
std::cout << "Time window: " << window_ms << "ms\n\n";
|
||||
|
||||
const PairResult naive_result = interviewStyleReduction(events, threshold_ms);
|
||||
printPair(naive_result, "Interview-style reduction (ignores request_id and time):");
|
||||
|
||||
const PairResult real_result = realSlidingWindowDetection(events, threshold_ms, window_ms);
|
||||
printPair(real_result, "Streaming sliding-window detection:");
|
||||
|
||||
std::cout << "Notes:\n";
|
||||
std::cout << " - The interview-style version can produce a false correlation.\n";
|
||||
std::cout << " - In this dataset, it first matches 58ms from req-2001 with 43ms from req-3001.\n";
|
||||
std::cout << " - That pair exceeds the threshold, but it is operationally meaningless.\n";
|
||||
std::cout << " - The streaming version only correlates events from the same request_id\n";
|
||||
std::cout << " and only within the configured time window.\n";
|
||||
|
||||
return 0;
|
||||
}
|
||||
catch (const std::exception& ex) {
|
||||
std::cerr << "Error: " << ex.what() << '\n';
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user