From ef720744c6849f99313b558d60c157edf8ca7384 Mon Sep 17 00:00:00 2001 From: swenzel Date: Thu, 23 Jan 2025 17:29:13 +0100 Subject: [PATCH] Change bad-data-interval parsing to account for more requirements Apparently the separator can change each line and be part of the comment message. Hence, simple csv parsing does not work. --- MC/bin/o2dpg_sim_workflow_anchored.py | 34 +++++++++++++++++---------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/MC/bin/o2dpg_sim_workflow_anchored.py b/MC/bin/o2dpg_sim_workflow_anchored.py index b2c1c3d34..1fe68e6c2 100755 --- a/MC/bin/o2dpg_sim_workflow_anchored.py +++ b/MC/bin/o2dpg_sim_workflow_anchored.py @@ -14,7 +14,6 @@ import json import math import pandas as pd -import csv # Creates a time anchored MC workflow; positioned within a given run-number (as function of production size etc) @@ -318,17 +317,28 @@ def exclude_timestamp(ts, orbit, run, filename): if not os.path.isfile(filename): return False - # Function to detect the delimiter automatically - def detect_delimiter(file_path): - with open(file_path, 'r') as csvfile: - sample = csvfile.read(1024) # Read a small sample of the file - sniffer = csv.Sniffer() - delimiter = sniffer.sniff(sample).delimiter - return delimiter - return ',' # a reasonable default - - # read txt file into a pandas dataframe ---> if this fails catch exception and return - df = pd.read_csv(filename, header=None, names=["Run", "From", "To", "Message"], sep=detect_delimiter(filename)) + def parse_file(filename): + parsed_data = [] + with open(filename, 'r') as file: + for line in file: + # Split the line into exactly 4 parts (first three numbers + comment) + columns = re.split(r'[,\s;\t]+', line.strip(), maxsplit=3) + + if len(columns) < 3: + continue # Skip lines with insufficient columns + + try: + # Extract the first three columns as numbers + num1, num2, num3 = map(int, columns[:3]) # Assuming integers in the data + comment = columns[3] if len(columns) > 3 else "" + parsed_data.append({"Run" : num1, "From" : num2, "To" : num3, "Message" : comment}) + except ValueError: + continue # Skip lines where first three columns are not numeric + return parsed_data + + data = parse_file(filename) + # print (data) + df = pd.DataFrame(data) # convert to data frame for easy handling # extract data for this run number filtered = df[df['Run'] == run]