From 63425013cb2d30ad8fb68a562e6eae4116f93c1c Mon Sep 17 00:00:00 2001 From: Anita Caron Date: Thu, 10 Oct 2024 17:09:30 -0300 Subject: [PATCH 1/3] get sample of each level type on robot report --- util/create_report_html.py | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/util/create_report_html.py b/util/create_report_html.py index d1ace55..ec3e4d6 100755 --- a/util/create_report_html.py +++ b/util/create_report_html.py @@ -38,20 +38,48 @@ def main(args): error_count_rule = {} error_count_level = {} + report_filtered = pd.DataFrame() try: report = pd.read_csv(args.report, sep="\t") + + # Get sample of each level only for ROBOT report if "Level" in report.columns and "Rule Name" in report.columns: error_count_level = report["Level"].value_counts() error_count_rule = report["Rule Name"].value_counts() - except Exception: - print("No report") + + if error_count_level["ERROR"] < args.limitlines: + rest = args.limitlines - error_count_level["ERROR"] + + # Calculate the sample number for each level based on group size + def calculate_sample_size(group, rest): + if group["Level"].iloc[0] == "ERROR": + return group.shape[0] + + return min(group.shape[0], rest) + + # Get a sample of each Level type + report_filtered = report.groupby(by="Level")[ + ["Level", "Rule Name", "Subject", "Property", "Value"] + ].apply( + lambda x: x.sample(calculate_sample_size(x, rest)) + ).reset_index(drop=True) + else: + report_filtered = report.head(args.limitlines) + else: + report_filtered = report.head(args.limitlines) + + if len(report_filtered) > args.limitlines: + report_filtered.to_csv(args.report, sep="\t", index=False) + + except Exception as e: + print(e) # Load Jinja2 template template = Template(args.template.read()) # Generate the HTML output - res = template.render(contents=report.head(args.limitlines), + res = template.render(contents=report_filtered.reset_index(drop=True), maybe_get_link=maybe_get_link, context=context, title=args.title, From 32c9dbb2be693b38b551680504f145e1104ae655 Mon Sep 17 00:00:00 2001 From: Anita Caron Date: Thu, 10 Oct 2024 17:14:44 -0300 Subject: [PATCH 2/3] removing the `truncate` step to download the same sample created --- Makefile | 14 +++++++------- util/dashboard_config.py | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index 411da26..285619b 100644 --- a/Makefile +++ b/Makefile @@ -18,13 +18,13 @@ clean: rm -rf build dashboard dependencies # Truncate potentially huge robot reports -truncate_reports_for_github: - $(eval REPORTS := $(wildcard dashboard/*/robot_report.tsv)) - for REP in $(REPORTS); do \ - touch $$REP; \ - cat $$REP | head -$(REPORT_LENGTH_LIMIT) > $$REP.tmp; \ - mv $$REP.tmp $$REP; \ - done +# truncate_reports_for_github: +# $(eval REPORTS := $(wildcard dashboard/*/robot_report.tsv)) +# for REP in $(REPORTS); do \ +# touch $$REP; \ +# cat $$REP | head -$(REPORT_LENGTH_LIMIT) > $$REP.tmp; \ +# mv $$REP.tmp $$REP; \ +# done # ------------------- # ### DIRECTORY SETUP ### diff --git a/util/dashboard_config.py b/util/dashboard_config.py index 4ba59ec..c974eec 100755 --- a/util/dashboard_config.py +++ b/util/dashboard_config.py @@ -75,8 +75,8 @@ def rundashboard(configfile, clean): prepare_ontologies(ontologies['ontologies'], ontology_dir, dashboard_dir, make_parameters, config) logging.info("Building the dashboard") runcmd(f"make dashboard {make_parameters} -B", config.get_dashboard_report_timeout_seconds()) - logging.info("Postprocess files for github") - runcmd(f"make truncate_reports_for_github {make_parameters} -B", config.get_dashboard_report_timeout_seconds()) + # logging.info("Postprocess files for github") + # runcmd(f"make truncate_reports_for_github {make_parameters} -B", config.get_dashboard_report_timeout_seconds()) info_usage_namespace = 'Info: Usage of namespaces in axioms' From 634ede81f4235111aa310023a4b70e6a52fdcd4f Mon Sep 17 00:00:00 2001 From: Anita Caron Date: Sat, 7 Jun 2025 15:59:13 -0300 Subject: [PATCH 3/3] Enhance error handling and logging in create_report_html.py --- util/create_report_html.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/util/create_report_html.py b/util/create_report_html.py index ec3e4d6..4be9b6d 100755 --- a/util/create_report_html.py +++ b/util/create_report_html.py @@ -2,6 +2,7 @@ import argparse import json +import logging import os import re import sys @@ -9,13 +10,15 @@ import pandas as pd from jinja2 import Template +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') + def main(args): """ """ parser = argparse.ArgumentParser(description='Create a report HTML page') parser.add_argument('report', - type=argparse.FileType('r'), + type=argparse.FileType('r+'), help='TSV report to convert to HTML') parser.add_argument('context', type=argparse.FileType('r'), @@ -48,7 +51,8 @@ def main(args): error_count_level = report["Level"].value_counts() error_count_rule = report["Rule Name"].value_counts() - if error_count_level["ERROR"] < args.limitlines: + error_count_error = error_count_level.get("ERROR", 0) + if error_count_error < args.limitlines: rest = args.limitlines - error_count_level["ERROR"] # Calculate the sample number for each level based on group size @@ -58,8 +62,13 @@ def calculate_sample_size(group, rest): return min(group.shape[0], rest) + required_columns = ["Level", "Rule Name", "Subject", "Property", "Value"] + missing_columns = [col for col in required_columns if col not in report.columns] + if missing_columns: + raise KeyError(f"Missing columns in report: {missing_columns}") + # Get a sample of each Level type - report_filtered = report.groupby(by="Level")[ + report_filtered = report.groupby(by=["Level","Rule Name","Subject"])[ ["Level", "Rule Name", "Subject", "Property", "Value"] ].apply( lambda x: x.sample(calculate_sample_size(x, rest)) @@ -72,8 +81,12 @@ def calculate_sample_size(group, rest): if len(report_filtered) > args.limitlines: report_filtered.to_csv(args.report, sep="\t", index=False) + except pd.errors.EmptyDataError as e: + logging.error("Empty data error: %s", e) + except FileNotFoundError as e: + logging.error("File not found: %s", e) except Exception as e: - print(e) + logging.error("An unexpected error occurred: %s", e) # Load Jinja2 template template = Template(args.template.read())