From 63425013cb2d30ad8fb68a562e6eae4116f93c1c Mon Sep 17 00:00:00 2001
From: Anita Caron <anitacaron@alumni.usp.br>
Date: Thu, 10 Oct 2024 17:09:30 -0300
Subject: [PATCH 1/3] get sample of each level type on robot report

---
 util/create_report_html.py | 34 +++++++++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/util/create_report_html.py b/util/create_report_html.py
index d1ace55..ec3e4d6 100755
--- a/util/create_report_html.py
+++ b/util/create_report_html.py
@@ -38,20 +38,48 @@ def main(args):
 
     error_count_rule = {}
     error_count_level = {}
+    report_filtered = pd.DataFrame()
 
     try:
         report = pd.read_csv(args.report, sep="\t")
+
+        # Get sample of each level only for ROBOT report
         if "Level" in report.columns and "Rule Name" in report.columns:
             error_count_level = report["Level"].value_counts()
             error_count_rule = report["Rule Name"].value_counts()
-    except Exception:
-        print("No report")
+
+            if error_count_level["ERROR"] < args.limitlines:
+                rest = args.limitlines - error_count_level["ERROR"]
+
+                # Calculate the sample number for each level based on group size
+                def calculate_sample_size(group, rest):
+                    if group["Level"].iloc[0] == "ERROR":
+                        return group.shape[0]
+
+                    return min(group.shape[0], rest)
+
+                # Get a sample of each Level type
+                report_filtered = report.groupby(by="Level")[
+                    ["Level", "Rule Name", "Subject", "Property", "Value"]
+                ].apply(
+                    lambda x: x.sample(calculate_sample_size(x, rest))
+                ).reset_index(drop=True)
+            else:
+                report_filtered = report.head(args.limitlines)
+        else:
+            report_filtered = report.head(args.limitlines)
+
+        if len(report_filtered) > args.limitlines:
+            report_filtered.to_csv(args.report, sep="\t", index=False)
+
+    except Exception as e:
+        print(e)
 
     # Load Jinja2 template
     template = Template(args.template.read())
 
     # Generate the HTML output
-    res = template.render(contents=report.head(args.limitlines),
+    res = template.render(contents=report_filtered.reset_index(drop=True),
                           maybe_get_link=maybe_get_link,
                           context=context,
                           title=args.title,

From 32c9dbb2be693b38b551680504f145e1104ae655 Mon Sep 17 00:00:00 2001
From: Anita Caron <anitacaron@alumni.usp.br>
Date: Thu, 10 Oct 2024 17:14:44 -0300
Subject: [PATCH 2/3] removing the `truncate` step to download the same sample
 created

---
 Makefile                 | 14 +++++++-------
 util/dashboard_config.py |  4 ++--
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/Makefile b/Makefile
index 411da26..285619b 100644
--- a/Makefile
+++ b/Makefile
@@ -18,13 +18,13 @@ clean:
 	rm -rf build dashboard dependencies
 
 # Truncate potentially huge robot reports
-truncate_reports_for_github:
-	$(eval REPORTS := $(wildcard dashboard/*/robot_report.tsv))
-	for REP in $(REPORTS); do \
-		touch $$REP; \
-		cat $$REP | head -$(REPORT_LENGTH_LIMIT) > $$REP.tmp; \
-		mv $$REP.tmp $$REP; \
-	done
+# truncate_reports_for_github:
+# 	$(eval REPORTS := $(wildcard dashboard/*/robot_report.tsv))
+# 	for REP in $(REPORTS); do \
+# 		touch $$REP; \
+# 		cat $$REP | head -$(REPORT_LENGTH_LIMIT) > $$REP.tmp; \
+# 		mv $$REP.tmp $$REP; \
+# 	done
 
 # ------------------- #
 ### DIRECTORY SETUP ###
diff --git a/util/dashboard_config.py b/util/dashboard_config.py
index 4ba59ec..c974eec 100755
--- a/util/dashboard_config.py
+++ b/util/dashboard_config.py
@@ -75,8 +75,8 @@ def rundashboard(configfile, clean):
     prepare_ontologies(ontologies['ontologies'], ontology_dir, dashboard_dir, make_parameters, config)
     logging.info("Building the dashboard")
     runcmd(f"make dashboard {make_parameters} -B", config.get_dashboard_report_timeout_seconds())
-    logging.info("Postprocess files for github")
-    runcmd(f"make truncate_reports_for_github {make_parameters} -B", config.get_dashboard_report_timeout_seconds())
+    # logging.info("Postprocess files for github")
+    # runcmd(f"make truncate_reports_for_github {make_parameters} -B", config.get_dashboard_report_timeout_seconds())
 
 info_usage_namespace = 'Info: Usage of namespaces in axioms'
 

From 634ede81f4235111aa310023a4b70e6a52fdcd4f Mon Sep 17 00:00:00 2001
From: Anita Caron <anitacaron@alumni.usp.br>
Date: Sat, 7 Jun 2025 15:59:13 -0300
Subject: [PATCH 3/3] Enhance error handling and logging in
 create_report_html.py

---
 util/create_report_html.py | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/util/create_report_html.py b/util/create_report_html.py
index ec3e4d6..4be9b6d 100755
--- a/util/create_report_html.py
+++ b/util/create_report_html.py
@@ -2,6 +2,7 @@
 
 import argparse
 import json
+import logging
 import os
 import re
 import sys
@@ -9,13 +10,15 @@
 import pandas as pd
 from jinja2 import Template
 
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+
 
 def main(args):
     """
     """
     parser = argparse.ArgumentParser(description='Create a report HTML page')
     parser.add_argument('report',
-                        type=argparse.FileType('r'),
+                        type=argparse.FileType('r+'),
                         help='TSV report to convert to HTML')
     parser.add_argument('context',
                         type=argparse.FileType('r'),
@@ -48,7 +51,8 @@ def main(args):
             error_count_level = report["Level"].value_counts()
             error_count_rule = report["Rule Name"].value_counts()
 
-            if error_count_level["ERROR"] < args.limitlines:
+            error_count_error = error_count_level.get("ERROR", 0)
+            if error_count_error < args.limitlines:
                 rest = args.limitlines - error_count_level["ERROR"]
 
                 # Calculate the sample number for each level based on group size
@@ -58,8 +62,13 @@ def calculate_sample_size(group, rest):
 
                     return min(group.shape[0], rest)
 
+                required_columns = ["Level", "Rule Name", "Subject", "Property", "Value"]
+                missing_columns = [col for col in required_columns if col not in report.columns]
+                if missing_columns:
+                    raise KeyError(f"Missing columns in report: {missing_columns}")
+
                 # Get a sample of each Level type
-                report_filtered = report.groupby(by="Level")[
+                report_filtered = report.groupby(by=["Level","Rule Name","Subject"])[
                     ["Level", "Rule Name", "Subject", "Property", "Value"]
                 ].apply(
                     lambda x: x.sample(calculate_sample_size(x, rest))
@@ -72,8 +81,12 @@ def calculate_sample_size(group, rest):
         if len(report_filtered) > args.limitlines:
             report_filtered.to_csv(args.report, sep="\t", index=False)
 
+    except pd.errors.EmptyDataError as e:
+        logging.error("Empty data error: %s", e)
+    except FileNotFoundError as e:
+        logging.error("File not found: %s", e)
     except Exception as e:
-        print(e)
+        logging.error("An unexpected error occurred: %s", e)
 
     # Load Jinja2 template
     template = Template(args.template.read())