forrtproject · richarddushime · Jan 5, 2026 · Nov 5, 2025 · Nov 5, 2025 · Nov 5, 2025
@@ -40,6 +40,8 @@ Strack
 Tennant
 Udo
 Yau
+Mathes
+Smal
 
 # Technical or code related terms
 aCount

@@ -0,0 +1,45 @@
+# FORRT Contributors Data Generation
+
+This directory contains the script and template for generating the Contributors page.
+
+## Files
+
+- `tenzing.py` - Python script that fetches contributor data from Google Sheets and generates the `tenzing.md` file
+- `tenzing_template.md` - Template file with frontmatter, page structure, and CSS styles
+- `tenzing.md` - Generated output file (copied to `content/contributors/tenzing.md` after generation)
+
+The JavaScript file implementing filtering features is located at `static/js/contributor-filter.js`.
+
+## How the Data is Generated
+
+The `tenzing.py` script:
+
+1. Fetches data from:
+   - The Tenzing index ("Tenzing Automation Source" sheet)
+   - The "FORRT Lead Tenzing Sheet"
+
+    Error Handling: If any project sheets fail to load, the script logs the failures to `tenzing_failures.json`, which triggers a GitHub workflow to create an issue for investigation.
+
+2. Processes the data to:
+   - Consolidate each person's contributions across FORRT projects
+   - Generate HTML for display on the Contributors page
+   - Add `data-*` attributes to enable filtering by project/role
+   - Add `id` attributes (when ORCID is available) to enable anchor links (e.g., `https://forrt.org/contributors#0000-0000-0000-0000`)
+   - Generate a JSON object with all unique projects and roles to populate filter dropdown menus
+
+3. Creates the final output by:
+   - Reading `tenzing_template.md`
+   - Appending the generated HTML
+   - Writing to `tenzing.md`
+
+
+**Important:** `tenzing.md` is auto-generated and should never be edited manually.
+
+## Local Development
+
+When working with `tenzing.py` locally, copy the generated file to the content directory before rendering the site:
+
+```
+cp scripts/forrt_contribs/tenzing.md content/contributors/tenzing.md
+hugo server
+```
@@ -1,5 +1,7 @@
 import pandas as pd
 import os
+import re
+import html
 import json
 
 def print_failures(failed_sheets):
@@ -41,7 +43,6 @@ def print_failures(failed_sheets):
         # Make sure each URL is transformed into a CSV export URL as shown above
         data_frame = pd.read_csv(url)
 
-        # --- LOGGING ADDED HERE ---
         # Log the number of contributors read from the current project
         print(f"✓ Read {len(data_frame)} contributors from '{project_name}'.")
 
@@ -79,8 +80,16 @@ def print_failures(failed_sheets):
 
 def concatenate_true_columns(row, columns):
     true_columns = [col for col in columns if pd.notna(row[col]) and row[col]]
-    if 'Project Managers' in true_columns:
-        other_columns = [f'*{col}*' for col in true_columns if col != 'Project Managers']
+
+    # Check for both "Project Managers" and "Project manager" (case variations)
+    pm_column = None
+    for col in true_columns:
+        if col.lower() == 'project managers' or col.lower() == 'project manager':
+            pm_column = col
+            break
+
+    if pm_column:
+        other_columns = [f'*{col}*' for col in true_columns if col != pm_column]
         if other_columns:
             return 'as Project Manager and with ' + ', '.join(other_columns[:-1]) + (' and ' if len(other_columns) > 1 else '') + other_columns[-1]
         else:
@@ -90,6 +99,7 @@ def concatenate_true_columns(row, columns):
 
 # List of column names to check for TRUE values
 fields_url = "https://docs.google.com/spreadsheets/d/e/2PACX-1vT_IaXiYtB3iAmtDZ_XiQKrToRkxOlkXNAeNU2SIT_J9PxvsQyptga6Gg9c8mSvDZpwY6d8skswIQYh/pub?output=csv&gid=277271370"
+
 try:
     column_mappings = pd.read_csv(fields_url)
     print(f"✓ Successfully loaded column mappings with {len(column_mappings)} fields")
@@ -107,6 +117,7 @@ def concatenate_true_columns(row, columns):
 # Filtering rows based on the updated columns_to_check list
 # Note: columns_to_check needs to be updated to the renamed columns for the filter to work correctly
 columns_to_check = [rename_dict[col] for col in columns_to_check if col in rename_dict]
+
 # Remove columns not present
 columns_present = [col for col in columns_to_check if col in merged_data.columns]
 columns_dropped = set(columns_to_check) - set(columns_present)
@@ -141,7 +152,7 @@ def concatenate_true_columns(row, columns):
 
 # Sort based on surname
 merged_data['sort_order'] = merged_data['Surname']
-merged_data = merged_data.sort_values(by='sort_order')
+merged_data = merged_data.sort_values(by='sort_order') 
 merged_data = merged_data.drop(columns='sort_order')
 
 # Strip spaces from 'ORCID iD' in merged data
@@ -152,7 +163,7 @@ def format_name(row):
     # Extract the first name, middle name initial, and surname
     first_name = row['First name'].strip() if pd.notna(row['First name']) else ""
     middle_name = row['Middle name']
-    surname = row['Surname'].strip() if pd.notna(row['Surname']) else ""
+    surname = row['Surname'].strip().rstrip('*') if pd.notna(row['Surname']) else ""
 
     # Check if the middle name is not NaN and not an empty string
     if pd.notna(middle_name) and middle_name != '':
@@ -169,35 +180,129 @@ def format_name(row):
 # Propagate ORCID iD within each contributor's grouping
 merged_data['ORCID iD'] = merged_data.groupby('full_name')['ORCID iD'].transform(lambda x: x.ffill().bfill())
 
+# Helper function to normalize project/role names for data attributes
+def normalize_for_attribute(text):
+    """Normalize text for use in HTML data-* attributes."""
+    if pd.isna(text) or text == '':
+        return ''
+
+    # Lowercase + trim
+    name = text.lower().strip()
+
+    # Replace & with 'and'
+    name = name.replace('&', 'and')
+
+    # Replace ANY non-alphanumeric sequence with a hyphen
+    name = re.sub(r'[^a-z0-9]+', '-', name)
+
+    # Collapse multiple hyphens
+    name = re.sub(r'-+', '-', name)
+
+    # Remove leading/trailing hyphens
+    name = name.strip('-')
+
+    return name
+
+
 # Group by 'ORCID iD' and concatenate the contributions
-def concatenate_contributions(group):
 
-    # Find the minimum original order for the group
+def concatenate_contributions(group):
+    # Minimum original order for sorting later
     min_order = group['original_order'].min()
 
-    # Format the full name once per group
+    # Format name once
     full_name = format_name(group.iloc[0])
     group = group.sort_values(by='special_role', ascending=False)
 
-    # Create the contributions string for each project
-    contributions = [
-        f"{row['Project Name']} {('as' if row['special_role'] else '')} {row['Contributions']}" if pd.isna(row['Project URL']) or row['Project URL'] == ''
-        else f"[{row['Project Name']}]({row['Project URL']}) {('as' if row['special_role'] else '')} {row['Contributions']}"
-        for _, row in group.iterrows()
-    ]
-
-    # Add numbering only if there are more than 1 contributions
-    if len(contributions) > 1:
-        contributions = [f"{i+1}. {contribution}" for i, contribution in enumerate(contributions)]
-
-    # Turn contributions into multiline list or single line
-    contributions_str = contributions[0] if len(contributions) == 1 else '\n    ' + '\n    '.join(contributions) + '\n' + '{{<rawhtml>}}<br/>&nbsp;<br/> {{</rawhtml>}}'
-
     orcid_id = group.iloc[0]['ORCID iD']
+
+    # Build name HTML
     if orcid_id:
-        return min_order, f"- **[{full_name}]({'https://orcid.org/' + orcid_id.strip()})** contributed to {contributions_str}"
+        name_html = f'<strong><a href="https://orcid.org/{orcid_id.strip()}">{full_name}</a></strong>'
     else:
-        return min_order, f"- **{full_name}** contributed to {contributions_str}"
+        name_html = f'<strong>{full_name}</strong>'
+
+    # Build individual contribution items
+    contribution_items = []
+
+    for _, row in group.iterrows():
+        project_name = row['Project Name']
+        if pd.isna(project_name) or project_name == '':
+            continue
+
+        # Normalize for data attributes
+        normalized_project = normalize_for_attribute(project_name)
+
+        # Extract roles for this specific contribution
+        contribution_roles = []
+        contributions_text = row['Contributions']
+
+        if pd.notna(contributions_text):
+            # Extract "Project Manager" if present (regardless of special_role)
+            pm_match = re.search(r'as\s+Project\s+Manager(?:\s+and\s+with)?', contributions_text, re.IGNORECASE)
+            if pm_match:
+                if 'project-manager' not in contribution_roles:
+                    contribution_roles.append('project-manager')
+
+            # Extract special roles (for special_role=True cases)
+            if row['special_role']:
+                special_role_match = re.search(r'(?:as\s+)?(.+?)(?:\s+and\s+with|\s+and|$)', contributions_text)
+                if special_role_match:
+                    special_role_text = special_role_match.group(1).strip()
+                    normalized_special = normalize_for_attribute(special_role_text)
+                    if normalized_special not in contribution_roles:
+                        contribution_roles.append(normalized_special)
+
+            # Extract roles marked with *
+            role_matches = re.findall(r'\*([^*]+)\*', contributions_text)
+            for role_match in role_matches:
+                normalized_role = normalize_for_attribute(role_match)
+                if normalized_role not in contribution_roles:
+                    contribution_roles.append(normalized_role)
+
+        # Build project HTML
+        if pd.notna(row['Project URL']) and row['Project URL'] != '':
+            project_html = f'<a href="{row["Project URL"]}">{project_name}</a>'
+        else:
+            project_html = project_name
+
+        # Convert *role* → <em>role</em>
+        contrib_html = re.sub(r'\*([^*]+)\*', r'<em>\1</em>', contributions_text) if pd.notna(contributions_text) else ''
+
+        # Handle special role phrasing
+        if row['special_role']:
+            # Use the actual special role text from contributions_text
+            full_contrib = f'{project_html} as {contributions_text}'
+        else:
+            full_contrib = f'{project_html} {contrib_html}'
+
+        # Create data attributes for this contribution
+        projects_attr = html.escape(normalized_project, quote=True)
+        roles_attr = html.escape(','.join(contribution_roles), quote=True)
+
+        # Build the contribution <li>
+        contribution_items.append(
+            f'    <li class="contribution" data-projects="{projects_attr}" '
+            f'data-roles="{roles_attr}">{full_contrib}</li>'
+        )
+
+        # Build the complete contributor group HTML
+        contributions_list = '\n'.join(contribution_items)
+
+        # Add id attribute if ORCID exists
+        id_attr = f' id="{orcid_id.strip()}"' if orcid_id else ''
+
+        final_html = (
+            f'<li class="contributor-group"{id_attr}>\n'
+            f'  {name_html} contributed to\n'
+            f'  <ul class="contributions-list">\n'
+            f'{contributions_list}\n'
+            f'  </ul>\n'
+            f'</li>\n'
+        )
+
+    return min_order, final_html
+
 
 def extract_orcid_id(value):
     if not isinstance(value, str) or len(value) < 5:
@@ -208,7 +313,6 @@ def extract_orcid_id(value):
 
     return value
 
-# Assuming 'data' is your DataFrame
 merged_data['ORCID iD'] = merged_data['ORCID iD'].apply(extract_orcid_id)
 
 # Creating a new column for the concatenated name
@@ -217,6 +321,9 @@ def extract_orcid_id(value):
 # Apply the function to each group and create a summary DataFrame
 merged_data['original_order'] = range(len(merged_data))
 
+# Move Flavio to the end of the list
+merged_data.loc[merged_data["ORCID iD"] == "0000-0001-9000-8513", 'original_order'] = 99999
+
 # Perform the groupby operation without sorting
 summary = (merged_data.groupby(merged_data['ORCID iD'].fillna(merged_data['Name']), sort=False)
                        .apply(concatenate_contributions)
@@ -235,7 +342,52 @@ def extract_orcid_id(value):
 summary = summary.reset_index(drop=True)
 summary_string = '\n\n'.join(summary['Contributions'])
 
-# --- LOGGING ADDED HERE ---
+
+# Get project and role names for dropdown filters
+
+project_names = sorted(merged_data["Project Name"].dropna().unique())
+
+
+role_names = list(set(columns_to_check + df_roles["Contributions"].dropna().unique().tolist()))
+
+projects_list = sorted(
+    [
+        {"value": normalize_for_attribute(p), "label": p}
+        for p in project_names
+        if p not in (None, "")
+    ],
+    key=lambda x: x["label"]
+)
+
+# Deduplicate roles by 'value', keeping the first label encountered (for solving Project Manager / Project manager issue)
+roles_dict = {}
+for r in role_names:
+    if r not in (None, ""):
+        normalized = normalize_for_attribute(r)
+        if normalized not in roles_dict:
+            roles_dict[normalized] = r
+
+roles_list = sorted(
+    [{"value": k, "label": v} for k, v in roles_dict.items()],
+    key=lambda x: x["label"]
+)
+
+# Save in json format
+filter_data = {
+    "projects": projects_list,
+    "roles": roles_list
+}
+
+# Add closing tags and JavaScript include
+footer_content = f"""
+</ul>
+<script>
+// Value-labels for filtering menus 
+window.filterData = {json.dumps(filter_data, indent=2)};
+</script>
+<script src="/js/contributor-filter.js"></script>
+"""
+
 # Log the final deduplicated number of contributors
 print("\n--- Processing Complete ---")
 print(f"Total number of unique contributors after deduplication: {len(summary)}")
@@ -255,8 +407,8 @@ def extract_orcid_id(value):
 with open(template_path, 'r') as file:
     template_content = file.read()
 
-# Combine the template content with the new summary string
-combined_content = template_content + summary_string
+# Combine the template content with the new summary string and footer
+combined_content = template_content + summary_string + footer_content
 
 # Save the combined content to 'tenzing.md'
 with open(output_path, 'w') as file:
-Original file line number
+Diff line change
@@ Expand Up / @@ -40,6 +40,8 @@ Strack @@
     Tennant
     Udo
     Yau
+    Mathes
+    Smal
     # Technical or code related terms
     aCount
@@ Expand Down @@