Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 161 additions & 5 deletions mlperf_logging/result_summarizer/result_summarizer.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
'''
Summarizes a set of results.
'''

from __future__ import print_function

import argparse
import glob
import json
Expand All @@ -13,6 +10,10 @@
import itertools
import pandas as pd
import yaml
import hashlib
import math
import operator
import uuid as uuidlib

from ..compliance_checker import mlp_compliance
from ..compliance_checker.mlp_compliance import usage_choices, rule_choices
Expand Down Expand Up @@ -280,6 +281,7 @@ def _get_column_schema(usage, ruleset, weak_scaling=False):
'accelerators_count': int,
'framework': str,
'notes': str,
'private_id': str
}
if weak_scaling == True:
benchmarks = get_allowed_benchmarks(usage, ruleset)
Expand Down Expand Up @@ -672,6 +674,16 @@ def _load_system_desc(folder, system):
raise FileNotFoundError('ERROR: Missing {}'.format(system_file))
return _read_json_file(system_file)

def _update_system_desc_with_id(folder, system, id):
systems_folder = os.path.join(folder, 'systems')
system_file = os.path.join(systems_folder, '{}.json'.format(system))
if not os.path.exists(system_file):
raise FileNotFoundError('ERROR: Missing {}'.format(system_file))
json_file_contents = _read_json_file(system_file)
if "private_id" not in json_file_contents:
json_file_contents["private_id"] = id
with open(system_file, 'w') as f:
json.dump(json_file_contents, f, indent=4)

def _fill_empty_benchmark_scores(
benchmark_scores,
Expand All @@ -691,6 +703,128 @@ def _fill_empty_benchmark_scores(
benchmark_scores[benchmark] = None


def _get_id_from_sysinfo(summary):
"""Generate private id from system information.

Args:
summary (dictionary): Sysinfo Dictionary
"""


# Code from humanhash3, which is public domain.
DEFAULT_WORDLIST = (
'ack', 'alabama', 'alanine', 'alaska', 'alpha', 'angel', 'apart', 'april',
'arizona', 'arkansas', 'artist', 'asparagus', 'aspen', 'august', 'autumn',
'avocado', 'bacon', 'bakerloo', 'batman', 'beer', 'berlin', 'beryllium',
'black', 'blossom', 'blue', 'bluebird', 'bravo', 'bulldog', 'burger',
'butter', 'california', 'carbon', 'cardinal', 'carolina', 'carpet', 'cat',
'ceiling', 'charlie', 'chicken', 'coffee', 'cola', 'cold', 'colorado',
'comet', 'connecticut', 'crazy', 'cup', 'dakota', 'december', 'delaware',
'delta', 'diet', 'don', 'double', 'early', 'earth', 'east', 'echo',
'edward', 'eight', 'eighteen', 'eleven', 'emma', 'enemy', 'equal',
'failed', 'fanta', 'fifteen', 'fillet', 'finch', 'fish', 'five', 'fix',
'floor', 'florida', 'football', 'four', 'fourteen', 'foxtrot', 'freddie',
'friend', 'fruit', 'gee', 'georgia', 'glucose', 'golf', 'green', 'grey',
'hamper', 'happy', 'harry', 'hawaii', 'helium', 'high', 'hot', 'hotel',
'hydrogen', 'idaho', 'illinois', 'india', 'indigo', 'ink', 'iowa',
'island', 'item', 'jersey', 'jig', 'johnny', 'juliet', 'july', 'jupiter',
'kansas', 'kentucky', 'kilo', 'king', 'kitten', 'lactose', 'lake', 'lamp',
'lemon', 'leopard', 'lima', 'lion', 'lithium', 'london', 'louisiana',
'low', 'magazine', 'magnesium', 'maine', 'mango', 'march', 'mars',
'maryland', 'massachusetts', 'may', 'mexico', 'michigan', 'mike',
'minnesota', 'mirror', 'mississippi', 'missouri', 'mobile', 'mockingbird',
'monkey', 'montana', 'moon', 'mountain', 'muppet', 'music', 'nebraska',
'neptune', 'network', 'nevada', 'nine', 'nineteen', 'nitrogen', 'north',
'november', 'nuts', 'october', 'ohio', 'oklahoma', 'one', 'orange',
'oranges', 'oregon', 'oscar', 'oven', 'oxygen', 'papa', 'paris', 'pasta',
'pennsylvania', 'pip', 'pizza', 'pluto', 'potato', 'princess', 'purple',
'quebec', 'queen', 'quiet', 'red', 'river', 'robert', 'robin', 'romeo',
'rugby', 'sad', 'salami', 'saturn', 'september', 'seven', 'seventeen',
'shade', 'sierra', 'single', 'sink', 'six', 'sixteen', 'skylark', 'snake',
'social', 'sodium', 'solar', 'south', 'spaghetti', 'speaker', 'spring',
'stairway', 'steak', 'stream', 'summer', 'sweet', 'table', 'tango', 'ten',
'tennessee', 'tennis', 'texas', 'thirteen', 'three', 'timing', 'triple',
'twelve', 'twenty', 'two', 'uncle', 'undress', 'uniform', 'uranus', 'utah',
'vegan', 'venus', 'vermont', 'victor', 'video', 'violet', 'virginia',
'washington', 'west', 'whiskey', 'white', 'william', 'winner', 'winter',
'wisconsin', 'wolfram', 'wyoming', 'xray', 'yankee', 'yellow', 'zebra',
'zulu')

class HumanHasher(object):

def __init__(self, wordlist=DEFAULT_WORDLIST):
self.wordlist = wordlist

def humanize_list(self, hexdigest, words=4):
# Gets a list of byte values between 0-255.
bytes_ = map(lambda x: int(x, 16),
map(''.join, zip(hexdigest[::2], hexdigest[1::2])))
# Compress an arbitrary number of bytes to `words`.
compressed = self.compress(bytes_, words)

return [str(self.wordlist[byte]) for byte in compressed]

def humanize(self, hexdigest, words=4, separator='-'):
# Map the compressed byte values through the word list.
return separator.join(self.humanize_list(hexdigest, words))

@staticmethod
def compress(bytes_, target):
bytes_list = list(bytes_)

length = len(bytes_list)
# If there are less than the target number bytes, return input bytes
if target >= length:
return bytes_

# Split `bytes` evenly into `target` segments
# Each segment hashes `seg_size` bytes, rounded down for some
seg_size = float(length) / float(target)
# Initialize `target` number of segments
segments = [0] * target
seg_num = 0

# Use a simple XOR checksum-like function for compression
for i, byte in enumerate(bytes_list):
# Divide the byte index by the segment size to assign its segment
# Floor to create a valid segment index
# Min to ensure the index is within `target`
seg_num = min(int(math.floor(i / seg_size)), target-1)
# Apply XOR to the existing segment and the byte
segments[seg_num] = operator.xor(segments[seg_num], byte)

return segments

def uuid(self, **params):
digest = str(uuidlib.uuid4()).replace('-', '')
return self.humanize(digest, **params), digest



def get_hash(row):
columns_for_hashing = [
'division',
'submitter',
'system_name',
'number_of_nodes',
'host_processor_model_name',
'host_processors_per_node',
'accelerator_model_name',
'accelerators_per_node',
'framework'
]
to_hash = ''.join(str(row[c]) for c in columns_for_hashing)
return hashlib.sha256(to_hash.encode('utf-8')).hexdigest()

hash = get_hash(summary)
humanhasha = HumanHasher()
summary = humanhasha.humanize(hash)

return summary




def summarize_results(folder, usage, ruleset, csv_file=None, **kwargs):
"""Summarizes a set of results.

Expand All @@ -713,6 +847,17 @@ def summarize_results(folder, usage, ruleset, csv_file=None, **kwargs):
# Load corresponding system description.
try:
desc = _load_system_desc(folder, system)

# Generate private id and update system desc to match
if kwargs.get('generate_private_ids'):
id = _get_id_from_sysinfo(desc)
desc['private_id'] = id if 'private_id' not in desc else desc['private_id']
_update_system_desc_with_id(folder, system, id)
else:
if 'private_id' in desc:
print(f"WARNING: Found private_id in system desc for {system} but not generating private ids. To generate private ids, please use the --generate_private_ids flag.")
desc['private_id'] = ''

except (json.JSONDecodeError, FileNotFoundError) as e:
print(e)
continue
Expand All @@ -729,6 +874,7 @@ def _check_and_update_system_specs(desc_keys, column_name, query=None):
# Construct prefix portion of the row.
try:
_check_and_update_system_specs('division', 'division')
_check_and_update_system_specs('private_id', 'private_id')
# Map availability if requested
if "availability" in kwargs:
_check_and_update_system_specs('status', 'availability', lambda desc: _map_availability(desc["status"], kwargs["availability"]))
Expand Down Expand Up @@ -837,6 +983,8 @@ def _check_and_update_system_specs(desc_keys, column_name, query=None):
return strong_scaling_summary, weak_scaling_summary, power_summary, power_weak_scaling_summary




def get_parser():
parser = argparse.ArgumentParser(
prog='mlperf_logging.result_summarizer',
Expand All @@ -857,6 +1005,11 @@ def get_parser():
type=str,
choices=rule_choices(),
help='the ruleset such as 0.6.0, 0.7.0, or 1.0.0')

parser.add_argument('--generate_private_ids',
action='store_true',
help='Generate private IDs for each run.')

parser.add_argument('--werror',
action='store_true',
help='Treat warnings as errors')
Expand All @@ -874,6 +1027,7 @@ def get_parser():
'--xlsx',
type=str,
help='Exports a xlsx of the results to the path specified')


return parser

Expand All @@ -896,13 +1050,15 @@ def _update_summaries(folder):
folder,
args.usage,
args.ruleset,
availability = config["availability"]
availability = config["availability"],
generate_private_ids = args.generate_private_ids,
)
else:
strong_scaling_summary, weak_scaling_summary, power_summary, power_weak_scaling_summary = summarize_results(
folder,
args.usage,
args.ruleset,
generate_private_ids = args.generate_private_ids,
)
strong_scaling_summaries.append(strong_scaling_summary)
if len(weak_scaling_summary) > 0:
Expand Down Expand Up @@ -1042,7 +1198,7 @@ def _print_and_write(summaries, weak_scaling=False, mode='w', power = False):

# Sort rows by their values
summaries = summaries.sort_values(by=cols)
print(summaries)

if args.csv is not None:
csv = args.csv
assert csv.endswith(".csv")
Expand Down
Loading