#!/usr/bin/env python3 # Copyright (C) 2015-2016 Intel Corporation # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # "MDAPI" xml files are an XML schema for maintaining meta data about Gen # graphics Ovservability counters, where MD API is the name of a library shared # by Intel GPA and Intel VTune. # # These files aren't publicly documented and have some historical baggage that # adds some complexity as well as being inconsistent in a number of ways that # makes it quite a bit of effort to parse/use the data. We also don't have # guarantees about how this schema is maintained. # # We've taken the opportunity to find ways to simplify the input data and to # make it more consistent to hopefully reduce the effort involved in using the # data downstream. # import argparse import copy import hashlib from operator import itemgetter import re import sys import time import uuid import xml.etree.ElementTree as et import xml.sax.saxutils as saxutils import oa_guid_registry as oa_registry # MDAPI configs include writes to some non-config registers, # thus the blacklists... gen8_11_chipset_params = { 'a_offset': 16, 'b_offset': 192, 'c_offset': 224, 'oa_report_size': 256, 'config_reg_blacklist': { 0x2364, # OACTXID }, } chipsets = { 'HSW': { 'a_offset': 12, 'b_offset': 192, 'c_offset': 224, 'oa_report_size': 256, 'registers': { # TODO extend the symbol table for nicer output... 0x2710: { 'name': 'OASTARTTRIG1' }, 0x2714: { 'name': 'OASTARTTRIG1' }, 0x2718: { 'name': 'OASTARTTRIG1' }, 0x271c: { 'name': 'OASTARTTRIG1' }, 0x2720: { 'name': 'OASTARTTRIG1' }, 0x2724: { 'name': 'OASTARTTRIG6' }, 0x2728: { 'name': 'OASTARTTRIG7' }, 0x272c: { 'name': 'OASTARTTRIG8' }, 0x2740: { 'name': 'OAREPORTTRIG1' }, 0x2744: { 'name': 'OAREPORTTRIG2' }, 0x2748: { 'name': 'OAREPORTTRIG3' }, 0x274c: { 'name': 'OAREPORTTRIG4' }, 0x2750: { 'name': 'OAREPORTTRIG5' }, 0x2754: { 'name': 'OAREPORTTRIG6' }, 0x2758: { 'name': 'OAREPORTTRIG7' }, 0x275c: { 'name': 'OAREPORTTRIG8' }, 0x2770: { 'name': 'OACEC0_0' }, 0x2774: { 'name': 'OACEC0_1' }, 0x2778: { 'name': 'OACEC1_0' }, 0x277c: { 'name': 'OACEC1_1' }, 0x2780: { 'name': 'OACEC2_0' }, 0x2784: { 'name': 'OACEC2_1' }, 0x2788: { 'name': 'OACEC3_0' }, 0x278c: { 'name': 'OACEC3_1' }, 0x2790: { 'name': 'OACEC4_0' }, 0x2794: { 'name': 'OACEC4_1' }, 0x2798: { 'name': 'OACEC5_0' }, 0x279c: { 'name': 'OACEC5_1' }, 0x27a0: { 'name': 'OACEC6_0' }, 0x27a4: { 'name': 'OACEC6_1' }, 0x27a8: { 'name': 'OACEC7_0' }, 0x27ac: { 'name': 'OACEC7_1' }, }, 'config_reg_blacklist': { 0x2364, # OASTATUS1 register }, }, 'BDW': gen8_11_chipset_params, 'CHV': gen8_11_chipset_params, 'SKL': gen8_11_chipset_params, 'BXT': gen8_11_chipset_params, 'KBL': gen8_11_chipset_params, 'GLK': gen8_11_chipset_params, 'CFL': gen8_11_chipset_params, 'CNL': gen8_11_chipset_params, 'ICL': gen8_11_chipset_params, 'EHL': gen8_11_chipset_params, 'TGL': gen8_11_chipset_params, 'RKL': gen8_11_chipset_params, 'DG1': gen8_11_chipset_params, 'ADL': gen8_11_chipset_params, } register_types = { 'OA', 'NOA', 'FLEX', 'PM' } default_set_blacklist = { "RenderDX1x", # TODO: rename to something non 'DX' # specific if this config is generally # usefull "RenderBalance", # XXX: missing register config "PipelineTimestamps", # Covered by API timestamp queries } counter_blacklist = { "DramLlcThroughput", # TODO: The max equation of this counter # requires dram throughtput value. Need to # investiguate how to get this value. } sys_vars = { "EuCoresTotalCount", "EuSlicesTotalCount", "SamplersTotalCount", "EuThreadsCount", "GpuMinFrequencyMHz", "GpuMaxFrequencyMHz", "GpuTimestampFrequency", "SliceMask", "SubsliceMask", "EuSubslicesTotalCount" } def underscore(name): s = re.sub('MHz', 'Mhz', name) s = re.sub('\.', '_', s) s = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', s) return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s).lower() def print_err(*args): sys.stderr.write(' '.join(map(str,args)) + '\n') read_register_offsets = { 0x1f0: 'PERFCNT 0', 0x1f8: 'PERFCNT 1', } def read_value(chipset, offset): if offset in read_register_offsets: return read_register_offsets[offset] print_err("Unknown offset register at offset {0}".format(offset)) assert 0 def read_token_to_rpn_read(chipset, token, raw_offsets): width, offset_str = token.split('@') # For Broadwell the raw read notation was extended for 40 bit # counters: rd40@<32bit_part1_offset>:<8bit_part2_offset> if width == "rd40": offset_32_str, offset_8_str = offset_str.split(':') offset_str = offset_32_str offset = int(offset_str, 16) if raw_offsets: a_offset = chipsets[chipset]['a_offset'] b_offset = chipsets[chipset]['b_offset'] c_offset = chipsets[chipset]['c_offset'] report_size = chipsets[chipset]['oa_report_size'] if offset < a_offset: if offset == 4: return "GPU_TIME 0 READ" elif offset == 12: assert chipset != "HSW" # Only for Gen8+ return "GPU_CLOCK 0 READ" else: assert 0 elif offset < b_offset: return "A " + str(int((offset - a_offset) / 4)) + " READ" elif offset < c_offset: return "B " + str(int((offset - b_offset) / 4)) + " READ" elif offset < report_size: return "C " + str(int((offset - c_offset) / 4)) + " READ" else: return "{0} READ".format(read_value(chipset, offset)) else: idx = int(offset / 8) if chipset == "HSW": # On Haswell accumulated counters are assumed to start # with GPU_TIME followed by 45 A counters, then 8 B # counters and finally 8 C counters. if idx < 1: return "GPU_TIME 0 READ" elif idx < 46: return "A " + str(idx - 1) + " READ" elif idx < 54: return "B " + str(idx - 46) + " READ" elif idx < 62: return "C " + str(idx - 54) + " READ" else: return "{0} READ".format(read_value(chipset, offset)) else: # For Gen8+ the array of accumulated counters is # assumed to start with a GPU_TIME then GPU_CLOCK, # then 36 A counters, then 8 B counters and finally # 8 C counters. if idx == 0: return "GPU_TIME 0 READ" elif idx == 1: return "GPU_CLOCK 0 READ" elif idx < 38: return "A " + str(idx - 2) + " READ" elif idx < 46: return "B " + str(idx - 38) + " READ" elif idx < 54: return "C " + str(idx - 46) + " READ" else: return "{0} READ".format(read_value(chipset, offset)) assert 0 def replace_read_tokens_with_rpn_read_ops(chipset, equation, raw_offsets): # MDAPI MetricSet equations use tokens like 'dw@0xff' for reading raw # values from snapshots, but this doesn't seem convenient for a few # reasons: # # 1) The offsets hide the particular a, b, or c counter they # correspond to which in turn makes it awkward to experiment # with different report sizes which trade off how many a, b and # c counters are available # # 2) Raw reads could be represented as RPN operations too, and # the consistency could make them slightly easier for tools to # handle, E.g: # # "A 5 READ" = read A counter 5 # # We replace dw@ address tokens with GPU_TIME, A, B or C READ ops... # tokens = equation.split() equation = "" for token in tokens: if '@' in token: read_exp = read_token_to_rpn_read(chipset, token, raw_offsets) equation = equation + " " + read_exp else: equation = equation + " " + token return equation parser = argparse.ArgumentParser() parser.add_argument("xml", nargs="+", help="XML description of metrics") parser.add_argument("--guids", required=True, help="Metric set GUID registry") parser.add_argument("--whitelist", help="Only output for given, space-separated, sets") parser.add_argument("--blacklist", help="Don't generate anything for given metric sets") parser.add_argument("--merge", help="Additional meta data to merge into the result") parser.add_argument("--dry-run", action="store_true", help="Not generate new XML but to check any errors") args = parser.parse_args() metrics = et.Element('metrics') tree = et.ElementTree(metrics) def apply_aliases(text, aliases): if aliases == None: return text for alias in aliases.split(','): (a, b) = alias.split('|') text = re.sub(r"\b%s\b" % re.escape(a), b, text) a = a.lower() b = b.lower() text = re.sub(r"\b%s\b" % re.escape(a), b, text) return text def strip_dx_apis(text): if text == None: return "" stripped = "" apis = text.split() for api in apis: if api[:2] != "DX": stripped = stripped + " " + api return stripped.strip() # For recursively appending counters in order of dependencies... def append_deps_and_counter(mdapi_counter, mdapi_counters, deps, sorted_array, sorted_set): symbol_name = mdapi_counter.get('SymbolName') if symbol_name in sorted_set: return for dep_name in deps[symbol_name]: if dep_name in mdapi_counters: append_deps_and_counter(mdapi_counters[dep_name], mdapi_counters, deps, sorted_array, sorted_set) sorted_array.append(mdapi_counter) sorted_set[symbol_name] = mdapi_counter def sort_counters(mdapi_counters, deps): sorted_array = [] sorted_set = {} # counters in here have been added to array for symbol_name in mdapi_counters: append_deps_and_counter(mdapi_counters[symbol_name], mdapi_counters, deps, sorted_array, sorted_set) return sorted_array def expand_macros(equation): equation = equation.replace('GpuDuration', "$Self 100 UMUL $GpuCoreClocks FDIV") equation = equation.replace('EuAggrDuration', "$Self $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV") return equation def fixup_equation(equation): if equation is None: return None return equation.replace('$SubliceMask', '$SubsliceMask') # The MDAPI XML files sometimes duplicate the same Flex EU/OA regs # between configs with different AvailabilityEquations even though the # availability checks are only expected to affect the MUX configs # # We iterate all the configs to filter out the FLEX/OA configs and # double check that there's never any variations between repeated # configs # def filter_single_config_registers_of_type(mdapi_metric_set, type): regs = [] for mdapi_reg_config in mdapi_metric_set.findall("RegConfigStart"): tmp_regs = [] for mdapi_reg in mdapi_reg_config.findall("Register"): reg = (int(mdapi_reg.get('offset'),16), int(mdapi_reg.get('value'),16)) if reg[0] in chipsets[chipset]['config_reg_blacklist']: continue if mdapi_reg.get('type') == type: tmp_regs.append(reg) if len(tmp_regs) > 0: bad = False if len(regs) == 0: regs = tmp_regs elif len(regs) != len(tmp_regs): bad = True else: for i in range(0, len(regs)): if regs[i] != tmp_regs[i]: bad = True break if bad: print_err("ERROR: multiple, differing FLEX/OA configs for one set: MetricSet=\"" + mdapi_metric_set.get('ShortName')) sys.exit(1) return regs # We only have a very small number of IDs, but we aren't assuming they # start from zero or are contiguous in the MDAPI XML files. Python # doesn't seem to have a built in sparse array type so we just # loop over the entries we have: def get_mux_id_group(id_groups, id): for group in id_groups: if group['id'] == id: return group new_group = { 'id': id, 'configs': [] } id_groups.append(new_group) return new_group def process_mux_configs(mdapi_set): allow_missing_id = True mux_config_id_groups = [] for mdapi_reg_config in mdapi_set.findall("RegConfigStart"): mux_regs = [] for mdapi_reg in mdapi_reg_config.findall("Register"): address = int(mdapi_reg.get('offset'), 16) if address in chipsets[chipset]['config_reg_blacklist']: continue reg_type = mdapi_reg.get('type') if reg_type not in register_types: print_err("ERROR: unknown register type=\"" + reg_type + "\": MetricSet=\"" + mdapi_set.get('ShortName')) sys.exit(1) if reg_type != 'NOA' and reg_type != 'PM': continue reg = (address, int(mdapi_reg.get('value'), 16)) mux_regs.append(reg) if len(mux_regs) == 0: continue availability = mdapi_reg_config.get('AvailabilityEquation') if availability == "": availability = None if mdapi_reg_config.get('ConfigPriority') != None: reg_config_priority = int(mdapi_reg_config.get('ConfigPriority')) else: reg_config_priority = 0 if mdapi_reg_config.get('ConfigId') != None: reg_config_id = int(mdapi_reg_config.get('ConfigId')) allow_missing_id = False elif mdapi_reg_config.get('ConfigId') == None and allow_missing_id == True: reg_config_id = 0 else: # It will spell trouble if there's a mixture of explicit and # implied config IDs... print_err("ERROR: register configs mixing implied/explicit IDs: MetricSet=\"" + mdapi_set.get('ShortName')) sys.exit(1) mux_config = { 'priority': reg_config_priority, 'availability': availability, 'registers': mux_regs } mux_config_id_group = get_mux_id_group(mux_config_id_groups, reg_config_id) mux_config_id_group['configs'].append(mux_config) mux_config_id_groups.sort(key=itemgetter('id')) # The only special case we currently support for more than one group of NOA # MUX configs is for the Broadwell ComputeExtended metric set with two Id # groups and the second just has a single unconditional config that can # logically be appended to all the conditional configs of the first group if len(mux_config_id_groups) > 1: if len(mux_config_id_groups) != 2: print_err("ERROR: Script doesn't currently allow more than two groups of NOA MUX configs for a single metric set: MetricSet=\"" + mdapi_set.get('ShortName')) sys.exit(1) last_id_group = mux_config_id_groups[-1] if len(last_id_group['configs']) != 1: print_err("ERROR: Script currently only allows up to two Ids for NOA MUX configs if second Id only contains a single unconditional config: MetricSet=\"" + mdapi_set.get('ShortName')) sys.exit(1) tail_config = last_id_group['configs'][0] for mux_config in mux_config_id_groups[0]['configs']: mux_config['registers'] = mux_config['registers'] + tail_config['registers'] mux_config_id_groups = [mux_config_id_groups[0]] if len(mux_config_id_groups) == 0 or mux_config_id_groups[0]['configs'] == 0: return () mux_configs = mux_config_id_groups[0]['configs'] assert isinstance(mux_configs, list) assert len(mux_configs) >= 1 assert len(mux_configs[0]['registers']) > 1 # > 1 registers return mux_configs def add_register_config(set, priority, availability, regs, type): reg_config = et.SubElement(set, 'register_config') reg_config.set('type', type) if availability != None: assert type == "NOA" reg_config.set('priority', str(priority)) reg_config.set('availability', availability) for reg in regs: elem = et.SubElement(reg_config, 'register') elem.set('type', type) elem.set('address', "0x%08X" % reg[0]) elem.set('value', "0x%08X" % reg[1]) def to_text(value): if value == None: return "" return value # There are duplicated metric sets with the same symbol name so we # keep track of the sets we've read so we can skip duplicates... sets = {} guids = {} guids_xml = et.parse(args.guids) for guid in guids_xml.findall(".//guid"): hashing_key = oa_registry.Registry.chipset_derive_hash(guid.get('chipset'), guid.get('mdapi_config_hash')) guids[hashing_key] = guid.get('id') for arg in args.xml: mdapi = et.parse(arg) concurrent_group = mdapi.find(".//ConcurrentGroup") for mdapi_set in mdapi.findall(".//MetricSet"): apis = mdapi_set.get('SupportedAPI') if "OGL" not in apis and "OCL" not in apis and "MEDIA" not in apis: continue set_symbol_name = mdapi_set.get('SymbolName') if set_symbol_name in sets: print_err("WARNING: duplicate set named \"" + set_symbol_name + "\" (SKIPPING)") continue chipset = oa_registry.Registry.chipset_name(mdapi_set.get('SupportedHW')) chipset_fullname = chipset if concurrent_group.get('SupportedGT') != None: chipset_fullname = chipset_fullname + oa_registry.Registry.gt_name(concurrent_group.get('SupportedGT')) if chipset not in chipsets: print_err("WARNING: unsupported chipset {0}, consider updating {1}".format(chipset, __file__)) continue if args.whitelist: set_whitelist = args.whitelist.split() if set_symbol_name not in set_whitelist: continue if args.blacklist: set_blacklist = args.blacklist.split() else: set_blacklist = default_set_blacklist if set_symbol_name in set_blacklist: continue if mdapi_set.get('SnapshotReportSize') != '256': print_err("WARNING: skipping metric set '{0}', report size {1} invalid".format(set_symbol_name, mdapi_set.get('SnapshotReportSize'))) continue set = et.SubElement(metrics, 'set') set.set('chipset', chipset_fullname) set.set('name', mdapi_set.get('ShortName')) set.set('symbol_name', set_symbol_name) set.set('underscore_name', underscore(mdapi_set.get('SymbolName'))) set.set('mdapi_supported_apis', strip_dx_apis(mdapi_set.get('SupportedAPI'))) # Look at the hardware register config before looking at the counters. # # The hardware configuration is used as a key to lookup up a GUID which # is used by applications to lookup the corresponding counter # normalization equations. # # We want to skip over any metric sets that don't yet have a registered # GUID in guids.xml. # There can be multiple NOA MUX configs, since they may have associated # availability tests to match particular systems. # # Unlike the MDAPI XML files we only support tracking one group of # mutually exclusive MUX configs, whereas the MDAPI XML files # theoretically allow a single metric set to be associated with ordered # groups of mutually exclusive configs. So far there is only one # Broadwell, ComputeExtended metric set which uses this, but that # particular case can be expressed in less general terms. # # Being a bit simpler here should make it easier for downstream tools # to deal with. (At least we got the handling of the Broadwell # ComputeExtended example wrong and it took several email exchanges and # a conference call to confirm how to interpret this case) mux_configs = process_mux_configs(mdapi_set) # Unlike for MUX registers, we only expect one set of FLEX/OA # registers per metric set (even though they are sometimes duplicated # between configs in MDAPI XML files. # # This filter function, extracts the register of a certain type but # also double checks that if they are repeated in separate configs that # they don't vary. (Notably the current i915 perf Linux driver would # need some adapting to support multiple OA/FLEX configs with different # availability expressions) # flex_regs = filter_single_config_registers_of_type(mdapi_set, "FLEX") oa_regs = filter_single_config_registers_of_type(mdapi_set, "OA") # Note: we ignore Perfmon registers for mux_config in mux_configs: add_register_config(set, mux_config['priority'], mux_config['availability'], mux_config['registers'], "NOA") if len(oa_regs) > 0: add_register_config(set, 0, None, oa_regs, "OA") if len(flex_regs) > 0: add_register_config(set, 0, None, flex_regs, "FLEX") mdapi_hw_config_hash = oa_registry.Registry.mdapi_hw_config_hash(mdapi_set) guid_hash = oa_registry.Registry.chipset_derive_hash(chipset_fullname.lower(), mdapi_hw_config_hash) hw_config_hash = oa_registry.Registry.hw_config_hash(set) if guid_hash in guids: set.set('hw_config_guid', guids[guid_hash]) else: print_err("WARNING: No GUID found for metric set " + chipset_fullname + ", " + set_symbol_name + " (SKIPPING)") print_err("WARNING: If this is a new config add the following to guids.xml:") print_err("") metrics.remove(set) continue sets[set_symbol_name] = set counters = {} normalization_equations = {} raw_equations = {} # Awkwardly we can't assume metrics are in dependency order and have to # sort them manually. We start by associating a list of dependencies with # each counter... mdapi_counters = {} mdapi_counter_deps = {} for mdapi_counter in mdapi_set.findall("Metrics/Metric"): symbol_name = mdapi_counter.get('SymbolName') if symbol_name in counter_blacklist: continue; # Have seen at least one MetricSet with a duplicate GpuCoreClocks counter... if symbol_name in mdapi_counters: print_err("WARNING: Skipping duplicate counter \"" + symbol_name + \ "\" in " + set.get('name') + " :: " + mdapi_counter.get('ShortName')) continue; deps = [] equations = fixup_equation(str(mdapi_counter.get('SnapshotReportReadEquation'))) + " " + \ fixup_equation(str(mdapi_counter.get('SnapshotReportDeltaEquation'))) + " " + \ fixup_equation(str(mdapi_counter.get('DeltaReportReadEquation'))) + " " + \ fixup_equation(str(mdapi_counter.get('NormalizationEquation'))) equations = expand_macros(equations) equations = equations.replace('$$', "$") for token in equations.split(): if token[0] == '$' and token[1:] not in sys_vars and token[1:] != "Self": deps.append(token[1:]) mdapi_counters[symbol_name] = mdapi_counter mdapi_counter_deps[symbol_name] = deps sorted_mdapi_counters = sort_counters(mdapi_counters, mdapi_counter_deps) for mdapi_counter in sorted_mdapi_counters: aliases = mdapi_counter.get('Alias') skip_counter = False # We don't currently support configuring and reading perfmon registers signal = mdapi_counter.get('SignalName') if signal and "perfmon" in signal: continue; # A few things to fixup with this common counter... if mdapi_counter.get('SymbolName') == "AvgGpuCoreFrequencyMHz": # To avoid requiring a special case in tools, add a max value # equation for the gpu frequency... mdapi_counter.set('MaxValueEquation', "$GpuMaxFrequency") # Don't include units in the name mdapi_counter.set('SymbolName', "AvgGpuCoreFrequency") # Use canonical, first order of magnitude units specifier mdapi_counter.set('MetricUnits', 'Hz') mdapi_counter.set('NormalizationEquation', '$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV') #mdapi_counter.set('DeltaReportReadEquation', '$GpuCoreClocks $GpuTime UDIV') symbol_name = mdapi_counter.get('SymbolName') counter = et.SubElement(set, 'counter') counter.set('name', apply_aliases(mdapi_counter.get('ShortName'), aliases)) counter.set('symbol_name', mdapi_counter.get('SymbolName')) counter.set('underscore_name', underscore(mdapi_counter.get('SymbolName'))) counter.set('description', apply_aliases(mdapi_counter.get('LongName'), aliases)) counter.set('mdapi_group', apply_aliases(to_text(mdapi_counter.get('Group')), aliases)) counter.set('mdapi_usage_flags', to_text(mdapi_counter.get('UsageFlags'))) counter.set('mdapi_supported_apis', strip_dx_apis(mdapi_counter.get('SupportedAPI'))) low = mdapi_counter.get('LowWatermark') if low: counter.set('low_watermark', low) high = to_text(mdapi_counter.get('HighWatermark')) if high: counter.set('high_watermark', high) counter.set('data_type', mdapi_counter.get('ResultType').lower()) max_eq = fixup_equation(mdapi_counter.get('MaxValueEquation')) if max_eq: counter.set('max_equation', max_eq) # XXX Not sure why EU metrics tend to just be bundled under 'gpu' counter.set('mdapi_hw_unit_type', mdapi_counter.get('HWUnitType').lower()) # There are counters representing cycle counts that have a semantic # type of 'duration' which doesn't seem to make sense... units = mdapi_counter.get('MetricUnits').lower() if units == "cycles": semantic_type = "event" else: semantic_type = mdapi_counter.get('MetricType').lower() counter.set('units', units) counter.set('semantic_type', semantic_type) # MDAPI MetricSets have 3 different kinds of counter read equations: # # 1) One for reading a raw (unnormalized) value from a hardware report # # The line between normalized and raw isn't always clear # as the raw equation may e.g. read and ADD multiple counters # # Not all counters have a raw equation if they are instead # derived through $CounterName references to other counters # in a normalized value equation # # 2) One for reading an unnormalized value from the accumulated 'delta reports' # # Seems to duplicate the raw equation but with delta report # offsets and referencing 64bit values # # The normalized value equations are always based on these # accumulated delta values # # 3) One for reading a normalized value # # These may start with a reference to "$Self" which is # effectively a macro for the above delta report equation # # If this is missing the delta report equation is effectively # the normalized equation too # # XXX: Beware that there are some inconsistent counters that # have a normalization equation with a $Self reference and a # raw equation but no delta report equation. This seems # pretty sketchy, but (at least for 'MEDIA' metrics) we will # substitute the raw equation for $Self in this case along # with a warning to double check the results. # # Currently there doesn't appear to be a clear reason to # differentiate these equations and the separation seems to # complicate things for tools wanting to generate code from this # data. # # We instead aim to have one normalized equation per counter that # always reference accumulated counter values. # XXX: As a special case, we override the raw and delta report # equations for the GpuTime counters, which seem inconsistent if mdapi_counter.get('SymbolName') == "GpuTime": mdapi_counter.set('SnapshotReportReadEquation', "dw@0x04 1000000000 UMUL $GpuTimestampFrequency UDIV") mdapi_counter.set('DeltaReportReadEquation', "qw@0x0 1000000000 UMUL $GpuTimestampFrequency UDIV") availability = fixup_equation(mdapi_counter.get('AvailabilityEquation')) if availability == "": availability = None # We prefer to only look at the equations that reference the raw # reports since the mapping of offsets back to A,B,C counters is # unambiguous, but if necessary we will fallback to mapping # delta report offsets (accumulated 64bit values that correspond # to the 32bit or 40bit values from raw repots) raw_read_eq = fixup_equation(mdapi_counter.get('SnapshotReportReadEquation')) if raw_read_eq: if raw_read_eq == "": raw_read_eq = None else: raw_read_eq = replace_read_tokens_with_rpn_read_ops(chipset, raw_read_eq, True) #raw offsets delta_read_eq = fixup_equation(mdapi_counter.get('DeltaReportReadEquation')) if delta_read_eq: if delta_read_eq == "": delta_read_eq = None else: delta_read_eq = replace_read_tokens_with_rpn_read_ops(chipset, delta_read_eq, False) #delta offsets if raw_read_eq and not delta_read_eq: print_err("WARNING: Counter with raw equation but no delta report equation: MetricSet=\"" + \ mdapi_set.get('ShortName') + "\" Metric=\"" + mdapi_counter.get('SymbolName') + \ "(" + mdapi_counter.get('ShortName') + ")" + "\"") # Media metric counters currently have no delta equation even # though they have normalization equations that reference $Self if "MEDIA" in apis: print_err("WARNING: -> Treating inconsistent media metric's 'raw' equation as a 'delta report' equation, but results should be double checked!") delta_read_eq = raw_read_eq else: set.remove(counter) continue # Some counters are sourced from register values that are # not put into the OA reports. This is why some counters # will have a delta equation but not a raw equation. These # counters are typically only available in query mode. For # this reason we put a particular availability value. if delta_read_eq and not raw_read_eq: assert availability == None availability = "true $QueryMode &&" raw_read_eq = delta_read_eq # After replacing read tokens with RPN counter READ ops the raw and # delta equations are expected to be identical so warn if that's # not true... if bool(raw_read_eq) ^ bool(delta_read_eq) or raw_read_eq != delta_read_eq: print_err("WARNING: Inconsistent raw and delta report equations for " + \ mdapi_set.get('ShortName') + " :: " + mdapi_counter.get('SymbolName') + \ "(" + mdapi_counter.get('ShortName') + ")" + ": raw=\"" + str(raw_read_eq) + \ "\" delta=\"" + str(delta_read_eq) + "\" (SKIPPING)") set.remove(counter) continue normalize_eq = fixup_equation(mdapi_counter.get('NormalizationEquation')) if normalize_eq and normalize_eq == "": normalize_eq = None if normalize_eq: # Some normalization equations are represented with macros such as # 'GpuDuration' corresponding to: # # "$Self 100 UMUL $GpuCoreClocks FDIV" # # We expand macros here so tools don't need to care about them... # equation = normalize_eq equation = expand_macros(equation) if raw_read_eq: equation = equation.replace('$Self', raw_read_eq) else: equation = delta_read_eq if '$Self' in equation: print_err("WARNING: Counter equation (\"" + equation + "\") with unexpanded $Self token: MetricSet=\"" + \ mdapi_set.get('ShortName') + "\" Metric=\"" + mdapi_counter.get('SymbolName') + \ "(" + mdapi_counter.get('ShortName') + ")" + "\" (SKIPPING)") set.remove(counter) continue # $$CounterName vs $CounterName in an equation is intended to # differentiate referencing the normalized or raw value of another # counter. # # Since we are only keeping a single (normalized) equation for # counters we only need one form, but we want to be careful to # check if any equations really depend on the raw value of another # counter so we can expand those variables now # tmp = equation for token in tmp.split(): if token[0] == '$' and token[1] != '$': if token[1:] in normalization_equations: raw_eq = raw_equations[token[1:]] equation = equation.replace(token, raw_eq) #if token[1:] not in raw_equations: # print_err("WARNING: Counter equation (\"" + equation + "\") references un-kept raw equation of another counter : MetricSet=\"" + \ # mdapi_set.get('ShortName') + "\" Metric=\"" + mdapi_counter.get('ShortName') + "\"") elif token[1:] not in raw_equations and token[1:] not in sys_vars: print_err("Unknown variable name: \"" + token + "\" in equation \"" + equation + "\"") symbol_name = counter.get('symbol_name') # Make sure that every variable in the equation is a known sys_var or counter name equation = equation.replace('$$', "$") for token in equation.split(): if token[0] == '$': if token[1:] not in counters and token[1:] not in sys_vars: print_err("WARNING: Counter equation (\"" + equation + "\") with unknown variable " + \ token + " (maybe skipped counter): MetricSet=\"" + mdapi_set.get('ShortName') + \ "\" Metric=\"" + mdapi_counter.get('SymbolName') + "(" + mdapi_counter.get('ShortName') + \ ")" + "\" (SKIPPING)") set.remove(counter) skip_counter = True break if skip_counter: continue counter.set('equation', equation.strip()) if availability != None: counter.set('availability', availability) counters[symbol_name] = counter; if normalize_eq: normalization_equations[symbol_name] = normalize_eq if raw_read_eq: raw_equations[symbol_name] = raw_read_eq if args.dry_run: sys.exit(0) # Merge in any custom meta data we have... if args.merge: merge = et.parse(args.merge) merge_metrics = merge.getroot() for merge_set in merge.findall(".//set"): pattern = ".//set[@symbol_name=\"" + merge_set.get('symbol_name') + "\"][@chipset=\"" + merge_set.get('chipset') + "\"]" real_set = metrics.find(pattern) if real_set is not None: for set_attr in merge_set.items(): real_set.set(set_attr[0], set_attr[1]) for merge_elem in merge_set: if merge_elem.tag == "counter": merge_counter = merge_elem pattern = "counter[@symbol_name=\"" + merge_counter.get('symbol_name') + "\"]" real_counter = real_set.find(pattern) if real_counter is not None: for counter_attr in merge_counter.items(): real_counter.set(counter_attr[0], counter_attr[1]) else: real_set.append(merge_counter) real_counter = merge_counter else: real_set.append(merge_elem) # For consistency + readability print everything manually... merge_md5 = hashlib.md5(open("merge.xml", 'rb').read()).hexdigest() else: merge_md5 = "" print ("") print("") for set in metrics.findall(".//set"): print(" ") for counter in set.findall("counter"): print(" ") for config in set.findall("register_config"): if config.get('availability') != None: print(" ") else: print(" ") for reg in config.findall("register"): addr = int(reg.get('address'), 16) if 'registers' in chipsets[chipset] and addr in chipsets[chipset]['registers']: reg_info = chipsets[chipset]['registers'][addr] comment = ' ' else: comment = '' print(" " + comment) print(" ") print(" \n") print("")