#!/usr/bin/env python3
# Copyright (C) 2015-2016 Intel Corporation
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# "MDAPI" xml files are an XML schema for maintaining meta data about Gen
# graphics Ovservability counters, where MD API is the name of a library shared
# by Intel GPA and Intel VTune.
#
# These files aren't publicly documented and have some historical baggage that
# adds some complexity as well as being inconsistent in a number of ways that
# makes it quite a bit of effort to parse/use the data. We also don't have
# guarantees about how this schema is maintained.
#
# We've taken the opportunity to find ways to simplify the input data and to
# make it more consistent to hopefully reduce the effort involved in using the
# data downstream.
#
import argparse
import copy
import hashlib
from operator import itemgetter
import re
import sys
import time
import uuid
import xml.etree.ElementTree as et
import xml.sax.saxutils as saxutils
import oa_guid_registry as oa_registry
# MDAPI configs include writes to some non-config registers,
# thus the blacklists...
gen8_11_chipset_params = {
'a_offset': 16,
'b_offset': 192,
'c_offset': 224,
'oa_report_size': 256,
'config_reg_blacklist': {
0x2364, # OACTXID
},
}
chipsets = {
'HSW': {
'a_offset': 12,
'b_offset': 192,
'c_offset': 224,
'oa_report_size': 256,
'registers': {
# TODO extend the symbol table for nicer output...
0x2710: { 'name': 'OASTARTTRIG1' },
0x2714: { 'name': 'OASTARTTRIG1' },
0x2718: { 'name': 'OASTARTTRIG1' },
0x271c: { 'name': 'OASTARTTRIG1' },
0x2720: { 'name': 'OASTARTTRIG1' },
0x2724: { 'name': 'OASTARTTRIG6' },
0x2728: { 'name': 'OASTARTTRIG7' },
0x272c: { 'name': 'OASTARTTRIG8' },
0x2740: { 'name': 'OAREPORTTRIG1' },
0x2744: { 'name': 'OAREPORTTRIG2' },
0x2748: { 'name': 'OAREPORTTRIG3' },
0x274c: { 'name': 'OAREPORTTRIG4' },
0x2750: { 'name': 'OAREPORTTRIG5' },
0x2754: { 'name': 'OAREPORTTRIG6' },
0x2758: { 'name': 'OAREPORTTRIG7' },
0x275c: { 'name': 'OAREPORTTRIG8' },
0x2770: { 'name': 'OACEC0_0' },
0x2774: { 'name': 'OACEC0_1' },
0x2778: { 'name': 'OACEC1_0' },
0x277c: { 'name': 'OACEC1_1' },
0x2780: { 'name': 'OACEC2_0' },
0x2784: { 'name': 'OACEC2_1' },
0x2788: { 'name': 'OACEC3_0' },
0x278c: { 'name': 'OACEC3_1' },
0x2790: { 'name': 'OACEC4_0' },
0x2794: { 'name': 'OACEC4_1' },
0x2798: { 'name': 'OACEC5_0' },
0x279c: { 'name': 'OACEC5_1' },
0x27a0: { 'name': 'OACEC6_0' },
0x27a4: { 'name': 'OACEC6_1' },
0x27a8: { 'name': 'OACEC7_0' },
0x27ac: { 'name': 'OACEC7_1' },
},
'config_reg_blacklist': {
0x2364, # OASTATUS1 register
},
},
'BDW': gen8_11_chipset_params,
'CHV': gen8_11_chipset_params,
'SKL': gen8_11_chipset_params,
'BXT': gen8_11_chipset_params,
'KBL': gen8_11_chipset_params,
'GLK': gen8_11_chipset_params,
'CFL': gen8_11_chipset_params,
'CNL': gen8_11_chipset_params,
'ICL': gen8_11_chipset_params,
'EHL': gen8_11_chipset_params,
'TGL': gen8_11_chipset_params,
'RKL': gen8_11_chipset_params,
'DG1': gen8_11_chipset_params,
'ADL': gen8_11_chipset_params,
}
register_types = { 'OA', 'NOA', 'FLEX', 'PM' }
default_set_blacklist = { "RenderDX1x", # TODO: rename to something non 'DX'
# specific if this config is generally
# usefull
"RenderBalance", # XXX: missing register config
"PipelineTimestamps", # Covered by API timestamp queries
}
counter_blacklist = {
"DramLlcThroughput", # TODO: The max equation of this counter
# requires dram throughtput value. Need to
# investiguate how to get this value.
}
sys_vars = { "EuCoresTotalCount",
"EuSlicesTotalCount",
"SamplersTotalCount",
"EuThreadsCount",
"GpuMinFrequencyMHz",
"GpuMaxFrequencyMHz",
"GpuTimestampFrequency",
"SliceMask",
"SubsliceMask",
"EuSubslicesTotalCount"
}
def underscore(name):
s = re.sub('MHz', 'Mhz', name)
s = re.sub('\.', '_', s)
s = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', s)
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s).lower()
def print_err(*args):
sys.stderr.write(' '.join(map(str,args)) + '\n')
read_register_offsets = {
0x1f0: 'PERFCNT 0',
0x1f8: 'PERFCNT 1',
}
def read_value(chipset, offset):
if offset in read_register_offsets:
return read_register_offsets[offset]
print_err("Unknown offset register at offset {0}".format(offset))
assert 0
def read_token_to_rpn_read(chipset, token, raw_offsets):
width, offset_str = token.split('@')
# For Broadwell the raw read notation was extended for 40 bit
# counters: rd40@<32bit_part1_offset>:<8bit_part2_offset>
if width == "rd40":
offset_32_str, offset_8_str = offset_str.split(':')
offset_str = offset_32_str
offset = int(offset_str, 16)
if raw_offsets:
a_offset = chipsets[chipset]['a_offset']
b_offset = chipsets[chipset]['b_offset']
c_offset = chipsets[chipset]['c_offset']
report_size = chipsets[chipset]['oa_report_size']
if offset < a_offset:
if offset == 4:
return "GPU_TIME 0 READ"
elif offset == 12:
assert chipset != "HSW" # Only for Gen8+
return "GPU_CLOCK 0 READ"
else:
assert 0
elif offset < b_offset:
return "A " + str(int((offset - a_offset) / 4)) + " READ"
elif offset < c_offset:
return "B " + str(int((offset - b_offset) / 4)) + " READ"
elif offset < report_size:
return "C " + str(int((offset - c_offset) / 4)) + " READ"
else:
return "{0} READ".format(read_value(chipset, offset))
else:
idx = int(offset / 8)
if chipset == "HSW":
# On Haswell accumulated counters are assumed to start
# with GPU_TIME followed by 45 A counters, then 8 B
# counters and finally 8 C counters.
if idx < 1:
return "GPU_TIME 0 READ"
elif idx < 46:
return "A " + str(idx - 1) + " READ"
elif idx < 54:
return "B " + str(idx - 46) + " READ"
elif idx < 62:
return "C " + str(idx - 54) + " READ"
else:
return "{0} READ".format(read_value(chipset, offset))
else:
# For Gen8+ the array of accumulated counters is
# assumed to start with a GPU_TIME then GPU_CLOCK,
# then 36 A counters, then 8 B counters and finally
# 8 C counters.
if idx == 0:
return "GPU_TIME 0 READ"
elif idx == 1:
return "GPU_CLOCK 0 READ"
elif idx < 38:
return "A " + str(idx - 2) + " READ"
elif idx < 46:
return "B " + str(idx - 38) + " READ"
elif idx < 54:
return "C " + str(idx - 46) + " READ"
else:
return "{0} READ".format(read_value(chipset, offset))
assert 0
def replace_read_tokens_with_rpn_read_ops(chipset, equation, raw_offsets):
# MDAPI MetricSet equations use tokens like 'dw@0xff' for reading raw
# values from snapshots, but this doesn't seem convenient for a few
# reasons:
#
# 1) The offsets hide the particular a, b, or c counter they
# correspond to which in turn makes it awkward to experiment
# with different report sizes which trade off how many a, b and
# c counters are available
#
# 2) Raw reads could be represented as RPN operations too, and
# the consistency could make them slightly easier for tools to
# handle, E.g:
#
# "A 5 READ" = read A counter 5
#
# We replace dw@ address tokens with GPU_TIME, A, B or C READ ops...
#
tokens = equation.split()
equation = ""
for token in tokens:
if '@' in token:
read_exp = read_token_to_rpn_read(chipset, token, raw_offsets)
equation = equation + " " + read_exp
else:
equation = equation + " " + token
return equation
parser = argparse.ArgumentParser()
parser.add_argument("xml", nargs="+", help="XML description of metrics")
parser.add_argument("--guids", required=True, help="Metric set GUID registry")
parser.add_argument("--whitelist", help="Only output for given, space-separated, sets")
parser.add_argument("--blacklist", help="Don't generate anything for given metric sets")
parser.add_argument("--merge", help="Additional meta data to merge into the result")
parser.add_argument("--dry-run", action="store_true",
help="Not generate new XML but to check any errors")
args = parser.parse_args()
metrics = et.Element('metrics')
tree = et.ElementTree(metrics)
def apply_aliases(text, aliases):
if aliases == None:
return text
for alias in aliases.split(','):
(a, b) = alias.split('|')
text = re.sub(r"\b%s\b" % re.escape(a), b, text)
a = a.lower()
b = b.lower()
text = re.sub(r"\b%s\b" % re.escape(a), b, text)
return text
def strip_dx_apis(text):
if text == None:
return ""
stripped = ""
apis = text.split()
for api in apis:
if api[:2] != "DX":
stripped = stripped + " " + api
return stripped.strip()
# For recursively appending counters in order of dependencies...
def append_deps_and_counter(mdapi_counter, mdapi_counters, deps,
sorted_array, sorted_set):
symbol_name = mdapi_counter.get('SymbolName')
if symbol_name in sorted_set:
return
for dep_name in deps[symbol_name]:
if dep_name in mdapi_counters:
append_deps_and_counter(mdapi_counters[dep_name], mdapi_counters, deps,
sorted_array, sorted_set)
sorted_array.append(mdapi_counter)
sorted_set[symbol_name] = mdapi_counter
def sort_counters(mdapi_counters, deps):
sorted_array = []
sorted_set = {} # counters in here have been added to array
for symbol_name in mdapi_counters:
append_deps_and_counter(mdapi_counters[symbol_name], mdapi_counters, deps,
sorted_array, sorted_set)
return sorted_array
def expand_macros(equation):
equation = equation.replace('GpuDuration', "$Self 100 UMUL $GpuCoreClocks FDIV")
equation = equation.replace('EuAggrDuration', "$Self $EuCoresTotalCount UDIV 100 UMUL $GpuCoreClocks FDIV")
return equation
def fixup_equation(equation):
if equation is None:
return None
return equation.replace('$SubliceMask', '$SubsliceMask')
# The MDAPI XML files sometimes duplicate the same Flex EU/OA regs
# between configs with different AvailabilityEquations even though the
# availability checks are only expected to affect the MUX configs
#
# We iterate all the configs to filter out the FLEX/OA configs and
# double check that there's never any variations between repeated
# configs
#
def filter_single_config_registers_of_type(mdapi_metric_set, type):
regs = []
for mdapi_reg_config in mdapi_metric_set.findall("RegConfigStart"):
tmp_regs = []
for mdapi_reg in mdapi_reg_config.findall("Register"):
reg = (int(mdapi_reg.get('offset'),16), int(mdapi_reg.get('value'),16))
if reg[0] in chipsets[chipset]['config_reg_blacklist']:
continue
if mdapi_reg.get('type') == type:
tmp_regs.append(reg)
if len(tmp_regs) > 0:
bad = False
if len(regs) == 0:
regs = tmp_regs
elif len(regs) != len(tmp_regs):
bad = True
else:
for i in range(0, len(regs)):
if regs[i] != tmp_regs[i]:
bad = True
break
if bad:
print_err("ERROR: multiple, differing FLEX/OA configs for one set: MetricSet=\"" + mdapi_metric_set.get('ShortName'))
sys.exit(1)
return regs
# We only have a very small number of IDs, but we aren't assuming they
# start from zero or are contiguous in the MDAPI XML files. Python
# doesn't seem to have a built in sparse array type so we just
# loop over the entries we have:
def get_mux_id_group(id_groups, id):
for group in id_groups:
if group['id'] == id:
return group
new_group = { 'id': id, 'configs': [] }
id_groups.append(new_group)
return new_group
def process_mux_configs(mdapi_set):
allow_missing_id = True
mux_config_id_groups = []
for mdapi_reg_config in mdapi_set.findall("RegConfigStart"):
mux_regs = []
for mdapi_reg in mdapi_reg_config.findall("Register"):
address = int(mdapi_reg.get('offset'), 16)
if address in chipsets[chipset]['config_reg_blacklist']:
continue
reg_type = mdapi_reg.get('type')
if reg_type not in register_types:
print_err("ERROR: unknown register type=\"" + reg_type + "\": MetricSet=\"" + mdapi_set.get('ShortName'))
sys.exit(1)
if reg_type != 'NOA' and reg_type != 'PM':
continue
reg = (address, int(mdapi_reg.get('value'), 16))
mux_regs.append(reg)
if len(mux_regs) == 0:
continue
availability = mdapi_reg_config.get('AvailabilityEquation')
if availability == "":
availability = None
if mdapi_reg_config.get('ConfigPriority') != None:
reg_config_priority = int(mdapi_reg_config.get('ConfigPriority'))
else:
reg_config_priority = 0
if mdapi_reg_config.get('ConfigId') != None:
reg_config_id = int(mdapi_reg_config.get('ConfigId'))
allow_missing_id = False
elif mdapi_reg_config.get('ConfigId') == None and allow_missing_id == True:
reg_config_id = 0
else:
# It will spell trouble if there's a mixture of explicit and
# implied config IDs...
print_err("ERROR: register configs mixing implied/explicit IDs: MetricSet=\"" + mdapi_set.get('ShortName'))
sys.exit(1)
mux_config = { 'priority': reg_config_priority,
'availability': availability,
'registers': mux_regs }
mux_config_id_group = get_mux_id_group(mux_config_id_groups, reg_config_id)
mux_config_id_group['configs'].append(mux_config)
mux_config_id_groups.sort(key=itemgetter('id'))
# The only special case we currently support for more than one group of NOA
# MUX configs is for the Broadwell ComputeExtended metric set with two Id
# groups and the second just has a single unconditional config that can
# logically be appended to all the conditional configs of the first group
if len(mux_config_id_groups) > 1:
if len(mux_config_id_groups) != 2:
print_err("ERROR: Script doesn't currently allow more than two groups of NOA MUX configs for a single metric set: MetricSet=\"" + mdapi_set.get('ShortName'))
sys.exit(1)
last_id_group = mux_config_id_groups[-1]
if len(last_id_group['configs']) != 1:
print_err("ERROR: Script currently only allows up to two Ids for NOA MUX configs if second Id only contains a single unconditional config: MetricSet=\"" + mdapi_set.get('ShortName'))
sys.exit(1)
tail_config = last_id_group['configs'][0]
for mux_config in mux_config_id_groups[0]['configs']:
mux_config['registers'] = mux_config['registers'] + tail_config['registers']
mux_config_id_groups = [mux_config_id_groups[0]]
if len(mux_config_id_groups) == 0 or mux_config_id_groups[0]['configs'] == 0:
return ()
mux_configs = mux_config_id_groups[0]['configs']
assert isinstance(mux_configs, list)
assert len(mux_configs) >= 1
assert len(mux_configs[0]['registers']) > 1 # > 1 registers
return mux_configs
def add_register_config(set, priority, availability, regs, type):
reg_config = et.SubElement(set, 'register_config')
reg_config.set('type', type)
if availability != None:
assert type == "NOA"
reg_config.set('priority', str(priority))
reg_config.set('availability', availability)
for reg in regs:
elem = et.SubElement(reg_config, 'register')
elem.set('type', type)
elem.set('address', "0x%08X" % reg[0])
elem.set('value', "0x%08X" % reg[1])
def to_text(value):
if value == None:
return ""
return value
# There are duplicated metric sets with the same symbol name so we
# keep track of the sets we've read so we can skip duplicates...
sets = {}
guids = {}
guids_xml = et.parse(args.guids)
for guid in guids_xml.findall(".//guid"):
hashing_key = oa_registry.Registry.chipset_derive_hash(guid.get('chipset'),
guid.get('mdapi_config_hash'))
guids[hashing_key] = guid.get('id')
for arg in args.xml:
mdapi = et.parse(arg)
concurrent_group = mdapi.find(".//ConcurrentGroup")
for mdapi_set in mdapi.findall(".//MetricSet"):
apis = mdapi_set.get('SupportedAPI')
if "OGL" not in apis and "OCL" not in apis and "MEDIA" not in apis:
continue
set_symbol_name = mdapi_set.get('SymbolName')
if set_symbol_name in sets:
print_err("WARNING: duplicate set named \"" + set_symbol_name + "\" (SKIPPING)")
continue
chipset = oa_registry.Registry.chipset_name(mdapi_set.get('SupportedHW'))
chipset_fullname = chipset
if concurrent_group.get('SupportedGT') != None:
chipset_fullname = chipset_fullname + oa_registry.Registry.gt_name(concurrent_group.get('SupportedGT'))
if chipset not in chipsets:
print_err("WARNING: unsupported chipset {0}, consider updating {1}".format(chipset, __file__))
continue
if args.whitelist:
set_whitelist = args.whitelist.split()
if set_symbol_name not in set_whitelist:
continue
if args.blacklist:
set_blacklist = args.blacklist.split()
else:
set_blacklist = default_set_blacklist
if set_symbol_name in set_blacklist:
continue
if mdapi_set.get('SnapshotReportSize') != '256':
print_err("WARNING: skipping metric set '{0}', report size {1} invalid".format(set_symbol_name, mdapi_set.get('SnapshotReportSize')))
continue
set = et.SubElement(metrics, 'set')
set.set('chipset', chipset_fullname)
set.set('name', mdapi_set.get('ShortName'))
set.set('symbol_name', set_symbol_name)
set.set('underscore_name', underscore(mdapi_set.get('SymbolName')))
set.set('mdapi_supported_apis', strip_dx_apis(mdapi_set.get('SupportedAPI')))
# Look at the hardware register config before looking at the counters.
#
# The hardware configuration is used as a key to lookup up a GUID which
# is used by applications to lookup the corresponding counter
# normalization equations.
#
# We want to skip over any metric sets that don't yet have a registered
# GUID in guids.xml.
# There can be multiple NOA MUX configs, since they may have associated
# availability tests to match particular systems.
#
# Unlike the MDAPI XML files we only support tracking one group of
# mutually exclusive MUX configs, whereas the MDAPI XML files
# theoretically allow a single metric set to be associated with ordered
# groups of mutually exclusive configs. So far there is only one
# Broadwell, ComputeExtended metric set which uses this, but that
# particular case can be expressed in less general terms.
#
# Being a bit simpler here should make it easier for downstream tools
# to deal with. (At least we got the handling of the Broadwell
# ComputeExtended example wrong and it took several email exchanges and
# a conference call to confirm how to interpret this case)
mux_configs = process_mux_configs(mdapi_set)
# Unlike for MUX registers, we only expect one set of FLEX/OA
# registers per metric set (even though they are sometimes duplicated
# between configs in MDAPI XML files.
#
# This filter function, extracts the register of a certain type but
# also double checks that if they are repeated in separate configs that
# they don't vary. (Notably the current i915 perf Linux driver would
# need some adapting to support multiple OA/FLEX configs with different
# availability expressions)
#
flex_regs = filter_single_config_registers_of_type(mdapi_set, "FLEX")
oa_regs = filter_single_config_registers_of_type(mdapi_set, "OA")
# Note: we ignore Perfmon registers
for mux_config in mux_configs:
add_register_config(set, mux_config['priority'], mux_config['availability'], mux_config['registers'], "NOA")
if len(oa_regs) > 0:
add_register_config(set, 0, None, oa_regs, "OA")
if len(flex_regs) > 0:
add_register_config(set, 0, None, flex_regs, "FLEX")
mdapi_hw_config_hash = oa_registry.Registry.mdapi_hw_config_hash(mdapi_set)
guid_hash = oa_registry.Registry.chipset_derive_hash(chipset_fullname.lower(),
mdapi_hw_config_hash)
hw_config_hash = oa_registry.Registry.hw_config_hash(set)
if guid_hash in guids:
set.set('hw_config_guid', guids[guid_hash])
else:
print_err("WARNING: No GUID found for metric set " + chipset_fullname + ", " + set_symbol_name + " (SKIPPING)")
print_err("WARNING: If this is a new config add the following to guids.xml:")
print_err("")
metrics.remove(set)
continue
sets[set_symbol_name] = set
counters = {}
normalization_equations = {}
raw_equations = {}
# Awkwardly we can't assume metrics are in dependency order and have to
# sort them manually. We start by associating a list of dependencies with
# each counter...
mdapi_counters = {}
mdapi_counter_deps = {}
for mdapi_counter in mdapi_set.findall("Metrics/Metric"):
symbol_name = mdapi_counter.get('SymbolName')
if symbol_name in counter_blacklist:
continue;
# Have seen at least one MetricSet with a duplicate GpuCoreClocks counter...
if symbol_name in mdapi_counters:
print_err("WARNING: Skipping duplicate counter \"" + symbol_name + \
"\" in " + set.get('name') + " :: " + mdapi_counter.get('ShortName'))
continue;
deps = []
equations = fixup_equation(str(mdapi_counter.get('SnapshotReportReadEquation'))) + " " + \
fixup_equation(str(mdapi_counter.get('SnapshotReportDeltaEquation'))) + " " + \
fixup_equation(str(mdapi_counter.get('DeltaReportReadEquation'))) + " " + \
fixup_equation(str(mdapi_counter.get('NormalizationEquation')))
equations = expand_macros(equations)
equations = equations.replace('$$', "$")
for token in equations.split():
if token[0] == '$' and token[1:] not in sys_vars and token[1:] != "Self":
deps.append(token[1:])
mdapi_counters[symbol_name] = mdapi_counter
mdapi_counter_deps[symbol_name] = deps
sorted_mdapi_counters = sort_counters(mdapi_counters, mdapi_counter_deps)
for mdapi_counter in sorted_mdapi_counters:
aliases = mdapi_counter.get('Alias')
skip_counter = False
# We don't currently support configuring and reading perfmon registers
signal = mdapi_counter.get('SignalName')
if signal and "perfmon" in signal:
continue;
# A few things to fixup with this common counter...
if mdapi_counter.get('SymbolName') == "AvgGpuCoreFrequencyMHz":
# To avoid requiring a special case in tools, add a max value
# equation for the gpu frequency...
mdapi_counter.set('MaxValueEquation', "$GpuMaxFrequency")
# Don't include units in the name
mdapi_counter.set('SymbolName', "AvgGpuCoreFrequency")
# Use canonical, first order of magnitude units specifier
mdapi_counter.set('MetricUnits', 'Hz')
mdapi_counter.set('NormalizationEquation', '$GpuCoreClocks 1000000000 UMUL $GpuTime UDIV')
#mdapi_counter.set('DeltaReportReadEquation', '$GpuCoreClocks $GpuTime UDIV')
symbol_name = mdapi_counter.get('SymbolName')
counter = et.SubElement(set, 'counter')
counter.set('name', apply_aliases(mdapi_counter.get('ShortName'), aliases))
counter.set('symbol_name', mdapi_counter.get('SymbolName'))
counter.set('underscore_name', underscore(mdapi_counter.get('SymbolName')))
counter.set('description', apply_aliases(mdapi_counter.get('LongName'), aliases))
counter.set('mdapi_group', apply_aliases(to_text(mdapi_counter.get('Group')), aliases))
counter.set('mdapi_usage_flags', to_text(mdapi_counter.get('UsageFlags')))
counter.set('mdapi_supported_apis', strip_dx_apis(mdapi_counter.get('SupportedAPI')))
low = mdapi_counter.get('LowWatermark')
if low:
counter.set('low_watermark', low)
high = to_text(mdapi_counter.get('HighWatermark'))
if high:
counter.set('high_watermark', high)
counter.set('data_type', mdapi_counter.get('ResultType').lower())
max_eq = fixup_equation(mdapi_counter.get('MaxValueEquation'))
if max_eq:
counter.set('max_equation', max_eq)
# XXX Not sure why EU metrics tend to just be bundled under 'gpu'
counter.set('mdapi_hw_unit_type', mdapi_counter.get('HWUnitType').lower())
# There are counters representing cycle counts that have a semantic
# type of 'duration' which doesn't seem to make sense...
units = mdapi_counter.get('MetricUnits').lower()
if units == "cycles":
semantic_type = "event"
else:
semantic_type = mdapi_counter.get('MetricType').lower()
counter.set('units', units)
counter.set('semantic_type', semantic_type)
# MDAPI MetricSets have 3 different kinds of counter read equations:
#
# 1) One for reading a raw (unnormalized) value from a hardware report
#
# The line between normalized and raw isn't always clear
# as the raw equation may e.g. read and ADD multiple counters
#
# Not all counters have a raw equation if they are instead
# derived through $CounterName references to other counters
# in a normalized value equation
#
# 2) One for reading an unnormalized value from the accumulated 'delta reports'
#
# Seems to duplicate the raw equation but with delta report
# offsets and referencing 64bit values
#
# The normalized value equations are always based on these
# accumulated delta values
#
# 3) One for reading a normalized value
#
# These may start with a reference to "$Self" which is
# effectively a macro for the above delta report equation
#
# If this is missing the delta report equation is effectively
# the normalized equation too
#
# XXX: Beware that there are some inconsistent counters that
# have a normalization equation with a $Self reference and a
# raw equation but no delta report equation. This seems
# pretty sketchy, but (at least for 'MEDIA' metrics) we will
# substitute the raw equation for $Self in this case along
# with a warning to double check the results.
#
# Currently there doesn't appear to be a clear reason to
# differentiate these equations and the separation seems to
# complicate things for tools wanting to generate code from this
# data.
#
# We instead aim to have one normalized equation per counter that
# always reference accumulated counter values.
# XXX: As a special case, we override the raw and delta report
# equations for the GpuTime counters, which seem inconsistent
if mdapi_counter.get('SymbolName') == "GpuTime":
mdapi_counter.set('SnapshotReportReadEquation', "dw@0x04 1000000000 UMUL $GpuTimestampFrequency UDIV")
mdapi_counter.set('DeltaReportReadEquation', "qw@0x0 1000000000 UMUL $GpuTimestampFrequency UDIV")
availability = fixup_equation(mdapi_counter.get('AvailabilityEquation'))
if availability == "":
availability = None
# We prefer to only look at the equations that reference the raw
# reports since the mapping of offsets back to A,B,C counters is
# unambiguous, but if necessary we will fallback to mapping
# delta report offsets (accumulated 64bit values that correspond
# to the 32bit or 40bit values from raw repots)
raw_read_eq = fixup_equation(mdapi_counter.get('SnapshotReportReadEquation'))
if raw_read_eq:
if raw_read_eq == "":
raw_read_eq = None
else:
raw_read_eq = replace_read_tokens_with_rpn_read_ops(chipset,
raw_read_eq,
True) #raw offsets
delta_read_eq = fixup_equation(mdapi_counter.get('DeltaReportReadEquation'))
if delta_read_eq:
if delta_read_eq == "":
delta_read_eq = None
else:
delta_read_eq = replace_read_tokens_with_rpn_read_ops(chipset,
delta_read_eq,
False) #delta offsets
if raw_read_eq and not delta_read_eq:
print_err("WARNING: Counter with raw equation but no delta report equation: MetricSet=\"" + \
mdapi_set.get('ShortName') + "\" Metric=\"" + mdapi_counter.get('SymbolName') + \
"(" + mdapi_counter.get('ShortName') + ")" + "\"")
# Media metric counters currently have no delta equation even
# though they have normalization equations that reference $Self
if "MEDIA" in apis:
print_err("WARNING: -> Treating inconsistent media metric's 'raw' equation as a 'delta report' equation, but results should be double checked!")
delta_read_eq = raw_read_eq
else:
set.remove(counter)
continue
# Some counters are sourced from register values that are
# not put into the OA reports. This is why some counters
# will have a delta equation but not a raw equation. These
# counters are typically only available in query mode. For
# this reason we put a particular availability value.
if delta_read_eq and not raw_read_eq:
assert availability == None
availability = "true $QueryMode &&"
raw_read_eq = delta_read_eq
# After replacing read tokens with RPN counter READ ops the raw and
# delta equations are expected to be identical so warn if that's
# not true...
if bool(raw_read_eq) ^ bool(delta_read_eq) or raw_read_eq != delta_read_eq:
print_err("WARNING: Inconsistent raw and delta report equations for " + \
mdapi_set.get('ShortName') + " :: " + mdapi_counter.get('SymbolName') + \
"(" + mdapi_counter.get('ShortName') + ")" + ": raw=\"" + str(raw_read_eq) + \
"\" delta=\"" + str(delta_read_eq) + "\" (SKIPPING)")
set.remove(counter)
continue
normalize_eq = fixup_equation(mdapi_counter.get('NormalizationEquation'))
if normalize_eq and normalize_eq == "":
normalize_eq = None
if normalize_eq:
# Some normalization equations are represented with macros such as
# 'GpuDuration' corresponding to:
#
# "$Self 100 UMUL $GpuCoreClocks FDIV"
#
# We expand macros here so tools don't need to care about them...
#
equation = normalize_eq
equation = expand_macros(equation)
if raw_read_eq:
equation = equation.replace('$Self', raw_read_eq)
else:
equation = delta_read_eq
if '$Self' in equation:
print_err("WARNING: Counter equation (\"" + equation + "\") with unexpanded $Self token: MetricSet=\"" + \
mdapi_set.get('ShortName') + "\" Metric=\"" + mdapi_counter.get('SymbolName') + \
"(" + mdapi_counter.get('ShortName') + ")" + "\" (SKIPPING)")
set.remove(counter)
continue
# $$CounterName vs $CounterName in an equation is intended to
# differentiate referencing the normalized or raw value of another
# counter.
#
# Since we are only keeping a single (normalized) equation for
# counters we only need one form, but we want to be careful to
# check if any equations really depend on the raw value of another
# counter so we can expand those variables now
#
tmp = equation
for token in tmp.split():
if token[0] == '$' and token[1] != '$':
if token[1:] in normalization_equations:
raw_eq = raw_equations[token[1:]]
equation = equation.replace(token, raw_eq)
#if token[1:] not in raw_equations:
# print_err("WARNING: Counter equation (\"" + equation + "\") references un-kept raw equation of another counter : MetricSet=\"" + \
# mdapi_set.get('ShortName') + "\" Metric=\"" + mdapi_counter.get('ShortName') + "\"")
elif token[1:] not in raw_equations and token[1:] not in sys_vars:
print_err("Unknown variable name: \"" + token + "\" in equation \"" + equation + "\"")
symbol_name = counter.get('symbol_name')
# Make sure that every variable in the equation is a known sys_var or counter name
equation = equation.replace('$$', "$")
for token in equation.split():
if token[0] == '$':
if token[1:] not in counters and token[1:] not in sys_vars:
print_err("WARNING: Counter equation (\"" + equation + "\") with unknown variable " + \
token + " (maybe skipped counter): MetricSet=\"" + mdapi_set.get('ShortName') + \
"\" Metric=\"" + mdapi_counter.get('SymbolName') + "(" + mdapi_counter.get('ShortName') + \
")" + "\" (SKIPPING)")
set.remove(counter)
skip_counter = True
break
if skip_counter:
continue
counter.set('equation', equation.strip())
if availability != None:
counter.set('availability', availability)
counters[symbol_name] = counter;
if normalize_eq:
normalization_equations[symbol_name] = normalize_eq
if raw_read_eq:
raw_equations[symbol_name] = raw_read_eq
if args.dry_run:
sys.exit(0)
# Merge in any custom meta data we have...
if args.merge:
merge = et.parse(args.merge)
merge_metrics = merge.getroot()
for merge_set in merge.findall(".//set"):
pattern = ".//set[@symbol_name=\"" + merge_set.get('symbol_name') + "\"][@chipset=\"" + merge_set.get('chipset') + "\"]"
real_set = metrics.find(pattern)
if real_set is not None:
for set_attr in merge_set.items():
real_set.set(set_attr[0], set_attr[1])
for merge_elem in merge_set:
if merge_elem.tag == "counter":
merge_counter = merge_elem
pattern = "counter[@symbol_name=\"" + merge_counter.get('symbol_name') + "\"]"
real_counter = real_set.find(pattern)
if real_counter is not None:
for counter_attr in merge_counter.items():
real_counter.set(counter_attr[0], counter_attr[1])
else:
real_set.append(merge_counter)
real_counter = merge_counter
else:
real_set.append(merge_elem)
# For consistency + readability print everything manually...
merge_md5 = hashlib.md5(open("merge.xml", 'rb').read()).hexdigest()
else:
merge_md5 = ""
print ("")
print("")
for set in metrics.findall(".//set"):
print(" ")
for counter in set.findall("counter"):
print(" ")
for config in set.findall("register_config"):
if config.get('availability') != None:
print(" ")
else:
print(" ")
for reg in config.findall("register"):
addr = int(reg.get('address'), 16)
if 'registers' in chipsets[chipset] and addr in chipsets[chipset]['registers']:
reg_info = chipsets[chipset]['registers'][addr]
comment = ' '
else:
comment = ''
print(" " + comment)
print(" ")
print(" \n")
print("")