From 5ae9be68d9f1a628fdc920b647257f94afb77887 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 25 Dec 2020 15:57:30 -0700 Subject: [PATCH] bpo-36876: [c-analyzer tool] Additional CLI updates for "capi" command. (gh-23929) https://bugs.python.org/issue36876 --- Tools/c-analyzer/cpython/__main__.py | 36 ++- Tools/c-analyzer/cpython/_capi.py | 362 ++++++++++++++++++++------- Tools/c-analyzer/must-resolve.sh | 75 ++++++ 3 files changed, 380 insertions(+), 93 deletions(-) create mode 100755 Tools/c-analyzer/must-resolve.sh diff --git a/Tools/c-analyzer/cpython/__main__.py b/Tools/c-analyzer/cpython/__main__.py index 9d29b13ed8f9c6..a11b687214d2f4 100644 --- a/Tools/c-analyzer/cpython/__main__.py +++ b/Tools/c-analyzer/cpython/__main__.py @@ -241,7 +241,7 @@ def process_kinds(args, *, argv=None): for raw in args.kinds or (): for kind in raw.replace(',', ' ').strip().split(): if kind in _capi.KINDS: - kind.append(kind) + kinds.append(kind) else: parser.error(f'expected KIND to be one of {sorted(_capi.KINDS)}, got {kind!r}') args.kinds = set(kinds) @@ -249,7 +249,7 @@ def process_kinds(args, *, argv=None): parser.add_argument('--group-by', dest='groupby', choices=['level', 'kind']) - parser.add_argument('--format', default='brief') + parser.add_argument('--format', default='table') parser.add_argument('--summary', dest='format', action='store_const', const='summary') def process_format(args, *, argv=None): @@ -259,12 +259,27 @@ def process_format(args, *, argv=None): if args.format not in _capi._FORMATS: parser.error(f'unsupported format {orig!r}') + parser.add_argument('--show-empty', dest='showempty', action='store_true') + parser.add_argument('--no-show-empty', dest='showempty', action='store_false') + parser.set_defaults(showempty=None) + + # XXX Add --sort-by, --sort and --no-sort. + + parser.add_argument('--ignore', dest='ignored', action='append') + def process_ignored(args, *, argv=None): + ignored = [] + for raw in args.ignored or (): + ignored.extend(raw.replace(',', ' ').strip().split()) + args.ignored = ignored or None + parser.add_argument('filenames', nargs='*', metavar='FILENAME') process_progress = add_progress_cli(parser) return [ process_levels, + process_kinds, process_format, + process_ignored, process_progress, ] @@ -273,7 +288,9 @@ def cmd_capi(filenames=None, *, levels=None, kinds=None, groupby='kind', - format='brief', + format='table', + showempty=None, + ignored=None, track_progress=None, verbosity=VERBOSITY, **kwargs @@ -282,7 +299,7 @@ def cmd_capi(filenames=None, *, filenames = _files.iter_header_files(filenames, levels=levels) #filenames = (file for file, _ in main_for_filenames(filenames)) - if track_progress is not None: + if track_progress: filenames = track_progress(filenames) items = _capi.iter_capi(filenames) if levels: @@ -290,7 +307,16 @@ def cmd_capi(filenames=None, *, if kinds: items = (item for item in items if item.kind in kinds) - lines = render(items, groupby=groupby, verbose=verbosity > VERBOSITY) + filter = _capi.resolve_filter(ignored) + if filter: + items = (item for item in items if filter(item, log=lambda msg: logger.log(1, msg))) + + lines = render( + items, + groupby=groupby, + showempty=showempty, + verbose=verbosity > VERBOSITY, + ) print() for line in lines: print(line) diff --git a/Tools/c-analyzer/cpython/_capi.py b/Tools/c-analyzer/cpython/_capi.py index 38d7cd3c514657..df8159a8cc169f 100644 --- a/Tools/c-analyzer/cpython/_capi.py +++ b/Tools/c-analyzer/cpython/_capi.py @@ -1,4 +1,5 @@ from collections import namedtuple +import logging import os import os.path import re @@ -10,6 +11,9 @@ from . import REPO_ROOT +logger = logging.getLogger(__name__) + + INCLUDE_ROOT = os.path.join(REPO_ROOT, 'Include') INCLUDE_CPYTHON = os.path.join(INCLUDE_ROOT, 'cpython') INCLUDE_INTERNAL = os.path.join(INCLUDE_ROOT, 'internal') @@ -122,31 +126,34 @@ def _parse_line(line, prev=None): results = zip(KINDS, m.groups()) for kind, name in results: if name: - clean = last.split('//')[0].strip() + clean = last.split('//')[0].rstrip() if clean.endswith('*/'): clean = clean.split('/*')[0].rstrip() + if kind == 'macro' or kind == 'constant': - if clean.endswith('\\'): - return line # the new "prev" + if not clean.endswith('\\'): + return name, kind elif kind == 'inline': - if not prev: - if not clean.endswith('}'): - return line # the new "prev" - elif clean != '}': - return line # the new "prev" - elif not clean.endswith(';'): - return line # the new "prev" - return name, kind + if clean.endswith('}'): + if not prev or clean == '}': + return name, kind + elif kind == 'func' or kind == 'data': + if clean.endswith(';'): + return name, kind + else: + # This should not be reached. + raise NotImplementedError + return line # the new "prev" # It was a plain #define. return None -LEVELS = { +LEVELS = [ 'stable', 'cpython', 'private', 'internal', -} +] def _get_level(filename, name, *, _cpython=INCLUDE_CPYTHON + os.path.sep, @@ -165,6 +172,12 @@ def _get_level(filename, name, *, #return '???' +GROUPINGS = { + 'kind': KINDS, + 'level': LEVELS, +} + + class CAPIItem(namedtuple('CAPIItem', 'file lno name kind level')): @classmethod @@ -231,34 +244,70 @@ def _parse_groupby(raw): else: raise NotImplementedError - if not all(v in ('kind', 'level') for v in groupby): + if not all(v in GROUPINGS for v in groupby): raise ValueError(f'invalid groupby value {raw!r}') return groupby -def summarize(items, *, groupby='kind'): - summary = {} +def _resolve_full_groupby(groupby): + if isinstance(groupby, str): + groupby = [groupby] + groupings = [] + for grouping in groupby + list(GROUPINGS): + if grouping not in groupings: + groupings.append(grouping) + return groupings + + +def summarize(items, *, groupby='kind', includeempty=True, minimize=None): + if minimize is None: + if includeempty is None: + minimize = True + includeempty = False + else: + minimize = includeempty + elif includeempty is None: + includeempty = minimize + elif minimize and includeempty: + raise ValueError(f'cannot minimize and includeempty at the same time') groupby = _parse_groupby(groupby)[0] - if groupby == 'kind': - outers = KINDS - inners = LEVELS - def increment(item): - summary[item.kind][item.level] += 1 - elif groupby == 'level': - outers = LEVELS - inners = KINDS - def increment(item): - summary[item.level][item.kind] += 1 - else: - raise NotImplementedError + _outer, _inner = _resolve_full_groupby(groupby) + outers = GROUPINGS[_outer] + inners = GROUPINGS[_inner] + + summary = { + 'totals': { + 'all': 0, + 'subs': {o: 0 for o in outers}, + 'bygroup': {o: {i: 0 for i in inners} + for o in outers}, + }, + } - for outer in outers: - summary[outer] = _outer = {} - for inner in inners: - _outer[inner] = 0 for item in items: - increment(item) + outer = getattr(item, _outer) + inner = getattr(item, _inner) + # Update totals. + summary['totals']['all'] += 1 + summary['totals']['subs'][outer] += 1 + summary['totals']['bygroup'][outer][inner] += 1 + + if not includeempty: + subtotals = summary['totals']['subs'] + bygroup = summary['totals']['bygroup'] + for outer in outers: + if subtotals[outer] == 0: + del subtotals[outer] + del bygroup[outer] + continue + + for inner in inners: + if bygroup[outer][inner] == 0: + del bygroup[outer][inner] + if minimize: + if len(bygroup[outer]) == 1: + del bygroup[outer] return summary @@ -289,20 +338,72 @@ def iter_capi(filenames=None): yield item -def _collate(items, groupby): +def resolve_filter(ignored): + if not ignored: + return None + ignored = set(_resolve_ignored(ignored)) + def filter(item, *, log=None): + if item.name not in ignored: + return True + if log is not None: + log(f'ignored {item.name!r}') + return False + return filter + + +def _resolve_ignored(ignored): + if isinstance(ignored, str): + ignored = [ignored] + for raw in ignored: + if isinstance(raw, str): + if raw.startswith('|'): + yield raw[1:] + elif raw.startswith('<') and raw.endswith('>'): + filename = raw[1:-1] + try: + infile = open(filename) + except Exception as exc: + logger.error(f'ignore file failed: {exc}') + continue + logger.log(1, f'reading ignored names from {filename!r}') + with infile: + for line in infile: + if not line: + continue + if line[0].isspace(): + continue + line = line.partition('#')[0].rstrip() + if line: + # XXX Recurse? + yield line + else: + raw = raw.strip() + if raw: + yield raw + else: + raise NotImplementedError + + +def _collate(items, groupby, includeempty): groupby = _parse_groupby(groupby)[0] maxfilename = maxname = maxkind = maxlevel = 0 + collated = {} + groups = GROUPINGS[groupby] + for group in groups: + collated[group] = [] + for item in items: key = getattr(item, groupby) - if key in collated: - collated[key].append(item) - else: - collated[key] = [item] + collated[key].append(item) maxfilename = max(len(item.relfile), maxfilename) maxname = max(len(item.name), maxname) maxkind = max(len(item.kind), maxkind) maxlevel = max(len(item.level), maxlevel) + if not includeempty: + for group in groups: + if not collated[group]: + del collated[group] maxextra = { 'kind': maxkind, 'level': maxlevel, @@ -310,27 +411,55 @@ def _collate(items, groupby): return collated, groupby, maxfilename, maxname, maxextra +def _get_sortkey(sort, _groupby, _columns): + if sort is True or sort is None: + # For now: + def sortkey(item): + return ( + item.level == 'private', + LEVELS.index(item.level), + KINDS.index(item.kind), + os.path.dirname(item.file), + os.path.basename(item.file), + item.name, + ) + return sortkey + + sortfields = 'not-private level kind dirname basename name'.split() + elif isinstance(sort, str): + sortfields = sort.replace(',', ' ').strip().split() + elif callable(sort): + return sort + else: + raise NotImplementedError + + # XXX Build a sortkey func from sortfields. + raise NotImplementedError + + ################################## # CLI rendering -_LEVEL_MARKERS = { - 'S': 'stable', - 'C': 'cpython', - 'P': 'private', - 'I': 'internal', -} -_KIND_MARKERS = { - 'F': 'func', - 'D': 'data', - 'I': 'inline', - 'M': 'macro', - 'C': 'constant', +_MARKERS = { + 'level': { + 'S': 'stable', + 'C': 'cpython', + 'P': 'private', + 'I': 'internal', + }, + 'kind': { + 'F': 'func', + 'D': 'data', + 'I': 'inline', + 'M': 'macro', + 'C': 'constant', + }, } def resolve_format(format): if not format: - return 'brief' + return 'table' elif isinstance(format, str) and format in _FORMATS: return format else: @@ -350,19 +479,29 @@ def render(items, **kwargs): return render -def render_table(items, *, columns=None, groupby='kind', verbose=False): +def render_table(items, *, + columns=None, + groupby='kind', + sort=True, + showempty=False, + verbose=False, + ): + if groupby is None: + groupby = 'kind' + if showempty is None: + showempty = False + if groupby: - collated, groupby, maxfilename, maxname, maxextra = _collate(items, groupby) - if groupby == 'kind': - groups = KINDS - extras = ['level'] - markers = {'level': _LEVEL_MARKERS} - elif groupby == 'level': - groups = LEVELS - extras = ['kind'] - markers = {'kind': _KIND_MARKERS} - else: - raise NotImplementedError + (collated, groupby, maxfilename, maxname, maxextra, + ) = _collate(items, groupby, showempty) + for grouping in GROUPINGS: + maxextra[grouping] = max(len(g) for g in GROUPINGS[grouping]) + + _, extra = _resolve_full_groupby(groupby) + extras = [extra] + markers = {extra: _MARKERS[extra]} + + groups = GROUPINGS[groupby] else: # XXX Support no grouping? raise NotImplementedError @@ -373,8 +512,6 @@ def get_extra(item): for extra in ('kind', 'level')} else: if verbose: - maxextra['kind'] = max(len(kind) for kind in KINDS) - maxextra['level'] = max(len(level) for level in LEVELS) extracols = [f'{extra}:{maxextra[extra]}' for extra in extras] def get_extra(item): @@ -404,32 +541,51 @@ def get_extra(item): ] header, div, fmt = build_table(columns) + if sort: + sortkey = _get_sortkey(sort, groupby, columns) + total = 0 - for group in groups: - if group not in collated: + for group, grouped in collated.items(): + if not showempty and group not in collated: continue yield '' yield f' === {group} ===' yield '' yield header yield div - for item in collated[group]: - yield fmt.format( - filename=item.relfile, - name=item.name, - **get_extra(item), - ) + if grouped: + if sort: + grouped = sorted(grouped, key=sortkey) + for item in grouped: + yield fmt.format( + filename=item.relfile, + name=item.name, + **get_extra(item), + ) yield div - subtotal = len(collated[group]) + subtotal = len(grouped) yield f' sub-total: {subtotal}' total += subtotal yield '' yield f'total: {total}' -def render_full(items, *, groupby=None, verbose=False): +def render_full(items, *, + groupby='kind', + sort=None, + showempty=None, + verbose=False, + ): + if groupby is None: + groupby = 'kind' + if showempty is None: + showempty = False + + if sort: + sortkey = _get_sortkey(sort, groupby, None) + if groupby: - collated, groupby, _, _, _ = _collate(items, groupby) + collated, groupby, _, _, _ = _collate(items, groupby, showempty) for group, grouped in collated.items(): yield '#' * 25 yield f'# {group} ({len(grouped)})' @@ -437,10 +593,14 @@ def render_full(items, *, groupby=None, verbose=False): yield '' if not grouped: continue + if sort: + grouped = sorted(grouped, key=sortkey) for item in grouped: yield from _render_item_full(item, groupby, verbose) yield '' else: + if sort: + items = sorted(items, key=sortkey) for item in items: yield from _render_item_full(item, None, verbose) yield '' @@ -459,21 +619,47 @@ def _render_item_full(item, groupby, verbose): print(' ---------------------------------------') -def render_summary(items, *, groupby='kind', verbose=False): - total = 0 - summary = summarize(items, groupby=groupby) - # XXX Stablize the sorting to match KINDS/LEVELS. - for outer, counts in summary.items(): - subtotal = sum(c for _, c in counts.items()) - yield f'{outer + ":":20} ({subtotal})' - for inner, count in counts.items(): - yield f' {inner + ":":9} {count}' - total += subtotal - yield f'{"total:":20} ({total})' +def render_summary(items, *, + groupby='kind', + sort=None, + showempty=None, + verbose=False, + ): + if groupby is None: + groupby = 'kind' + summary = summarize( + items, + groupby=groupby, + includeempty=showempty, + minimize=None if showempty else not verbose, + ) + + subtotals = summary['totals']['subs'] + bygroup = summary['totals']['bygroup'] + lastempty = False + for outer, subtotal in subtotals.items(): + if bygroup: + subtotal = f'({subtotal})' + yield f'{outer + ":":20} {subtotal:>8}' + else: + yield f'{outer + ":":10} {subtotal:>8}' + if outer in bygroup: + for inner, count in bygroup[outer].items(): + yield f' {inner + ":":9} {count}' + lastempty = False + else: + lastempty = True + + total = f'*{summary["totals"]["all"]}*' + label = '*total*:' + if bygroup: + yield f'{label:20} {total:>8}' + else: + yield f'{label:10} {total:>9}' _FORMATS = { - 'brief': render_table, + 'table': render_table, 'full': render_full, 'summary': render_summary, } diff --git a/Tools/c-analyzer/must-resolve.sh b/Tools/c-analyzer/must-resolve.sh new file mode 100755 index 00000000000000..191e42241f5eec --- /dev/null +++ b/Tools/c-analyzer/must-resolve.sh @@ -0,0 +1,75 @@ +#!/usr/bin/env bash + +# Any PyObject exposed via the public API is problematic since it must +# be made per-interpreter. This involves the following: +# +# singletons: +# - None +# - True +# - False +# - NotImplemented +# - Ellipsis +# PyTypeObject: +# - PyExc* [97] +# - static types [81] +# +# In the non-stable API we could use #defines to do the conversion +# transparently (though Py_None is perhaps problematic for performance +# reasons). However, we can't take that approach with the stable API. +# That means we must find all functions (& macros) in the stable API +# (and probably the full public API, for sanity sake) and adjust them. +# This will involve internally converting from the public object to the +# corresponding per-interpreter object. +# +# Note that the only place this solution fails is with direct pointer +# equality checks with the public objects. + +# XXX What about saying that the stable API is not sub-interpreter +# compatible? + + +function run_capi() { + ./python Tools/c-analyzer/c-analyzer.py capi \ + --no-progress \ + --group-by kind \ + --func --inline --macro \ + --no-show-empty \ + --ignore '' \ + $@ +} + +echo '' +echo '#################################################' +echo '# All API' +echo '#################################################' +run_capi --format summary Include/*.h Include/cpython/*.h +run_capi --format table Include/*.h Include/cpython/*.h +echo '' +echo '' +echo '#################################################' +echo '# stable API' +echo '#################################################' +echo '' +echo '# public:' +run_capi --format summary --public --no-show-empty Include/*.h +echo '' +echo '# private:' +run_capi --format summary --private --no-show-empty Include/*.h +echo '' +run_capi --format full -v Include/*.h +#run_capi --format full -v --public Include/*.h +#run_capi --format full -v --private Include/*.h +echo '' +echo '#################################################' +echo '# cpython API' +echo '#################################################' +echo '' +echo '# public:' +run_capi --format summary --public --no-show-empty Include/cpython/*.h +echo '' +echo '# private:' +run_capi --format summary --private --no-show-empty Include/cpython/*.h +echo '' +run_capi --format full -v Include/cpython/*.h +#run_capi --format full -v --public Include/cpython/*.h +#run_capi --format full -v --private Include/cpython/*.h