From 4fe72090deb7fb7bc09bfa56c92f6b3b0967d395 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 30 Oct 2020 15:46:52 -0600 Subject: [PATCH] bpo-36876: Small adjustments to the C-analyzer tool. (GH-23045) This is a little bit of clean-up, small fixes, and additional helpers prior to building an updated & accurate list of globals to eliminate. --- Tools/c-analyzer/c_analyzer/__init__.py | 8 +- Tools/c-analyzer/c_analyzer/__main__.py | 104 +++++---- Tools/c-analyzer/c_analyzer/analyze.py | 6 +- Tools/c-analyzer/c_analyzer/datafiles.py | 3 +- Tools/c-analyzer/c_analyzer/info.py | 42 +--- Tools/c-analyzer/c_analyzer/match.py | 212 +++++++++++++++++++ Tools/c-analyzer/c_common/scriptutil.py | 51 ++++- Tools/c-analyzer/c_parser/datafiles.py | 2 +- Tools/c-analyzer/c_parser/info.py | 166 ++++----------- Tools/c-analyzer/c_parser/match.py | 177 ++++++++++++++++ Tools/c-analyzer/c_parser/parser/__init__.py | 6 +- Tools/c-analyzer/c_parser/parser/_info.py | 15 ++ Tools/c-analyzer/c_parser/parser/_regexes.py | 3 +- Tools/c-analyzer/cpython/__main__.py | 5 +- Tools/c-analyzer/cpython/_analyzer.py | 7 +- Tools/c-analyzer/cpython/_parser.py | 44 +++- 16 files changed, 633 insertions(+), 218 deletions(-) create mode 100644 Tools/c-analyzer/c_analyzer/match.py create mode 100644 Tools/c-analyzer/c_parser/match.py diff --git a/Tools/c-analyzer/c_analyzer/__init__.py b/Tools/c-analyzer/c_analyzer/__init__.py index 4a01cd396f5f5f..171fa25102bffc 100644 --- a/Tools/c-analyzer/c_analyzer/__init__.py +++ b/Tools/c-analyzer/c_analyzer/__init__.py @@ -4,10 +4,12 @@ from c_parser.info import ( KIND, TypeDeclaration, - filter_by_kind, - collate_by_kind_group, resolve_parsed, ) +from c_parser.match import ( + filter_by_kind, + group_by_kinds, +) from . import ( analyze as _analyze, datafiles as _datafiles, @@ -55,7 +57,7 @@ def analyze_decls(decls, known, *, ) decls = list(decls) - collated = collate_by_kind_group(decls) + collated = group_by_kinds(decls) types = {decl: None for decl in collated['type']} typespecs = _analyze.get_typespecs(types) diff --git a/Tools/c-analyzer/c_analyzer/__main__.py b/Tools/c-analyzer/c_analyzer/__main__.py index 1fd45b985d9bcf..4cff1d4efb5fe9 100644 --- a/Tools/c-analyzer/c_analyzer/__main__.py +++ b/Tools/c-analyzer/c_analyzer/__main__.py @@ -1,5 +1,6 @@ import io import logging +import os import os.path import re import sys @@ -9,6 +10,7 @@ add_verbosity_cli, add_traceback_cli, add_sepval_cli, + add_progress_cli, add_files_cli, add_commands_cli, process_args_by_key, @@ -17,11 +19,13 @@ filter_filenames, iter_marks, ) -from c_parser.info import KIND, is_type_decl +from c_parser.info import KIND +from c_parser.match import is_type_decl +from .match import filter_forward from . import ( analyze as _analyze, - check_all as _check_all, datafiles as _datafiles, + check_all as _check_all, ) @@ -44,7 +48,7 @@ TABLE_SECTIONS = { 'types': ( ['kind', 'name', 'data', 'file'], - is_type_decl, + KIND.is_type_decl, (lambda v: (v.kind.value, v.filename or '', v.name)), ), 'typedefs': 'types', @@ -167,9 +171,7 @@ def handle_failure(failure, data): print(f'{data.filename}:{name} - {failure}') elif fmt == 'summary': def handle_failure(failure, data): - parent = data.parent or '' - funcname = parent if isinstance(parent, str) else parent.name - print(f'{data.filename:35}\t{funcname or "-":35}\t{data.name:40}\t{failure}') + print(_fmt_one_summary(data, failure)) elif fmt == 'full': div = '' def handle_failure(failure, data): @@ -230,6 +232,15 @@ def section(name): yield f'grand total: {total}' +def _fmt_one_summary(item, extra=None): + parent = item.parent or '' + funcname = parent if isinstance(parent, str) else parent.name + if extra: + return f'{item.filename:35}\t{funcname or "-":35}\t{item.name:40}\t{extra}' + else: + return f'{item.filename:35}\t{funcname or "-":35}\t{item.name}' + + def fmt_full(analysis): # XXX Support sorting. items = sorted(analysis, key=lambda v: v.key) @@ -272,10 +283,12 @@ def process_checks(args): args.checks = [check] else: process_checks = add_checks_cli(parser, checks=checks) + process_progress = add_progress_cli(parser) process_output = add_output_cli(parser, default=None) process_files = add_files_cli(parser, **kwargs) return [ process_checks, + process_progress, process_output, process_files, ] @@ -288,6 +301,7 @@ def cmd_check(filenames, *, relroot=None, failfast=False, iter_filenames=None, + track_progress=None, verbosity=VERBOSITY, _analyze=_analyze, _CHECKS=CHECKS, @@ -304,36 +318,53 @@ def cmd_check(filenames, *, ) = _get_check_handlers(fmt, printer, verbosity) filenames = filter_filenames(filenames, iter_filenames) + if track_progress: + filenames = track_progress(filenames) - logger.info('analyzing...') + logger.info('analyzing files...') analyzed = _analyze(filenames, **kwargs) if relroot: analyzed.fix_filenames(relroot) + decls = filter_forward(analyzed, markpublic=True) - logger.info('checking...') - numfailed = 0 - for data, failure in _check_all(analyzed, checks, failfast=failfast): + logger.info('checking analysis results...') + failed = [] + for data, failure in _check_all(decls, checks, failfast=failfast): if data is None: printer.info('stopping after one failure') break - if div is not None and numfailed > 0: + if div is not None and len(failed) > 0: printer.info(div) - numfailed += 1 + failed.append(data) handle_failure(failure, data) handle_after() printer.info('-------------------------') - logger.info(f'total failures: {numfailed}') + logger.info(f'total failures: {len(failed)}') logger.info('done checking') - if numfailed > 0: - sys.exit(numfailed) + if fmt == 'summary': + print('Categorized by storage:') + print() + from .match import group_by_storage + grouped = group_by_storage(failed, ignore_non_match=False) + for group, decls in grouped.items(): + print() + print(group) + for decl in decls: + print(' ', _fmt_one_summary(decl)) + print(f'subtotal: {len(decls)}') + + if len(failed) > 0: + sys.exit(len(failed)) def _cli_analyze(parser, **kwargs): + process_progress = add_progress_cli(parser) process_output = add_output_cli(parser) process_files = add_files_cli(parser, **kwargs) return [ + process_progress, process_output, process_files, ] @@ -343,6 +374,7 @@ def _cli_analyze(parser, **kwargs): def cmd_analyze(filenames, *, fmt=None, iter_filenames=None, + track_progress=None, verbosity=None, _analyze=_analyze, formats=FORMATS, @@ -356,49 +388,46 @@ def cmd_analyze(filenames, *, raise ValueError(f'unsupported fmt {fmt!r}') filenames = filter_filenames(filenames, iter_filenames) - if verbosity == 2: - def iter_filenames(filenames=filenames): - marks = iter_marks() - for filename in filenames: - print(next(marks), end='') - yield filename - filenames = iter_filenames() - elif verbosity > 2: - def iter_filenames(filenames=filenames): - for filename in filenames: - print(f'<{filename}>') - yield filename - filenames = iter_filenames() - - logger.info('analyzing...') + if track_progress: + filenames = track_progress(filenames) + + logger.info('analyzing files...') analyzed = _analyze(filenames, **kwargs) + decls = filter_forward(analyzed, markpublic=True) - for line in do_fmt(analyzed): + for line in do_fmt(decls): print(line) def _cli_data(parser, filenames=None, known=None): ArgumentParser = type(parser) common = ArgumentParser(add_help=False) - if filenames is None: - common.add_argument('filenames', metavar='FILE', nargs='+') + # These flags will get processed by the top-level parse_args(). + add_verbosity_cli(common) + add_traceback_cli(common) subs = parser.add_subparsers(dest='datacmd') sub = subs.add_parser('show', parents=[common]) if known is None: sub.add_argument('--known', required=True) + if filenames is None: + sub.add_argument('filenames', metavar='FILE', nargs='+') - sub = subs.add_parser('dump') + sub = subs.add_parser('dump', parents=[common]) if known is None: sub.add_argument('--known') sub.add_argument('--show', action='store_true') + process_progress = add_progress_cli(sub) - sub = subs.add_parser('check') + sub = subs.add_parser('check', parents=[common]) if known is None: sub.add_argument('--known', required=True) - return None + def process_args(args): + if args.datacmd == 'dump': + process_progress(args) + return process_args def cmd_data(datacmd, filenames, known=None, *, @@ -406,6 +435,7 @@ def cmd_data(datacmd, filenames, known=None, *, formats=FORMATS, extracolumns=None, relroot=None, + track_progress=None, **kwargs ): kwargs.pop('verbosity', None) @@ -417,6 +447,8 @@ def cmd_data(datacmd, filenames, known=None, *, for line in do_fmt(known): print(line) elif datacmd == 'dump': + if track_progress: + filenames = track_progress(filenames) analyzed = _analyze(filenames, **kwargs) if known is None or usestdout: outfile = io.StringIO() diff --git a/Tools/c-analyzer/c_analyzer/analyze.py b/Tools/c-analyzer/c_analyzer/analyze.py index d8ae915e420029..267d058e07abdb 100644 --- a/Tools/c-analyzer/c_analyzer/analyze.py +++ b/Tools/c-analyzer/c_analyzer/analyze.py @@ -3,15 +3,19 @@ TypeDeclaration, POTSType, FuncPtr, +) +from c_parser.match import ( is_pots, is_funcptr, ) from .info import ( IGNORED, UNKNOWN, - is_system_type, SystemType, ) +from .match import ( + is_system_type, +) def get_typespecs(typedecls): diff --git a/Tools/c-analyzer/c_analyzer/datafiles.py b/Tools/c-analyzer/c_analyzer/datafiles.py index 0de438cce470fd..d37a4eefe351ad 100644 --- a/Tools/c-analyzer/c_analyzer/datafiles.py +++ b/Tools/c-analyzer/c_analyzer/datafiles.py @@ -1,5 +1,6 @@ import c_common.tables as _tables import c_parser.info as _info +import c_parser.match as _match import c_parser.datafiles as _parser from . import analyze as _analyze @@ -17,7 +18,7 @@ def analyze_known(known, *, handle_unresolved=True, ): knowntypes = knowntypespecs = {} - collated = _info.collate_by_kind_group(known) + collated = _match.group_by_kinds(known) types = {decl: None for decl in collated['type']} typespecs = _analyze.get_typespecs(types) def analyze_decl(decl): diff --git a/Tools/c-analyzer/c_analyzer/info.py b/Tools/c-analyzer/c_analyzer/info.py index 23d77611a4c3ca..be9281502d250d 100644 --- a/Tools/c-analyzer/c_analyzer/info.py +++ b/Tools/c-analyzer/c_analyzer/info.py @@ -7,7 +7,11 @@ HighlevelParsedItem, Declaration, TypeDeclaration, +) +from c_parser.match import ( is_type_decl, +) +from .match import ( is_process_global, ) @@ -16,44 +20,6 @@ UNKNOWN = _misc.Labeled('UNKNOWN') -# XXX Use known.tsv for these? -SYSTEM_TYPES = { - 'int8_t', - 'uint8_t', - 'int16_t', - 'uint16_t', - 'int32_t', - 'uint32_t', - 'int64_t', - 'uint64_t', - 'size_t', - 'ssize_t', - 'intptr_t', - 'uintptr_t', - 'wchar_t', - '', - # OS-specific - 'pthread_cond_t', - 'pthread_mutex_t', - 'pthread_key_t', - 'atomic_int', - 'atomic_uintptr_t', - '', - # lib-specific - 'WINDOW', # curses - 'XML_LChar', - 'XML_Size', - 'XML_Parser', - 'enum XML_Error', - 'enum XML_Status', - '', -} - - -def is_system_type(typespec): - return typespec in SYSTEM_TYPES - - class SystemType(TypeDeclaration): def __init__(self, name): diff --git a/Tools/c-analyzer/c_analyzer/match.py b/Tools/c-analyzer/c_analyzer/match.py new file mode 100644 index 00000000000000..5c27e4a224afc8 --- /dev/null +++ b/Tools/c-analyzer/c_analyzer/match.py @@ -0,0 +1,212 @@ +import os.path + +from c_parser import ( + info as _info, + match as _match, +) + + +_KIND = _info.KIND + + +# XXX Use known.tsv for these? +SYSTEM_TYPES = { + 'int8_t', + 'uint8_t', + 'int16_t', + 'uint16_t', + 'int32_t', + 'uint32_t', + 'int64_t', + 'uint64_t', + 'size_t', + 'ssize_t', + 'intptr_t', + 'uintptr_t', + 'wchar_t', + '', + # OS-specific + 'pthread_cond_t', + 'pthread_mutex_t', + 'pthread_key_t', + 'atomic_int', + 'atomic_uintptr_t', + '', + # lib-specific + 'WINDOW', # curses + 'XML_LChar', + 'XML_Size', + 'XML_Parser', + 'enum XML_Error', + 'enum XML_Status', + '', +} + + +def is_system_type(typespec): + return typespec in SYSTEM_TYPES + + +################################## +# decl matchers + +def is_public(decl): + if not decl.filename.endswith('.h'): + return False + if 'Include' not in decl.filename.split(os.path.sep): + return False + return True + + +def is_process_global(vardecl): + kind, storage, _, _, _ = _info.get_parsed_vartype(vardecl) + if kind is not _KIND.VARIABLE: + raise NotImplementedError(vardecl) + if 'static' in (storage or ''): + return True + + if hasattr(vardecl, 'parent'): + parent = vardecl.parent + else: + parent = vardecl.get('parent') + return not parent + + +def is_fixed_type(vardecl): + if not vardecl: + return None + _, _, _, typespec, abstract = _info.get_parsed_vartype(vardecl) + if 'typeof' in typespec: + raise NotImplementedError(vardecl) + elif not abstract: + return True + + if '*' not in abstract: + # XXX What about []? + return True + elif _match._is_funcptr(abstract): + return True + else: + for after in abstract.split('*')[1:]: + if not after.lstrip().startswith('const'): + return False + else: + return True + + +def is_immutable(vardecl): + if not vardecl: + return None + if not is_fixed_type(vardecl): + return False + _, _, typequal, _, _ = _info.get_parsed_vartype(vardecl) + # If there, it can only be "const" or "volatile". + return typequal == 'const' + + +def is_public_api(decl): + if not is_public(decl): + return False + if decl.kind is _KIND.TYPEDEF: + return True + elif _match.is_type_decl(decl): + return not _match.is_forward_decl(decl) + else: + return _match.is_external_reference(decl) + + +def is_public_declaration(decl): + if not is_public(decl): + return False + if decl.kind is _KIND.TYPEDEF: + return True + elif _match.is_type_decl(decl): + return _match.is_forward_decl(decl) + else: + return _match.is_external_reference(decl) + + +def is_public_definition(decl): + if not is_public(decl): + return False + if decl.kind is _KIND.TYPEDEF: + return True + elif _match.is_type_decl(decl): + return not _match.is_forward_decl(decl) + else: + return not _match.is_external_reference(decl) + + +def is_public_impl(decl): + if not _KIND.is_decl(decl.kind): + return False + # See filter_forward() about "is_public". + return getattr(decl, 'is_public', False) + + +def is_module_global_decl(decl): + if is_public_impl(decl): + return False + if _match.is_forward_decl(decl): + return False + return not _match.is_local_var(decl) + + +################################## +# filtering with matchers + +def filter_forward(items, *, markpublic=False): + if markpublic: + public = set() + actual = [] + for item in items: + if is_public_api(item): + public.add(item.id) + elif not _match.is_forward_decl(item): + actual.append(item) + else: + # non-public duplicate! + # XXX + raise Exception(item) + for item in actual: + _info.set_flag(item, 'is_public', item.id in public) + yield item + else: + for item in items: + if _match.is_forward_decl(item): + continue + yield item + + +################################## +# grouping with matchers + +def group_by_storage(decls, **kwargs): + def is_module_global(decl): + if not is_module_global_decl(decl): + return False + if decl.kind == _KIND.VARIABLE: + if _info.get_effective_storage(decl) == 'static': + # This is covered by is_static_module_global(). + return False + return True + def is_static_module_global(decl): + if not _match.is_global_var(decl): + return False + return _info.get_effective_storage(decl) == 'static' + def is_static_local(decl): + if not _match.is_local_var(decl): + return False + return _info.get_effective_storage(decl) == 'static' + #def is_local(decl): + # if not _match.is_local_var(decl): + # return False + # return _info.get_effective_storage(decl) != 'static' + categories = { + #'extern': is_extern, + 'published': is_public_impl, + 'module-global': is_module_global, + 'static-module-global': is_static_module_global, + 'static-local': is_static_local, + } + return _match.group_by_category(decls, categories, **kwargs) diff --git a/Tools/c-analyzer/c_common/scriptutil.py b/Tools/c-analyzer/c_common/scriptutil.py index 939a85003b2964..222059015d76ec 100644 --- a/Tools/c-analyzer/c_common/scriptutil.py +++ b/Tools/c-analyzer/c_common/scriptutil.py @@ -10,6 +10,9 @@ from . import fsutil, strutil, iterutil, logging as loggingutil +_NOT_SET = object() + + def get_prog(spec=None, *, absolute=False, allowsuffix=True): if spec is None: _, spec = _find_script() @@ -313,6 +316,22 @@ def _parse_files(filenames): yield filename.strip() +def add_progress_cli(parser, *, threshold=VERBOSITY, **kwargs): + parser.add_argument('--progress', dest='track_progress', action='store_const', const=True) + parser.add_argument('--no-progress', dest='track_progress', action='store_false') + parser.set_defaults(track_progress=True) + + def process_args(args): + if args.track_progress: + ns = vars(args) + verbosity = ns.get('verbosity', VERBOSITY) + if verbosity <= threshold: + args.track_progress = track_progress_compact + else: + args.track_progress = track_progress_flat + return process_args + + def add_failure_filtering_cli(parser, pool, *, default=False): parser.add_argument('--fail', action='append', metavar=f'"{{all|{"|".join(sorted(pool))}}},..."') @@ -551,13 +570,39 @@ def _iter_filenames(filenames, iter_files): raise NotImplementedError -def iter_marks(mark='.', *, group=5, groups=2, lines=10, sep=' '): +def track_progress_compact(items, *, groups=5, **mark_kwargs): + last = os.linesep + marks = iter_marks(groups=groups, **mark_kwargs) + for item in items: + last = next(marks) + print(last, end='', flush=True) + yield item + if not last.endswith(os.linesep): + print() + + +def track_progress_flat(items, fmt='<{}>'): + for item in items: + print(fmt.format(item), flush=True) + yield item + + +def iter_marks(mark='.', *, group=5, groups=2, lines=_NOT_SET, sep=' '): mark = mark or '' + group = group if group and group > 1 else 1 + groups = groups if groups and groups > 1 else 1 + sep = f'{mark}{sep}' if sep else mark end = f'{mark}{os.linesep}' div = os.linesep perline = group * groups - perlines = perline * lines + if lines is _NOT_SET: + # By default we try to put about 100 in each line group. + perlines = 100 // perline * perline + elif not lines or lines < 0: + perlines = None + else: + perlines = perline * lines if perline == 1: yield end @@ -568,7 +613,7 @@ def iter_marks(mark='.', *, group=5, groups=2, lines=10, sep=' '): while True: if count % perline == 0: yield end - if count % perlines == 0: + if perlines and count % perlines == 0: yield div elif count % group == 0: yield sep diff --git a/Tools/c-analyzer/c_parser/datafiles.py b/Tools/c-analyzer/c_parser/datafiles.py index 5bdb946b1772ab..cdd69b1f9b2d8a 100644 --- a/Tools/c-analyzer/c_parser/datafiles.py +++ b/Tools/c-analyzer/c_parser/datafiles.py @@ -92,7 +92,7 @@ def write_decls_tsv(decls, outfile, extracolumns=None, *, **kwargs ): # XXX Move the row rendering here. - _write_decls_tsv(rows, outfile, extracolumns, relroot, kwargs) + _write_decls_tsv(decls, outfile, extracolumns, relroot, kwargs) def _iter_decls_tsv(infile, extracolumns=None, relroot=None): diff --git a/Tools/c-analyzer/c_parser/info.py b/Tools/c-analyzer/c_parser/info.py index a07ce2e0ccb8d3..798a45d2e08e71 100644 --- a/Tools/c-analyzer/c_parser/info.py +++ b/Tools/c-analyzer/c_parser/info.py @@ -7,85 +7,12 @@ import c_common.misc as _misc import c_common.strutil as _strutil import c_common.tables as _tables -from .parser._regexes import SIMPLE_TYPE +from .parser._regexes import SIMPLE_TYPE, _STORAGE FIXED_TYPE = _misc.Labeled('FIXED_TYPE') -POTS_REGEX = re.compile(rf'^{SIMPLE_TYPE}$', re.VERBOSE) - - -def is_pots(typespec): - if not typespec: - return None - if type(typespec) is not str: - _, _, _, typespec, _ = get_parsed_vartype(typespec) - return POTS_REGEX.match(typespec) is not None - - -def is_funcptr(vartype): - if not vartype: - return None - _, _, _, _, abstract = get_parsed_vartype(vartype) - return _is_funcptr(abstract) - - -def _is_funcptr(declstr): - if not declstr: - return None - # XXX Support "(*)(". - return '(*)(' in declstr.replace(' ', '') - - -def is_exported_symbol(decl): - _, storage, _, _, _ = get_parsed_vartype(decl) - raise NotImplementedError - - -def is_process_global(vardecl): - kind, storage, _, _, _ = get_parsed_vartype(vardecl) - if kind is not KIND.VARIABLE: - raise NotImplementedError(vardecl) - if 'static' in (storage or ''): - return True - - if hasattr(vardecl, 'parent'): - parent = vardecl.parent - else: - parent = vardecl.get('parent') - return not parent - - -def is_fixed_type(vardecl): - if not vardecl: - return None - _, _, _, typespec, abstract = get_parsed_vartype(vardecl) - if 'typeof' in typespec: - raise NotImplementedError(vardecl) - elif not abstract: - return True - - if '*' not in abstract: - # XXX What about []? - return True - elif _is_funcptr(abstract): - return True - else: - for after in abstract.split('*')[1:]: - if not after.lstrip().startswith('const'): - return False - else: - return True - - -def is_immutable(vardecl): - if not vardecl: - return None - if not is_fixed_type(vardecl): - return False - _, _, typequal, _, _ = get_parsed_vartype(vardecl) - # If there, it can only be "const" or "volatile". - return typequal == 'const' +STORAGE = frozenset(_STORAGE) ############################# @@ -214,58 +141,8 @@ def resolve_group(cls, group): KIND._GROUPS.update((k.value, {k}) for k in KIND) -# The module-level kind-related helpers (below) deal with .kind: - -def is_type_decl(kind): - # Handle ParsedItem, Declaration, etc.. - kind = getattr(kind, 'kind', kind) - return KIND.is_type_decl(kind) - - -def is_decl(kind): - # Handle ParsedItem, Declaration, etc.. - kind = getattr(kind, 'kind', kind) - return KIND.is_decl(kind) - - -def filter_by_kind(items, kind): - if kind == 'type': - kinds = KIND._TYPE_DECLS - elif kind == 'decl': - kinds = KIND._TYPE_DECLS - try: - okay = kind in KIND - except TypeError: - kinds = set(kind) - else: - kinds = {kind} if okay else set(kind) - for item in items: - if item.kind in kinds: - yield item - - -def collate_by_kind(items): - collated = {kind: [] for kind in KIND} - for item in items: - try: - collated[item.kind].append(item) - except KeyError: - raise ValueError(f'unsupported kind in {item!r}') - return collated - - -def get_kind_group(kind): - # Handle ParsedItem, Declaration, etc.. - kind = getattr(kind, 'kind', kind) - return KIND.get_group(kind) - - -def collate_by_kind_group(items): - collated = {KIND.get_group(k): [] for k in KIND} - for item in items: - group = KIND.get_group(item.kind) - collated[group].append(item) - return collated +def get_kind_group(item): + return KIND.get_group(item.kind) ############################# @@ -484,6 +361,27 @@ def get_parsed_vartype(decl): return kind, storage, typequal, typespec, abstract +def get_default_storage(decl): + if decl.kind not in (KIND.VARIABLE, KIND.FUNCTION): + return None + return 'extern' if decl.parent is None else 'auto' + + +def get_effective_storage(decl, *, default=None): + # Note that "static" limits access to just that C module + # and "extern" (the default for module-level) allows access + # outside the C module. + if default is None: + default = get_default_storage(decl) + if default is None: + return None + try: + storage = decl.storage + except AttributeError: + storage, _ = _get_vartype(decl.data) + return storage or default + + ############################# # high-level @@ -997,7 +895,7 @@ def _unformat_data(cls, datastr, fmt=None): def __init__(self, file, name, data, parent=None, storage=None): super().__init__(file, name, data, parent, - _extra={'storage': storage}, + _extra={'storage': storage or None}, _shortkey=f'({parent.name}).{name}' if parent else name, _key=(str(file), # Tilde comes after all other ascii characters. @@ -1005,6 +903,11 @@ def __init__(self, file, name, data, parent=None, storage=None): name, ), ) + if storage: + if storage not in STORAGE: + # The parser must need an update. + raise NotImplementedError(storage) + # Otherwise we trust the compiler to have validated it. @property def vartype(self): @@ -1413,6 +1316,13 @@ def resolve_parsed(parsed): return cls.from_parsed(parsed) +def set_flag(item, name, value): + try: + setattr(item, name, value) + except AttributeError: + object.__setattr__(item, name, value) + + ############################# # composite diff --git a/Tools/c-analyzer/c_parser/match.py b/Tools/c-analyzer/c_parser/match.py new file mode 100644 index 00000000000000..3b5068fd11b685 --- /dev/null +++ b/Tools/c-analyzer/c_parser/match.py @@ -0,0 +1,177 @@ +import re + +from . import info as _info +from .parser._regexes import SIMPLE_TYPE + + +_KIND = _info.KIND + + +def match_storage(decl, expected): + default = _info.get_default_storage(decl) + #assert default + if expected is None: + expected = {default} + elif isinstance(expected, str): + expected = {expected or default} + elif not expected: + expected = _info.STORAGE + else: + expected = {v or default for v in expected} + storage = _info.get_effective_storage(decl, default=default) + return storage in expected + + +################################## +# decl matchers + +def is_type_decl(item): + return _KIND.is_type_decl(item.kind) + + +def is_decl(item): + return _KIND.is_decl(item.kind) + + +def is_pots(typespec, *, + _regex=re.compile(rf'^{SIMPLE_TYPE}$', re.VERBOSE), + ): + + if not typespec: + return None + if type(typespec) is not str: + _, _, _, typespec, _ = _info.get_parsed_vartype(typespec) + return _regex.match(typespec) is not None + + +def is_funcptr(vartype): + if not vartype: + return None + _, _, _, _, abstract = _info.get_parsed_vartype(vartype) + return _is_funcptr(abstract) + + +def _is_funcptr(declstr): + if not declstr: + return None + # XXX Support "(*)(". + return '(*)(' in declstr.replace(' ', '') + + +def is_forward_decl(decl): + if decl.kind is _KIND.TYPEDEF: + return False + elif is_type_decl(decl): + return not decl.data + elif decl.kind is _KIND.FUNCTION: + # XXX This doesn't work with ParsedItem. + return decl.signature.isforward + elif decl.kind is _KIND.VARIABLE: + # No var decls are considered forward (or all are...). + return False + else: + raise NotImplementedError(decl) + + +def can_have_symbol(decl): + return decl.kind in (_KIND.VARIABLE, _KIND.FUNCTION) + + +def has_external_symbol(decl): + if not can_have_symbol(decl): + return False + if _info.get_effective_storage(decl) != 'extern': + return False + if decl.kind is _KIND.FUNCTION: + return not decl.signature.isforward + else: + # It must be a variable, which can only be implicitly extern here. + return decl.storage != 'extern' + + +def has_internal_symbol(decl): + if not can_have_symbol(decl): + return False + return _info.get_actual_storage(decl) == 'static' + + +def is_external_reference(decl): + if not can_have_symbol(decl): + return False + # We have to check the declared storage rather tnan the effective. + if decl.storage != 'extern': + return False + if decl.kind is _KIND.FUNCTION: + return decl.signature.isforward + # Otherwise it's a variable. + return True + + +def is_local_var(decl): + if not decl.kind is _KIND.VARIABLE: + return False + return True if decl.parent else False + + +def is_global_var(decl): + if not decl.kind is _KIND.VARIABLE: + return False + return False if decl.parent else True + + +################################## +# filtering with matchers + +def filter_by_kind(items, kind): + if kind == 'type': + kinds = _KIND._TYPE_DECLS + elif kind == 'decl': + kinds = _KIND._TYPE_DECLS + try: + okay = kind in _KIND + except TypeError: + kinds = set(kind) + else: + kinds = {kind} if okay else set(kind) + for item in items: + if item.kind in kinds: + yield item + + +################################## +# grouping with matchers + +def group_by_category(decls, categories, *, ignore_non_match=True): + collated = {} + for decl in decls: + # Matchers should be mutually exclusive. (First match wins.) + for category, match in categories.items(): + if match(decl): + if category not in collated: + collated[category] = [decl] + else: + collated[category].append(decl) + break + else: + if not ignore_non_match: + raise Exception(f'no match for {decl!r}') + return collated + + +def group_by_kind(items): + collated = {kind: [] for kind in _KIND} + for item in items: + try: + collated[item.kind].append(item) + except KeyError: + raise ValueError(f'unsupported kind in {item!r}') + return collated + + +def group_by_kinds(items): + # Collate into kind groups (decl, type, etc.). + collated = {_KIND.get_group(k): [] for k in _KIND} + for item in items: + group = _KIND.get_group(item.kind) + collated[group].append(item) + return collated diff --git a/Tools/c-analyzer/c_parser/parser/__init__.py b/Tools/c-analyzer/c_parser/parser/__init__.py index 7cb34caf09eba8..4b201c6354023c 100644 --- a/Tools/c-analyzer/c_parser/parser/__init__.py +++ b/Tools/c-analyzer/c_parser/parser/__init__.py @@ -163,6 +163,8 @@ def _parse(srclines, anon_name): def _iter_source(lines, *, maxtext=20_000, maxlines=700, showtext=False): + maxtext = maxtext if maxtext and maxtext > 0 else None + maxlines = maxlines if maxlines and maxlines > 0 else None filestack = [] allinfo = {} # "lines" should be (fileinfo, data), as produced by the preprocessor code. @@ -181,9 +183,7 @@ def _iter_source(lines, *, maxtext=20_000, maxlines=700, showtext=False): _logger.debug(f'-> {line}') srcinfo._add_line(line, fileinfo.lno) - if len(srcinfo.text) > maxtext: - break - if srcinfo.end - srcinfo.start > maxlines: + if srcinfo.too_much(maxtext, maxlines): break while srcinfo._used(): yield srcinfo diff --git a/Tools/c-analyzer/c_parser/parser/_info.py b/Tools/c-analyzer/c_parser/parser/_info.py index 2dcd5e5e760b7c..cc21931b66cc57 100644 --- a/Tools/c-analyzer/c_parser/parser/_info.py +++ b/Tools/c-analyzer/c_parser/parser/_info.py @@ -1,3 +1,5 @@ +import re + from ..info import KIND, ParsedItem, FileInfo @@ -121,6 +123,19 @@ def resolve(self, kind, data, name, parent=None): def done(self): self._set_ready() + def too_much(self, maxtext, maxlines): + if maxtext and len(self.text) > maxtext: + pass + elif maxlines and self.end - self.start > maxlines: + pass + else: + return False + + #if re.fullmatch(r'[^;]+\[\][ ]*=[ ]*[{]([ ]*\d+,)*([ ]*\d+,?)\s*', + # self._current.text): + # return False + return True + def _set_ready(self): if self._current is None: self._ready = False diff --git a/Tools/c-analyzer/c_parser/parser/_regexes.py b/Tools/c-analyzer/c_parser/parser/_regexes.py index e9bc31d335a7d5..cb85a59aaa16c2 100644 --- a/Tools/c-analyzer/c_parser/parser/_regexes.py +++ b/Tools/c-analyzer/c_parser/parser/_regexes.py @@ -137,7 +137,8 @@ def _ind(text, level=1, edges='both'): ####################################### # variable declarations -STORAGE_CLASS = r'(?: \b (?: auto | register | static | extern ) \b )' +_STORAGE = 'auto register static extern'.split() +STORAGE_CLASS = rf'(?: \b (?: {" | ".join(_STORAGE)} ) \b )' TYPE_QUALIFIER = r'(?: \b (?: const | volatile ) \b )' PTR_QUALIFIER = rf'(?: [*] (?: \s* {TYPE_QUALIFIER} )? )' diff --git a/Tools/c-analyzer/cpython/__main__.py b/Tools/c-analyzer/cpython/__main__.py index 23a3de06f639c1..23ce29776ca68e 100644 --- a/Tools/c-analyzer/cpython/__main__.py +++ b/Tools/c-analyzer/cpython/__main__.py @@ -31,6 +31,9 @@ def _resolve_filenames(filenames): return resolved +####################################### +# the formats + def fmt_summary(analysis): # XXX Support sorting and grouping. supported = [] @@ -179,7 +182,7 @@ def analyze(files, **kwargs): analyze_resolved=_analyzer.analyze_resolved, ) return _analyzer.Analysis.from_results(results) - else: + else: # check known = _analyzer.read_known() def analyze(files, **kwargs): return _analyzer.iter_decls(files, **kwargs) diff --git a/Tools/c-analyzer/cpython/_analyzer.py b/Tools/c-analyzer/cpython/_analyzer.py index 98f8888651e579..978831d1fd9496 100644 --- a/Tools/c-analyzer/cpython/_analyzer.py +++ b/Tools/c-analyzer/cpython/_analyzer.py @@ -11,9 +11,14 @@ Struct, Member, FIXED_TYPE, +) +from c_parser.match import ( is_type_decl, is_pots, is_funcptr, +) +from c_analyzer.match import ( + is_system_type, is_process_global, is_fixed_type, is_immutable, @@ -246,7 +251,7 @@ def _check_typespec(decl, typedecl, types, knowntypes): # Fall back to default known types. if is_pots(typespec): return None - elif _info.is_system_type(typespec): + elif is_system_type(typespec): return None elif is_funcptr(decl.vartype): return None diff --git a/Tools/c-analyzer/cpython/_parser.py b/Tools/c-analyzer/cpython/_parser.py index 35fa296251e2ee..7c8c2966653989 100644 --- a/Tools/c-analyzer/cpython/_parser.py +++ b/Tools/c-analyzer/cpython/_parser.py @@ -46,10 +46,14 @@ def clean_lines(text): GLOBS = [ 'Include/*.h', 'Include/internal/*.h', + 'Modules/**/*.h', 'Modules/**/*.c', + 'Objects/**/*.h', 'Objects/**/*.c', + 'Python/**/*.h', + 'Parser/**/*.c', + 'Python/**/*.h', 'Parser/**/*.c', - 'Python/**/*.c', ] EXCLUDED = clean_lines(''' @@ -67,11 +71,24 @@ def clean_lines(text): Modules/_winapi.c # windows.h Modules/overlapped.c # winsock.h Python/dynload_win.c # windows.h +Modules/expat/winconfig.h +Python/thread_nt.h # other OS-dependent Python/dynload_dl.c # dl.h Python/dynload_hpux.c # dl.h Python/dynload_aix.c # sys/ldr.h +Python/thread_pthread.h + +# only huge constants (safe but parsing is slow) +Modules/_ssl_data.h +Modules/unicodedata_db.h +Modules/unicodename_db.h +Modules/cjkcodecs/mappings_*.h +Objects/unicodetype_db.h +Python/importlib.h +Python/importlib_external.h +Python/importlib_zipimport.h # @end=conf@ ''') @@ -80,6 +97,17 @@ def clean_lines(text): EXCLUDED += clean_lines(''' # The tool should be able to parse these... +Modules/hashlib.h +Objects/stringlib/codecs.h +Objects/stringlib/count.h +Objects/stringlib/ctype.h +Objects/stringlib/fastsearch.h +Objects/stringlib/find.h +Objects/stringlib/find_max_char.h +Objects/stringlib/partition.h +Objects/stringlib/replace.h +Objects/stringlib/split.h + Modules/_dbmmodule.c Modules/cjkcodecs/_codecs_*.c Modules/expat/xmlrole.c @@ -134,6 +162,9 @@ def clean_lines(text): Modules/_ctypes/cfield.c Py_BUILD_CORE 1 Modules/_heapqmodule.c Py_BUILD_CORE 1 Modules/_posixsubprocess.c Py_BUILD_CORE 1 +Objects/stringlib/codecs.h Py_BUILD_CORE 1 +Python/ceval_gil.h Py_BUILD_CORE 1 +Python/condvar.h Py_BUILD_CORE 1 Modules/_json.c Py_BUILD_CORE_BUILTIN 1 Modules/_pickle.c Py_BUILD_CORE_BUILTIN 1 @@ -177,6 +208,12 @@ def clean_lines(text): Python/import.c PyMODINIT_FUNC PyObject* Modules/_testcapimodule.c PyAPI_FUNC(RTYPE) RTYPE Python/getargs.c PyAPI_FUNC(RTYPE) RTYPE +Objects/stringlib/unicode_format.h Py_LOCAL_INLINE(type) static inline type + +# implied include of pymacro.h +*/clinic/*.c.h PyDoc_VAR(name) static const char name[] +*/clinic/*.c.h PyDoc_STR(str) str +*/clinic/*.c.h PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str) # implied include of exports.h #Modules/_io/bytesio.c Py_EXPORTED_SYMBOL /* */ @@ -212,6 +249,11 @@ def clean_lines(text): Modules/expat/xmlparse.c XML_POOR_ENTROPY 1 Modules/_dbmmodule.c HAVE_GDBM_DASH_NDBM_H 1 +# others +Modules/sre_lib.h LOCAL(type) static inline type +Modules/sre_lib.h SRE(F) sre_ucs2_##F +Objects/stringlib/codecs.h STRINGLIB_IS_UNICODE 1 + # @end=tsv@ ''')[1:]