From 795f673c3d510bf36724180cfe49d72d9d7bf10c Mon Sep 17 00:00:00 2001 From: Matthias Kramm Date: Fri, 19 Aug 2016 13:20:17 -0700 Subject: [PATCH] Allow a custom type comment prefix. Instead of hardcoding it, we expect callers to explicitly do something like ast27.register_type_comment_prefix("# type: ") before parsing. --- ast27/Custom/typed_ast.c | 11 +++++++++ ast27/Parser/tokenizer.c | 47 ++++++++++++++++++++++++++------------ ast27/Python/Python-ast.c | 6 ++++- ast35/Custom/typed_ast.c | 11 +++++++++ ast35/Parser/tokenizer.c | 48 ++++++++++++++++++++++++++------------- ast35/Python/Python-ast.c | 6 ++++- typed_ast/ast27.py | 7 ++++++ typed_ast/ast35.py | 7 ++++++ 8 files changed, 111 insertions(+), 32 deletions(-) diff --git a/ast27/Custom/typed_ast.c b/ast27/Custom/typed_ast.c index df130ff7..5b80fe2d 100644 --- a/ast27/Custom/typed_ast.c +++ b/ast27/Custom/typed_ast.c @@ -9,6 +9,7 @@ #include "errcode.h" extern grammar _Ta27Parser_Grammar; /* from graminit.c */ +extern void tokenizer_register_type_comment_prefix(const char* pattern); /* from tokenizer.c */ // from Python/bltinmodule.c static const char * @@ -311,3 +312,13 @@ ast27_parse(PyObject *self, PyObject *args) return return_value; } + +PyObject * +ast27_register_type_comment_prefix(PyObject *self, PyObject *args) +{ + const char* prefix; + if (!PyArg_ParseTuple(args, "s", &prefix)) + return NULL; + tokenizer_register_type_comment_prefix(prefix); + return Py_None; +} diff --git a/ast27/Parser/tokenizer.c b/ast27/Parser/tokenizer.c index e435d497..9accc067 100644 --- a/ast27/Parser/tokenizer.c +++ b/ast27/Parser/tokenizer.c @@ -101,7 +101,20 @@ char *_Ta27Parser_TokenNames[] = { /* Spaces in this constant are treated as "zero or more spaces or tabs" when tokenizing. */ -static const char* type_comment_prefix = "# type: "; +struct type_comment_prefix { + const char* pattern; + struct type_comment_prefix* next; +} *type_comment_prefixes = NULL; + +/* For changing the way we treat type comments */ +void +tokenizer_register_type_comment_prefix(const char* pattern) +{ + struct type_comment_prefix* prefix = malloc(sizeof(struct type_comment_prefix)); + prefix->pattern = pattern; + prefix->next = type_comment_prefixes; + type_comment_prefixes = prefix; +} /* Create and initialize a new tok_state structure */ @@ -1377,24 +1390,30 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end) c = tok_nextc(tok); /* check for type comment */ - const char *prefix, *p, *type_start; - p = tok->start; - prefix = type_comment_prefix; - while (*prefix && p < tok->cur) { - if (*prefix == ' ') { - while (*p == ' ' || *p == '\t') + struct type_comment_prefix* prefix = type_comment_prefixes; + const char *p = NULL; + while (prefix) { + p = tok->start; + const char *pattern = prefix->pattern; + while (*pattern && p < tok->cur) { + if (*pattern == ' ') { + while (*p == ' ' || *p == '\t') + p++; + } else if (*pattern == *p) { p++; - } else if (*prefix == *p) { - p++; - } else { - break; + } else { + break; + } + pattern++; } - - prefix++; + if (!*pattern) + break; + prefix = prefix->next; } /* This is a type comment if we matched all of type_comment_prefix. */ - if (!*prefix) { + if (prefix) { + const char *type_start; int is_type_ignore = 1; tok_backup(tok, c); /* don't eat the newline or EOF */ diff --git a/ast27/Python/Python-ast.c b/ast27/Python/Python-ast.c index ec9b340f..f155dad8 100644 --- a/ast27/Python/Python-ast.c +++ b/ast27/Python/Python-ast.c @@ -6850,9 +6850,13 @@ obj2ast_type_ignore(PyObject* obj, type_ignore_ty* out, PyArena* arena) } +PyObject *ast27_register_type_comment_prefix(PyObject *self, PyObject *args); PyObject *ast27_parse(PyObject *self, PyObject *args); static PyMethodDef ast27_methods[] = { - {"parse", ast27_parse, METH_VARARGS, "Parse string into typed AST."}, + {"parse", ast27_parse, + METH_VARARGS, "Parse string into typed AST."}, + {"register_type_comment_prefix", ast27_register_type_comment_prefix, + METH_VARARGS, "Register a prefix to treat as a typecomment."}, {NULL, NULL, 0, NULL} }; static struct PyModuleDef _astmodule27 = { diff --git a/ast35/Custom/typed_ast.c b/ast35/Custom/typed_ast.c index 80172aa2..3c04b95a 100644 --- a/ast35/Custom/typed_ast.c +++ b/ast35/Custom/typed_ast.c @@ -9,6 +9,7 @@ #include "errcode.h" extern grammar _Ta35Parser_Grammar; /* from graminit.c */ +extern void tokenizer_register_type_comment_prefix(const char* pattern); /* from tokenizer.c */ // from Python/bltinmodule.c static const char * @@ -320,3 +321,13 @@ ast35_parse(PyObject *self, PyObject *args) return return_value; } + +PyObject * +ast35_register_type_comment_prefix(PyObject *self, PyObject *args) +{ + const char* prefix; + if (!PyArg_ParseTuple(args, "s", &prefix)) + return NULL; + tokenizer_register_type_comment_prefix(prefix); + return Py_None; +} diff --git a/ast35/Parser/tokenizer.c b/ast35/Parser/tokenizer.c index a23546ed..26f60efa 100644 --- a/ast35/Parser/tokenizer.c +++ b/ast35/Parser/tokenizer.c @@ -117,8 +117,20 @@ const char *_Ta35Parser_TokenNames[] = { /* Spaces in this constant are treated as "zero or more spaces or tabs" when tokenizing. */ -static const char* type_comment_prefix = "# type: "; +struct type_comment_prefix { + const char* pattern; + struct type_comment_prefix* next; +} *type_comment_prefixes = NULL; +/* For changing the way we treat type comments */ +void +tokenizer_register_type_comment_prefix(const char* pattern) +{ + struct type_comment_prefix* prefix = malloc(sizeof(struct type_comment_prefix)); + prefix->pattern = pattern; + prefix->next = type_comment_prefixes; + type_comment_prefixes = prefix; +} /* Create and initialize a new tok_state structure */ @@ -1473,32 +1485,36 @@ tok_get(struct tok_state *tok, char **p_start, char **p_end) /* Skip comment, unless it's a type comment */ if (c == '#') { - const char *prefix, *p, *type_start; - while (c != EOF && c != '\n') c = tok_nextc(tok); - p = tok->start; - prefix = type_comment_prefix; - while (*prefix && p < tok->cur) { - if (*prefix == ' ') { - while (*p == ' ' || *p == '\t') + struct type_comment_prefix* prefix = type_comment_prefixes; + const char *p = NULL; + while (prefix) { + p = tok->start; + const char *pattern = prefix->pattern; + while (*pattern && p < tok->cur) { + if (*pattern == ' ') { + while (*p == ' ' || *p == '\t') + p++; + } else if (*pattern == *p) { p++; - } else if (*prefix == *p) { - p++; - } else { - break; + } else { + break; + } + pattern++; } - - prefix++; + if (!*pattern) + break; + prefix = prefix->next; } /* This is a type comment if we matched all of type_comment_prefix. */ - if (!*prefix) { + if (prefix) { int is_type_ignore = 1; tok_backup(tok, c); /* don't eat the newline or EOF */ - type_start = p; + const char *type_start = p; is_type_ignore = tok->cur >= p + 6 && memcmp(p, "ignore", 6) == 0; p += 6; diff --git a/ast35/Python/Python-ast.c b/ast35/Python/Python-ast.c index eb866be9..8bf6c9db 100644 --- a/ast35/Python/Python-ast.c +++ b/ast35/Python/Python-ast.c @@ -7556,8 +7556,12 @@ obj2ast_type_ignore(PyObject* obj, type_ignore_ty* out, PyArena* arena) PyObject *ast35_parse(PyObject *self, PyObject *args); +PyObject *ast35_register_type_comment_prefix(PyObject *self, PyObject *args); static PyMethodDef ast35_methods[] = { - {"_parse", ast35_parse, METH_VARARGS, "Parse string into typed AST."}, + {"_parse", ast35_parse, + METH_VARARGS, "Parse string into typed AST."}, + {"register_type_comment_prefix", ast35_register_type_comment_prefix, + METH_VARARGS, "Register a prefix to treat as a typecomment."}, {NULL, NULL, 0, NULL} }; static struct PyModuleDef _astmodule35 = { diff --git a/typed_ast/ast27.py b/typed_ast/ast27.py index a1a15f80..becf2a8b 100644 --- a/typed_ast/ast27.py +++ b/typed_ast/ast27.py @@ -37,6 +37,13 @@ def parse(source, filename='', mode='exec'): return _ast27.parse(source, filename, mode) +def register_type_comment_prefix(prefix): + """ + Register a keyword to scan for in comments, for things like: # type: ignore + """ + return _ast27.register_type_comment_prefix(prefix) + + def literal_eval(node_or_string): """ Safely evaluate an expression node or a string containing a Python diff --git a/typed_ast/ast35.py b/typed_ast/ast35.py index 0865a2d9..4acab1c5 100644 --- a/typed_ast/ast35.py +++ b/typed_ast/ast35.py @@ -46,6 +46,13 @@ def parse(source, filename='', mode='exec'): return _ast35._parse(source, filename, mode) +def register_type_comment_prefix(prefix): + """ + Register a keyword to scan for in comments, for things like: # type: ignore + """ + return _ast27.register_type_comment_prefix(prefix) + + def literal_eval(node_or_string): """ Safely evaluate an expression node or a string containing a Python