From f14add0259fe73483983c60cea7969407ec8cbe7 Mon Sep 17 00:00:00 2001 From: wim glenn Date: Thu, 23 Mar 2023 23:36:06 -0500 Subject: [PATCH 1/4] modern packaging --- .github/workflows/test.yml | 16 ++++++++++++---- MANIFEST.in | 2 -- pyproject.toml | 28 ++++++++++++++++++++++++++++ setup.py | 36 ------------------------------------ test_parse.py | 0 tox.ini | 6 ++++-- 6 files changed, 44 insertions(+), 44 deletions(-) delete mode 100644 MANIFEST.in create mode 100644 pyproject.toml delete mode 100755 setup.py mode change 100755 => 100644 test_parse.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a2ee85f..610f710 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,5 +1,11 @@ name: parse -on: [push] +on: + push: + branches: + - master + pull_request: + branches: + - master jobs: run-test: runs-on: ${{ matrix.os }} @@ -8,10 +14,12 @@ jobs: os: [ubuntu-latest, macos-latest, windows-latest] python-version: [3.5, 3.6, 3.7, 3.8, 3.9, pypy3] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Run tests - run: python test_parse.py + run: | + python test_parse.py + python -m doctest README.rst diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index d4691d8..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1,2 +0,0 @@ -include README.rst LICENSE -include *.py diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..be00d23 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,28 @@ +[build-system] +requires = ["setuptools>=61.2"] +build-backend = "setuptools.build_meta" + +[project] +name = "parse" +dynamic = ["version"] +readme = "README.rst" +authors = [{name = "Richard Jones", email = "richard@python.org"}] +description = "parse() is the opposite of format()" +license = {file = "LICENSE"} +classifiers = [ + "Environment :: Web Environment", + "Intended Audience :: Developers", + "Programming Language :: Python :: 3", + "Topic :: Software Development :: Code Generators", + "Topic :: Software Development :: Libraries :: Python Modules", + "License :: OSI Approved :: MIT License", +] + +[project.urls] +homepage = "https://github.com/r1chardj0n3s/parse" + +[tool.setuptools] +py-modules = ["parse"] + +[tool.setuptools.dynamic] +version = {attr = "parse.__version__"} diff --git a/setup.py b/setup.py deleted file mode 100755 index fadc186..0000000 --- a/setup.py +++ /dev/null @@ -1,36 +0,0 @@ -#! /usr/bin/env python - -from __future__ import with_statement - -from setuptools import setup - -from parse import __version__, __doc__ - -with open('README.rst', 'w') as f: - f.write(__doc__) - -# perform the setup action -setup( - name = "parse", - version = __version__, - description = "parse() is the opposite of format()", - long_description = __doc__, - author = "Richard Jones", - author_email = "richard@python.org", - py_modules = ['parse'], - url = 'https://github.com/r1chardj0n3s/parse', - classifiers = [ - 'Environment :: Web Environment', - 'Intended Audience :: Developers', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Topic :: Software Development :: Code Generators', - 'Topic :: Software Development :: Libraries :: Python Modules', - 'License :: OSI Approved :: BSD License', - ], -) - -# vim: set filetype=python ts=4 sw=4 et si diff --git a/test_parse.py b/test_parse.py old mode 100755 new mode 100644 diff --git a/tox.ini b/tox.ini index ff54428..9b82252 100644 --- a/tox.ini +++ b/tox.ini @@ -10,7 +10,9 @@ basepython=python3.6 commands = python -mdoctest README.rst [testenv:readme] -deps = twine +deps = + twine + build commands = - python setup.py sdist + python -m build twine check dist/* From 547c2b12e81597cd6cafbe0039e0bf61f1863a45 Mon Sep 17 00:00:00 2001 From: wim glenn Date: Thu, 23 Mar 2023 23:40:31 -0500 Subject: [PATCH 2/4] remove EOL pythons from CI --- .github/workflows/test.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 610f710..e8e21df 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -12,7 +12,13 @@ jobs: strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: [3.5, 3.6, 3.7, 3.8, 3.9, pypy3] + python-version: + - "3.7" + - "3.8" + - "3.9" + - "3.10" + - "3.11" + - "pypy3" steps: - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} From a5ac5662233a3658c85fcf8cce85cbd3026d03ae Mon Sep 17 00:00:00 2001 From: wim glenn Date: Fri, 24 Mar 2023 00:43:39 -0500 Subject: [PATCH 3/4] specify pypy-3.9 explicitly --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e8e21df..e58f328 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -18,7 +18,7 @@ jobs: - "3.9" - "3.10" - "3.11" - - "pypy3" + - "pypy-3.9" steps: - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} From 523fc43de287bcfcfd311d4e47261eef8246f403 Mon Sep 17 00:00:00 2001 From: wim glenn Date: Fri, 16 Jun 2023 23:36:28 -0500 Subject: [PATCH 4/4] remove module docstring since it's duplicated in README.rst, and the idea of keeping them in sync by using the setup.py execution will not work with a declarative build system --- README.rst | 2 +- parse.py | 469 ----------------------------------------------------- 2 files changed, 1 insertion(+), 470 deletions(-) diff --git a/README.rst b/README.rst index df881ea..eebcdd1 100644 --- a/README.rst +++ b/README.rst @@ -3,7 +3,7 @@ Parse strings using a specification based on the Python format() syntax. ``parse()`` is the opposite of ``format()`` The module is set up to only export ``parse()``, ``search()``, ``findall()``, -and ``with_pattern()`` when ``import \*`` is used: +and ``with_pattern()`` when ``import *`` is used: >>> from parse import * diff --git a/parse.py b/parse.py index e5db401..9a301e6 100644 --- a/parse.py +++ b/parse.py @@ -1,472 +1,3 @@ -r'''Parse strings using a specification based on the Python format() syntax. - - ``parse()`` is the opposite of ``format()`` - -The module is set up to only export ``parse()``, ``search()``, ``findall()``, -and ``with_pattern()`` when ``import \*`` is used: - ->>> from parse import * - -From there it's a simple thing to parse a string: - -.. code-block:: pycon - - >>> parse("It's {}, I love it!", "It's spam, I love it!") - - >>> _[0] - 'spam' - -Or to search a string for some pattern: - -.. code-block:: pycon - - >>> search('Age: {:d}\n', 'Name: Rufus\nAge: 42\nColor: red\n') - - -Or find all the occurrences of some pattern in a string: - -.. code-block:: pycon - - >>> ''.join(r[0] for r in findall(">{}<", "

the bold text

")) - 'the bold text' - -If you're going to use the same pattern to match lots of strings you can -compile it once: - -.. code-block:: pycon - - >>> from parse import compile - >>> p = compile("It's {}, I love it!") - >>> print(p) - - >>> p.parse("It's spam, I love it!") - - -("compile" is not exported for ``import *`` usage as it would override the -built-in ``compile()`` function) - -The default behaviour is to match strings case insensitively. You may match with -case by specifying `case_sensitive=True`: - -.. code-block:: pycon - - >>> parse('SPAM', 'spam', case_sensitive=True) is None - True - - -Format Syntax -------------- - -A basic version of the `Format String Syntax`_ is supported with anonymous -(fixed-position), named and formatted fields:: - - {[field name]:[format spec]} - -Field names must be a valid Python identifiers, including dotted names; -element indexes imply dictionaries (see below for example). - -Numbered fields are also not supported: the result of parsing will include -the parsed fields in the order they are parsed. - -The conversion of fields to types other than strings is done based on the -type in the format specification, which mirrors the ``format()`` behaviour. -There are no "!" field conversions like ``format()`` has. - -Some simple parse() format string examples: - -.. code-block:: pycon - - >>> parse("Bring me a {}", "Bring me a shrubbery") - - >>> r = parse("The {} who {} {}", "The knights who say Ni!") - >>> print(r) - - >>> print(r.fixed) - ('knights', 'say', 'Ni!') - >>> print(r[0]) - knights - >>> print(r[1:]) - ('say', 'Ni!') - >>> r = parse("Bring out the holy {item}", "Bring out the holy hand grenade") - >>> print(r) - - >>> print(r.named) - {'item': 'hand grenade'} - >>> print(r['item']) - hand grenade - >>> 'item' in r - True - -Note that `in` only works if you have named fields. - -Dotted names and indexes are possible with some limits. Only word identifiers -are supported (ie. no numeric indexes) and the application must make additional -sense of the result: - -.. code-block:: pycon - - >>> r = parse("Mmm, {food.type}, I love it!", "Mmm, spam, I love it!") - >>> print(r) - - >>> print(r.named) - {'food.type': 'spam'} - >>> print(r['food.type']) - spam - >>> r = parse("My quest is {quest[name]}", "My quest is to seek the holy grail!") - >>> print(r) - - >>> print(r['quest']) - {'name': 'to seek the holy grail!'} - >>> print(r['quest']['name']) - to seek the holy grail! - -If the text you're matching has braces in it you can match those by including -a double-brace ``{{`` or ``}}`` in your format string, just like format() does. - - -Format Specification --------------------- - -Most often a straight format-less ``{}`` will suffice where a more complex -format specification might have been used. - -Most of `format()`'s `Format Specification Mini-Language`_ is supported: - - [[fill]align][0][width][.precision][type] - -The differences between `parse()` and `format()` are: - -- The align operators will cause spaces (or specified fill character) to be - stripped from the parsed value. The width is not enforced; it just indicates - there may be whitespace or "0"s to strip. -- Numeric parsing will automatically handle a "0b", "0o" or "0x" prefix. - That is, the "#" format character is handled automatically by d, b, o - and x formats. For "d" any will be accepted, but for the others the correct - prefix must be present if at all. -- Numeric sign is handled automatically. -- The thousands separator is handled automatically if the "n" type is used. -- The types supported are a slightly different mix to the format() types. Some - format() types come directly over: "d", "n", "%", "f", "e", "b", "o" and "x". - In addition some regular expression character group types "D", "w", "W", "s" - and "S" are also available. -- The "e" and "g" types are case-insensitive so there is not need for - the "E" or "G" types. The "e" type handles Fortran formatted numbers (no - leading 0 before the decimal point). - -===== =========================================== ======== -Type Characters Matched Output -===== =========================================== ======== -l Letters (ASCII) str -w Letters, numbers and underscore str -W Not letters, numbers and underscore str -s Whitespace str -S Non-whitespace str -d Digits (effectively integer numbers) int -D Non-digit str -n Numbers with thousands separators (, or .) int -% Percentage (converted to value/100.0) float -f Fixed-point numbers float -F Decimal numbers Decimal -e Floating-point numbers with exponent float - e.g. 1.1e-10, NAN (all case insensitive) -g General number format (either d, f or e) float -b Binary numbers int -o Octal numbers int -x Hexadecimal numbers (lower and upper case) int -ti ISO 8601 format date/time datetime - e.g. 1972-01-20T10:21:36Z ("T" and "Z" - optional) -te RFC2822 e-mail format date/time datetime - e.g. Mon, 20 Jan 1972 10:21:36 +1000 -tg Global (day/month) format date/time datetime - e.g. 20/1/1972 10:21:36 AM +1:00 -ta US (month/day) format date/time datetime - e.g. 1/20/1972 10:21:36 PM +10:30 -tc ctime() format date/time datetime - e.g. Sun Sep 16 01:03:52 1973 -th HTTP log format date/time datetime - e.g. 21/Nov/2011:00:07:11 +0000 -ts Linux system log format date/time datetime - e.g. Nov 9 03:37:44 -tt Time time - e.g. 10:21:36 PM -5:30 -===== =========================================== ======== - -Some examples of typed parsing with ``None`` returned if the typing -does not match: - -.. code-block:: pycon - - >>> parse('Our {:d} {:w} are...', 'Our 3 weapons are...') - - >>> parse('Our {:d} {:w} are...', 'Our three weapons are...') - >>> parse('Meet at {:tg}', 'Meet at 1/2/2011 11:00 PM') - - -And messing about with alignment: - -.. code-block:: pycon - - >>> parse('with {:>} herring', 'with a herring') - - >>> parse('spam {:^} spam', 'spam lovely spam') - - -Note that the "center" alignment does not test to make sure the value is -centered - it just strips leading and trailing whitespace. - -Width and precision may be used to restrict the size of matched text -from the input. Width specifies a minimum size and precision specifies -a maximum. For example: - -.. code-block:: pycon - - >>> parse('{:.2}{:.2}', 'look') # specifying precision - - >>> parse('{:4}{:4}', 'look at that') # specifying width - - >>> parse('{:4}{:.4}', 'look at that') # specifying both - - >>> parse('{:2d}{:2d}', '0440') # parsing two contiguous numbers - - -Some notes for the date and time types: - -- the presence of the time part is optional (including ISO 8601, starting - at the "T"). A full datetime object will always be returned; the time - will be set to 00:00:00. You may also specify a time without seconds. -- when a seconds amount is present in the input fractions will be parsed - to give microseconds. -- except in ISO 8601 the day and month digits may be 0-padded. -- the date separator for the tg and ta formats may be "-" or "/". -- named months (abbreviations or full names) may be used in the ta and tg - formats in place of numeric months. -- as per RFC 2822 the e-mail format may omit the day (and comma), and the - seconds but nothing else. -- hours greater than 12 will be happily accepted. -- the AM/PM are optional, and if PM is found then 12 hours will be added - to the datetime object's hours amount - even if the hour is greater - than 12 (for consistency.) -- in ISO 8601 the "Z" (UTC) timezone part may be a numeric offset -- timezones are specified as "+HH:MM" or "-HH:MM". The hour may be one or two - digits (0-padded is OK.) Also, the ":" is optional. -- the timezone is optional in all except the e-mail format (it defaults to - UTC.) -- named timezones are not handled yet. - -Note: attempting to match too many datetime fields in a single parse() will -currently result in a resource allocation issue. A TooManyFields exception -will be raised in this instance. The current limit is about 15. It is hoped -that this limit will be removed one day. - -.. _`Format String Syntax`: - http://docs.python.org/library/string.html#format-string-syntax -.. _`Format Specification Mini-Language`: - http://docs.python.org/library/string.html#format-specification-mini-language - - -Result and Match Objects ------------------------- - -The result of a ``parse()`` and ``search()`` operation is either ``None`` (no match), a -``Result`` instance or a ``Match`` instance if ``evaluate_result`` is False. - -The ``Result`` instance has three attributes: - -``fixed`` - A tuple of the fixed-position, anonymous fields extracted from the input. -``named`` - A dictionary of the named fields extracted from the input. -``spans`` - A dictionary mapping the names and fixed position indices matched to a - 2-tuple slice range of where the match occurred in the input. - The span does not include any stripped padding (alignment or width). - -The ``Match`` instance has one method: - -``evaluate_result()`` - Generates and returns a ``Result`` instance for this ``Match`` object. - - - -Custom Type Conversions ------------------------ - -If you wish to have matched fields automatically converted to your own type you -may pass in a dictionary of type conversion information to ``parse()`` and -``compile()``. - -The converter will be passed the field string matched. Whatever it returns -will be substituted in the ``Result`` instance for that field. - -Your custom type conversions may override the builtin types if you supply one -with the same identifier: - -.. code-block:: pycon - - >>> def shouty(string): - ... return string.upper() - ... - >>> parse('{:shouty} world', 'hello world', dict(shouty=shouty)) - - -If the type converter has the optional ``pattern`` attribute, it is used as -regular expression for better pattern matching (instead of the default one): - -.. code-block:: pycon - - >>> def parse_number(text): - ... return int(text) - >>> parse_number.pattern = r'\d+' - >>> parse('Answer: {number:Number}', 'Answer: 42', dict(Number=parse_number)) - - >>> _ = parse('Answer: {:Number}', 'Answer: Alice', dict(Number=parse_number)) - >>> assert _ is None, "MISMATCH" - -You can also use the ``with_pattern(pattern)`` decorator to add this -information to a type converter function: - -.. code-block:: pycon - - >>> from parse import with_pattern - >>> @with_pattern(r'\d+') - ... def parse_number(text): - ... return int(text) - >>> parse('Answer: {number:Number}', 'Answer: 42', dict(Number=parse_number)) - - -A more complete example of a custom type might be: - -.. code-block:: pycon - - >>> yesno_mapping = { - ... "yes": True, "no": False, - ... "on": True, "off": False, - ... "true": True, "false": False, - ... } - >>> @with_pattern(r"|".join(yesno_mapping)) - ... def parse_yesno(text): - ... return yesno_mapping[text.lower()] - - -If the type converter ``pattern`` uses regex-grouping (with parenthesis), -you should indicate this by using the optional ``regex_group_count`` parameter -in the ``with_pattern()`` decorator: - -.. code-block:: pycon - - >>> @with_pattern(r'((\d+))', regex_group_count=2) - ... def parse_number2(text): - ... return int(text) - >>> parse('Answer: {:Number2} {:Number2}', 'Answer: 42 43', dict(Number2=parse_number2)) - - -Otherwise, this may cause parsing problems with unnamed/fixed parameters. - - -Potential Gotchas ------------------ - -``parse()`` will always match the shortest text necessary (from left to right) -to fulfil the parse pattern, so for example: - - -.. code-block:: pycon - - >>> pattern = '{dir1}/{dir2}' - >>> data = 'root/parent/subdir' - >>> sorted(parse(pattern, data).named.items()) - [('dir1', 'root'), ('dir2', 'parent/subdir')] - -So, even though `{'dir1': 'root/parent', 'dir2': 'subdir'}` would also fit -the pattern, the actual match represents the shortest successful match for -``dir1``. - ----- - -- 1.19.0 Added slice access to fixed results (thanks @jonathangjertsen). - Also corrected matching of *full string* vs. *full line* (thanks @giladreti) - Fix issue with using digit field numbering and types -- 1.18.0 Correct bug in int parsing introduced in 1.16.0 (thanks @maxxk) -- 1.17.0 Make left- and center-aligned search consume up to next space -- 1.16.0 Make compiled parse objects pickleable (thanks @martinResearch) -- 1.15.0 Several fixes for parsing non-base 10 numbers (thanks @vladikcomper) -- 1.14.0 More broad acceptance of Fortran number format (thanks @purpleskyfall) -- 1.13.1 Project metadata correction. -- 1.13.0 Handle Fortran formatted numbers with no leading 0 before decimal - point (thanks @purpleskyfall). - Handle comparison of FixedTzOffset with other types of object. -- 1.12.1 Actually use the `case_sensitive` arg in compile (thanks @jacquev6) -- 1.12.0 Do not assume closing brace when an opening one is found (thanks @mattsep) -- 1.11.1 Revert having unicode char in docstring, it breaks Bamboo builds(?!) -- 1.11.0 Implement `__contains__` for Result instances. -- 1.10.0 Introduce a "letters" matcher, since "w" matches numbers - also. -- 1.9.1 Fix deprecation warnings around backslashes in regex strings - (thanks Mickael Schoentgen). Also fix some documentation formatting - issues. -- 1.9.0 We now honor precision and width specifiers when parsing numbers - and strings, allowing parsing of concatenated elements of fixed width - (thanks Julia Signell) -- 1.8.4 Add LICENSE file at request of packagers. - Correct handling of AM/PM to follow most common interpretation. - Correct parsing of hexadecimal that looks like a binary prefix. - Add ability to parse case sensitively. - Add parsing of numbers to Decimal with "F" (thanks John Vandenberg) -- 1.8.3 Add regex_group_count to with_pattern() decorator to support - user-defined types that contain brackets/parenthesis (thanks Jens Engel) -- 1.8.2 add documentation for including braces in format string -- 1.8.1 ensure bare hexadecimal digits are not matched -- 1.8.0 support manual control over result evaluation (thanks Timo Furrer) -- 1.7.0 parse dict fields (thanks Mark Visser) and adapted to allow - more than 100 re groups in Python 3.5+ (thanks David King) -- 1.6.6 parse Linux system log dates (thanks Alex Cowan) -- 1.6.5 handle precision in float format (thanks Levi Kilcher) -- 1.6.4 handle pipe "|" characters in parse string (thanks Martijn Pieters) -- 1.6.3 handle repeated instances of named fields, fix bug in PM time - overflow -- 1.6.2 fix logging to use local, not root logger (thanks Necku) -- 1.6.1 be more flexible regarding matched ISO datetimes and timezones in - general, fix bug in timezones without ":" and improve docs -- 1.6.0 add support for optional ``pattern`` attribute in user-defined types - (thanks Jens Engel) -- 1.5.3 fix handling of question marks -- 1.5.2 fix type conversion error with dotted names (thanks Sebastian Thiel) -- 1.5.1 implement handling of named datetime fields -- 1.5 add handling of dotted field names (thanks Sebastian Thiel) -- 1.4.1 fix parsing of "0" in int conversion (thanks James Rowe) -- 1.4 add __getitem__ convenience access on Result. -- 1.3.3 fix Python 2.5 setup.py issue. -- 1.3.2 fix Python 3.2 setup.py issue. -- 1.3.1 fix a couple of Python 3.2 compatibility issues. -- 1.3 added search() and findall(); removed compile() from ``import *`` - export as it overwrites builtin. -- 1.2 added ability for custom and override type conversions to be - provided; some cleanup -- 1.1.9 to keep things simpler number sign is handled automatically; - significant robustification in the face of edge-case input. -- 1.1.8 allow "d" fields to have number base "0x" etc. prefixes; - fix up some field type interactions after stress-testing the parser; - implement "%" type. -- 1.1.7 Python 3 compatibility tweaks (2.5 to 2.7 and 3.2 are supported). -- 1.1.6 add "e" and "g" field types; removed redundant "h" and "X"; - removed need for explicit "#". -- 1.1.5 accept textual dates in more places; Result now holds match span - positions. -- 1.1.4 fixes to some int type conversion; implemented "=" alignment; added - date/time parsing with a variety of formats handled. -- 1.1.3 type conversion is automatic based on specified field types. Also added - "f" and "n" types. -- 1.1.2 refactored, added compile() and limited ``from parse import *`` -- 1.1.1 documentation improvements -- 1.1.0 implemented more of the `Format Specification Mini-Language`_ - and removed the restriction on mixing fixed-position and named fields -- 1.0.0 initial release - -This code is copyright 2012-2021 Richard Jones -See the end of the source file for the license of use. -''' - from __future__ import absolute_import __version__ = '1.19.0'