From 4c2b626e0ab648dcf8d362cb921ba225af819e52 Mon Sep 17 00:00:00 2001 From: Lena Garber Date: Wed, 13 Nov 2024 14:26:56 -0500 Subject: [PATCH 1/3] Improve description parsing logic --- linodecli/baked/parsing.py | 52 ++++++++++++++++++++++---------------- tests/unit/test_parsing.py | 2 +- 2 files changed, 31 insertions(+), 23 deletions(-) diff --git a/linodecli/baked/parsing.py b/linodecli/baked/parsing.py index dd41af5ef..602977918 100644 --- a/linodecli/baked/parsing.py +++ b/linodecli/baked/parsing.py @@ -5,15 +5,15 @@ import functools import re from html import unescape -from typing import List, Tuple +from typing import List, Optional, Tuple # Sentence delimiter, split on a period followed by any type of # whitespace (space, new line, tab, etc.) -REGEX_SENTENCE_DELIMITER = re.compile(r"\W(?:\s|$)") +REGEX_SENTENCE_DELIMITER = re.compile(r"\.(?:\s|$)", flags=re.M) # Matches on pattern __prefix__ at the beginning of a description # or after a comma -REGEX_TECHDOCS_PREFIX = re.compile(r"(?:, |\A)__([\w-]+)__") +REGEX_TECHDOCS_PREFIX = re.compile(r"(?:, |\A)__([^_]+)__") # Matches on pattern [link title](https://.../) REGEX_MARKDOWN_LINK = re.compile(r"\[(?P.*?)]\((?P.*?)\)") @@ -121,23 +121,35 @@ def get_short_description(description: str) -> str: :rtype: set """ - target_lines = description.splitlines() - relevant_lines = None - - for i, line in enumerate(target_lines): + def __simplify(sentence: str) -> Optional[str]: # Edge case for descriptions starting with a note - if line.lower().startswith("__note__"): - continue + if sentence.lower().startswith("__note__"): + return None + + sentence = strip_techdocs_prefixes(sentence) - relevant_lines = target_lines[i:] - break + # Check that the sentence still has content after stripping prefixes + if len(sentence) < 2: + return None - if relevant_lines is None: + return sentence + "." + + # Find the first relevant sentence + result = next( + simplified + for simplified in iter( + __simplify(sentence) + for sentence in REGEX_SENTENCE_DELIMITER.split(description) + ) + if simplified is not None + ) + + if result is None: raise ValueError( f"description does not contain any relevant lines: {description}", ) - return REGEX_SENTENCE_DELIMITER.split("\n".join(relevant_lines), 1)[0] + "." + return result def strip_techdocs_prefixes(description: str) -> str: @@ -150,11 +162,7 @@ def strip_techdocs_prefixes(description: str) -> str: :returns: The stripped description :rtype: str """ - result_description = REGEX_TECHDOCS_PREFIX.sub( - "", description.lstrip() - ).lstrip() - - return result_description + return REGEX_TECHDOCS_PREFIX.sub("", description.lstrip()).lstrip() def process_arg_description(description: str) -> Tuple[str, str]: @@ -173,12 +181,12 @@ def process_arg_description(description: str) -> Tuple[str, str]: return "", "" result = get_short_description(description) - result = strip_techdocs_prefixes(result) result = result.replace("\n", " ").replace("\r", " ") - description, links = extract_markdown_links(result) + # NOTE: Links should only be separated from Rich Markdown links + result_no_links, links = extract_markdown_links(result) if len(links) > 0: - description += f" See: {'; '.join(links)}" + result_no_links += f" See: {'; '.join(links)}" - return unescape(markdown_to_rich_markup(description)), unescape(description) + return unescape(markdown_to_rich_markup(result_no_links)), unescape(result) diff --git a/tests/unit/test_parsing.py b/tests/unit/test_parsing.py index f33bf874c..817a8a7b3 100644 --- a/tests/unit/test_parsing.py +++ b/tests/unit/test_parsing.py @@ -65,7 +65,7 @@ def test_get_first_sentence(self): assert ( get_short_description( - "__Note__. This might be a sentence.\nThis is a sentence." + "__Note__ This might be a sentence.\nThis is a sentence." ) == "This is a sentence." ) From 71141ef4859be9bf4ea1d664b6433d3fb71e177d Mon Sep 17 00:00:00 2001 From: Lena Garber Date: Wed, 13 Nov 2024 14:49:32 -0500 Subject: [PATCH 2/3] Tidy up logic --- linodecli/baked/operation.py | 9 +++++++-- linodecli/baked/parsing.py | 2 +- linodecli/baked/request.py | 4 ++-- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/linodecli/baked/operation.py b/linodecli/baked/operation.py index 218b9059c..d6402ff75 100644 --- a/linodecli/baked/operation.py +++ b/linodecli/baked/operation.py @@ -17,6 +17,7 @@ import openapi3.paths from openapi3.paths import Operation, Parameter +from linodecli.baked.parsing import simplify_description from linodecli.baked.request import OpenAPIFilteringRequest, OpenAPIRequest from linodecli.baked.response import OpenAPIResponse from linodecli.exit_codes import ExitCodes @@ -356,8 +357,12 @@ def __init__( self.action_aliases = {} self.action = action - self.summary = operation.summary - self.description = operation.description.split(".")[0] + # Ensure the summary has punctuation + self.summary = operation.summary.rstrip(".") + "." + + self.description_rich, self.description = simplify_description( + operation.description or "" + ) # The apiVersion attribute should not be specified as a positional argument self.params = [ diff --git a/linodecli/baked/parsing.py b/linodecli/baked/parsing.py index 602977918..9846b064e 100644 --- a/linodecli/baked/parsing.py +++ b/linodecli/baked/parsing.py @@ -165,7 +165,7 @@ def strip_techdocs_prefixes(description: str) -> str: return REGEX_TECHDOCS_PREFIX.sub("", description.lstrip()).lstrip() -def process_arg_description(description: str) -> Tuple[str, str]: +def simplify_description(description: str) -> Tuple[str, str]: """ Processes the given raw request argument description into one suitable for help pages, etc. diff --git a/linodecli/baked/request.py b/linodecli/baked/request.py index 9cf45c207..b0bf07340 100644 --- a/linodecli/baked/request.py +++ b/linodecli/baked/request.py @@ -2,7 +2,7 @@ Request details for a CLI Operation """ -from linodecli.baked.parsing import process_arg_description +from linodecli.baked.parsing import simplify_description class OpenAPIRequestArg: @@ -46,7 +46,7 @@ def __init__( #: the larger response model self.path = prefix + "." + name if prefix else name - description_rich, description = process_arg_description( + description_rich, description = simplify_description( schema.description or "" ) From d940426cedf0930c12f8788edad19c7f90e06924 Mon Sep 17 00:00:00 2001 From: Lena Garber Date: Wed, 13 Nov 2024 15:03:01 -0500 Subject: [PATCH 3/3] Add test_simplify_description --- tests/unit/test_parsing.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/unit/test_parsing.py b/tests/unit/test_parsing.py index 817a8a7b3..7f1df97c6 100644 --- a/tests/unit/test_parsing.py +++ b/tests/unit/test_parsing.py @@ -2,6 +2,7 @@ extract_markdown_links, get_short_description, markdown_to_rich_markup, + simplify_description, strip_techdocs_prefixes, ) @@ -101,3 +102,29 @@ def test_markdown_to_rich_markup(self): == "very [i]cool[/] [b]test[/] [i]string[/]*\n[b]wow[/] [i]cool[/]* " "[italic deep_pink3 on grey15]code block[/] `" ) + + def test_simplify_description(self): + # This description was not parsed correctly prior to PR #680. + assert simplify_description( + "The authentication methods that are allowed when connecting to " + "[the Linode Shell (Lish)](https://www.linode.com/docs/guides/lish/).\n" + "\n" + "- `keys_only` is the most secure if you intend to use Lish.\n" + "- `disabled` is recommended if you do not intend to use Lish at all.\n" + "- If this account's Cloud Manager authentication type is set to a Third-Party Authentication method, " + "`password_keys` cannot be used as your Lish authentication method. To view this account's Cloud Manager " + "`authentication_type` field, send a request to the " + "[Get a profile](https://techdocs.akamai.com/linode-api/reference/get-profile) operation." + ) == ( + "The authentication methods that are allowed when connecting to the Linode Shell (Lish). " + "See: https://www.linode.com/docs/guides/lish/", + "The authentication methods that are allowed when connecting to " + "[the Linode Shell (Lish)](https://www.linode.com/docs/guides/lish/).", + ) + + assert simplify_description( + "A unique, user-defined `string` referring to the Managed Database." + ) == ( + "A unique, user-defined [italic deep_pink3 on grey15]string[/] referring to the Managed Database.", + "A unique, user-defined `string` referring to the Managed Database.", + )