Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve parsing & formatting of help page descriptions #680

Open
wants to merge 3 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions linodecli/baked/operation.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import openapi3.paths
from openapi3.paths import Operation, Parameter

from linodecli.baked.parsing import simplify_description
from linodecli.baked.request import OpenAPIFilteringRequest, OpenAPIRequest
from linodecli.baked.response import OpenAPIResponse
from linodecli.exit_codes import ExitCodes
Expand Down Expand Up @@ -356,8 +357,12 @@ def __init__(
self.action_aliases = {}
self.action = action

self.summary = operation.summary
self.description = operation.description.split(".")[0]
# Ensure the summary has punctuation
self.summary = operation.summary.rstrip(".") + "."

self.description_rich, self.description = simplify_description(
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't consume OpenAPIOperation(...).description currently but I figured it'd be good to future-proof 🙂

operation.description or ""
)

# The apiVersion attribute should not be specified as a positional argument
self.params = [
Expand Down
54 changes: 31 additions & 23 deletions linodecli/baked/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@
import functools
Copy link
Contributor Author

@lgarber-akamai lgarber-akamai Nov 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The changes in this file originated in #639, which provides a bit more context for why they were made.

import re
from html import unescape
from typing import List, Tuple
from typing import List, Optional, Tuple

# Sentence delimiter, split on a period followed by any type of
# whitespace (space, new line, tab, etc.)
REGEX_SENTENCE_DELIMITER = re.compile(r"\W(?:\s|$)")
REGEX_SENTENCE_DELIMITER = re.compile(r"\.(?:\s|$)", flags=re.M)

# Matches on pattern __prefix__ at the beginning of a description
# or after a comma
REGEX_TECHDOCS_PREFIX = re.compile(r"(?:, |\A)__([\w-]+)__")
REGEX_TECHDOCS_PREFIX = re.compile(r"(?:, |\A)__([^_]+)__")

# Matches on pattern [link title](https://.../)
REGEX_MARKDOWN_LINK = re.compile(r"\[(?P<text>.*?)]\((?P<link>.*?)\)")
Expand Down Expand Up @@ -121,23 +121,35 @@ def get_short_description(description: str) -> str:
:rtype: set
"""

target_lines = description.splitlines()
relevant_lines = None

for i, line in enumerate(target_lines):
def __simplify(sentence: str) -> Optional[str]:
# Edge case for descriptions starting with a note
if line.lower().startswith("__note__"):
continue
if sentence.lower().startswith("__note__"):
return None

sentence = strip_techdocs_prefixes(sentence)

relevant_lines = target_lines[i:]
break
# Check that the sentence still has content after stripping prefixes
if len(sentence) < 2:
return None

if relevant_lines is None:
return sentence + "."

# Find the first relevant sentence
result = next(
simplified
for simplified in iter(
__simplify(sentence)
for sentence in REGEX_SENTENCE_DELIMITER.split(description)
)
if simplified is not None
)

if result is None:
raise ValueError(
f"description does not contain any relevant lines: {description}",
)

return REGEX_SENTENCE_DELIMITER.split("\n".join(relevant_lines), 1)[0] + "."
return result


def strip_techdocs_prefixes(description: str) -> str:
Expand All @@ -150,14 +162,10 @@ def strip_techdocs_prefixes(description: str) -> str:
:returns: The stripped description
:rtype: str
"""
result_description = REGEX_TECHDOCS_PREFIX.sub(
"", description.lstrip()
).lstrip()

return result_description
return REGEX_TECHDOCS_PREFIX.sub("", description.lstrip()).lstrip()


def process_arg_description(description: str) -> Tuple[str, str]:
def simplify_description(description: str) -> Tuple[str, str]:
"""
Processes the given raw request argument description into one suitable
for help pages, etc.
Expand All @@ -173,12 +181,12 @@ def process_arg_description(description: str) -> Tuple[str, str]:
return "", ""

result = get_short_description(description)
result = strip_techdocs_prefixes(result)
result = result.replace("\n", " ").replace("\r", " ")

description, links = extract_markdown_links(result)
# NOTE: Links should only be separated from Rich Markdown links
result_no_links, links = extract_markdown_links(result)

if len(links) > 0:
description += f" See: {'; '.join(links)}"
result_no_links += f" See: {'; '.join(links)}"

return unescape(markdown_to_rich_markup(description)), unescape(description)
return unescape(markdown_to_rich_markup(result_no_links)), unescape(result)
4 changes: 2 additions & 2 deletions linodecli/baked/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Request details for a CLI Operation
"""

from linodecli.baked.parsing import process_arg_description
from linodecli.baked.parsing import simplify_description


class OpenAPIRequestArg:
Expand Down Expand Up @@ -46,7 +46,7 @@ def __init__(
#: the larger response model
self.path = prefix + "." + name if prefix else name

description_rich, description = process_arg_description(
description_rich, description = simplify_description(
schema.description or ""
)

Expand Down
29 changes: 28 additions & 1 deletion tests/unit/test_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
extract_markdown_links,
get_short_description,
markdown_to_rich_markup,
simplify_description,
strip_techdocs_prefixes,
)

Expand Down Expand Up @@ -65,7 +66,7 @@ def test_get_first_sentence(self):

assert (
get_short_description(
"__Note__. This might be a sentence.\nThis is a sentence."
"__Note__ This might be a sentence.\nThis is a sentence."
)
== "This is a sentence."
)
Expand Down Expand Up @@ -101,3 +102,29 @@ def test_markdown_to_rich_markup(self):
== "very [i]cool[/] [b]test[/] [i]string[/]*\n[b]wow[/] [i]cool[/]* "
"[italic deep_pink3 on grey15]code block[/] `"
)

def test_simplify_description(self):
# This description was not parsed correctly prior to PR #680.
assert simplify_description(
"The authentication methods that are allowed when connecting to "
"[the Linode Shell (Lish)](https://www.linode.com/docs/guides/lish/).\n"
"\n"
"- `keys_only` is the most secure if you intend to use Lish.\n"
"- `disabled` is recommended if you do not intend to use Lish at all.\n"
"- If this account's Cloud Manager authentication type is set to a Third-Party Authentication method, "
"`password_keys` cannot be used as your Lish authentication method. To view this account's Cloud Manager "
"`authentication_type` field, send a request to the "
"[Get a profile](https://techdocs.akamai.com/linode-api/reference/get-profile) operation."
) == (
"The authentication methods that are allowed when connecting to the Linode Shell (Lish). "
"See: https://www.linode.com/docs/guides/lish/",
"The authentication methods that are allowed when connecting to "
"[the Linode Shell (Lish)](https://www.linode.com/docs/guides/lish/).",
)

assert simplify_description(
"A unique, user-defined `string` referring to the Managed Database."
) == (
"A unique, user-defined [italic deep_pink3 on grey15]string[/] referring to the Managed Database.",
"A unique, user-defined `string` referring to the Managed Database.",
)
Loading