Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: strip prefixes and suffixes in links #5486

Merged
merged 7 commits into from
Jul 14, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
- Minor: Add channel points indication for new bits power-up redemptions. (#5471)
- Minor: Added `/warn <username> <reason>` command for mods. This prevents the user from chatting until they acknowledge the warning. (#5474)
- Minor: Introduce HTTP API for plugins. (#5383)
- Minor: Links can now have prefixes and suffixes such as parentheses. (#5486)
- Bugfix: Fixed tab move animation occasionally failing to start after closing a tab. (#5426)
- Bugfix: If a network request errors with 200 OK, Qt's error code is now reported instead of the HTTP status. (#5378)
- Bugfix: Fixed restricted users usernames not being clickable. (#5405)
Expand Down
77 changes: 57 additions & 20 deletions src/common/LinkParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,19 +113,57 @@ bool startsWithPort(QStringView string)
return true;
}

/// @brief Strips ignored characters off @a source
///
/// As per https://github.github.com/gfm/#autolinks-extension-:
///
/// '<', '*', '_', '~', and '(' are ignored at the beginning
/// '>', '?', '!', '.', ',', ':', '*', '_', '~', and ')' are ignored at the end
///
/// A difference to GFM is that the source isn't scanned for parentheses.
void strip(QStringView &source)
{
while (!source.isEmpty())
{
auto c = source.first();
if (c == u'<' || c == u'*' || c == u'_' || c == u'~' || c == u'(')
{
source = source.mid(1);
continue;
}
break;
}

while (!source.isEmpty())
{
auto c = source.last();
if (c == u'>' || c == u'?' || c == u'!' || c == u'.' || c == u',' ||
c == u':' || c == u'*' || c == u'_' || c == u'~' || c == u')')
{
source.chop(1);
continue;
}
break;
}
}

} // namespace

namespace chatterino {
namespace chatterino::linkparser {

LinkParser::LinkParser(const QString &unparsedString)
std::optional<Parsed> parse(const QString &source) noexcept
{
ParsedLink result;
std::optional<Parsed> result;
// This is not implemented with a regex to increase performance.
QStringView remaining(unparsedString);
QStringView protocol(remaining);

QStringView link{source};
strip(link);

QStringView remaining = link;
QStringView protocol;

// Check protocol for https?://
if (remaining.startsWith(QStringLiteral("http"), Qt::CaseInsensitive) &&
if (remaining.startsWith(u"http", Qt::CaseInsensitive) &&
remaining.length() >= 4 + 3 + 1) // 'http' + '://' + [any]
{
// optimistic view assuming there's a protocol (http or https)
Expand All @@ -136,11 +174,11 @@ LinkParser::LinkParser(const QString &unparsedString)
withProto = withProto.mid(1);
}

if (withProto.startsWith(QStringLiteral("://")))
if (withProto.startsWith(u"://"))
{
// there's really a protocol => consume it
remaining = withProto.mid(3);
result.protocol = {protocol.begin(), remaining.begin()};
protocol = {link.begin(), remaining.begin()};
}
}

Expand All @@ -161,7 +199,7 @@ LinkParser::LinkParser(const QString &unparsedString)
{
if (lastWasDot) // no double dots ..
{
return;
return result;
}
lastDotPos = i;
lastWasDot = true;
Expand All @@ -181,7 +219,7 @@ LinkParser::LinkParser(const QString &unparsedString)

if (!startsWithPort(remaining))
{
return;
return result;
}

break;
Expand All @@ -198,23 +236,22 @@ LinkParser::LinkParser(const QString &unparsedString)

if (lastWasDot || lastDotPos <= 0)
{
return;
return result;
}

// check host/tld
if ((nDots == 3 && isValidIpv4(host)) ||
isValidTld(host.mid(lastDotPos + 1)))
{
result.host = host;
result.rest = rest;
result.source = unparsedString;
this->result_ = std::move(result);
result = Parsed{
.protocol = protocol,
.host = host,
.rest = rest,
.link = link,
};
}
}

const std::optional<ParsedLink> &LinkParser::result() const
{
return this->result_;
return result;
}

} // namespace chatterino
} // namespace chatterino::linkparser
117 changes: 99 additions & 18 deletions src/common/LinkParser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,43 +4,124 @@

#include <optional>

namespace chatterino {
namespace chatterino::linkparser {

struct ParsedLink {
/// @brief Represents a parsed link
///
/// A parsed link is represented as views over the source string for its
/// different segments. In this simplified model, a link consists of an optional
/// @a protocol, a mandatory @a host and an optional @a rest. These segments are
/// always next to eachother in the input string, however together, they don't
/// span the whole input as it could contain prefixes or suffixes.
///
/// Prefixes and suffixes are identical to the ones in GitHub Flavored Markdown
/// (GFM - https://github.github.com/gfm/#autolinks-extension-).
/// Matching is done case insensitive (e.g. 'HTTp://a.com' would be valid).
///
/// A @a protocol can either be empty, "http://", or "https://".
/// A @a host can either be an IPv4 address or a hostname. The hostname must end
/// in a valid top level domain. Otherwise, there are no restrictions on it.
/// The @a rest can start with an optional port followed by either a '/', '?',
/// or '#'.
///
/// @b Example
///
/// ```text
/// (https://wiki.chatterino.com/Help/#overview)
/// ▏▏proto ▕ host ▏ rest ▏▏
/// ▏▏ link ▏▏
/// ▏ source ▏
/// ```
struct Parsed {
/// The parsed protocol of the link. Can be empty.
///
/// ```text
/// https://www.forsen.tv/commands
/// ^------^
/// ▏╌╌╌╌╌╌▕
///
/// www.forsen.tv/commands
/// (empty)
/// ```
QStringView protocol;

/// The parsed host of the link. Can not be empty.
///
/// ```text
/// https://www.forsen.tv/commands
/// ^-----------^
/// ▏╌╌╌╌╌╌╌╌╌╌╌▕
/// ```
QStringView host;

/// The remainder of the link. Can be empty.
///
/// ```text
/// https://www.forsen.tv/commands
/// ^-------^
/// ▏╌╌╌╌╌╌╌▕
///
/// https://www.forsen.tv
/// (empty)
/// ```
QStringView rest;

/// The original unparsed link.
/// The matched link. Can not be empty.
///
/// https://www.forsen.tv/commands
/// ^----------------------------^
QString source;
};
/// ```text
/// (https://www.forsen.tv/commands)
/// ▏╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌▕
/// ```
QStringView link;

/// Checks if the parsed link contains a prefix
bool hasPrefix(const QString &source) const noexcept
{
return this->link.begin() != source.begin();
}

class LinkParser
{
public:
explicit LinkParser(const QString &unparsedString);
/// The prefix before the parsed link inside @a source. May be empty.
///
/// ```text
/// (https://www.forsen.tv/commands)
/// ^
///
/// https://www.forsen.tv/commands
/// (empty)
/// ```
QStringView prefix(const QString &source) const noexcept
{
return {source.data(), this->link.begin()};
}

const std::optional<ParsedLink> &result() const;
/// Checks if the parsed link contains a suffix
bool hasSuffix(const QString &source) const noexcept
{
return this->link.end() != source.end();
}

private:
std::optional<ParsedLink> result_{};
/// The suffix after the parsed link inside @a source. May be empty.
///
/// ```text
/// (https://www.forsen.tv/commands)
/// ^
///
/// https://www.forsen.tv/commands
/// (empty)
/// ```
QStringView suffix(const QString &source) const noexcept
{
return {
this->link.begin() + this->link.size(),
source.data() + source.length(),
};
}
};

} // namespace chatterino
/// @brief Parses a link from @a source into its segments
///
/// If no link is contained in @a source, `std::nullopt` will be returned.
/// The returned value is valid as long as @a source exists, as it contains
/// views into @a source.
///
/// For the accepted links, see Parsed.
std::optional<Parsed> parse(const QString &source) noexcept;

} // namespace chatterino::linkparser
6 changes: 3 additions & 3 deletions src/controllers/commands/builtin/twitch/SendWhisper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,10 @@ bool appendWhisperMessageWordsLocally(const QStringList &words)
void operator()(const QString &string,
MessageBuilder &b) const
{
LinkParser parser(string);
if (parser.result())
auto link = linkparser::parse(string);
if (link)
{
b.addLink(*parser.result());
b.addLink(*link, string);
}
else
{
Expand Down
Loading
Loading