Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Performance: Improve speed of automatic instance redirection #4193

Merged
merged 4 commits into from
Oct 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/invidious.cr
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,8 @@ Invidious::Jobs.register Invidious::Jobs::NotificationJob.new(CONNECTION_CHANNEL

Invidious::Jobs.register Invidious::Jobs::ClearExpiredItemsJob.new

Invidious::Jobs.register Invidious::Jobs::InstanceListRefreshJob.new

Invidious::Jobs.start_all

def popular_videos
Expand Down
62 changes: 0 additions & 62 deletions src/invidious/helpers/utils.cr
Original file line number Diff line number Diff line change
Expand Up @@ -323,68 +323,6 @@ def parse_range(range)
return 0_i64, nil
end

def fetch_random_instance
begin
instance_api_client = make_client(URI.parse("https://api.invidious.io"))

# Timeouts
instance_api_client.connect_timeout = 10.seconds
instance_api_client.dns_timeout = 10.seconds

instance_list = JSON.parse(instance_api_client.get("/instances.json").body).as_a
instance_api_client.close
rescue Socket::ConnectError | IO::TimeoutError | JSON::ParseException
instance_list = [] of JSON::Any
end

filtered_instance_list = [] of String

instance_list.each do |data|
# TODO Check if current URL is onion instance and use .onion types if so.
if data[1]["type"] == "https"
# Instances can have statistics disabled, which is an requirement of version validation.
# as_nil? doesn't exist. Thus we'll have to handle the error raised if as_nil fails.
begin
data[1]["stats"].as_nil
next
rescue TypeCastError
end

# stats endpoint could also lack the software dict.
next if data[1]["stats"]["software"]?.nil?

# Makes sure the instance isn't too outdated.
if remote_version = data[1]["stats"]?.try &.["software"]?.try &.["version"]
remote_commit_date = remote_version.as_s.match(/\d{4}\.\d{2}\.\d{2}/)
next if !remote_commit_date

remote_commit_date = Time.parse(remote_commit_date[0], "%Y.%m.%d", Time::Location::UTC)
local_commit_date = Time.parse(CURRENT_VERSION, "%Y.%m.%d", Time::Location::UTC)

next if (remote_commit_date - local_commit_date).abs.days > 30

begin
data[1]["monitor"].as_nil
health = data[1]["monitor"].as_h["dailyRatios"][0].as_h["ratio"]
filtered_instance_list << data[0].as_s if health.to_s.to_f > 90
rescue TypeCastError
# We can't check the health if the monitoring is broken. Thus we'll just add it to the list
# and move on. Ideally we'll ignore any instance that has broken health monitoring but due to the fact that
# it's an error that often occurs with all the instances at the same time, we have to just skip the check.
filtered_instance_list << data[0].as_s
end
end
end
end

# If for some reason no instances managed to get fetched successfully then we'll just redirect to redirect.invidious.io
if filtered_instance_list.size == 0
return "redirect.invidious.io"
end

return filtered_instance_list.sample(1)[0]
end

def reduce_uri(uri : URI | String, max_length : Int32 = 50, suffix : String = "…") : String
str = uri.to_s.sub(/^https?:\/\//, "")
if str.size > max_length
Expand Down
97 changes: 97 additions & 0 deletions src/invidious/jobs/instance_refresh_job.cr
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
class Invidious::Jobs::InstanceListRefreshJob < Invidious::Jobs::BaseJob
# We update the internals of a constant as so it can be accessed from anywhere
# within the codebase
#
# "INSTANCES" => Array(Tuple(String, String)) # region, instance
syeopite marked this conversation as resolved.
Show resolved Hide resolved

INSTANCES = {"INSTANCES" => [] of Tuple(String, String)}

def initialize
end

def begin
loop do
refresh_instances
LOGGER.info("InstanceListRefreshJob: Done, sleeping for 30 minutes")
sleep 30.minute
Fiber.yield
end
end

# Refreshes the list of instances used for redirects.
#
# Does the following three checks for each instance
# - Is it a clear-net instance?
# - Is it an instance with a good uptime?
# - Is it an updated instance?
private def refresh_instances
raw_instance_list = self.fetch_instances
filtered_instance_list = [] of Tuple(String, String)

raw_instance_list.each do |instance_data|
# TODO allow Tor hidden service instances when the current instance
# is also a hidden service. Same for i2p and any other non-clearnet instances.
begin
domain = instance_data[0]
info = instance_data[1]
stats = info["stats"]

next unless info["type"] == "https"
next if bad_uptime?(info["monitor"])
next if outdated?(stats["software"]["version"])

filtered_instance_list << {info["region"].as_s, domain.as_s}
rescue ex
if domain
LOGGER.info("InstanceListRefreshJob: failed to parse information from '#{domain}' because \"#{ex}\"\n\"#{ex.backtrace.join('\n')}\" ")
else
LOGGER.info("InstanceListRefreshJob: failed to parse information from an instance because \"#{ex}\"\n\"#{ex.backtrace.join('\n')}\" ")
end
end
end

if !filtered_instance_list.empty?
INSTANCES["INSTANCES"] = filtered_instance_list
end
end

# Fetches information regarding instances from api.invidious.io or an otherwise configured URL
private def fetch_instances : Array(JSON::Any)
begin
# We directly call the stdlib HTTP::Client here as it allows us to negate the effects
# of the force_resolve config option. This is needed as api.invidious.io does not support ipv6
# and as such the following request raises if we were to use force_resolve with the ipv6 value.
instance_api_client = HTTP::Client.new(URI.parse("https://api.invidious.io"))

# Timeouts
instance_api_client.connect_timeout = 10.seconds
instance_api_client.dns_timeout = 10.seconds

raw_instance_list = JSON.parse(instance_api_client.get("/instances.json").body).as_a
instance_api_client.close
rescue ex : Socket::ConnectError | IO::TimeoutError | JSON::ParseException
raw_instance_list = [] of JSON::Any
end

return raw_instance_list
end

# Checks if the given target instance is outdated
private def outdated?(target_instance_version) : Bool
remote_commit_date = target_instance_version.as_s.match(/\d{4}\.\d{2}\.\d{2}/)
return false if !remote_commit_date

remote_commit_date = Time.parse(remote_commit_date[0], "%Y.%m.%d", Time::Location::UTC)
local_commit_date = Time.parse(CURRENT_VERSION, "%Y.%m.%d", Time::Location::UTC)

return (remote_commit_date - local_commit_date).abs.days > 30
end

# Checks if the uptime of the target instance is greater than 90% over a 30 day period
private def bad_uptime?(target_instance_health_monitor) : Bool
return true if !target_instance_health_monitor["down"].as_bool == false
return true if target_instance_health_monitor["uptime"].as_f < 90

return false
end
end
11 changes: 10 additions & 1 deletion src/invidious/routes/misc.cr
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,16 @@ module Invidious::Routes::Misc

def self.cross_instance_redirect(env)
referer = get_referer(env)
instance_url = fetch_random_instance

instance_list = Invidious::Jobs::InstanceListRefreshJob::INSTANCES["INSTANCES"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess this should be used to add some sites for reddit comments eventually as well

# TODO: Use something like #479 for a static list of instances to use here
query = URI::Params.encode({q: "(url:3D#{id} OR url:#{id}) AND (site:invidio.us OR site:youtube.com OR site:youtu.be)"})

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good to me, this can be used for the eventual Lemmy source as well.

Though does Reddit's search functionality even support filtering for that many sites at once?

if instance_list.empty?
instance_url = "redirect.invidious.io"
else
# Sample returns an array
# Instances are packaged as {region, domain} in the instance list
instance_url = instance_list.sample(1)[0][1]
end

env.redirect "https://#{instance_url}#{referer}"
end
end
Loading