Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: increase timeout rate of lora metadata/model downloads #211

Merged
merged 5 commits into from
Mar 7, 2024
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 41 additions & 15 deletions hordelib/model_manager/lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,19 @@ class LoraModelManager(BaseModelManager):
)
LORA_API = "https://civitai.com/api/v1/models?types=LORA&sort=Highest%20Rated&primaryFileOnly=true"
MAX_RETRIES = 10 if not TESTS_ONGOING else 3
MAX_DOWNLOAD_THREADS = 3
MAX_DOWNLOAD_THREADS = 5 if not TESTS_ONGOING else 15
db0 marked this conversation as resolved.
Show resolved Hide resolved
RETRY_DELAY = 3 if not TESTS_ONGOING else 0.2
"""The time to wait between retries in seconds"""
REQUEST_METADATA_TIMEOUT = 20
"""The time to wait for a response from the server in seconds"""
REQUEST_DOWNLOAD_TIMEOUT = 300
"""The time to wait for a response from the server in seconds"""
THREAD_WAIT_TIME = 2
REQUEST_METADATA_TIMEOUT = 20 # Longer because civitai performs poorly on metadata requests for more than 5 models
"""The maximum time for no data to be received before we give up on a metadata fetch, in seconds"""
REQUEST_DOWNLOAD_TIMEOUT = 10 if not TESTS_ONGOING else 1
"""The maximum time for no data to be received before we give up on a download, in seconds

This is not the time to download the file, but the time to wait in between data packets. \
If we're actively downloading and the connection to the server is alive, this doesn't come into play
"""

THREAD_WAIT_TIME = 0.1
"""The time to wait between checking the download queue in seconds"""

_file_lock: multiprocessing_lock | nullcontext
Expand Down Expand Up @@ -274,21 +279,42 @@ def _add_lora_ids_to_download_queue(self, lora_ids, adhoc=False, version_compare
def _get_json(self, url):
retries = 0
while retries <= self.MAX_RETRIES:
response = None
try:
response = requests.get(url, timeout=self.REQUEST_METADATA_TIMEOUT)
response = requests.get(
url,
timeout=self.REQUEST_METADATA_TIMEOUT if len(url) < 200 else self.REQUEST_METADATA_TIMEOUT * 1.5,
)
response.raise_for_status()
# Attempt to decode the response to JSON
return response.json()

except (requests.HTTPError, requests.ConnectionError, requests.Timeout, json.JSONDecodeError) as e:
# CivitAI Errors when the model ID is too long
if response.status_code in [404, 500]:
logger.debug(f"url '{url}' download failed {type(e)} {e}")

# If this is a 401, 404, or 500, we're not going to get anywhere, just give up
# The following are the CivitAI errors encountered so far
# (and all of them will not fix themselves with retries as of writing)
# [401: requires a token, 404: model ID too long, 500: internal server error]
if response is not None and response.status_code in [401, 404, 500]:
logger.debug(f"url '{url}' download failed with status code {response.status_code}")
return None

logger.debug(f"url '{url}' download failed {type(e)} {e}")
# The json being invalid is a CivitAI issue, possibly it showing an HTML page and
# this isn't likely to change in the next 30 seconds, so we'll try twice more
# and give up if it doesn't work
if isinstance(e, json.JSONDecodeError):
logger.debug(f"url '{url}' download failed with {type(e)} {e}")
retries += 3

# If the network connection timed out, then self.REQUEST_METADATA_TIMEOUT seconds passed
# and the clock is ticking, so we'll try once more
if response is None:
retries += 5

retries += 1
self.total_retries_attempted += 1

if retries <= self.MAX_RETRIES:
time.sleep(self.RETRY_DELAY)
else:
Expand Down Expand Up @@ -674,8 +700,8 @@ def clear_all_references(self):
def wait_for_downloads(self, timeout=None):
rtr = 0
while not self.are_downloads_complete():
time.sleep(0.5)
rtr += 0.5
time.sleep(self.THREAD_WAIT_TIME)
rtr += self.THREAD_WAIT_TIME
if timeout and rtr > timeout:
raise Exception(f"Lora downloads exceeded specified timeout ({timeout})")
logger.debug("Downloads complete")
Expand Down Expand Up @@ -973,7 +999,7 @@ def reset_adhoc_loras(self):
if self._stop_all_threads:
logger.debug("Stopped processing thread")
return
time.sleep(0.2)
time.sleep(self.THREAD_WAIT_TIME)
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
self._adhoc_loras = set()
unsorted_items = []
Expand Down Expand Up @@ -1073,8 +1099,8 @@ def is_adhoc_reset_complete(self):
def wait_for_adhoc_reset(self, timeout=15):
rtr = 0
while not self.is_adhoc_reset_complete():
time.sleep(0.2)
rtr += 0.2
time.sleep(self.THREAD_WAIT_TIME)
rtr += self.THREAD_WAIT_TIME
if timeout and rtr > timeout:
raise Exception(f"Lora adhoc reset exceeded specified timeout ({timeout})")

Expand Down
Loading