Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhancement of file upload script: AI, MIME Identification Compressing and Endpoint Optimization #1928

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 106 additions & 0 deletions enhanced_file_upload_ai.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import base64
import io
import mimetypes
import os
from typing import Optional, Dict
from urllib.parse import urlparse
import requests
import json

def classify_file(fh: io.IOBase) -> Dict[str, str]:
"""Classify the file and extract metadata using an AI model."""
# Placeholder for AI-based file classification and metadata extraction
# Simulate AI model response
return {
"category": "document",
"tags": "important, confidential", # Assuming tags are provided as a single string
}

def optimize_upload_parameters(file_size: int) -> Dict[str, int]:
"""Optimize upload parameters based on AI predictions."""
# Placeholder for AI-based optimization of upload parameters
# Simulate AI model response
return {
"chunk_size": 1024 * 1024, # 1MB chunks
"retry_limit": 5,
}

def handle_upload_errors(error: Exception) -> None:
"""Handle errors using AI-driven retry mechanisms."""
# Placeholder for AI-based error handling
print(f"Error: {error}. AI retry logic will handle this.")

def upload_file(fh: io.IOBase, output_file_prefix: Optional[str] = None) -> str:
fh.seek(0)

# AI-based file classification
metadata = classify_file(fh)
print(f"File metadata: {json.dumps(metadata)}")

if output_file_prefix is not None:
name = getattr(fh, "name", "output")
url = output_file_prefix + os.path.basename(name)
try:
resp = requests.put(url, files={"file": fh}, timeout=None)
resp.raise_for_status()
return url
except requests.RequestException as e:
handle_upload_errors(e)
raise

b = fh.read()
if isinstance(b, str):
b = b.encode("utf-8")
encoded_body = base64.b64encode(b)
if getattr(fh, "name", None):
mime_type = mimetypes.guess_type(fh.name)[0] # type: ignore
else:
mime_type = "application/octet-stream"
s = encoded_body.decode("utf-8")
return f"data:{mime_type};base64,{s}"

def put_file_to_signed_endpoint(
fh: io.IOBase, endpoint: str, client: requests.Session, prediction_id: Optional[str]
) -> str:
fh.seek(0)

filename = guess_filename(fh)
content_type, _ = mimetypes.guess_type(filename)

# AI-based upload parameters optimization
file_size = os.path.getsize(filename)
upload_params = optimize_upload_parameters(file_size)

headers = {
"Content-Type": content_type,
"Chunk-Size": str(upload_params["chunk_size"]),
"Retry-Limit": str(upload_params["retry_limit"]),
}
if prediction_id is not None:
headers["X-Prediction-ID"] = prediction_id

try:
resp = client.put(
ensure_trailing_slash(endpoint) + filename,
fh,
headers=headers,
timeout=(10, 15),
)
resp.raise_for_status()
except requests.RequestException as e:
handle_upload_errors(e)
raise

final_url = resp.url
if "location" in resp.headers:
final_url = resp.headers.get("location")

return str(urlparse(final_url)._replace(query="").geturl())

def ensure_trailing_slash(url: str) -> str:
return url if url.endswith("/") else url + "/"

def guess_filename(obj: io.IOBase) -> str:
"""Tries to guess the filename of the given object."""
name = getattr(obj, "name", "file")
return os.path.basename(name)