Skip to content

Commit

Permalink
Merge pull request #253 from ropable/master
Browse files Browse the repository at this point in the history
Updates to handle differences between local files and blobs
  • Loading branch information
ropable authored Aug 28, 2023
2 parents ec7df1b + 64aba8d commit d677076
Show file tree
Hide file tree
Showing 7 changed files with 50 additions and 14 deletions.
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ FROM python_libs_prs
COPY gunicorn.py manage.py ./
COPY prs2 ./prs2
RUN python manage.py collectstatic --noinput

# Run the application as the www-data user.
USER www-data
EXPOSE 8080
Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,12 @@ By default, PRS assumes that user-uploaded media will be saved to Azure blob
storage. To use local storage, set the environment variable `LOCAL_MEDIA_STORAGE=True`
and ensure that a writeable `media` directory exists in the project directory.

Credentials for Azure should be defined in the following environment variables:

AZURE_ACCOUNT_NAME=name
AZURE_ACCOUNT_KEY=key
AZURE_CONTAINER=container_name

# Running

Use `runserver` to run a local copy of the application:
Expand Down
23 changes: 19 additions & 4 deletions prs2/indexer/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,25 +66,40 @@ def typesense_index_record(rec, client=None):
try:
# PDF text extraction can be a little error-prone.
# In the event of an exception here, we'll just accept it and pass.
file_content = high_level.extract_text(open(rec.uploaded_file.path, 'rb'))
if settings.LOCAL_MEDIA_STORAGE:
file_content = high_level.extract_text(open(rec.uploaded_file.path, 'rb'))
else:
file_content = high_level.extract_text(open(rec.uploaded_file))
except:
pass

# MSG document content.
if rec.extension == 'MSG':
message = Message(rec.uploaded_file.path)
if settings.LOCAL_MEDIA_STORAGE:
message = Message(rec.uploaded_file.path)
else:
message = Message(rec.uploaded_file)
file_content = '{} {}'.format(message.subject, message.body.replace('\r\n', ' '))

# DOCX document content.
if rec.extension == 'DOCX':
file_content = docx2txt.process(rec.uploaded_file.path)
if settings.LOCAL_MEDIA_STORAGE:
file_content = docx2txt.process(rec.uploaded_file.path)
else:
file_content = docx2txt.process(rec.uploaded_file)

# TXT document content.
if rec.extension == 'TXT':
file_content = open(rec.uploaded_file.path, 'r').read()
if settings.LOCAL_MEDIA_STORAGE:
file_content = open(rec.uploaded_file.path, 'r').read()
else:
file_content = rec.uploaded_file.read()

# Trim down the content of uploaded files a little.
if file_content:
# Decode a bytes object to a string.
if isinstance(file_content, bytes):
file_content = file_content.decode('utf-8')
# Replace punctuation with a space.
file_content = re.sub(r'[^\w\s]', ' ', file_content)
# Replace newlines with a space.
Expand Down
17 changes: 13 additions & 4 deletions prs2/referral/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1116,7 +1116,10 @@ def save(self, force_insert=False, force_update=False, *args, **kwargs):

# If the file is a .MSG we take the sent date of the email and use it for order_date.
if self.extension == "MSG":
msg = Message(os.path.realpath(self.uploaded_file.path))
if settings.LOCAL_MEDIA_STORAGE:
msg = Message(os.path.realpath(self.uploaded_file.path))
else:
msg = Message(self.uploaded_file)
if msg.date:
date = parse(msg.date.replace('GMT ', ''))
if date and self.order_date != date:
Expand All @@ -1132,10 +1135,16 @@ def save(self, force_insert=False, force_update=False, *args, **kwargs):

@property
def filename(self):
if self.uploaded_file and os.path.exists(self.uploaded_file.path):
return self.uploaded_file.name.rsplit("/", 1)[-1]
if settings.LOCAL_MEDIA_STORAGE:
if self.uploaded_file and os.path.exists(self.uploaded_file.path):
return self.uploaded_file.name.rsplit("/", 1)[-1]
else:
return ""
else:
return ""
if self.uploaded_file:
return self.uploaded_file.name.rsplit("/", 1)[-1]
else:
return ""

@property
def extension(self):
Expand Down
2 changes: 2 additions & 0 deletions prs2/settings-test.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
PASSWORD_HASHERS = (
'django.contrib.auth.hashers.MD5PasswordHasher',
)
# Use local media storage
MEDIA_ROOT = os.path.join(BASE_DIR, 'media')
DEFAULT_FILE_STORAGE = 'django.core.files.storage.FileSystemStorage'
MIDDLEWARE = [
'django.middleware.security.SecurityMiddleware',
Expand Down
13 changes: 8 additions & 5 deletions prs2/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,11 @@
ROOT_URLCONF = 'prs2.urls'
WSGI_APPLICATION = 'prs2.wsgi.application'

# Use Azure blob storage for media uploads, unless explicitly set otherwise.
if env('LOCAL_MEDIA_STORAGE', False):
# Assume Azure blob storage is used for media uploads, unless explicitly set as local storage.
LOCAL_MEDIA_STORAGE = env('LOCAL_MEDIA_STORAGE', False)
if LOCAL_MEDIA_STORAGE:
DEFAULT_FILE_STORAGE = 'django.core.files.storage.FileSystemStorage'
MEDIA_ROOT = os.path.join(BASE_DIR, 'media')
else:
DEFAULT_FILE_STORAGE = 'storages.backends.azure_storage.AzureStorage'
AZURE_ACCOUNT_NAME = env('AZURE_ACCOUNT_NAME', 'name')
Expand Down Expand Up @@ -115,7 +117,7 @@
LOGIN_REDIRECT_URL = '/'
APPLICATION_TITLE = 'Planning Referral System'
APPLICATION_ACRONYM = 'PRS'
APPLICATION_VERSION_NO = '2.5.32'
APPLICATION_VERSION_NO = '2.5.33'
APPLICATION_ALERTS_EMAIL = '[email protected]'
SITE_URL = env('SITE_URL', 'localhost')
PRS_USER_GROUP = env('PRS_USER_GROUP', 'PRS user')
Expand Down Expand Up @@ -183,14 +185,15 @@
)

# Static files (CSS, JavaScript, Images)
MEDIA_ROOT = os.path.join(BASE_DIR, 'media')
MEDIA_URL = '/media/'
STATIC_ROOT = os.path.join(BASE_DIR, 'staticfiles')
STATIC_URL = '/static/'
STATICFILES_DIRS = (os.path.join(BASE_DIR, 'prs2', 'static'),)
STATICFILES_STORAGE = 'whitenoise.storage.CompressedManifestStaticFilesStorage'
WHITENOISE_ROOT = STATIC_ROOT

# Media uploads
MEDIA_URL = '/media/'

# This is required to add context variables to all templates:
STATIC_CONTEXT_VARS = {}

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "prs"
version = "2.5.32"
version = "2.5.33"
description = "Planning Referral System corporate application"
authors = ["Ashley Felton <[email protected]>"]
license = "Apache-2.0"
Expand Down

0 comments on commit d677076

Please sign in to comment.