Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Import working for non-book pages #1478

Merged
merged 6 commits into from
Jul 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 30 additions & 26 deletions wagtailimportexport/exporting.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@
from wagtail.documents.models import Document


def export_page(settings = {'root_page': None, 'export_unpublished': False,
'export_documents': False, 'export_images': False, 'null_pk': True,
'null_fk': False, 'null_users': False
}):
def export_page(settings={'root_page': None, 'export_unpublished': False,
'export_documents': False, 'export_images': False, 'null_pk': True,
'null_fk': False, 'null_users': False
}):
"""
Exports the root_page as well as its children (if the setting is set).

Expand All @@ -37,11 +37,11 @@ def export_page(settings = {'root_page': None, 'export_unpublished': False,
# If root_page is not set, then set it the main directory as default.
if not settings['root_page']:
settings['root_page'] = Page.objects.filter(url_path='/').first()

# Get the list of the pages, (that are the descendant of the root_page).
pages = Page.objects.descendant_of(
settings['root_page'], inclusive=True).order_by('path').specific()

# Filter the pages if export_unpublished is set to false.
if not settings['export_unpublished']:
pages = pages.filter(live=True)
Expand All @@ -57,28 +57,29 @@ def export_page(settings = {'root_page': None, 'export_unpublished': False,
# skip over pages whose parents haven't already been exported
# (which means that export_unpublished is false and the parent was unpublished)
if i == 0 or (parent_path in exported_paths):

# Turn page data to a dictionary.
data = json.loads(page.to_json())
locale = data['locale']

# look up document titles
cover = functions.document_title(data['cover'])
title_image = functions.document_title(data['title_image'])
hi_res_pdf = functions.document_title(data['high_resolution_pdf'])
lo_res_pdf = functions.document_title(data['low_resolution_pdf'])
community_logo = functions.document_title(data['community_resource_logo'])
community_feature_link = functions.document_title(data['community_resource_feature_link'])

# Get list (and metadata) of images and documents to be exported.
if page.content_type.model == 'book':
cover = functions.document_title(data['cover'])
title_image = functions.document_title(data['title_image'])
hi_res_pdf = functions.document_title(data['high_resolution_pdf'])
lo_res_pdf = functions.document_title(data['low_resolution_pdf'])
community_logo = functions.document_title(data['community_resource_logo'])
community_feature_link = functions.document_title(data['community_resource_feature_link'])

# Get list (and metadata) of images and documents to be exported.
images = list_fileobjects(page, settings, Image) if settings['export_images'] else {}
documents = list_fileobjects(page, settings, Document) if settings['export_documents'] else {}

#Remove FKs
# Remove FKs
if settings['null_fk']:
functions.null_fks(page, data)

#Remove the owner of the page.
# Remove the owner of the page.
if settings['null_users'] and not data.get('owner'):
data['owner'] = None

Expand All @@ -91,12 +92,13 @@ def export_page(settings = {'root_page': None, 'export_unpublished': False,
data['pk'] = None
data['locale'] = locale
# add document titles to data
data['cover'] = cover
data['title_image'] = title_image
data['high_resolution_pdf'] = hi_res_pdf
data['low_resolution_pdf'] = lo_res_pdf
data['community_resource_logo'] = community_logo
data['community_resource_feature_link'] = community_feature_link
if page.content_type.model == 'book':
data['cover'] = cover
data['title_image'] = title_image
data['high_resolution_pdf'] = hi_res_pdf
data['low_resolution_pdf'] = lo_res_pdf
data['community_resource_logo'] = community_logo
data['community_resource_feature_link'] = community_feature_link

# Export page data.
page_data.append({
Expand All @@ -111,6 +113,7 @@ def export_page(settings = {'root_page': None, 'export_unpublished': False,

return functions.zip_contents(page_data)


def list_fileobjects(page, settings, objtype):
"""
Returns a dict of all fields that has the related_model of objtype as well as their metadata.
Expand Down Expand Up @@ -149,22 +152,23 @@ def list_fileobjects(page, settings, objtype):
objects[field.name] = instance_to_data(instance, null_users=settings['null_users'])

except (FileNotFoundError, objtype.DoesNotExist):
logging.error("File for "+str(field.name)+" is not found on the environment, skipping.")
logging.error("File for " + str(field.name) + " is not found on the environment, skipping.")
objects[field.name] = None

else:
objects[field.name] = None

return objects


def instance_to_data(instance, null_users=False):
"""
A utility to create JSON-able data from a model instance.

Arguments:
instance -- objects.get() object instance.
null_users -- Whether to null user references.

Returns:
A dictionary of metadata of instance.
"""
Expand Down
47 changes: 30 additions & 17 deletions wagtailimportexport/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from django.core.serializers.json import DjangoJSONEncoder
from django.db.models.fields.related import ForeignKey
from django.db.models.fields.reverse_related import ManyToOneRel
from django.contrib.contenttypes.models import ContentType

from wagtail.core.fields import StreamField
from wagtail.documents.models import Document
Expand All @@ -25,7 +26,7 @@ def null_pks(page, data):
Returns:
N/A. Overwrites the argument.
"""

# Nullify the main ID
data['id'] = None
data['pk'] = None
Expand All @@ -34,11 +35,12 @@ def null_pks(page, data):
for field_name, field_val in data.items():
if type(field_val) != list:
continue

for i, sub_item in enumerate(field_val):
if 'pk' in sub_item:
data[field_name][i]['pk'] = None


def find_null_child_blocks(subfield, location, data):
"""
Recursive function to find all children blocks
Expand All @@ -65,9 +67,10 @@ def find_null_child_blocks(subfield, location, data):
if isinstance(field_val, ForeignKey):
# TODO: Implement overwriting.
pass

# Recursive Calls
find_null_child_blocks(field_val, location+[field_key], data)
find_null_child_blocks(field_val, location + [field_key], data)


def find_null_child_relations(subfield, location, data):
"""
Expand Down Expand Up @@ -95,13 +98,14 @@ def find_null_child_relations(subfield, location, data):
if isinstance(field, ForeignKey):
if not location[0] in data:
continue

for i, value in enumerate(data[location[0]]):
if not field.name in data[location[0]][i]:
continue

data[location[0]][i][field.name] = None


def null_fks(page, data):
"""
Nullifies foreign keys within all supplied fields.
Expand All @@ -116,11 +120,11 @@ def null_fks(page, data):

# Loop through all fields.
for field in page._meta.get_fields():

# Check whether the field is a ForeignKey.
# By nature, owner, content_type, live_revision
# are foreign keys defined by wagtail core pages.
if(isinstance(field, ForeignKey)):
if (isinstance(field, ForeignKey)):
data[field.name] = None

# StreamFields often have foreign keys associated with them.
Expand All @@ -130,7 +134,8 @@ def null_fks(page, data):
# # Many to One relations often have foreign keys associated with them.
# if(isinstance(field, ManyToOneRel)):
# find_null_child_relations(field, [field.name], data)



def zip_contents(page_contents):
"""
Creates and returns a zip archive of all supplied items.
Expand Down Expand Up @@ -158,42 +163,42 @@ def zip_contents(page_contents):
'content.json',
json.dumps(page_contents, indent=2, cls=DjangoJSONEncoder)
)

# Loop through pages to explore all used images and documents.
for page in page_contents:

# Export all the images.
for image_def in page['images'].values():
if not image_def:
continue

filename = image_def['file']['name']

try:
with file_storage.open(filename, 'rb') as f:
zf.writestr(filename, f.read())
except FileNotFoundError:
logging.error("File "+str(filename)+" is not found on local file storage and was not exported.")
logging.error("File " + str(filename) + " is not found on local file storage and was not exported.")


# Export all the documents.
for doc_def in page['documents'].values():
if not doc_def:
continue

filename = doc_def['file']['name']

try:
with file_storage.open(filename, 'rb') as f:
zf.writestr(filename, f.read())
except FileNotFoundError:
logging.error("File "+str(filename)+" is not found on local file storage and was not exported.")
logging.error("File " + str(filename) + " is not found on local file storage and was not exported.")

with open(zfname, 'rb') as zf:
fd = zf.read()

return io.BytesIO(fd)


def unzip_contents(zip_contents):
"""
Extracts all items in the zip archive and returns a mapping
Expand All @@ -213,7 +218,7 @@ def unzip_contents(zip_contents):
zip_contents.extractall(tempdir)

# Return the mapping of all extracted members.
return {member: tempdir+'/'+member for member in zip_contents.namelist()}
return {member: tempdir + '/' + member for member in zip_contents.namelist()}


def document_title(doc_pk):
Expand All @@ -230,3 +235,11 @@ def document_id(doc_title):
return None
else:
return doc[0].pk


def content_type_by_model(model):
content_type = ContentType.objects.all().filter(model=model)
if not content_type:
return None
else:
return str(content_type[0].pk)
22 changes: 14 additions & 8 deletions wagtailimportexport/importing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import io
import json
import logging
import traceback
from zipfile import ZipFile

from django.apps import apps
Expand Down Expand Up @@ -66,10 +67,12 @@ def import_page(uploaded_archive, parent_page, overwrites={}):
for (i, page_record) in enumerate(contents):

new_field_datas = {}
content_type = functions.content_type_by_model(page_record['model'])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I might be missing it - but how are the functions getting imported?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Line 18 - from wagtailimportexport import functions

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Totally missed it after looking at it 4 times. Ha! I've approved it, thanks for the clarification, just wanted to make sure.

#content_type = page_record['content']['content_type']

# Skip the existing pages.
if i in existing_pages:
error_msg = 'Duplicate slug'
error_msg = 'Import stopped. Duplicate slug: ' + str(page_record['content']['slug'])
continue

# Reassign image IDs.
Expand All @@ -96,13 +99,14 @@ def import_page(uploaded_archive, parent_page, overwrites={}):
for (field, new_value) in overwrites.items():
page_record['content'][field] = new_value

# look up document ids
page_record['content']['cover'] = functions.document_id(page_record['content']['cover'])
page_record['content']['title_image'] = functions.document_id(page_record['content']['title_image'])
page_record['content']['high_resolution_pdf'] = functions.document_id(page_record['content']['high_resolution_pdf'])
page_record['content']['low_resolution_pdf'] = functions.document_id(page_record['content']['low_resolution_pdf'])
page_record['content']['community_resource_logo'] = functions.document_id(page_record['content']['community_resource_logo'])
page_record['content']['community_resource_feature_link'] = functions.document_id(page_record['content']['community_resource_feature_link'])
if page_record['model'] == 'book':
# look up document ids
page_record['content']['cover'] = functions.document_id(page_record['content']['cover'])
page_record['content']['title_image'] = functions.document_id(page_record['content']['title_image'])
page_record['content']['high_resolution_pdf'] = functions.document_id(page_record['content']['high_resolution_pdf'])
page_record['content']['low_resolution_pdf'] = functions.document_id(page_record['content']['low_resolution_pdf'])
page_record['content']['community_resource_logo'] = functions.document_id(page_record['content']['community_resource_logo'])
page_record['content']['community_resource_feature_link'] = functions.document_id(page_record['content']['community_resource_feature_link'])

# set page.pk to null if pk already exists
pages = Page.objects.all()
Expand All @@ -111,6 +115,7 @@ def import_page(uploaded_archive, parent_page, overwrites={}):
page_record['content']['pk'] = None
break

page_record['content']['content_type'] = content_type
# Create page instance.
page = Page.from_serializable_data(page_record['content'])

Expand Down Expand Up @@ -165,6 +170,7 @@ def import_page(uploaded_archive, parent_page, overwrites={}):
# If content.json does not exist, then return the error,
# and terminate the import_page.
logging.error("Importing file failed because file does not exist: " + str(e))
traceback.print_exception(type(e), e, e.__traceback__)
return (0, 1, "File does not exist: " + str(e))

return (0, 1, "")
Expand Down