Skip to content

Commit

Permalink
#4469 - Support for MHTML web page archives
Browse files Browse the repository at this point in the history
- Better control over blocking/allowing images in the safety net
- Fix scrollbar breaking out of container on project overview page
  • Loading branch information
reckart committed Jan 30, 2024
1 parent 0557a1b commit 17c6582
Show file tree
Hide file tree
Showing 7 changed files with 123 additions and 63 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ public interface ExternalEditorProperties

boolean isBlockImg();

Source getAllowImgSource();

boolean isBlockEmbed();

boolean isBlockAudio();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ public class ExternalEditorPropertiesImpl
private boolean blockStyle = true;

private boolean blockImg = true;
private Source allowImgSource = Source.NONE;
private boolean blockEmbed = true;
private boolean blockAudio = true;
private boolean blockObject = true;
Expand Down Expand Up @@ -53,6 +54,17 @@ public void setBlockImg(boolean aBlockImg)
blockImg = aBlockImg;
}

@Override
public Source getAllowImgSource()
{
return allowImgSource;
}

public void setAllowImgSource(Source aAllowImgSource)
{
allowImgSource = aAllowImgSource;
}

@Override
public boolean isBlockEmbed()
{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
* Licensed to the Technische Universität Darmstadt under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The Technische Universität Darmstadt
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.inception.externaleditor.config;

public enum Source
{
NONE, LOCAL, ANY;
}
Original file line number Diff line number Diff line change
Expand Up @@ -73,38 +73,58 @@ public SafetyNetDocumentPolicy(ExternalEditorProperties aProperties) throws IOEx

private PolicyCollection makeDefaultPolicy()
{
var builder = PolicyCollectionBuilder //
var policy = PolicyCollectionBuilder //
.caseInsensitive() //
.defaultAttributeAction(AttributeAction.PASS) //
.defaultElementAction(ElementAction.PASS);

builder.disallowElements("script", "meta", "applet", "link", "iframe");
policy.disallowElements("script", "meta", "applet", "link", "iframe");

if (properties.isBlockStyle()) {
builder.disallowElements("style");
policy.disallowElements("style");
}

if (properties.isBlockAudio()) {
builder.disallowElements("audio");
policy.disallowElements("audio");
}

if (properties.isBlockEmbed()) {
builder.disallowElements("embed");
policy.disallowElements("embed");
}

if (properties.isBlockImg()) {
builder.disallowElements("img");
policy.disallowElements("img");
}
else {
switch (properties.getAllowImgSource()) {
case NONE:
policy.disallowAttributes("src").onElements("img");
break;
case LOCAL:
policy.disallowAttributes("src") //
.matching(compile("(?!res[?]resId=).*")) //
.onElements("img");
break;
case ANY:
// No restriction in this case
break;
}
}

if (properties.isBlockObject()) {
builder.disallowElements("object");
policy.disallowElements("object");
}

if (properties.isBlockVideo()) {
builder.disallowElements("video");
policy.disallowElements("video");
}

builder.disallowAttributes(JAVASCRIPT_ACTIVE_ATTRIBUTES) //
policy.disallowAttributes(JAVASCRIPT_ACTIVE_ATTRIBUTES) //
.matching(compile("\\s*javascript:.*")) //
.globally();

builder.disallowAttributes(JAVASCRIPT_EVENT_ATTRIBUTES).globally();
return builder.build();
policy.disallowAttributes(JAVASCRIPT_EVENT_ATTRIBUTES).globally();
return policy.build();
}

public PolicyCollection getPolicy() throws IOException
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
= MHTML (Web archive)

====
CAUTION: Experimental feature. To use this functionality, you need to enable it first by adding `format.mhtml.enabled=true` to the `settings.properties` file. In order to load images from MHTML files, it is currently also necessary to disable image blocking in the safety net using `ui.external.block-img=false` - use with care as this will also enable loading images from external sources.
CAUTION: Experimental feature. To use this functionality, you need to enable it first by adding `format.mhtml.enabled=true` to the `settings.properties` file. In order to load images from MHTML files, it is currently also necessary to disable image blocking in the safety net using `ui.external.block-img=false` and set `ui.external.allow-img-source=LOCAL` - this will allow loading images
embedded into documents, but not to load images from remote servers.
====

link:https://en.wikipedia.org/wiki/MHTML[MHTML] is a format supported by many browsers which stores the website currently shown in the browser along with most resources required to display the page - including but not limited to images.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,10 +202,10 @@ void thatAttributesCanBeDroppedSelectively(String aName, PolicyCollectionBuilder
void thatDisallowedAttributesAreDroppped(String aName, PolicyCollectionBuilder aBuilder)
throws Exception
{
QName root = new QName("root");
QName child = new QName("child");
QName attr1 = new QName("attr1");
QName attr2 = new QName("attr2");
var root = new QName("root");
var child = new QName("child");
var attr1 = new QName("attr1");
var attr2 = new QName("attr2");

var buffer = new StringWriter();
var policy = aBuilder //
Expand Down Expand Up @@ -258,9 +258,9 @@ void thatPrunedBranchesAreDropped(String aName, PolicyCollectionBuilder aBuilder
@Test
void thatCaseInsensitiveModeWorks() throws Exception
{
QName root = new QName("root");
QName child = new QName("child");
QName attr1 = new QName("attr1");
var root = new QName("root");
var child = new QName("child");
var attr1 = new QName("attr1");

var buffer = new StringWriter();
var policy = PolicyCollectionBuilder.caseInsensitive() //
Expand All @@ -285,9 +285,9 @@ void thatCaseInsensitiveModeWorks() throws Exception
@Test
void thatCaseSensitiveModeWorks() throws Exception
{
QName root = new QName("ROOT");
QName child = new QName("child");
QName attr1 = new QName("attr1");
var root = new QName("ROOT");
var child = new QName("child");
var attr1 = new QName("attr1");

var buffer = new StringWriter();
var policy = PolicyCollectionBuilder.caseSensitive() //
Expand All @@ -314,7 +314,7 @@ void thatCaseSensitiveModeWorks() throws Exception
@Test
void thatSanitizingDefaultXmlParserWorks() throws Exception
{
QName root = new QName("http://namespace.org", "ROOT");
var root = new QName("http://namespace.org", "ROOT");

var buffer = new StringWriter();
var policy = PolicyCollectionBuilder.caseSensitive() //
Expand All @@ -323,7 +323,7 @@ void thatSanitizingDefaultXmlParserWorks() throws Exception

var sut = new SanitizingContentHandler(makeXmlSerializer(buffer), policy);

String xml = "<ns:ROOT xmlns:ns='http://namespace.org'/>";
var xml = "<ns:ROOT xmlns:ns='http://namespace.org'/>";

var parser = newSaxParser();
parser.parse(toInputStream(xml, UTF_8), new DefaultHandlerToContentHandlerAdapter<>(sut));
Expand All @@ -334,7 +334,7 @@ void thatSanitizingDefaultXmlParserWorks() throws Exception
@Test
void thatNamespaceDeclarationsPass() throws Exception
{
QName root = new QName("http://namespace.org", "ROOT");
var root = new QName("http://namespace.org", "ROOT");

var buffer = new StringWriter();
var policy = PolicyCollectionBuilder.caseSensitive() //
Expand All @@ -343,7 +343,7 @@ void thatNamespaceDeclarationsPass() throws Exception

var sut = new SanitizingContentHandler(makeXmlSerializer(buffer), policy);

String xml = "<ns:ROOT xmlns:ns='http://namespace.org' xmlns:other='otherNs' xmlns='default'/>";
var xml = "<ns:ROOT xmlns:ns='http://namespace.org' xmlns:other='otherNs' xmlns='default'/>";

var parser = newSaxParser();
parser.parse(toInputStream(xml, UTF_8), new DefaultHandlerToContentHandlerAdapter<>(sut));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,48 +64,50 @@
</div>
<ul wicket:id="pagingNavigator" class="me-3"/>
</div>
<div class="scrolling flex-content flex-h-container mx-3 mb-3 card">
<div wicket:id="emptyListLabel" class="no-data-notice flex-content"></div>
<div wicket:id="projects" class="flex-content list-group-flush list-group border-0">
<div wicket:id="project" class="list-group-item">
<div class="d-flex">
<div class="flex-grow-1 d-flex flex-column">
<div>
<a wicket:id="projectLink" class="text-decoration-none">
<wicket:container wicket:id="name"/>
</a>
<div class="d-flex flex-grow-1 overflow-hidden mx-3 mb-3 card">
<div class="scrolling flex-content flex-h-container ">
<div wicket:id="emptyListLabel" class="no-data-notice flex-content"></div>
<div wicket:id="projects" class="flex-content list-group-flush list-group border-0">
<div wicket:id="project" class="list-group-item">
<div class="d-flex">
<div class="flex-grow-1 d-flex flex-column">
<div>
<a wicket:id="projectLink" class="text-decoration-none">
<wicket:container wicket:id="name"/>
</a>
</div>
<div class="list-group-item-text text-muted">
<small><wicket:container wicket:id="description"/></small>
</div>
</div>
<div class="list-group-item-text text-muted">
<small><wicket:container wicket:id="description"/></small>
<div class="d-flex flex-column text-end">
<div class="d-flex flex-nowrap flex-row-reverse">
<span wicket:id="role" class="badge bg-secondary ms-1"><wicket:container wicket:id="label"/></span>
</div>
<div class="text-nowrap mt-auto">
<small class="text-muted" wicket:enclosure="created">Created: <wicket:container wicket:id="created"/></small>
<small class="text-muted" wicket:enclosure="id">ID: <wicket:container wicket:id="id"/></small>
</div>
</div>
</div>
<div class="d-flex flex-column text-end">
<div class="d-flex flex-nowrap flex-row-reverse">
<span wicket:id="role" class="badge bg-secondary ms-1"><wicket:container wicket:id="label"/></span>
<div wicket:id="actionDropdown" class="d-flex">
<div class="dropdown flex-content flex-h-container">
<button class="btn dropdown-toggle flex-content" type="button" data-bs-toggle="dropdown">
<i class="fas fa-ellipsis-v"></i>
</button>
<ul class="dropdown-menu float-end" role="menu">
<li>
<button class="dropdown-item" tabindex="-1" wicket:id="leaveProject" type="button">
<wicket:message key="leaveProject"/>
</button>
</li>
</ul>
</div>
</div>
<div class="text-nowrap mt-auto">
<small class="text-muted" wicket:enclosure="created">Created: <wicket:container wicket:id="created"/></small>
<small class="text-muted" wicket:enclosure="id">ID: <wicket:container wicket:id="id"/></small>
</div>
</div>
<div wicket:id="actionDropdown" class="d-flex">
<div class="dropdown flex-content flex-h-container">
<button class="btn dropdown-toggle flex-content" type="button" data-bs-toggle="dropdown">
<i class="fas fa-ellipsis-v"></i>
</button>
<ul class="dropdown-menu float-end" role="menu">
<li>
<button class="dropdown-item" tabindex="-1" wicket:id="leaveProject" type="button">
<wicket:message key="leaveProject"/>
</button>
</li>
</ul>
</div>
</div>
</div>
</div>
</div>
</div>
<div wicket:id="dialog"></div>
</div>
<div wicket:id="dialog"></div>
</div>
</div>
</wicket:extend>
Expand Down

0 comments on commit 17c6582

Please sign in to comment.