Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[For merging on 30.09] landing page is canonical #513

Merged
merged 12 commits into from
Oct 1, 2019
4 changes: 2 additions & 2 deletions bin/add_revision.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
import tempfile

from anthology.utils import deconstruct_anthology_id, indent
from anthology.data import ANTHOLOGY_URL
from anthology.data import ANTHOLOGY_PDF

import lxml.etree as ET
import urllib.request
Expand Down Expand Up @@ -131,7 +131,7 @@ def main(args):
# (essentially backing up the original version)
revised_file_v1_path = os.path.join(output_dir, f'{args.anthology_id}{change_letter}1.pdf')

current_version = ANTHOLOGY_URL.format(args.anthology_id)
current_version = ANTHOLOGY_PDF.format(args.anthology_id)
if args.do:
try:
print(f'-> Downloading file from {args.path} to {revised_file_v1_path}', file=sys.stderr)
Expand Down
2 changes: 2 additions & 0 deletions bin/anthology/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
ANTHOLOGY_PREFIX = "https://www.aclweb.org/anthology"

ANTHOLOGY_URL = ANTHOLOGY_PREFIX + '/{}'
ANTHOLOGY_PDF = ANTHOLOGY_PREFIX + '/{}.pdf'

ATTACHMENT_URL = ANTHOLOGY_PREFIX + '/attachments/{}'

# Names of XML elements that may appear multiple times
Expand Down
4 changes: 2 additions & 2 deletions bin/anthology/papers.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def from_xml(xml_element, *args):
tag, paper.full_id, item['url']
)
)
item['url'] = data.ANTHOLOGY_URL.format(item['url'])
item['url'] = data.ANTHOLOGY_PDF.format(item['url'])

if 'attachment' in paper.attrib:
for item in paper.attrib['attachment']:
Expand All @@ -90,7 +90,7 @@ def from_xml(xml_element, *args):
paper.attrib['revision'].insert(0, {
"value": "{}v1".format(paper.full_id),
"id": "1",
"url": data.ANTHOLOGY_URL.format( "{}v1".format(paper.full_id)) } )
"url": data.ANTHOLOGY_PDF.format( "{}v1".format(paper.full_id)) } )

paper.attrib["title"] = paper.get_title("plain")
paper.attrib["booktitle"] = paper.get_booktitle("plain")
Expand Down
9 changes: 6 additions & 3 deletions bin/anthology/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,12 +116,12 @@ def remove_extra_whitespace(text):
return re.sub(" +", " ", text.replace("\n", "").strip())


def infer_url(filename, prefix=data.ANTHOLOGY_URL):
def infer_url(filename, prefix=data.ANTHOLOGY_PREFIX):
"""If URL is relative, return the full Anthology URL.
"""
if urlparse(filename).netloc:
return filename
return prefix.format(filename)
return f"{prefix}/{filename}"


def infer_attachment_url(filename, parent_id=None):
Expand Down Expand Up @@ -298,8 +298,11 @@ def parse_element(xml_element):
value = element.text

if tag == "url":
# Use the tag 'pdf' instead of 'url'
tag = 'pdf'

# Convert relative URLs to canonical ones
value = element.text if element.text.startswith('http') else data.ANTHOLOGY_URL.format(element.text)
value = element.text if element.text.startswith('http') else data.ANTHOLOGY_PDF.format(element.text)

if tag in data.LIST_ELEMENTS:
try:
Expand Down
2 changes: 1 addition & 1 deletion bin/create_bibtex.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def create_bibtex(anthology, trgdir, clean=False):
"{}/anthology.bib.gz".format(trgdir), "wt", encoding="utf-8"
) as file_full:
for volume_id, volume in tqdm(anthology.volumes.items()):
volume_dir = "{}/papers/{}/{}".format(trgdir, volume_id[0], volume_id[:3])
volume_dir = trgdir
if not os.path.exists(volume_dir):
os.makedirs(volume_dir)
with open(
Expand Down
4 changes: 4 additions & 0 deletions hugo/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ disablePathToLower = true
staticDir = ["static", "data-export"]


[permalinks]
papers = "/:filename/"


[menu]

[[menu.footer]]
Expand Down
10 changes: 3 additions & 7 deletions hugo/layouts/papers/list-entry.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,13 @@
{{ $paper := index (index $.Site.Data.papers $volume_id) .Params.anthology_id }}
<p class="d-sm-flex align-items-stretch">
<span class="d-block mr-2 text-nowrap list-button-row">
{{- with $paper.url -}}
{{- with $paper.pdf -}}
<a class="badge badge-primary align-middle mr-1" href="{{ . }}" data-toggle="tooltip" data-placement="top" title="Open PDF">
pdf
mjpost marked this conversation as resolved.
Show resolved Hide resolved
</a>
{{- if and (hasPrefix . $.Site.Params.baseURL) (eq . (strings.TrimSuffix ".pdf" .)) -}}
<a class="d-none" href="{{ . }}.pdf" title="Hidden link to PDF with extension">pdf</a>
{{- end -}}
{{- end -}}
mbollmann marked this conversation as resolved.
Show resolved Hide resolved
{{- $bibfile := printf "/papers/%s/%s/%s.bib" (slicestr $volume_id 0 1) $volume_id .Params.anthology_id -}}
{{- if (fileExists (printf "/data-export/%s" $bibfile)) -}}
<a class="badge badge-secondary align-middle mr-1" href="{{ $bibfile | relURL }}" data-toggle="tooltip" data-placement="top" title="Export to BibTeX">
{{- if (fileExists (printf "/data-export/%s.bib" .Params.anthology_id)) -}}
<a class="badge badge-secondary align-middle mr-1" href="{{ (printf "/%s.bib" .Params.anthology_id) | relURL }}" data-toggle="tooltip" data-placement="top" title="Export to BibTeX">
bib
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The bib path is now ${ANTH_ID}.bib, e.g., https://aclweb.org/anthology/P19-1002.bib, instead of the nested version.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're no longer using $bibfile, but the link is still within an if clause that checks for $bibfile.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is fine though, right? If the bibfile exists, we generate the appropriate link to it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't change this because I'm not sure how to fix it. If the bibfile is present, we generate the canonical link to it, which is handled by a redirect internally.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, the question is whether we still want a check for the existence of the file in the Hugo template.

If we do, then this current solution is suboptimal IMO since it requires us manually remembering and updating the actual local path to the file (in the $bibfile := ... line), in case it ever changes.

If we do not, we should just throw out the surrounding lines altogether.

</a>
{{- end -}}
Expand Down
37 changes: 19 additions & 18 deletions hugo/layouts/papers/single.html
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
{{ with $volume.meta_issue }}<meta content="{{ . }}" name="citation_issue" >{{ end }}
{{ with $volume.meta_date }}<meta content="{{ . }}" name="citation_publication_date" >{{ end }}
{{ end }}
{{ with $paper.url }}
{{ with $paper.pdf }}
<meta content="{{ . }}" name="citation_pdf_url" >
{{ end }}
{{ with $paper.page_first }}<meta content="{{ . }}" name="citation_firstpage" >{{ end }}
Expand Down Expand Up @@ -49,10 +49,13 @@
{{ $anthology_id := .Params.anthology_id }}
{{ $volume_id := slicestr .Params.anthology_id 0 3 }}
{{ $paper := index (index .Site.Data.papers $volume_id) .Params.anthology_id }}
{{ $bibfile := printf "/papers/%s/%s/%s.bib" (slicestr $volume_id 0 1) $volume_id $anthology_id }}
<section id="main">
<h2 id="title">
<a href="{{ $paper.url }}">{{ $paper.title_html | safeHTML }}</a>
{{ with $paper.pdf }}
<a href="{{ . }}">{{ $paper.title_html | safeHTML }}</a>
{{ else }}
{{ $paper.title_html | safeHTML }}
{{ end }}
</h2>
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The paper page has a / at the end.

{{ with $paper.author }}
<p class="lead">
Expand Down Expand Up @@ -139,7 +142,7 @@ <h5 class="card-title">Abstract</h5>
<dt>Pages:</dt>
<dd>{{ with $paper.pages }}{{ . }}{{ end }}</dd>
<dt>URL:</dt>
<dd>{{ with $paper.url }}<a href="{{ . }}">{{ . }}</a>{{ end }}</dd>
<dd>{{ with $paper.pdf }}<a href="{{ . }}">{{ . }}</a>{{ end }}</dd>
<dt>DOI:</dt>
<dd>{{ with $paper.doi }}<a href="http://dx.doi.org/{{ . }}" title="To the current version of the paper by DOI">{{ . }}</a>{{ end }}</dd>
<!--
Expand All @@ -150,19 +153,17 @@ <h5 class="card-title">Abstract</h5>
-->
<dt class="acl-button-row">Bib Export formats:</dt>
<dd class="acl-button-row">
{{ if (fileExists (printf "/data-export/%s" $bibfile)) }}
<a class="btn btn-secondary btn-sm" href="{{ $bibfile | relURL }}">BibTeX</a>
{{ if (fileExists (printf "/data-export/%s.bib" $anthology_id)) }}
<a class="btn btn-secondary btn-sm" href="{{ (printf "/%s.bib" $anthology_id) | relURL }}">BibTeX</a>
{{ end }}
{{ $expfile := printf "/papers/%s/%s/%s.xml" (slicestr $volume_id 0 1) $volume_id $anthology_id }}
{{ if (fileExists (printf "/data-export/%s" $expfile)) }}
<a class="btn btn-secondary btn-sm" href="{{ $expfile | relURL }}">MODS XML</a>
{{ if (fileExists (printf "/data-export/%s.xml" $anthology_id)) }}
<a class="btn btn-secondary btn-sm" href="{{ (printf "/%s.xml" $anthology_id) | relURL }}">MODS XML</a>
{{ end }}
{{ $endfile := printf "/papers/%s/%s/%s.endf" (slicestr $volume_id 0 1) $volume_id $anthology_id }}
{{ if (fileExists (printf "/data-export/%s" $endfile)) }}
<a class="btn btn-secondary btn-sm" href="{{ $endfile | relURL }}">EndNote</a>
{{ if (fileExists (printf "/data-export/%s.endf" $anthology_id)) }}
<a class="btn btn-secondary btn-sm" href="{{ (printf "/%s.endf" $anthology_id) | relURL }}">EndNote</a>
{{ end }}
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use the canonical URL prefix instead of the deeply nested prefix.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same concern as with $bibfile above.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So do you suggest that we generate bib, endf, etc links in the YAML, and only use those if present?

Copy link
Member Author

@mjpost mjpost Sep 22, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is what I ended up doing for clarity.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is nice, but has the same issue with local browsing–the links will always point to the live server.

{{ if (fileExists (printf "/data-export/%s" $bibfile)) }}
<button type="button" class="btn btn-clipboard btn-secondary btn-sm d-none" data-clipboard-text="{{ readFile (printf "/data-export/%s" $bibfile) }}"><i class="far fa-clipboard pr-2"></i>Copy BibTeX to Clipboard</button>
{{ if (fileExists (printf "/data-export/%s.bib" $anthology_id)) }}
<button type="button" class="btn btn-clipboard btn-secondary btn-sm d-none" data-clipboard-text="{{ readFile (printf "/data-export/%s.bib" $anthology_id) }}"><i class="far fa-clipboard pr-2"></i>Copy BibTeX to Clipboard</button>
{{ end }}
</dd>

Expand All @@ -187,14 +188,14 @@ <h5 class="card-title">Abstract</h5>
</a>
{{ end }}
{{ else }}
{{ with $paper.url }}
<a class="btn btn-primary" href="{{ . }}" title="Open PDF of '{{ $paper.title | htmlEscape }}'">
{{ with $paper.pdf }}
<a class="btn btn-primary" href="{{ . }}" title="Open PDF of '{{ $paper.title | htmlEscape }}'">
mjpost marked this conversation as resolved.
Show resolved Hide resolved
<i class="far fa-file-pdf"></i><span class="pl-2">PDF</span>
</a>
{{ end }}
{{ end }}
{{ if (fileExists (printf "/data-export/%s" $bibfile)) }}
<a class="btn btn-secondary" href="{{ $bibfile | relURL }}" title="Export '{{ $paper.title | htmlEscape }}' to bib format">
{{ if (fileExists (printf "/data-export/%s.bib" $anthology_id)) }}
<a class="btn btn-secondary" href="{{ (printf "/%s.bib" $anthology_id) | relURL }}" title="Export '{{ $paper.title | htmlEscape }}' to bib format">
<i class="fas fa-file-export"></i><span class="pl-2 transform-lower-sm">Bib</span><span class="d-none d-sm-inline">TeX</span>
</a>
{{ end }}
Expand Down
10 changes: 7 additions & 3 deletions hugo/layouts/volumes/single.html
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@
{{ $paper := index .Site.Data.volumes .Params.anthology_id }}
<section id="main">
<h2 id="title">
<a href="{{ $paper.url }}">{{ $paper.title_html | safeHTML }}</a>
{{ with $paper.pdf }}
<a href="{{ . }}">{{ $paper.title_html | safeHTML }}</a>
{{ else }}
{{ $paper.title_html | safeHTML }}
{{ end }}
</h2>
{{ with $paper.editor }}
<p class="lead">
Expand Down Expand Up @@ -63,7 +67,7 @@ <h2 id="title">
<dt>Publisher:</dt>
<dd>{{ with $paper.publisher }}{{ . }}{{ end }}</dd>
<dt>URL:</dt>
<dd>{{ with $paper.url }}<a href="{{ . }}">{{ . }}</a>{{ end }}</dd>
<dd>{{ with $paper.pdf }}<a href="{{ . }}">{{ . }}</a>{{ end }}</dd>
<dt>DOI:</dt>
<dd>{{ with $paper.doi }}<a href="http://dx.doi.org/{{ . }}" title="To the current version of the paper by DOI">{{ . }}</a>{{ end }}</dd>
<!--
Expand Down Expand Up @@ -92,7 +96,7 @@ <h2 id="title">

<!-- Most of the styling for this block is set in _papers.scss to avoid clutter -->
<div class="acl-paper-link-block">
{{ with $paper.url }}
{{ with $paper.pdf }}
<a class="btn btn-primary" href="{{ . }}" title="Open PDF of '{{ $paper.title | htmlEscape }}'">
<i class="far fa-file-pdf"></i><span class="pl-2">PDF&nbsp;<small>(full)</small></span>
</a>
Expand Down
8 changes: 2 additions & 6 deletions hugo/static/.htaccess
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ RewriteRule ^papers/[A-Za-z]/[A-Za-z][0-9][0-9]/([A-Za-z])([0-9][0-9])\-([0-9][0
#

## PDF redirection
# Canonical URL (a plain ACL ID with no file extension, e.g., P17-1069 loads P/P17/P17-1069.pdf)
RewriteRule ^([A-Za-z])([0-9][0-9])\-([0-9][0-9][0-9][0-9])$ /anthology-files/pdf/$1/$1$2/$1$2-$3.pdf [L,NC]
# Canonical URL (a plain ACL ID with no file extension) -> landing page
RewriteRule ^([A-Za-z])([0-9][0-9])\-([0-9][0-9][0-9][0-9])\/?$ papers/$1/$1$2/$1$2-$3/ [L,NC]

# Volume URLs (e.g., P17-1 loads P/P17/P17-1.pdf)
RewriteRule ^([A-Za-z])([0-9][0-9])\-([0-9]{1,2})$ /anthology-files/pdf/$1/$1$2/$1$2-$3.pdf [L,NC]
Expand All @@ -68,10 +68,6 @@ RewriteRule ^([A-Za-z])([0-9][0-9])\-([0-9][0-9][0-9][0-9])([ve][0-9]+)$ /anthol
# Attachments (e.g., P17-1069.Poster.pdf loads /anthology-files/attachments/P/P17/P17-1069.Poster.pdf)
RewriteRule ^attachments/([A-Za-z])([0-9][0-9])\-([0-9][0-9][0-9][0-9])(\..*)?$ /anthology-files/attachments/$1/$1$2/$1$2-$3$4 [L,NC]

## Paper and author pages and bibtex
# The Paper metadata page (e.g., P17-1069/ loads papers/P/P17/P17-1069/index.html)
RewriteRule ^([A-Za-z])([0-9][0-9])\-([0-9][0-9][0-9][0-9])\/$ papers/$1/$1$2/$1$2-$3/index.html [L,NC]

# Redirects for bib, MODS XML, Endnote (e.g., /P17-1069.bib loads papers/P/P17/P17-1069.bib)
RewriteRule ^([A-Za-z])([0-9][0-9])\-([0-9][0-9][0-9][0-9])\.([a-z]+)$ papers/$1/$1$2/$1$2-$3.$4 [L,NC]

Expand Down