Skip to content

Commit

Permalink
docker: update to pywb rc7
Browse files Browse the repository at this point in the history
content type redirects: support default block list, with specific allow rules, per #54
  • Loading branch information
ikreymer committed May 1, 2020
1 parent af3f601 commit 2a9c078
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 15 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

# Based on standard pywb fork
FROM webrecorder/pywb:2.4.0-rc6-test
FROM webrecorder/pywb:2.4.0-rc7

USER root
WORKDIR /ukwa_pywb
Expand Down
10 changes: 10 additions & 0 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,20 @@ collections:
Expires: 'Thu, 01 Jan 1970 00:00:00 GMT'

content_type_redirects:
# allows
'text/': 'allow'
'image/': 'allow'
'video/': 'allow'
'audio/': 'allow'
'application/javascript': 'allow'

'text/rtf': 'https://example.com/viewer?{query}'
'application/pdf': 'https://example.com/viewer?{query}'
'application/': 'https://example.com/blocked?{query}'

# default redirects
'<any-download>': 'https://example.com/blocked?{query}'
'*': 'https://example.com/blocked?{query}'


# open access (with access controls)
Expand Down
8 changes: 8 additions & 0 deletions ukwa_pywb/test/config_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,18 @@ collections:
Expires: 'Thu, 01 Jan 1970 00:00:00 GMT'

content_type_redirects:
'text/': 'allow'
'image/': 'allow'
'video/': 'allow'
'audio/': 'allow'

'text/rtf': 'https://example.com/viewer?{query}'
'application/pdf': 'https://example.com/viewer?{query}'
'application/': 'https://example.com/blocked?{query}'

# default redirects
'<any-download>': 'https://example.com/blocked?{query}'
'*': 'https://example.com/blocked?{query}'

pywb-no-locks:
index_paths: ./integration-test/test-data/
Expand Down
36 changes: 22 additions & 14 deletions ukwa_pywb/ukwa_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,25 +196,33 @@ def render_content(self, wb_url_str, coll_config, environ):
if default_response.status_headers.get('preference-applied') == 'raw':
return default_response

content_type = default_response.status_headers.get("content-type")

redirect_url = None

if content_type:
content_type = content_type.split(";", 1)[0]
redirect_url = ct_redirects.get(content_type)
if redirect_url is None:
redirect_url = ct_redirects.get(content_type.split("/")[0] + "/")

# if no content-type match, check content-disposition
if not redirect_url:
content_disp = default_response.status_headers.get("content-disposition")
if content_disp and 'attachment' in content_disp:
redirect_url = ct_redirects.get('<any-download>')

if not redirect_url:
# if we have a content-disposition, takes precedence using the <any-download> option
content_disp = default_response.status_headers.get("content-disposition")
if content_disp and 'attachment' in content_disp:
redirect_url = ct_redirects.get('<any-download>')

# attempt to find rule by content-type
if redirect_url is None:
content_type = default_response.status_headers.get("content-type")
if content_type:
content_type = content_type.split(";", 1)[0]
redirect_url = ct_redirects.get(content_type)
# find by content-type prefix, eg: text/
if redirect_url is None:
redirect_url = ct_redirects.get(content_type.split("/")[0] + "/")

# default rule if no other matches
if redirect_url is None:
redirect_url = ct_redirects.get('*')

# if no redirect or rule is 'allow', then continue
if not redirect_url or redirect_url == 'allow':
return default_response

# otherwise, redirect to specified url
wb_url = WbUrl(wb_url_str)
wb_url.mod = 'id_'
loc = self.get_full_prefix(environ) + str(wb_url)
Expand Down

0 comments on commit 2a9c078

Please sign in to comment.