From 4d31c17d4ccf55857264476fbd295ad3f43599c5 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Wed, 23 Jul 2014 12:56:25 -0700 Subject: [PATCH 1/5] archivalrouter: make SCRIPT_NAME key lookup default to '', addresses #39 --- pywb/framework/archivalrouter.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pywb/framework/archivalrouter.py b/pywb/framework/archivalrouter.py index 749654ba..861fad90 100644 --- a/pywb/framework/archivalrouter.py +++ b/pywb/framework/archivalrouter.py @@ -49,12 +49,13 @@ class ArchivalRouter(object): def parse_request(self, route, env, matcher, coll, request_uri, use_abs_prefix=False): matched_str = matcher.group(0) + rel_prefix = env.get('SCRIPT_NAME', '') + '/' + if matched_str: - rel_prefix = env['SCRIPT_NAME'] + '/' + matched_str + '/' + rel_prefix += matched_str + '/' # remove the '/' + rel_prefix part of uri wb_url_str = request_uri[len(matched_str) + 2:] else: - rel_prefix = env['SCRIPT_NAME'] + '/' # the request_uri is the wb_url, since no coll wb_url_str = request_uri[1:] @@ -157,7 +158,7 @@ class ReferRedirect: path = ref_split.path - app_path = env['SCRIPT_NAME'] + app_path = env.get('SCRIPT_NAME', '') if app_path: # must start with current app name, if not root From e513b3755c568d2a7280c1b07350f1c6c37bc475 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Wed, 23 Jul 2014 15:27:01 -0700 Subject: [PATCH 2/5] cdxindexing: encode unicode filenames using system encoding, add test for unicode filenames --- pywb/warc/cdxindexer.py | 4 +++- pywb/warc/test/test_indexing.py | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pywb/warc/cdxindexer.py b/pywb/warc/cdxindexer.py index 585b5711..60dd5ad5 100644 --- a/pywb/warc/cdxindexer.py +++ b/pywb/warc/cdxindexer.py @@ -109,7 +109,6 @@ def cdx_filename(filename): #================================================================= def write_multi_cdx_index(output, inputs, **options): - # write one cdx per dir if output != '-' and os.path.isdir(output): for fullpath, filename in iter_file_or_dir(inputs): @@ -145,6 +144,9 @@ def write_multi_cdx_index(output, inputs, **options): def write_cdx_index(outfile, infile, filename, **options): writer_cls = options.get('writer_cls') + if type(filename) is unicode: + filename = filename.encode(sys.getfilesystemencoding()) + if writer_cls: pass elif options.get('sort'): diff --git a/pywb/warc/test/test_indexing.py b/pywb/warc/test/test_indexing.py index 88a3d3ff..b90e9d65 100644 --- a/pywb/warc/test/test_indexing.py +++ b/pywb/warc/test/test_indexing.py @@ -130,8 +130,8 @@ com,example)/?example=1 20140103030321 http://example.com?example=1 text/html 20 org,iana)/domains/example 20140128051539 http://www.iana.org/domains/example text/html 302 JZ622UA23G5ZU6Y3XAKH4LINONUEICEG - - 577 2907 example.warc.gz 4 -# test writing to temp dir ->>> cli_lines_with_dir(TEST_WARC_DIR + 'example.warc.gz') +# test writing to temp dir, also use unicode filename +>>> cli_lines_with_dir(unicode(TEST_WARC_DIR + 'example.warc.gz')) example.cdx com,example)/?example=1 20140103030321 http://example.com?example=1 text/html 200 B2LTWWPUOYAH7UIPQ7ZUPQ4VMBSVC36A - - 1043 333 example.warc.gz org,iana)/domains/example 20140128051539 http://www.iana.org/domains/example text/html 302 JZ622UA23G5ZU6Y3XAKH4LINONUEICEG - - 577 2907 example.warc.gz From 8ea7f5d3a063c38cd0ee2a82dace43c955a72c9f Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Wed, 23 Jul 2014 15:30:01 -0700 Subject: [PATCH 3/5] framed replay: don't use is_timegate to determine frame usage due to potential ambiguity, memento will need to use the mp_ modifier --- pywb/webapp/replay_views.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pywb/webapp/replay_views.py b/pywb/webapp/replay_views.py index 0b8bb528..c4e0f4f3 100644 --- a/pywb/webapp/replay_views.py +++ b/pywb/webapp/replay_views.py @@ -67,8 +67,7 @@ class BaseContentView(object): # (not supported in proxy mode) if (self.is_frame_mode and wbrequest.wb_url and not wbrequest.wb_url.mod and - not wbrequest.options['is_proxy'] and - not wbrequest.options.get('is_timegate', False)): + not wbrequest.options['is_proxy']): embed_url = wbrequest.wb_url.to_str(mod=self._mp_mod) timestamp = datetime_to_timestamp(datetime.datetime.utcnow()) From 22c210131a474ab224f6c83b9bb05ca7ed72f681 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Wed, 23 Jul 2014 15:31:04 -0700 Subject: [PATCH 4/5] bump version to 0.5.1 --- CHANGES.rst | 8 ++++++++ README.rst | 2 +- setup.py | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 21aff406..1e2794df 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,11 @@ +pywb 0.5.1 changelist +~~~~~~~~~~~~~~~~~~~~~ + +* cdxindexer supports unicode filenames + +* SCRIPT_NAME now defaults to '' if not present + + pywb 0.5.0 changelist ~~~~~~~~~~~~~~~~~~~~~ diff --git a/README.rst b/README.rst index 632aca5b..9c4b380d 100644 --- a/README.rst +++ b/README.rst @@ -1,4 +1,4 @@ -PyWb 0.5.0 +PyWb 0.5.1 ========== .. image:: https://travis-ci.org/ikreymer/pywb.png?branch=develop diff --git a/setup.py b/setup.py index 305a432c..3e89abed 100755 --- a/setup.py +++ b/setup.py @@ -34,7 +34,7 @@ class PyTest(TestCommand): setup( name='pywb', - version='0.5.0', + version='0.5.1', url='https://github.com/ikreymer/pywb', author='Ilya Kreymer', author_email='ikreymer@gmail.com', From bdf69ff1a9c7066e131f5bf3556f9eecb23bc607 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Thu, 24 Jul 2014 16:38:39 -0700 Subject: [PATCH 5/5] tweak CHANGES.rst --- CHANGES.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 1e2794df..a7848d64 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,7 +1,8 @@ pywb 0.5.1 changelist ~~~~~~~~~~~~~~~~~~~~~ +minor fixes: -* cdxindexer supports unicode filenames +* cdxindexer accepts unicode filenames, encodes via sys encoding * SCRIPT_NAME now defaults to '' if not present