From e5ef51363c2dfc66e91ecf11491de34e5bf48f92 Mon Sep 17 00:00:00 2001
From: Ilya Kreymer <ikreymer@gmail.com>
Date: Thu, 31 Mar 2016 13:09:57 -0700
Subject: [PATCH] zipnum: backport fix for #173, paths specified in a zipnum
 .loc file are relative to the .loc file, not to the working dir of the
 application warnings: don't warn on .gz cdx files

---
 pywb/cdx/cdxserver.py                   |  4 ++--
 pywb/cdx/test/test_zipnum.py            |  2 +-
 pywb/cdx/zipnum.py                      | 11 ++++++++++-
 sample_archive/zipcdx/zipnum-bad.loc    |  4 ++--
 sample_archive/zipcdx/zipnum-sample.loc |  2 +-
 5 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/pywb/cdx/cdxserver.py b/pywb/cdx/cdxserver.py
index 5da0d621..ca97fdec 100644
--- a/pywb/cdx/cdxserver.py
+++ b/pywb/cdx/cdxserver.py
@@ -176,8 +176,8 @@ class CDXServer(BaseCDXServer):
         if filename.endswith(('.summary', '.idx')):
             return ZipNumCluster(filename, config)
 
-        # no warning for .loc
-        if not filename.endswith('.loc'):
+        # no warning for .loc or .gz (zipnum)
+        if not filename.endswith(('.loc', '.gz')):
             logging.warn('skipping unrecognized URI: %s', filename)
 
         return None
diff --git a/pywb/cdx/test/test_zipnum.py b/pywb/cdx/test/test_zipnum.py
index 4b0336ae..259b966b 100644
--- a/pywb/cdx/test/test_zipnum.py
+++ b/pywb/cdx/test/test_zipnum.py
@@ -149,7 +149,7 @@ Traceback (most recent call last):
 Exception: No Locations Found for: foo
 
 
->>> zip_test_err(url='http://iana.org/x', matchType='exact')
+>>> zip_test_err(url='http://iana.org/x', matchType='exact')   # doctest: +IGNORE_EXCEPTION_DETAIL
 Traceback (most recent call last):
 IOError: [Errno 2] No such file or directory: './sample_archive/invalid'
 
diff --git a/pywb/cdx/zipnum.py b/pywb/cdx/zipnum.py
index d0b832d2..1b149822 100644
--- a/pywb/cdx/zipnum.py
+++ b/pywb/cdx/zipnum.py
@@ -51,11 +51,20 @@ class LocMapResolver(object):
         # update loc file mtime
         self.loc_mtime = new_mtime
 
+        local_dir = os.path.dirname(self.loc_filename)
+
+        def res_path(pathname):
+            if '://' not in pathname:
+                pathname = os.path.join(local_dir, pathname)
+            return pathname
+
         logging.debug('Loading loc from: ' + self.loc_filename)
         with open(self.loc_filename, 'rb') as fh:
             for line in fh:
                 parts = line.rstrip().split('\t')
-                self.loc_map[parts[0]] = parts[1:]
+
+                paths = [res_path(pathname) for pathname in parts[1:]]
+                self.loc_map[parts[0]] = paths
 
     def __call__(self, part, query):
         return self.loc_map[part]
diff --git a/sample_archive/zipcdx/zipnum-bad.loc b/sample_archive/zipcdx/zipnum-bad.loc
index d113a330..8c6bd330 100644
--- a/sample_archive/zipcdx/zipnum-bad.loc
+++ b/sample_archive/zipcdx/zipnum-bad.loc
@@ -1,3 +1,3 @@
-bar	./sample_archive/invalid
+bar	invalid
 foo2
-zipnum	./sample_archive/x-bad-path-to-ignore-x	./sample_archive/zipcdx/zipnum-sample.cdx.gz
+zipnum	x-bad-path-to-ignore-x	zipnum-sample.cdx.gz
diff --git a/sample_archive/zipcdx/zipnum-sample.loc b/sample_archive/zipcdx/zipnum-sample.loc
index 8a4d1210..c0f24a88 100644
--- a/sample_archive/zipcdx/zipnum-sample.loc
+++ b/sample_archive/zipcdx/zipnum-sample.loc
@@ -1 +1 @@
-zipnum	./sample_archive/x-bad-path-to-ignore-x	./sample_archive/zipcdx/zipnum-sample.cdx.gz
+zipnum	x-bad-path-to-ignore-x	zipnum-sample.cdx.gz