diff --git a/pywb/cdx/cdxserver.py b/pywb/cdx/cdxserver.py index bfdf5741..f0869d0f 100644 --- a/pywb/cdx/cdxserver.py +++ b/pywb/cdx/cdxserver.py @@ -175,8 +175,8 @@ class CDXServer(BaseCDXServer): if filename.endswith(('.summary', '.idx')): return ZipNumCluster(filename, config) - # no warning for .loc - if not filename.endswith('.loc'): + # no warning for .loc or .gz (zipnum) + if not filename.endswith(('.loc', '.gz')): logging.warn('skipping unrecognized URI: %s', filename) return None diff --git a/pywb/cdx/zipnum.py b/pywb/cdx/zipnum.py index f44a6b6a..9a51ae7f 100644 --- a/pywb/cdx/zipnum.py +++ b/pywb/cdx/zipnum.py @@ -54,11 +54,20 @@ class LocMapResolver(object): # update loc file mtime self.loc_mtime = new_mtime + local_dir = os.path.dirname(self.loc_filename) + + def res_path(pathname): + if '://' not in pathname: + pathname = os.path.join(local_dir, pathname) + return pathname + logging.debug('Loading loc from: ' + self.loc_filename) with open(self.loc_filename, 'r') as fh: for line in fh: parts = line.rstrip().split('\t') - self.loc_map[parts[0]] = parts[1:] + + paths = [res_path(pathname) for pathname in parts[1:]] + self.loc_map[parts[0]] = paths def __call__(self, part, query): return self.loc_map[part] diff --git a/sample_archive/zipcdx/zipnum-bad.loc b/sample_archive/zipcdx/zipnum-bad.loc index d113a330..8c6bd330 100644 --- a/sample_archive/zipcdx/zipnum-bad.loc +++ b/sample_archive/zipcdx/zipnum-bad.loc @@ -1,3 +1,3 @@ -bar ./sample_archive/invalid +bar invalid foo2 -zipnum ./sample_archive/x-bad-path-to-ignore-x ./sample_archive/zipcdx/zipnum-sample.cdx.gz +zipnum x-bad-path-to-ignore-x zipnum-sample.cdx.gz diff --git a/sample_archive/zipcdx/zipnum-sample.loc b/sample_archive/zipcdx/zipnum-sample.loc index 8a4d1210..c0f24a88 100644 --- a/sample_archive/zipcdx/zipnum-sample.loc +++ b/sample_archive/zipcdx/zipnum-sample.loc @@ -1 +1 @@ -zipnum ./sample_archive/x-bad-path-to-ignore-x ./sample_archive/zipcdx/zipnum-sample.cdx.gz +zipnum x-bad-path-to-ignore-x zipnum-sample.cdx.gz