1
0
mirror of https://github.com/webrecorder/pywb.git synced 2025-03-15 00:03:28 +01:00

zipnum: when using .loc file, resolve shard paths relative to the .loc file, not from working directory, fixes #173

This commit is contained in:
Ilya Kreymer 2016-03-22 11:31:08 -07:00
parent 33f60efb96
commit 5fd49f35ee
4 changed files with 15 additions and 6 deletions

View File

@ -175,8 +175,8 @@ class CDXServer(BaseCDXServer):
if filename.endswith(('.summary', '.idx')): if filename.endswith(('.summary', '.idx')):
return ZipNumCluster(filename, config) return ZipNumCluster(filename, config)
# no warning for .loc # no warning for .loc or .gz (zipnum)
if not filename.endswith('.loc'): if not filename.endswith(('.loc', '.gz')):
logging.warn('skipping unrecognized URI: %s', filename) logging.warn('skipping unrecognized URI: %s', filename)
return None return None

View File

@ -54,11 +54,20 @@ class LocMapResolver(object):
# update loc file mtime # update loc file mtime
self.loc_mtime = new_mtime self.loc_mtime = new_mtime
local_dir = os.path.dirname(self.loc_filename)
def res_path(pathname):
if '://' not in pathname:
pathname = os.path.join(local_dir, pathname)
return pathname
logging.debug('Loading loc from: ' + self.loc_filename) logging.debug('Loading loc from: ' + self.loc_filename)
with open(self.loc_filename, 'r') as fh: with open(self.loc_filename, 'r') as fh:
for line in fh: for line in fh:
parts = line.rstrip().split('\t') parts = line.rstrip().split('\t')
self.loc_map[parts[0]] = parts[1:]
paths = [res_path(pathname) for pathname in parts[1:]]
self.loc_map[parts[0]] = paths
def __call__(self, part, query): def __call__(self, part, query):
return self.loc_map[part] return self.loc_map[part]

View File

@ -1,3 +1,3 @@
bar ./sample_archive/invalid bar invalid
foo2 foo2
zipnum ./sample_archive/x-bad-path-to-ignore-x ./sample_archive/zipcdx/zipnum-sample.cdx.gz zipnum x-bad-path-to-ignore-x zipnum-sample.cdx.gz

View File

@ -1 +1 @@
zipnum ./sample_archive/x-bad-path-to-ignore-x ./sample_archive/zipcdx/zipnum-sample.cdx.gz zipnum x-bad-path-to-ignore-x zipnum-sample.cdx.gz