mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
Merge pull request #21 from nla/method-filter
add --method-filter option
This commit is contained in:
commit
f948850692
@ -77,6 +77,8 @@ def _build_arg_parser(prog=os.path.basename(sys.argv[0])):
|
||||
default='sha1', help='digest algorithm, one of {}'.format(', '.join(hash_algos)))
|
||||
arg_parser.add_argument('--base32', dest='base32', action='store_true',
|
||||
default=False, help='write digests in Base32 instead of hex')
|
||||
arg_parser.add_argument('--method-filter', metavar='HTTP_METHOD',
|
||||
action='append', help='only record requests with the given http method(s) (can be used more than once)')
|
||||
arg_parser.add_argument('--stats-db-file', dest='stats_db_file',
|
||||
default='./warcprox-stats.db', help='persistent statistics database file; empty string or /dev/null disables statistics tracking')
|
||||
arg_parser.add_argument('-P', '--playback-port', dest='playback_port',
|
||||
|
@ -56,6 +56,7 @@ class WarcWriterThread(threading.Thread):
|
||||
self.listeners = listeners
|
||||
self.options = options
|
||||
self.idle = None
|
||||
self.method_filter = set(method.upper() for method in self.options.method_filter or [])
|
||||
|
||||
def run(self):
|
||||
if self.options.profile:
|
||||
@ -63,6 +64,9 @@ class WarcWriterThread(threading.Thread):
|
||||
else:
|
||||
self._run()
|
||||
|
||||
def _filter_accepts(self, recorded_url):
|
||||
return not self.method_filter or recorded_url.method.upper() in self.method_filter
|
||||
|
||||
def _run(self):
|
||||
while not self.stop.is_set():
|
||||
try:
|
||||
@ -76,11 +80,12 @@ class WarcWriterThread(threading.Thread):
|
||||
|
||||
recorded_url = self.recorded_url_q.get(block=True, timeout=0.5)
|
||||
self.idle = None
|
||||
if self.dedup_db:
|
||||
warcprox.dedup.decorate_with_dedup_info(self.dedup_db,
|
||||
recorded_url, base32=self.options.base32)
|
||||
records = self.writer_pool.write_records(recorded_url)
|
||||
self._final_tasks(recorded_url, records)
|
||||
if self._filter_accepts(recorded_url):
|
||||
if self.dedup_db:
|
||||
warcprox.dedup.decorate_with_dedup_info(self.dedup_db,
|
||||
recorded_url, base32=self.options.base32)
|
||||
records = self.writer_pool.write_records(recorded_url)
|
||||
self._final_tasks(recorded_url, records)
|
||||
|
||||
# try to release resources in a timely fashion
|
||||
if recorded_url.response_recorder and recorded_url.response_recorder.tempfile:
|
||||
|
Loading…
x
Reference in New Issue
Block a user