From 90031a2058184fd27363fa50315683abd945b137 Mon Sep 17 00:00:00 2001
From: Alex Osborne <ato@meshy.org>
Date: Tue, 15 Nov 2016 23:24:59 +1100
Subject: [PATCH] add --method-filter option

---
 warcprox/main.py         |  2 ++
 warcprox/writerthread.py | 15 ++++++++++-----
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/warcprox/main.py b/warcprox/main.py
index a127016..5045298 100644
--- a/warcprox/main.py
+++ b/warcprox/main.py
@@ -77,6 +77,8 @@ def _build_arg_parser(prog=os.path.basename(sys.argv[0])):
             default='sha1', help='digest algorithm, one of {}'.format(', '.join(hash_algos)))
     arg_parser.add_argument('--base32', dest='base32', action='store_true',
             default=False, help='write digests in Base32 instead of hex')
+    arg_parser.add_argument('--method-filter', metavar='HTTP_METHOD',
+                            action='append', help='only record requests with the given http method(s) (can be used more than once)')
     arg_parser.add_argument('--stats-db-file', dest='stats_db_file',
             default='./warcprox-stats.db', help='persistent statistics database file; empty string or /dev/null disables statistics tracking')
     arg_parser.add_argument('-P', '--playback-port', dest='playback_port',
diff --git a/warcprox/writerthread.py b/warcprox/writerthread.py
index 002b897..92bd416 100644
--- a/warcprox/writerthread.py
+++ b/warcprox/writerthread.py
@@ -56,6 +56,7 @@ class WarcWriterThread(threading.Thread):
         self.listeners = listeners
         self.options = options
         self.idle = None
+        self.method_filter = set(method.upper() for method in self.options.method_filter or [])
 
     def run(self):
         if self.options.profile:
@@ -63,6 +64,9 @@ class WarcWriterThread(threading.Thread):
         else:
             self._run()
 
+    def _filter_accepts(self, recorded_url):
+        return not self.method_filter or recorded_url.method.upper() in self.method_filter
+
     def _run(self):
         while not self.stop.is_set():
             try:
@@ -76,11 +80,12 @@ class WarcWriterThread(threading.Thread):
 
                         recorded_url = self.recorded_url_q.get(block=True, timeout=0.5)
                         self.idle = None
-                        if self.dedup_db:
-                            warcprox.dedup.decorate_with_dedup_info(self.dedup_db,
-                                    recorded_url, base32=self.options.base32)
-                        records = self.writer_pool.write_records(recorded_url)
-                        self._final_tasks(recorded_url, records)
+                        if self._filter_accepts(recorded_url):
+                            if self.dedup_db:
+                                warcprox.dedup.decorate_with_dedup_info(self.dedup_db,
+                                        recorded_url, base32=self.options.base32)
+                            records = self.writer_pool.write_records(recorded_url)
+                            self._final_tasks(recorded_url, records)
 
                         # try to release resources in a timely fashion
                         if recorded_url.response_recorder and recorded_url.response_recorder.tempfile: