mirror of
https://github.com/internetarchive/warcprox.git
synced 2025-01-18 13:22:09 +01:00
add license headers
This commit is contained in:
parent
ad661dcead
commit
2c65ff89fa
@ -1,4 +1,24 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
#
|
||||||
|
# run-benchmarks.py - some benchmarking code for warcprox
|
||||||
|
#
|
||||||
|
# Copyright (C) 2015-2016 Internet Archive
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
||||||
|
# USA.
|
||||||
|
#
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import aiohttp
|
import aiohttp
|
||||||
@ -89,7 +109,7 @@ class AsyncClient(object):
|
|||||||
logging.debug("finished reading from %s", url)
|
logging.debug("finished reading from %s", url)
|
||||||
r.close()
|
r.close()
|
||||||
break
|
break
|
||||||
|
|
||||||
@asyncio.coroutine
|
@asyncio.coroutine
|
||||||
def one_request(self, url):
|
def one_request(self, url):
|
||||||
logging.debug("issuing request to %s", url)
|
logging.debug("issuing request to %s", url)
|
||||||
|
@ -1,5 +1,23 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# vim:set sw=4 et:
|
#
|
||||||
|
# dump-anydbm - dumps contents of dbm file to stdout
|
||||||
|
#
|
||||||
|
# Copyright (C) 2013-2016 Internet Archive
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
||||||
|
# USA.
|
||||||
#
|
#
|
||||||
|
|
||||||
"""
|
"""
|
||||||
@ -14,7 +32,7 @@ try:
|
|||||||
whichdb = dbm.whichdb
|
whichdb = dbm.whichdb
|
||||||
|
|
||||||
except:
|
except:
|
||||||
import anydbm
|
import anydbm
|
||||||
dbm = anydbm
|
dbm = anydbm
|
||||||
from whichdb import whichdb
|
from whichdb import whichdb
|
||||||
|
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# vim: set sw=4 et:
|
|
||||||
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
||||||
|
22
setup.py
22
setup.py
@ -1,4 +1,24 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
#
|
||||||
|
# setup.py - setuptools installation config for warcprox
|
||||||
|
#
|
||||||
|
# Copyright (C) 2013-2016 Internet Archive
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
||||||
|
# USA.
|
||||||
|
#
|
||||||
|
|
||||||
from setuptools.command.test import test as TestCommand
|
from setuptools.command.test import test as TestCommand
|
||||||
import sys
|
import sys
|
||||||
@ -30,7 +50,7 @@ except:
|
|||||||
deps.append('futures')
|
deps.append('futures')
|
||||||
|
|
||||||
setuptools.setup(name='warcprox',
|
setuptools.setup(name='warcprox',
|
||||||
version='2.0.dev4',
|
version='2.0.dev5',
|
||||||
description='WARC writing MITM HTTP/S proxy',
|
description='WARC writing MITM HTTP/S proxy',
|
||||||
url='https://github.com/internetarchive/warcprox',
|
url='https://github.com/internetarchive/warcprox',
|
||||||
author='Noah Levitt',
|
author='Noah Levitt',
|
||||||
|
@ -1,3 +1,24 @@
|
|||||||
|
#
|
||||||
|
# Dockerfile for warcprox tests
|
||||||
|
#
|
||||||
|
# Copyright (C) 2015-2016 Internet Archive
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
||||||
|
# USA.
|
||||||
|
#
|
||||||
|
|
||||||
FROM phusion/baseimage
|
FROM phusion/baseimage
|
||||||
MAINTAINER Noah Levitt <nlevitt@archive.org>
|
MAINTAINER Noah Levitt <nlevitt@archive.org>
|
||||||
|
|
||||||
|
@ -1,4 +1,24 @@
|
|||||||
# vim:set sw=4 et:
|
#
|
||||||
|
# tests/conftest.py - command line options for warcprox tests
|
||||||
|
#
|
||||||
|
# Copyright (C) 2015-2016 Internet Archive
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
||||||
|
# USA.
|
||||||
|
#
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
def pytest_addoption(parser):
|
def pytest_addoption(parser):
|
||||||
|
@ -1,9 +1,28 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
#
|
#
|
||||||
# Runs tests in a docker container. Also runs a temporary instance of rethinkdb
|
# tests/run-tests.sh - Runs tests in a docker container. Also runs a temporary
|
||||||
# inside the container. The tests run with rethinkdb features enabled, against
|
# instance of rethinkdb inside the container. The tests run with rethinkdb
|
||||||
# that instance of rethinkdb, and also run without rethinkdb features enabled.
|
# features enabled, against that instance of rethinkdb, and also run without
|
||||||
# With python 2.7 and 3.4.
|
# rethinkdb features enabled. With python 2.7 and 3.4.
|
||||||
|
#
|
||||||
|
# tests/conftest.py - command line options for warcprox tests
|
||||||
|
#
|
||||||
|
# Copyright (C) 2015-2016 Internet Archive
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
||||||
|
# USA.
|
||||||
#
|
#
|
||||||
# 😬
|
# 😬
|
||||||
#
|
#
|
||||||
|
@ -1,4 +1,26 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
#
|
||||||
|
# tests/single-threaded-proxy.py - single-threaded recording proxy, useful for
|
||||||
|
# debugging
|
||||||
|
#
|
||||||
|
# Copyright (C) 2015-2016 Internet Archive
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
||||||
|
# USA.
|
||||||
|
#
|
||||||
|
|
||||||
"""Useful for debugging. Does not write warcs."""
|
"""Useful for debugging. Does not write warcs."""
|
||||||
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
@ -1,4 +1,24 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
#
|
||||||
|
# tests/test_dump-anydbm.py - tests for dump-anydbm
|
||||||
|
#
|
||||||
|
# Copyright (C) 2013-2016 Internet Archive
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
||||||
|
# USA.
|
||||||
|
#
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import os
|
import os
|
||||||
|
@ -1,5 +1,24 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# vim: set sw=4 et:
|
#
|
||||||
|
# tests/test_warcprox.py - automated tests for warcprox
|
||||||
|
#
|
||||||
|
# Copyright (C) 2013-2016 Internet Archive
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
||||||
|
# USA.
|
||||||
|
#
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import threading
|
import threading
|
||||||
|
@ -1,10 +1,33 @@
|
|||||||
|
#
|
||||||
|
# warcprox/__init__.py - warcprox package main file, contains some utility code
|
||||||
|
#
|
||||||
|
# Copyright (C) 2013-2016 Internet Archive
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
||||||
|
# USA.
|
||||||
|
#
|
||||||
|
|
||||||
from argparse import Namespace as _Namespace
|
from argparse import Namespace as _Namespace
|
||||||
from pkg_resources import get_distribution as _get_distribution
|
from pkg_resources import get_distribution as _get_distribution
|
||||||
__version__ = _get_distribution('warcprox').version
|
__version__ = _get_distribution('warcprox').version
|
||||||
|
|
||||||
def digest_str(hash_obj, base32):
|
def digest_str(hash_obj, base32):
|
||||||
import base64
|
import base64
|
||||||
return hash_obj.name.encode('utf-8') + b':' + (base64.b32encode(hash_obj.digest()) if base32 else hash_obj.hexdigest().encode('ascii'))
|
return hash_obj.name.encode('utf-8') + b':' + (
|
||||||
|
base64.b32encode(hash_obj.digest()) if base32
|
||||||
|
else hash_obj.hexdigest().encode('ascii'))
|
||||||
|
|
||||||
class Options(_Namespace):
|
class Options(_Namespace):
|
||||||
def __getattr__(self, name):
|
def __getattr__(self, name):
|
||||||
|
@ -1,3 +1,27 @@
|
|||||||
|
#
|
||||||
|
# warcprox/bigtable.py - module for "big" RethinkDB table for deduplication;
|
||||||
|
# the table is "big" in the sense that it is designed to be usable as an index
|
||||||
|
# for playback software outside of warcprox, and contains information not
|
||||||
|
# needed merely for deduplication
|
||||||
|
#
|
||||||
|
# Copyright (C) 2015-2016 Internet Archive
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
||||||
|
# USA.
|
||||||
|
#
|
||||||
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
@ -1,3 +1,27 @@
|
|||||||
|
#
|
||||||
|
# warcprox/controller.py - contains WarcproxController class, responsible for
|
||||||
|
# starting up and shutting down the various components of warcprox, and for
|
||||||
|
# sending heartbeats to the service registry if configured to do so; also has
|
||||||
|
# some memory profiling capabilities
|
||||||
|
#
|
||||||
|
# Copyright (C) 2013-2016 Internet Archive
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
||||||
|
# USA.
|
||||||
|
#
|
||||||
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
@ -1,3 +1,24 @@
|
|||||||
|
#
|
||||||
|
# warcprox/dedup.py - identical payload digest deduplication
|
||||||
|
#
|
||||||
|
# Copyright (C) 2013-2016 Internet Archive
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
||||||
|
# USA.
|
||||||
|
#
|
||||||
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
@ -1,3 +1,25 @@
|
|||||||
|
#
|
||||||
|
# warcprox/kafkafeed.py - support for publishing information about archived
|
||||||
|
# urls to apache kafka
|
||||||
|
#
|
||||||
|
# Copyright (C) 2015-2016 Internet Archive
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
||||||
|
# USA.
|
||||||
|
#
|
||||||
|
|
||||||
import kafka
|
import kafka
|
||||||
import datetime
|
import datetime
|
||||||
import json
|
import json
|
||||||
|
@ -1,5 +1,25 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# vim:set sw=4 et:
|
#
|
||||||
|
# warcprox/main.py - entrypoint for warcprox executable, parses command line
|
||||||
|
# arguments, initializes components, starts controller, handles signals
|
||||||
|
#
|
||||||
|
# Copyright (C) 2013-2016 Internet Archive
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
||||||
|
# USA.
|
||||||
|
#
|
||||||
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
||||||
|
@ -1,3 +1,29 @@
|
|||||||
|
#
|
||||||
|
# warcprox/mitmproxy.py - man-in-the-middle http/s proxy code, handles http
|
||||||
|
# CONNECT method by creating a snakeoil certificate for the requested site,
|
||||||
|
# calling ssl.wrap_socket() on the client connection; connects to remote
|
||||||
|
# (proxied) host, possibly using tor if host tld is .onion and tor proxy is
|
||||||
|
# configured
|
||||||
|
#
|
||||||
|
# Copyright (C) 2012 Cygnos Corporation
|
||||||
|
# Copyright (C) 2013-2016 Internet Archive
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
||||||
|
# USA.
|
||||||
|
#
|
||||||
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -1,4 +1,24 @@
|
|||||||
# vim:set sw=4 et:
|
#
|
||||||
|
# warcprox/playback.py - rudimentary support for playback of urls archived by
|
||||||
|
# warcprox (not much used or maintained)
|
||||||
|
#
|
||||||
|
# Copyright (C) 2013-2016 Internet Archive
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
||||||
|
# USA.
|
||||||
|
#
|
||||||
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
||||||
|
@ -1,3 +1,24 @@
|
|||||||
|
#
|
||||||
|
# warcprox/stats.py - keeps statistics on what has been proxied, archived
|
||||||
|
#
|
||||||
|
# Copyright (C) 2013-2016 Internet Archive
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
||||||
|
# USA.
|
||||||
|
#
|
||||||
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
@ -1,3 +1,24 @@
|
|||||||
|
#
|
||||||
|
# warcprox/warc.py - assembles warc records
|
||||||
|
#
|
||||||
|
# Copyright (C) 2013-2016 Internet Archive
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
||||||
|
# USA.
|
||||||
|
#
|
||||||
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
@ -1,10 +1,24 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
#
|
#
|
||||||
"""
|
# warcprox/warcproxy.py - recording proxy, extends mitmproxy to record traffic,
|
||||||
WARC writing MITM HTTP/S proxy
|
# enqueue info on the recorded url queue
|
||||||
|
#
|
||||||
See README.rst or https://github.com/internetarchive/warcprox
|
# Copyright (C) 2013-2016 Internet Archive
|
||||||
"""
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
||||||
|
# USA.
|
||||||
|
#
|
||||||
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
||||||
|
@ -1,4 +1,23 @@
|
|||||||
# vim:set sw=4 et:
|
#
|
||||||
|
# warcprox/writer.py - warc writer, manages and writes records to warc files
|
||||||
|
#
|
||||||
|
# Copyright (C) 2013-2016 Internet Archive
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
||||||
|
# USA.
|
||||||
|
#
|
||||||
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
||||||
|
@ -1,3 +1,25 @@
|
|||||||
|
#
|
||||||
|
# warcprox/writerthread.py - warc writer thread, reads from the recorded url
|
||||||
|
# queue, writes warc records, runs final tasks after warc records are written
|
||||||
|
#
|
||||||
|
# Copyright (C) 2013-2016 Internet Archive
|
||||||
|
#
|
||||||
|
# This program is free software; you can redistribute it and/or
|
||||||
|
# modify it under the terms of the GNU General Public License
|
||||||
|
# as published by the Free Software Foundation; either version 2
|
||||||
|
# of the License, or (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
||||||
|
# USA.
|
||||||
|
#
|
||||||
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user