From 480768fc2ee3e9e29caf90cda9d160369f7787e7 Mon Sep 17 00:00:00 2001 From: Ed Summers Date: Tue, 11 Apr 2023 08:26:54 -0400 Subject: [PATCH 01/31] updated dependencies --- pywb/apps/rewriterapp.py | 2 +- pywb/rewrite/templateview.py | 10 +++++----- requirements.txt | 6 +++--- test_requirements.txt | 4 ++-- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/pywb/apps/rewriterapp.py b/pywb/apps/rewriterapp.py index a02927325..59b7d0ea4 100644 --- a/pywb/apps/rewriterapp.py +++ b/pywb/apps/rewriterapp.py @@ -64,7 +64,7 @@ def __init__(self, framed_replay=False, jinja_env=None, config=None, paths=None) if not jinja_env: jinja_env = JinjaEnv(globals={'static_path': 'static'}, - extensions=['jinja2.ext.i18n', 'jinja2.ext.with_']) + extensions=['jinja2.ext.i18n']) jinja_env.jinja_env.install_null_translations() self.jinja_env = jinja_env diff --git a/pywb/rewrite/templateview.py b/pywb/rewrite/templateview.py index 7f0cbc88e..39c76ee75 100644 --- a/pywb/rewrite/templateview.py +++ b/pywb/rewrite/templateview.py @@ -5,7 +5,7 @@ from six.moves.urllib.parse import urlsplit, quote -from jinja2 import Environment, TemplateNotFound, contextfunction, select_autoescape +from jinja2 import Environment, TemplateNotFound, pass_context, select_autoescape from jinja2 import FileSystemLoader, PackageLoader, ChoiceLoader from webassets.ext.jinja2 import AssetsExtension @@ -139,7 +139,7 @@ def get_translate(context): return loc_map.get(loc) def override_func(jinja_env, name): - @contextfunction + @pass_context def get_override(context, text): translate = get_translate(context) if not translate: @@ -158,7 +158,7 @@ def get_override(context, text): # Special _Q() function to return %-encoded text, necessary for use # with text in banner - @contextfunction + @pass_context def quote_gettext(context, text): translate = get_translate(context) if not translate: @@ -171,7 +171,7 @@ def quote_gettext(context, text): self.jinja_env.globals['_Q'] = quote_gettext self.jinja_env.globals['default_locale'] = default_locale - @contextfunction + @pass_context def switch_locale(context, locale): environ = context.get('env') curr_loc = environ.get('pywb_lang', '') @@ -188,7 +188,7 @@ def switch_locale(context, locale): return app_prefix + '/' + locale + request_uri - @contextfunction + @pass_context def get_locale_prefixes(context): environ = context.get('env') locale_prefixes = {} diff --git a/requirements.txt b/requirements.txt index 557bebfac..da8ba2ed9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,18 +2,18 @@ six warcio>=1.7.1 requests redis<3.0 -jinja2<3.0.0 +jinja2 surt>=0.3.1 brotlipy pyyaml werkzeug webencodings -gevent==21.12.0 +gevent webassets==2.0 portalocker wsgiprox>=1.5.1 fakeredis<1.0 tldextract python-dateutil -markupsafe<2.1.0 +markupsafe ua_parser diff --git a/test_requirements.txt b/test_requirements.txt index 972c8ca8b..a1cc50576 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -3,7 +3,7 @@ WebTest pytest-cov mock urllib3 -httpbin==0.5.0 -flask<2.0 +httpbin +flask ujson lxml From 3ccc7ea759eb5a98420099b799ed2ae0319557a9 Mon Sep 17 00:00:00 2001 From: Ed Summers Date: Wed, 12 Apr 2023 08:34:35 -0400 Subject: [PATCH 02/31] Use httpbin fork Add aaronhmiller/httpbin fork which has had https://github.com/aaronhmiller/httpbin.git applied so it can work with latest werkzeug. --- test_requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_requirements.txt b/test_requirements.txt index a1cc50576..ad49ac48a 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -3,7 +3,7 @@ WebTest pytest-cov mock urllib3 -httpbin flask ujson lxml +httpbin @ git+https://github.com/aaronhmiller/httpbin.git From e824ffda8a66d95aa259beb6f3a8e780636ee994 Mon Sep 17 00:00:00 2001 From: Ed Summers Date: Thu, 29 Jun 2023 16:57:44 -0400 Subject: [PATCH 03/31] pop_path_info This was removed in werkzeug 2.3 and exists as shift_pop_info in wsgiref: https://github.com/pallets/werkzeug/pull/2415 --- pywb/apps/frontendapp.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pywb/apps/frontendapp.py b/pywb/apps/frontendapp.py index a10c7a423..f3367e05e 100644 --- a/pywb/apps/frontendapp.py +++ b/pywb/apps/frontendapp.py @@ -1,7 +1,7 @@ from gevent.monkey import patch_all; patch_all() from werkzeug.routing import Map, Rule, RequestRedirect, Submount -from werkzeug.wsgi import pop_path_info +from wsgiref.util import shift_path_info from six.moves.urllib.parse import urljoin, parse_qsl from six import iteritems from warcio.utils import to_native_str @@ -558,9 +558,9 @@ def setup_paths(self, environ, coll, record=False): return if coll != '$root': - pop_path_info(environ) + shift_path_info(environ) if record: - pop_path_info(environ) + shift_path_info(environ) paths = [self.warcserver.root_dir] @@ -669,7 +669,7 @@ def handle_request(self, environ, start_response): lang = args.pop('lang', self.default_locale) if lang: - pop_path_info(environ) + shift_path_info(environ) environ['pywb_lang'] = lang response = endpoint(environ, **args) From 5b72d968717db159cc37a56ee430620f1dcfa161 Mon Sep 17 00:00:00 2001 From: Ed Summers Date: Thu, 29 Jun 2023 18:11:34 -0400 Subject: [PATCH 04/31] Spaces in test responses It seems like JSON in responses is now minimized without spaces? I'm not sure what changed in the stack to do that. --- tests/test_live_rewriter.py | 16 ++++++++-------- tests/test_record_dedup.py | 4 ++-- tests/test_record_replay.py | 20 ++++++++++---------- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/tests/test_live_rewriter.py b/tests/test_live_rewriter.py index 0fbe2c5f3..29f7fae0e 100644 --- a/tests/test_live_rewriter.py +++ b/tests/test_live_rewriter.py @@ -36,7 +36,7 @@ def header_test_server(environ, start_response): # ============================================================================ def cookie_test_server(environ, start_response): - body = 'cookie value: ' + environ.get('HTTP_COOKIE', '') + body = 'cookie value:' + environ.get('HTTP_COOKIE', '') body = body.encode('utf-8') headers = [('Content-Length', str(len(body))), ('Content-Type', 'text/plain')] @@ -76,14 +76,14 @@ def test_live_live_post(self, fmod_sl): resp = self.post('/live/{0}httpbin.org/post', fmod_sl, {'foo': 'bar', 'test': 'abc'}) assert resp.status_int == 200 resp.charset = 'utf-8' - assert '"foo": "bar"' in resp.text - assert '"test": "abc"' in resp.text + assert '"foo":"bar"' in resp.text + assert '"test":"abc"' in resp.text assert resp.status_int == 200 def test_live_anchor_encode(self, fmod_sl): resp = self.get('/live/{0}httpbin.org/get?val=abc%23%23xyz', fmod_sl) assert 'get?val=abc%23%23xyz"' in resp.text - assert '"val": "abc##xyz"' in resp.text + assert '"val":"abc##xyz"' in resp.text #assert '"http://httpbin.org/anything/abc##xyz"' in resp.text assert resp.status_int == 200 @@ -125,18 +125,18 @@ def test_domain_cookie(self, fmod_sl): headers={'Host': 'example.com'}) assert resp.headers['Set-Cookie'] == 'testcookie=cookie-val; Path=/live/{0}http://localhost:{1}/'.format(fmod_sl, self.cookie_test_serv.port) - assert resp.text == 'cookie value: ' + assert resp.text == 'cookie value:' resp = self.get('/live/{0}http://localhost:%s/' % self.cookie_test_serv.port, fmod_sl, headers={'Host': 'example.com'}) - assert resp.text == 'cookie value: testcookie=cookie-val' + assert resp.text == 'cookie value:testcookie=cookie-val' resp = self.get('/live/{0}http://localhost:%s/' % self.cookie_test_serv.port, fmod_sl, headers={'Host': 'sub.example.com'}) assert 'Set-Cookie' not in resp.headers - assert resp.text == 'cookie value: testcookie=cookie-val' + assert resp.text == 'cookie value:testcookie=cookie-val' def test_fetch_page_with_html_title(self, fmod_sl): resp = self.get('/live/{0}http://localhost:%s/html-title' % self.header_test_serv.port, fmod_sl, @@ -178,7 +178,7 @@ def test_live_video_info(self): def test_deflate(self, fmod_sl): resp = self.get('/live/{0}http://httpbin.org/deflate', fmod_sl) - assert b'"deflated": true' in resp.body + assert b'"deflated":true' in resp.body def test_live_origin_and_referrer(self, fmod_sl): headers = {'Referer': 'http://localhost:80/live/{0}http://example.com/test'.format(fmod_sl), diff --git a/tests/test_record_dedup.py b/tests/test_record_dedup.py index 9b01e9e3b..4ed57a518 100644 --- a/tests/test_record_dedup.py +++ b/tests/test_record_dedup.py @@ -26,12 +26,12 @@ def test_init_coll(self): def test_record_1(self): res = self.testapp.get('/test-dedup/record/mp_/http://httpbin.org/get?A=B', headers={"Referer": "http://httpbin.org/"}) - assert '"A": "B"' in res.text + assert '"A":"B"' in res.text time.sleep(1.2) res = self.testapp.get('/test-dedup/record/mp_/http://httpbin.org/get?A=B', headers={"Referer": "http://httpbin.org/"}) - assert '"A": "B"' in res.text + assert '"A":"B"' in res.text def test_single_redis_entry(self): res = self.redis.zrange("pywb:test-dedup:cdxj", 0, -1) diff --git a/tests/test_record_replay.py b/tests/test_record_replay.py index c0f373920..8cd89149f 100644 --- a/tests/test_record_replay.py +++ b/tests/test_record_replay.py @@ -35,7 +35,7 @@ def test_init_coll(self): def test_record_1(self): res = self.testapp.get('/test/record/mp_/http://httpbin.org/get?A=B') - assert '"A": "B"' in res.text + assert '"A":"B"' in res.text def test_record_head(self): res = self.testapp.head('/test/record/mp_/http://httpbin.org/get?A=B') @@ -47,7 +47,7 @@ def test_replay_1(self, fmod): fmod_slash = fmod + '/' if fmod else '' res = self.get('/test/{0}http://httpbin.org/get?A=B', fmod_slash) - assert '"A": "B"' in res.text + assert '"A":"B"' in res.text def test_replay_head(self, fmod): fmod_slash = fmod + '/' if fmod else '' @@ -58,25 +58,25 @@ def test_replay_head(self, fmod): def test_record_2(self): res = self.testapp.get('/test2/record/mp_/http://httpbin.org/get?C=D') - assert '"C": "D"' in res.text + assert '"C":"D"' in res.text def test_replay_2(self, fmod): self.ensure_empty() fmod_slash = fmod + '/' if fmod else '' res = self.get('/test2/{0}http://httpbin.org/get?C=D', fmod_slash) - assert '"C": "D"' in res.text + assert '"C":"D"' in res.text def test_record_again_1(self): res = self.testapp.get('/test/record/mp_/http://httpbin.org/get?C=D2') - assert '"C": "D2"' in res.text + assert '"C":"D2"' in res.text def test_replay_again_1(self, fmod): self.ensure_empty() fmod_slash = fmod + '/' if fmod else '' res = self.get('/test/{0}http://httpbin.org/get?C=D2', fmod_slash) - assert '"C": "D2"' in res.text + assert '"C":"D2"' in res.text assert len(os.listdir(os.path.join(self.root_dir, '_test_colls', 'test', 'archive'))) == 1 @@ -94,10 +94,10 @@ def test_replay_all_coll(self, fmod): fmod_slash = fmod + '/' if fmod else '' res = self.get('/all/{0}http://httpbin.org/get?C=D', fmod_slash) - assert '"C": "D"' in res.text + assert '"C":"D"' in res.text res = self.get('/all/mp_/http://httpbin.org/get?A=B', fmod_slash) - assert '"A": "B"' in res.text + assert '"A":"B"' in res.text def test_cdx_all_coll(self): res = self.testapp.get('/all/cdx?url=http://httpbin.org/get*&output=json') @@ -163,7 +163,7 @@ def test_init_and_rec(self): assert os.path.isdir(dir_name) res = self.testapp.get('/test-new/record/mp_/http://httpbin.org/get?A=B') - assert '"A": "B"' in res.text + assert '"A":"B"' in res.text names = os.listdir(dir_name) assert len(names) == 1 @@ -176,7 +176,7 @@ def test_init_and_rec(self): def test_no_brotli(self): res = self.testapp.get('/test-new/record/mp_/http://httpbin.org/get?C=D', headers={'Accept-Encoding': 'gzip, deflate, br'}) - assert '"C": "D"' in res.text + assert '"C":"D"' in res.text with open(self.warc_name, 'rb') as fh: for record in ArchiveIterator(fh): From 0bdc96855ae5d6a600b7fcc64079bfa6a69e46df Mon Sep 17 00:00:00 2001 From: Ed Summers Date: Fri, 22 Sep 2023 10:57:05 -0400 Subject: [PATCH 05/31] httpbin adjustments Use the latest httpbin from psf/httpbin and adjust some json responses since the formatting has changed slightly. --- pywb/recorder/test/test_recorder.py | 30 +++++++++++++-------------- pywb/warcserver/test/test_handlers.py | 6 +++--- test_requirements.txt | 2 +- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/pywb/recorder/test/test_recorder.py b/pywb/recorder/test/test_recorder.py index 6cc6e8895..60f337b06 100644 --- a/pywb/recorder/test/test_recorder.py +++ b/pywb/recorder/test/test_recorder.py @@ -150,7 +150,7 @@ def test_record_warc_1(self): resp = self._test_warc_write(recorder_app, 'httpbin.org', '/get?foo=bar') assert b'HTTP/1.1 200 OK' in resp.body - assert b'"foo": "bar"' in resp.body + assert b'"foo":"bar"' in resp.body self._test_all_warcs('/warcs/', 1) @@ -160,7 +160,7 @@ def test_record_warc_2(self): resp = self._test_warc_write(recorder_app, 'httpbin.org', '/get?foo=bar') assert b'HTTP/1.1 200 OK' in resp.body - assert b'"foo": "bar"' in resp.body + assert b'"foo":"bar"' in resp.body self._test_all_warcs('/warcs/', 2) @@ -262,7 +262,7 @@ def test_record_skip_wrong_coll(self): resp = self._test_warc_write(recorder_app, 'httpbin.org', '/get?foo=bar') assert b'HTTP/1.1 200 OK' in resp.body - assert b'"foo": "bar"' in resp.body + assert b'"foo":"bar"' in resp.body self._test_all_warcs('/warcs/', 2) @@ -279,7 +279,7 @@ def test_record_param_user_coll(self): resp = self._test_warc_write(recorder_app, 'httpbin.org', '/user-agent', '¶m.recorder.user=USER¶m.recorder.coll=COLL') - assert '"user-agent": "{0}"'.format(UA) in resp.text + assert '"user-agent":"{0}"'.format(UA) in resp.text #assert b'HTTP/1.1 200 OK' in resp.body #assert b'"foo": "bar"' in resp.body @@ -312,12 +312,12 @@ def test_record_param_user_coll_same_dir(self): resp = self._test_warc_write(recorder_app, 'httpbin.org', '/get?foo=bar', '¶m.recorder.user=USER2¶m.recorder.coll=COLL2') assert b'HTTP/1.1 200 OK' in resp.body - assert b'"foo": "bar"' in resp.body + assert b'"foo":"bar"' in resp.body resp = self._test_warc_write(recorder_app, 'httpbin.org', '/get?foo=bar', '¶m.recorder.user=USER2¶m.recorder.coll=COLL3') assert b'HTTP/1.1 200 OK' in resp.body - assert b'"foo": "bar"' in resp.body + assert b'"foo":"bar"' in resp.body self._test_all_warcs('/warcs2', 2) @@ -334,7 +334,7 @@ def test_record_param_user_coll_revisit(self): resp = self._test_warc_write(recorder_app, 'httpbin.org', '/user-agent', '¶m.recorder.user=USER¶m.recorder.coll=COLL') - assert '"user-agent": "{0}"'.format(UA) in resp.text + assert '"user-agent":"{0}"'.format(UA) in resp.text #assert b'HTTP/1.1 200 OK' in resp.body #assert b'"foo": "bar"' in resp.body @@ -387,7 +387,7 @@ def test_record_param_user_coll_skip(self): resp = self._test_warc_write(recorder_app, 'httpbin.org', '/user-agent', '¶m.recorder.user=USER¶m.recorder.coll=COLL') - assert '"user-agent": "{0}"'.format(UA) in resp.text + assert '"user-agent":"{0}"'.format(UA) in resp.text #assert b'HTTP/1.1 200 OK' in resp.body #assert b'"foo": "bar"' in resp.body self._test_all_warcs('/warcs/USER/COLL/', 2) @@ -409,7 +409,7 @@ def test_record_param_user_coll_write_dupe_no_revisit(self): resp = self._test_warc_write(recorder_app, 'httpbin.org', '/get?foo=bar', '¶m.recorder.user=USER¶m.recorder.coll=COLL') assert b'HTTP/1.1 200 OK' in resp.body - assert b'"foo": "bar"' in resp.body + assert b'"foo":"bar"' in resp.body self._test_all_warcs('/warcs/USER/COLL/', 3) @@ -432,7 +432,7 @@ def test_record_file_warc_keep_open(self): resp = self._test_warc_write(recorder_app, 'httpbin.org', '/get?foo=bar') assert b'HTTP/1.1 200 OK' in resp.body - assert b'"foo": "bar"' in resp.body + assert b'"foo":"bar"' in resp.body assert os.path.isfile(path) assert len(writer.fh_cache) == 1 @@ -455,7 +455,7 @@ def test_record_multiple_writes_keep_open(self): '/get?foo=bar', '¶m.recorder.coll=FOO') assert b'HTTP/1.1 200 OK' in resp.body - assert b'"foo": "bar"' in resp.body + assert b'"foo":"bar"' in resp.body # Second Record @@ -463,7 +463,7 @@ def test_record_multiple_writes_keep_open(self): '/get?boo=far', '¶m.recorder.coll=FOO') assert b'HTTP/1.1 200 OK' in resp.body - assert b'"boo": "far"' in resp.body + assert b'"boo":"far"' in resp.body self._test_all_warcs('/warcs/FOO/', 1) @@ -523,14 +523,14 @@ def test_record_multiple_writes_rollover_idle(self): '/get?foo=bar', '¶m.recorder.coll=GOO') assert b'HTTP/1.1 200 OK' in resp.body - assert b'"foo": "bar"' in resp.body + assert b'"foo":"bar"' in resp.body # Second Record resp = self._test_warc_write(recorder_app, 'httpbin.org', '/get?boo=far', '¶m.recorder.coll=GOO') assert b'HTTP/1.1 200 OK' in resp.body - assert b'"boo": "far"' in resp.body + assert b'"boo":"far"' in resp.body self._test_all_warcs('/warcs/GOO/', 1) @@ -542,7 +542,7 @@ def test_record_multiple_writes_rollover_idle(self): '/get?goo=bar', '¶m.recorder.coll=GOO') assert b'HTTP/1.1 200 OK' in resp.body - assert b'"goo": "bar"' in resp.body + assert b'"goo":"bar"' in resp.body self._test_all_warcs('/warcs/GOO/', 2) diff --git a/pywb/warcserver/test/test_handlers.py b/pywb/warcserver/test/test_handlers.py index 74a4a0a94..ee8be49f1 100644 --- a/pywb/warcserver/test/test_handlers.py +++ b/pywb/warcserver/test/test_handlers.py @@ -162,7 +162,7 @@ def test_live_resource(self): assert resp.headers['Memento-Datetime'] != '' assert b'HTTP/1.1 200 OK' in resp.body - assert b'"foo": "bar"' in resp.body + assert b'"foo":"bar"' in resp.body assert 'ResErrors' not in resp.headers @@ -178,7 +178,7 @@ def test_live_post_resource(self): assert resp.headers['Memento-Datetime'] != '' assert b'HTTP/1.1 200 OK' in resp.body - assert b'"foo": "bar"' in resp.body + assert b'"foo":"bar"' in resp.body assert 'ResErrors' not in resp.headers @@ -288,7 +288,7 @@ def test_agg_live_postreq(self): assert resp.headers['Memento-Datetime'] != '' assert b'HTTP/1.1 200 OK' in resp.body - assert b'"foo": "bar"' in resp.body + assert b'"foo":"bar"' in resp.body #assert json.loads(resp.headers['ResErrors']) == {"rhiz": "NotFoundException('https://webenact.rhizome.org/vvork/http://httpbin.org/get?foo=bar',)"} assert "NotFoundException('https://webenact.rhizome.org/vvork/" in json.loads(resp.headers['ResErrors'])['rhiz'] diff --git a/test_requirements.txt b/test_requirements.txt index ad49ac48a..e24a78680 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -6,4 +6,4 @@ urllib3 flask ujson lxml -httpbin @ git+https://github.com/aaronhmiller/httpbin.git +httpbin From 778556d7a0ac8b5cb4d2238df5c28262575013f9 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Thu, 28 Mar 2024 14:33:10 -0400 Subject: [PATCH 06/31] Add asserts to TestInputReq for app and testapp --- pywb/warcserver/test/test_inputreq.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pywb/warcserver/test/test_inputreq.py b/pywb/warcserver/test/test_inputreq.py index 041cb61e2..b3bc2de1c 100644 --- a/pywb/warcserver/test/test_inputreq.py +++ b/pywb/warcserver/test/test_inputreq.py @@ -41,7 +41,9 @@ def __call__(self, environ, start_response): class TestInputReq(object): def setup(self): self.app = InputReqApp() + assert self.app self.testapp = webtest.TestApp(self.app) + assert self.testapp def test_get_direct(self): res = self.testapp.get('/test/http://example.com/', headers={'Foo': 'Bar'}) From a91cfd9506d2c9d6692f0535871e17ce41337b64 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Thu, 28 Mar 2024 14:47:51 -0400 Subject: [PATCH 07/31] Move asserts --- pywb/warcserver/test/test_inputreq.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pywb/warcserver/test/test_inputreq.py b/pywb/warcserver/test/test_inputreq.py index b3bc2de1c..28fab6525 100644 --- a/pywb/warcserver/test/test_inputreq.py +++ b/pywb/warcserver/test/test_inputreq.py @@ -41,11 +41,10 @@ def __call__(self, environ, start_response): class TestInputReq(object): def setup(self): self.app = InputReqApp() - assert self.app self.testapp = webtest.TestApp(self.app) - assert self.testapp def test_get_direct(self): + assert self.testapp res = self.testapp.get('/test/http://example.com/', headers={'Foo': 'Bar'}) assert res.text == '\ GET /test/http://example.com/ HTTP/1.0\r\n\ @@ -55,6 +54,7 @@ def test_get_direct(self): ' def test_post_direct(self): + assert self.testapp res = self.testapp.post('/test/http://example.com/', headers={'Foo': 'Bar'}, params='ABC') lines = res.text.split('\r\n') assert lines[0] == 'POST /test/http://example.com/ HTTP/1.0' @@ -71,6 +71,7 @@ def test_post_req(self): Foo: Bar\r\n\ \r\n\ ' + assert self.testapp res = self.testapp.post('/test-postreq?url=http://example.com/', params=postdata) assert res.text == '\ From a8ed1dbbd585ee93f9276a9f3a87fbe1752554f4 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Thu, 28 Mar 2024 14:48:00 -0400 Subject: [PATCH 08/31] Test Python 3.8-3.11 --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index c48092cda..1fbc61008 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -8,7 +8,7 @@ jobs: strategy: max-parallel: 3 matrix: - python-version: ['3.7', '3.8', '3.9', '3.10'] + python-version: ['3.8', '3.9', '3.10', '3.11'] steps: - name: checkout From 0c6b21eb4edbcaad84bbe5d2ef3e8cd33ffc8b2d Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Thu, 28 Mar 2024 14:48:08 -0400 Subject: [PATCH 09/31] Test Python 3.8-3.11 in tox --- tox.ini | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tox.ini b/tox.ini index 94ac6913b..8d511f6bf 100644 --- a/tox.ini +++ b/tox.ini @@ -4,15 +4,14 @@ testpaths = tests [tox] -envlist = py36, py37, py38, py39, py310 +envlist = py38, py39, py310, py311 [gh-actions] python = - 3.6: py36 - 3.7: py37 3.8: py38 3.9: py39 3.10: py310 + 3.11: py311 [testenv] setenv = PYWB_NO_VERIFY_SSL = 1 From 9a5da9b46f28381746a3f9e74a74cf48d3093039 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Thu, 28 Mar 2024 15:26:00 -0400 Subject: [PATCH 10/31] Remove asserts --- pywb/warcserver/test/test_inputreq.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pywb/warcserver/test/test_inputreq.py b/pywb/warcserver/test/test_inputreq.py index 28fab6525..041cb61e2 100644 --- a/pywb/warcserver/test/test_inputreq.py +++ b/pywb/warcserver/test/test_inputreq.py @@ -44,7 +44,6 @@ def setup(self): self.testapp = webtest.TestApp(self.app) def test_get_direct(self): - assert self.testapp res = self.testapp.get('/test/http://example.com/', headers={'Foo': 'Bar'}) assert res.text == '\ GET /test/http://example.com/ HTTP/1.0\r\n\ @@ -54,7 +53,6 @@ def test_get_direct(self): ' def test_post_direct(self): - assert self.testapp res = self.testapp.post('/test/http://example.com/', headers={'Foo': 'Bar'}, params='ABC') lines = res.text.split('\r\n') assert lines[0] == 'POST /test/http://example.com/ HTTP/1.0' @@ -71,7 +69,6 @@ def test_post_req(self): Foo: Bar\r\n\ \r\n\ ' - assert self.testapp res = self.testapp.post('/test-postreq?url=http://example.com/', params=postdata) assert res.text == '\ From d253d2bd4354fe6176a6c12b20ffb70c81532382 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Thu, 28 Mar 2024 15:26:26 -0400 Subject: [PATCH 11/31] Pin to specific versions of werkzeug, flask, jinja2 --- requirements.txt | 4 ++-- test_requirements.txt | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/requirements.txt b/requirements.txt index da8ba2ed9..baee7d4d1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,11 +2,11 @@ six warcio>=1.7.1 requests redis<3.0 -jinja2 +jinja2>=3.1.2 surt>=0.3.1 brotlipy pyyaml -werkzeug +werkzeug>=2.3.7 webencodings gevent webassets==2.0 diff --git a/test_requirements.txt b/test_requirements.txt index e24a78680..4eb5392ef 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -1,9 +1,9 @@ pytest -WebTest +WebTest==3.0.0 pytest-cov mock urllib3 -flask +flask==2.3.3 ujson lxml -httpbin +httpbin>=0.10.2 From b6e4037fc6e6f2c205d2aa1bd9409f38a7f72022 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Thu, 28 Mar 2024 15:30:42 -0400 Subject: [PATCH 12/31] Temporarily comment out bad content length tests --- tests/test_live_rewriter.py | 41 ++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/tests/test_live_rewriter.py b/tests/test_live_rewriter.py index 29f7fae0e..1dd44b471 100644 --- a/tests/test_live_rewriter.py +++ b/tests/test_live_rewriter.py @@ -91,25 +91,28 @@ def test_live_head(self, fmod_sl): resp = self.head('/live/{0}httpbin.org/get?foo=bar', fmod_sl) assert resp.status_int == 200 - @pytest.mark.skipif(sys.version_info < (3,0), reason='does not respond in 2.7') - def test_live_bad_content_length(self, fmod_sl): - resp = self.get('/live/{0}httpbin.org/response-headers?content-length=149,149', fmod_sl, status=200) - assert resp.headers['Content-Length'] == '149' - - resp = self.get('/live/{0}httpbin.org/response-headers?Content-Length=xyz', fmod_sl, status=200) - assert resp.headers['Content-Length'] == '90' - - @pytest.mark.skipif(sys.version_info < (3,0), reason='does not respond in 2.7') - def test_live_bad_content_length_with_range(self, fmod_sl): - resp = self.get('/live/{0}httpbin.org/response-headers?content-length=149,149', fmod_sl, - headers={'Range': 'bytes=0-'}, status=206) - assert resp.headers['Content-Length'] == '149' - assert resp.headers['Content-Range'] == 'bytes 0-148/149' - - resp = self.get('/live/{0}httpbin.org/response-headers?Content-Length=xyz', fmod_sl, - headers={'Range': 'bytes=0-'}, status=206) - assert resp.headers['Content-Length'] == '90' - assert resp.headers['Content-Range'] == 'bytes 0-89/90' + # Following tests are temporarily commented out because latest version of PSF httpbin + # now returns 400 if content-length header isn't parsable as an int + + # @pytest.mark.skipif(sys.version_info < (3,0), reason='does not respond in 2.7') + # def test_live_bad_content_length(self, fmod_sl): + # resp = self.get('/live/{0}httpbin.org/response-headers?content-length=149,149', fmod_sl, status=200) + # assert resp.headers['Content-Length'] == '149' + + # resp = self.get('/live/{0}httpbin.org/response-headers?Content-Length=xyz', fmod_sl, status=200) + # assert resp.headers['Content-Length'] == '90' + + # @pytest.mark.skipif(sys.version_info < (3,0), reason='does not respond in 2.7') + # def test_live_bad_content_length_with_range(self, fmod_sl): + # resp = self.get('/live/{0}httpbin.org/response-headers?content-length=149,149', fmod_sl, + # headers={'Range': 'bytes=0-'}, status=206) + # assert resp.headers['Content-Length'] == '149' + # assert resp.headers['Content-Range'] == 'bytes 0-148/149' + + # resp = self.get('/live/{0}httpbin.org/response-headers?Content-Length=xyz', fmod_sl, + # headers={'Range': 'bytes=0-'}, status=206) + # assert resp.headers['Content-Length'] == '90' + # assert resp.headers['Content-Range'] == 'bytes 0-89/90' def test_custom_unicode_header(self, fmod_sl): value = u'⛄' From 44e253236b469554e1de11a424c470ca771a6b84 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Thu, 28 Mar 2024 15:48:25 -0400 Subject: [PATCH 13/31] Pin gevent to 22.10.1 for now --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index baee7d4d1..a3452227c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,7 +8,7 @@ brotlipy pyyaml werkzeug>=2.3.7 webencodings -gevent +gevent==22.10.1 webassets==2.0 portalocker wsgiprox>=1.5.1 From 2f5d1692ac14ce483c81d0adbd0b2d87a3679d16 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Thu, 28 Mar 2024 15:58:19 -0400 Subject: [PATCH 14/31] More dependency pinning --- requirements.txt | 7 ++++--- test_requirements.txt | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/requirements.txt b/requirements.txt index a3452227c..49c43d699 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,13 +2,14 @@ six warcio>=1.7.1 requests redis<3.0 -jinja2>=3.1.2 +jinja2==3.1.2 surt>=0.3.1 brotlipy pyyaml -werkzeug>=2.3.7 +werkzeug==2.3.7 webencodings -gevent==22.10.1 +gevent==23.9.0 +greenlet==2.0.2 webassets==2.0 portalocker wsgiprox>=1.5.1 diff --git a/test_requirements.txt b/test_requirements.txt index 4eb5392ef..895995dc6 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -6,4 +6,4 @@ urllib3 flask==2.3.3 ujson lxml -httpbin>=0.10.2 +httpbin==0.10.2 From b6e8d7f64a52fe7f61bdbf03641cf62eaeec8547 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Thu, 28 Mar 2024 16:09:49 -0400 Subject: [PATCH 15/31] Assert TestInputReq.app exists --- pywb/warcserver/test/test_inputreq.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pywb/warcserver/test/test_inputreq.py b/pywb/warcserver/test/test_inputreq.py index 041cb61e2..872bc01bf 100644 --- a/pywb/warcserver/test/test_inputreq.py +++ b/pywb/warcserver/test/test_inputreq.py @@ -44,6 +44,7 @@ def setup(self): self.testapp = webtest.TestApp(self.app) def test_get_direct(self): + assert self.app res = self.testapp.get('/test/http://example.com/', headers={'Foo': 'Bar'}) assert res.text == '\ GET /test/http://example.com/ HTTP/1.0\r\n\ From 82ae62524053731abc37149ec68b276183bc2eb2 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Thu, 28 Mar 2024 16:37:19 -0400 Subject: [PATCH 16/31] Remove assert --- pywb/warcserver/test/test_inputreq.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pywb/warcserver/test/test_inputreq.py b/pywb/warcserver/test/test_inputreq.py index 872bc01bf..041cb61e2 100644 --- a/pywb/warcserver/test/test_inputreq.py +++ b/pywb/warcserver/test/test_inputreq.py @@ -44,7 +44,6 @@ def setup(self): self.testapp = webtest.TestApp(self.app) def test_get_direct(self): - assert self.app res = self.testapp.get('/test/http://example.com/', headers={'Foo': 'Bar'}) assert res.text == '\ GET /test/http://example.com/ HTTP/1.0\r\n\ From ed25b0a06b69baa64c0ed80f3c74581cb5079318 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Thu, 28 Mar 2024 16:37:28 -0400 Subject: [PATCH 17/31] Call tests with pytest rather than py.test --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 8d511f6bf..0c858fdf4 100644 --- a/tox.ini +++ b/tox.ini @@ -21,6 +21,6 @@ deps = -rrequirements.txt -rextra_requirements.txt commands = - py.test --cov-config .coveragerc --cov pywb -v --doctest-modules ./pywb/ tests/ + pytest --cov-config .coveragerc --cov pywb -v --doctest-modules ./pywb/ tests/ From 63560e01d586b48bacfacaed24033240a5dcbba9 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Thu, 28 Mar 2024 17:03:48 -0400 Subject: [PATCH 18/31] Unpin WebTest --- test_requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_requirements.txt b/test_requirements.txt index 895995dc6..856123d42 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -1,5 +1,5 @@ pytest -WebTest==3.0.0 +WebTest pytest-cov mock urllib3 From 543849fc0400ea22cb2be9f9ce18eeb367e54d68 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Thu, 28 Mar 2024 17:10:39 -0400 Subject: [PATCH 19/31] Revert combining if lang statements --- pywb/apps/frontendapp.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pywb/apps/frontendapp.py b/pywb/apps/frontendapp.py index 42b3e76e2..ab57a7018 100644 --- a/pywb/apps/frontendapp.py +++ b/pywb/apps/frontendapp.py @@ -670,6 +670,8 @@ def handle_request(self, environ, start_response): lang = args.pop('lang', '') if lang: shift_path_info(environ) + + if lang: environ['pywb_lang'] = lang elif self.default_locale: environ['pywb_lang'] = self.default_locale From 60fd7b2ee859a0451aac6a25811a8bb1e7da370c Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Fri, 29 Mar 2024 12:19:25 -0400 Subject: [PATCH 20/31] Remove flask --- test_requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/test_requirements.txt b/test_requirements.txt index 856123d42..047a16b42 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -3,7 +3,6 @@ WebTest pytest-cov mock urllib3 -flask==2.3.3 ujson lxml httpbin==0.10.2 From 1181f2eeacc8787b7720025575dfdc3e48a0fb06 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Fri, 29 Mar 2024 12:23:30 -0400 Subject: [PATCH 21/31] Unpin redis --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 49c43d699..d369e417d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ six warcio>=1.7.1 requests -redis<3.0 +redis jinja2==3.1.2 surt>=0.3.1 brotlipy From 575b2e53e3e5e076c71bab4e83554e98e070846f Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Fri, 29 Mar 2024 12:31:13 -0400 Subject: [PATCH 22/31] Rename pytest's deprecated setup to setup_method in test classes --- pywb/warcserver/test/test_inputreq.py | 2 +- pywb/warcserver/test/test_upstream.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pywb/warcserver/test/test_inputreq.py b/pywb/warcserver/test/test_inputreq.py index 041cb61e2..a7cc01cac 100644 --- a/pywb/warcserver/test/test_inputreq.py +++ b/pywb/warcserver/test/test_inputreq.py @@ -39,7 +39,7 @@ def __call__(self, environ, start_response): #============================================================================= class TestInputReq(object): - def setup(self): + def setup_method(self): self.app = InputReqApp() self.testapp = webtest.TestApp(self.app) diff --git a/pywb/warcserver/test/test_upstream.py b/pywb/warcserver/test/test_upstream.py index 1e8e93ed0..ac5e26258 100644 --- a/pywb/warcserver/test/test_upstream.py +++ b/pywb/warcserver/test/test_upstream.py @@ -18,7 +18,7 @@ class TestUpstream(LiveServerTests, HttpBinLiveTests, BaseTestClass): - def setup(self): + def setup_method(self): app = BaseWarcServer() base_url = 'http://localhost:{0}'.format(self.server.port) From f4ccdfcee322d5e99e207b588f33bc9798e4e328 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Fri, 29 Mar 2024 12:41:00 -0400 Subject: [PATCH 23/31] Update force https test for new behavior --- tests/test_force_https.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_force_https.py b/tests/test_force_https.py index 7fc65facd..a3c0171bd 100644 --- a/tests/test_force_https.py +++ b/tests/test_force_https.py @@ -56,6 +56,6 @@ def test_force_https_root_replay_1(self, fmod): resp = self.get('/20140128051539{0}/http://www.iana.org/domains/example', fmod, headers={'X-Forwarded-Proto': 'https'}) - assert resp.headers['Location'] == 'https://localhost:80/20140128051539{0}/http://www.iana.org/domains/reserved'.format(fmod) + assert resp.headers['Location'] == 'https://localhost:80/20140128051539{0}/http://www.iana.org/help/example-domains'.format(fmod) From 2e3342c42057b3212d5c7a8695c30b08317242a5 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Fri, 29 Mar 2024 12:49:02 -0400 Subject: [PATCH 24/31] Switch some requirements to >= to get minor/patch updates --- requirements.txt | 4 ++-- test_requirements.txt | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index d369e417d..1e62ca010 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,14 +2,14 @@ six warcio>=1.7.1 requests redis -jinja2==3.1.2 +jinja2>=3.1.2 surt>=0.3.1 brotlipy pyyaml werkzeug==2.3.7 webencodings gevent==23.9.0 -greenlet==2.0.2 +greenlet>=2.0.2 webassets==2.0 portalocker wsgiprox>=1.5.1 diff --git a/test_requirements.txt b/test_requirements.txt index 047a16b42..e34b071d2 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -5,4 +5,4 @@ mock urllib3 ujson lxml -httpbin==0.10.2 +httpbin>=0.10.2 From d69bffd9397f78f6e0a80878f571505e9fdc86d7 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Fri, 29 Mar 2024 13:40:07 -0400 Subject: [PATCH 25/31] Enforce Python>=3.8 required in setup.py --- setup.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/setup.py b/setup.py index 236bc1b5a..85671138b 100755 --- a/setup.py +++ b/setup.py @@ -113,6 +113,7 @@ def get_package_data(): "translate_toolkit" ], }, + python_requires='>=3.8', tests_require=load_requirements("test_requirements.txt"), cmdclass={'test': PyTest}, test_suite='', @@ -131,16 +132,11 @@ def get_package_data(): 'Environment :: Web Environment', 'License :: OSI Approved :: GNU General Public License (GPL)', 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.3', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', 'Topic :: Internet :: Proxy Servers', 'Topic :: Internet :: WWW/HTTP', 'Topic :: Internet :: WWW/HTTP :: WSGI', From 930c901f0001796ef55a8567ae978593a3ab8953 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Fri, 29 Mar 2024 13:51:16 -0400 Subject: [PATCH 26/31] Ensure greenlet 2 not 3 is used with pinned gevent --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 1e62ca010..183d2f69e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,7 +9,7 @@ pyyaml werkzeug==2.3.7 webencodings gevent==23.9.0 -greenlet>=2.0.2 +greenlet>=2.0.2,<3.0 webassets==2.0 portalocker wsgiprox>=1.5.1 From ea05caa4dfe5ac70afa03b4d6482f77104d18239 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Mon, 1 Apr 2024 13:22:44 -0400 Subject: [PATCH 27/31] Drop werkzeug version to support Python 3.7-3.11 --- .github/workflows/ci.yaml | 2 +- requirements.txt | 2 +- setup.py | 3 ++- tox.ini | 3 ++- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 1fbc61008..4319e5600 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -8,7 +8,7 @@ jobs: strategy: max-parallel: 3 matrix: - python-version: ['3.8', '3.9', '3.10', '3.11'] + python-version: ['3.7', '3.8', '3.9', '3.10', '3.11'] steps: - name: checkout diff --git a/requirements.txt b/requirements.txt index 183d2f69e..381d59382 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,7 @@ jinja2>=3.1.2 surt>=0.3.1 brotlipy pyyaml -werkzeug==2.3.7 +werkzeug==2.2.3 webencodings gevent==23.9.0 greenlet>=2.0.2,<3.0 diff --git a/setup.py b/setup.py index 85671138b..b9f5a8cac 100755 --- a/setup.py +++ b/setup.py @@ -113,7 +113,7 @@ def get_package_data(): "translate_toolkit" ], }, - python_requires='>=3.8', + python_requires='>=3.7,<3.12', tests_require=load_requirements("test_requirements.txt"), cmdclass={'test': PyTest}, test_suite='', @@ -133,6 +133,7 @@ def get_package_data(): 'License :: OSI Approved :: GNU General Public License (GPL)', 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', diff --git a/tox.ini b/tox.ini index 0c858fdf4..eab3d6e3a 100644 --- a/tox.ini +++ b/tox.ini @@ -4,10 +4,11 @@ testpaths = tests [tox] -envlist = py38, py39, py310, py311 +envlist = py37, py38, py39, py310, py311 [gh-actions] python = + 3.7: py37 3.8: py38 3.9: py39 3.10: py310 From b51c0bb84a0861193b6571e2602b7f388a7b3046 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Mon, 1 Apr 2024 13:25:21 -0400 Subject: [PATCH 28/31] Drop gevent to 22.10.2 to support Python 3.7 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 381d59382..f3df2ffcf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,7 +8,7 @@ brotlipy pyyaml werkzeug==2.2.3 webencodings -gevent==23.9.0 +gevent==22.10.2 greenlet>=2.0.2,<3.0 webassets==2.0 portalocker From 3b554fbcdd3ce12b487c28c44e9acdbe39d15240 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Mon, 1 Apr 2024 15:03:17 -0400 Subject: [PATCH 29/31] Revert spacing changes for older 3.7-supporting werkzeug --- pywb/recorder/test/test_recorder.py | 26 +++++++++++++------------- tests/test_live_rewriter.py | 16 ++++++++-------- tests/test_record_dedup.py | 4 ++-- tests/test_record_replay.py | 20 ++++++++++---------- 4 files changed, 33 insertions(+), 33 deletions(-) diff --git a/pywb/recorder/test/test_recorder.py b/pywb/recorder/test/test_recorder.py index 60f337b06..80b329d91 100644 --- a/pywb/recorder/test/test_recorder.py +++ b/pywb/recorder/test/test_recorder.py @@ -150,7 +150,7 @@ def test_record_warc_1(self): resp = self._test_warc_write(recorder_app, 'httpbin.org', '/get?foo=bar') assert b'HTTP/1.1 200 OK' in resp.body - assert b'"foo":"bar"' in resp.body + assert b'"foo": "bar"' in resp.body self._test_all_warcs('/warcs/', 1) @@ -160,7 +160,7 @@ def test_record_warc_2(self): resp = self._test_warc_write(recorder_app, 'httpbin.org', '/get?foo=bar') assert b'HTTP/1.1 200 OK' in resp.body - assert b'"foo":"bar"' in resp.body + assert b'"foo": "bar"' in resp.body self._test_all_warcs('/warcs/', 2) @@ -279,7 +279,7 @@ def test_record_param_user_coll(self): resp = self._test_warc_write(recorder_app, 'httpbin.org', '/user-agent', '¶m.recorder.user=USER¶m.recorder.coll=COLL') - assert '"user-agent":"{0}"'.format(UA) in resp.text + assert '"user-agent": "{0}"'.format(UA) in resp.text #assert b'HTTP/1.1 200 OK' in resp.body #assert b'"foo": "bar"' in resp.body @@ -312,12 +312,12 @@ def test_record_param_user_coll_same_dir(self): resp = self._test_warc_write(recorder_app, 'httpbin.org', '/get?foo=bar', '¶m.recorder.user=USER2¶m.recorder.coll=COLL2') assert b'HTTP/1.1 200 OK' in resp.body - assert b'"foo":"bar"' in resp.body + assert b'"foo": "bar"' in resp.body resp = self._test_warc_write(recorder_app, 'httpbin.org', '/get?foo=bar', '¶m.recorder.user=USER2¶m.recorder.coll=COLL3') assert b'HTTP/1.1 200 OK' in resp.body - assert b'"foo":"bar"' in resp.body + assert b'"foo": "bar"' in resp.body self._test_all_warcs('/warcs2', 2) @@ -334,7 +334,7 @@ def test_record_param_user_coll_revisit(self): resp = self._test_warc_write(recorder_app, 'httpbin.org', '/user-agent', '¶m.recorder.user=USER¶m.recorder.coll=COLL') - assert '"user-agent":"{0}"'.format(UA) in resp.text + assert '"user-agent": "{0}"'.format(UA) in resp.text #assert b'HTTP/1.1 200 OK' in resp.body #assert b'"foo": "bar"' in resp.body @@ -387,7 +387,7 @@ def test_record_param_user_coll_skip(self): resp = self._test_warc_write(recorder_app, 'httpbin.org', '/user-agent', '¶m.recorder.user=USER¶m.recorder.coll=COLL') - assert '"user-agent":"{0}"'.format(UA) in resp.text + assert '"user-agent": "{0}"'.format(UA) in resp.text #assert b'HTTP/1.1 200 OK' in resp.body #assert b'"foo": "bar"' in resp.body self._test_all_warcs('/warcs/USER/COLL/', 2) @@ -409,7 +409,7 @@ def test_record_param_user_coll_write_dupe_no_revisit(self): resp = self._test_warc_write(recorder_app, 'httpbin.org', '/get?foo=bar', '¶m.recorder.user=USER¶m.recorder.coll=COLL') assert b'HTTP/1.1 200 OK' in resp.body - assert b'"foo":"bar"' in resp.body + assert b'"foo": "bar"' in resp.body self._test_all_warcs('/warcs/USER/COLL/', 3) @@ -432,7 +432,7 @@ def test_record_file_warc_keep_open(self): resp = self._test_warc_write(recorder_app, 'httpbin.org', '/get?foo=bar') assert b'HTTP/1.1 200 OK' in resp.body - assert b'"foo":"bar"' in resp.body + assert b'"foo": "bar"' in resp.body assert os.path.isfile(path) assert len(writer.fh_cache) == 1 @@ -455,7 +455,7 @@ def test_record_multiple_writes_keep_open(self): '/get?foo=bar', '¶m.recorder.coll=FOO') assert b'HTTP/1.1 200 OK' in resp.body - assert b'"foo":"bar"' in resp.body + assert b'"foo": "bar"' in resp.body # Second Record @@ -463,7 +463,7 @@ def test_record_multiple_writes_keep_open(self): '/get?boo=far', '¶m.recorder.coll=FOO') assert b'HTTP/1.1 200 OK' in resp.body - assert b'"boo":"far"' in resp.body + assert b'"boo": "far"' in resp.body self._test_all_warcs('/warcs/FOO/', 1) @@ -523,14 +523,14 @@ def test_record_multiple_writes_rollover_idle(self): '/get?foo=bar', '¶m.recorder.coll=GOO') assert b'HTTP/1.1 200 OK' in resp.body - assert b'"foo":"bar"' in resp.body + assert b'"foo": "bar"' in resp.body # Second Record resp = self._test_warc_write(recorder_app, 'httpbin.org', '/get?boo=far', '¶m.recorder.coll=GOO') assert b'HTTP/1.1 200 OK' in resp.body - assert b'"boo":"far"' in resp.body + assert b'"boo": "far"' in resp.body self._test_all_warcs('/warcs/GOO/', 1) diff --git a/tests/test_live_rewriter.py b/tests/test_live_rewriter.py index 1dd44b471..25891e500 100644 --- a/tests/test_live_rewriter.py +++ b/tests/test_live_rewriter.py @@ -36,7 +36,7 @@ def header_test_server(environ, start_response): # ============================================================================ def cookie_test_server(environ, start_response): - body = 'cookie value:' + environ.get('HTTP_COOKIE', '') + body = 'cookie value: ' + environ.get('HTTP_COOKIE', '') body = body.encode('utf-8') headers = [('Content-Length', str(len(body))), ('Content-Type', 'text/plain')] @@ -76,14 +76,14 @@ def test_live_live_post(self, fmod_sl): resp = self.post('/live/{0}httpbin.org/post', fmod_sl, {'foo': 'bar', 'test': 'abc'}) assert resp.status_int == 200 resp.charset = 'utf-8' - assert '"foo":"bar"' in resp.text - assert '"test":"abc"' in resp.text + assert '"foo" :"bar"' in resp.text + assert '"test" :"abc"' in resp.text assert resp.status_int == 200 def test_live_anchor_encode(self, fmod_sl): resp = self.get('/live/{0}httpbin.org/get?val=abc%23%23xyz', fmod_sl) assert 'get?val=abc%23%23xyz"' in resp.text - assert '"val":"abc##xyz"' in resp.text + assert '"val" :"abc##xyz"' in resp.text #assert '"http://httpbin.org/anything/abc##xyz"' in resp.text assert resp.status_int == 200 @@ -128,18 +128,18 @@ def test_domain_cookie(self, fmod_sl): headers={'Host': 'example.com'}) assert resp.headers['Set-Cookie'] == 'testcookie=cookie-val; Path=/live/{0}http://localhost:{1}/'.format(fmod_sl, self.cookie_test_serv.port) - assert resp.text == 'cookie value:' + assert resp.text == 'cookie value: ' resp = self.get('/live/{0}http://localhost:%s/' % self.cookie_test_serv.port, fmod_sl, headers={'Host': 'example.com'}) - assert resp.text == 'cookie value:testcookie=cookie-val' + assert resp.text == 'cookie value: testcookie=cookie-val' resp = self.get('/live/{0}http://localhost:%s/' % self.cookie_test_serv.port, fmod_sl, headers={'Host': 'sub.example.com'}) assert 'Set-Cookie' not in resp.headers - assert resp.text == 'cookie value:testcookie=cookie-val' + assert resp.text == 'cookie value: testcookie=cookie-val' def test_fetch_page_with_html_title(self, fmod_sl): resp = self.get('/live/{0}http://localhost:%s/html-title' % self.header_test_serv.port, fmod_sl, @@ -181,7 +181,7 @@ def test_live_video_info(self): def test_deflate(self, fmod_sl): resp = self.get('/live/{0}http://httpbin.org/deflate', fmod_sl) - assert b'"deflated":true' in resp.body + assert b'"deflated" :true' in resp.body def test_live_origin_and_referrer(self, fmod_sl): headers = {'Referer': 'http://localhost:80/live/{0}http://example.com/test'.format(fmod_sl), diff --git a/tests/test_record_dedup.py b/tests/test_record_dedup.py index 4ed57a518..9b01e9e3b 100644 --- a/tests/test_record_dedup.py +++ b/tests/test_record_dedup.py @@ -26,12 +26,12 @@ def test_init_coll(self): def test_record_1(self): res = self.testapp.get('/test-dedup/record/mp_/http://httpbin.org/get?A=B', headers={"Referer": "http://httpbin.org/"}) - assert '"A":"B"' in res.text + assert '"A": "B"' in res.text time.sleep(1.2) res = self.testapp.get('/test-dedup/record/mp_/http://httpbin.org/get?A=B', headers={"Referer": "http://httpbin.org/"}) - assert '"A":"B"' in res.text + assert '"A": "B"' in res.text def test_single_redis_entry(self): res = self.redis.zrange("pywb:test-dedup:cdxj", 0, -1) diff --git a/tests/test_record_replay.py b/tests/test_record_replay.py index 8cd89149f..c0f373920 100644 --- a/tests/test_record_replay.py +++ b/tests/test_record_replay.py @@ -35,7 +35,7 @@ def test_init_coll(self): def test_record_1(self): res = self.testapp.get('/test/record/mp_/http://httpbin.org/get?A=B') - assert '"A":"B"' in res.text + assert '"A": "B"' in res.text def test_record_head(self): res = self.testapp.head('/test/record/mp_/http://httpbin.org/get?A=B') @@ -47,7 +47,7 @@ def test_replay_1(self, fmod): fmod_slash = fmod + '/' if fmod else '' res = self.get('/test/{0}http://httpbin.org/get?A=B', fmod_slash) - assert '"A":"B"' in res.text + assert '"A": "B"' in res.text def test_replay_head(self, fmod): fmod_slash = fmod + '/' if fmod else '' @@ -58,25 +58,25 @@ def test_replay_head(self, fmod): def test_record_2(self): res = self.testapp.get('/test2/record/mp_/http://httpbin.org/get?C=D') - assert '"C":"D"' in res.text + assert '"C": "D"' in res.text def test_replay_2(self, fmod): self.ensure_empty() fmod_slash = fmod + '/' if fmod else '' res = self.get('/test2/{0}http://httpbin.org/get?C=D', fmod_slash) - assert '"C":"D"' in res.text + assert '"C": "D"' in res.text def test_record_again_1(self): res = self.testapp.get('/test/record/mp_/http://httpbin.org/get?C=D2') - assert '"C":"D2"' in res.text + assert '"C": "D2"' in res.text def test_replay_again_1(self, fmod): self.ensure_empty() fmod_slash = fmod + '/' if fmod else '' res = self.get('/test/{0}http://httpbin.org/get?C=D2', fmod_slash) - assert '"C":"D2"' in res.text + assert '"C": "D2"' in res.text assert len(os.listdir(os.path.join(self.root_dir, '_test_colls', 'test', 'archive'))) == 1 @@ -94,10 +94,10 @@ def test_replay_all_coll(self, fmod): fmod_slash = fmod + '/' if fmod else '' res = self.get('/all/{0}http://httpbin.org/get?C=D', fmod_slash) - assert '"C":"D"' in res.text + assert '"C": "D"' in res.text res = self.get('/all/mp_/http://httpbin.org/get?A=B', fmod_slash) - assert '"A":"B"' in res.text + assert '"A": "B"' in res.text def test_cdx_all_coll(self): res = self.testapp.get('/all/cdx?url=http://httpbin.org/get*&output=json') @@ -163,7 +163,7 @@ def test_init_and_rec(self): assert os.path.isdir(dir_name) res = self.testapp.get('/test-new/record/mp_/http://httpbin.org/get?A=B') - assert '"A":"B"' in res.text + assert '"A": "B"' in res.text names = os.listdir(dir_name) assert len(names) == 1 @@ -176,7 +176,7 @@ def test_init_and_rec(self): def test_no_brotli(self): res = self.testapp.get('/test-new/record/mp_/http://httpbin.org/get?C=D', headers={'Accept-Encoding': 'gzip, deflate, br'}) - assert '"C":"D"' in res.text + assert '"C": "D"' in res.text with open(self.warc_name, 'rb') as fh: for record in ArchiveIterator(fh): From 7b5544dcc5c7cc6e340d0fe5f497bc8c0ec08492 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Mon, 1 Apr 2024 15:10:54 -0400 Subject: [PATCH 30/31] More spacing test fixes --- pywb/recorder/test/test_recorder.py | 4 ++-- pywb/warcserver/test/test_handlers.py | 6 +++--- tests/test_live_rewriter.py | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pywb/recorder/test/test_recorder.py b/pywb/recorder/test/test_recorder.py index 80b329d91..6cc6e8895 100644 --- a/pywb/recorder/test/test_recorder.py +++ b/pywb/recorder/test/test_recorder.py @@ -262,7 +262,7 @@ def test_record_skip_wrong_coll(self): resp = self._test_warc_write(recorder_app, 'httpbin.org', '/get?foo=bar') assert b'HTTP/1.1 200 OK' in resp.body - assert b'"foo":"bar"' in resp.body + assert b'"foo": "bar"' in resp.body self._test_all_warcs('/warcs/', 2) @@ -542,7 +542,7 @@ def test_record_multiple_writes_rollover_idle(self): '/get?goo=bar', '¶m.recorder.coll=GOO') assert b'HTTP/1.1 200 OK' in resp.body - assert b'"goo":"bar"' in resp.body + assert b'"goo": "bar"' in resp.body self._test_all_warcs('/warcs/GOO/', 2) diff --git a/pywb/warcserver/test/test_handlers.py b/pywb/warcserver/test/test_handlers.py index ee8be49f1..74a4a0a94 100644 --- a/pywb/warcserver/test/test_handlers.py +++ b/pywb/warcserver/test/test_handlers.py @@ -162,7 +162,7 @@ def test_live_resource(self): assert resp.headers['Memento-Datetime'] != '' assert b'HTTP/1.1 200 OK' in resp.body - assert b'"foo":"bar"' in resp.body + assert b'"foo": "bar"' in resp.body assert 'ResErrors' not in resp.headers @@ -178,7 +178,7 @@ def test_live_post_resource(self): assert resp.headers['Memento-Datetime'] != '' assert b'HTTP/1.1 200 OK' in resp.body - assert b'"foo":"bar"' in resp.body + assert b'"foo": "bar"' in resp.body assert 'ResErrors' not in resp.headers @@ -288,7 +288,7 @@ def test_agg_live_postreq(self): assert resp.headers['Memento-Datetime'] != '' assert b'HTTP/1.1 200 OK' in resp.body - assert b'"foo":"bar"' in resp.body + assert b'"foo": "bar"' in resp.body #assert json.loads(resp.headers['ResErrors']) == {"rhiz": "NotFoundException('https://webenact.rhizome.org/vvork/http://httpbin.org/get?foo=bar',)"} assert "NotFoundException('https://webenact.rhizome.org/vvork/" in json.loads(resp.headers['ResErrors'])['rhiz'] diff --git a/tests/test_live_rewriter.py b/tests/test_live_rewriter.py index 25891e500..ab022d26c 100644 --- a/tests/test_live_rewriter.py +++ b/tests/test_live_rewriter.py @@ -76,14 +76,14 @@ def test_live_live_post(self, fmod_sl): resp = self.post('/live/{0}httpbin.org/post', fmod_sl, {'foo': 'bar', 'test': 'abc'}) assert resp.status_int == 200 resp.charset = 'utf-8' - assert '"foo" :"bar"' in resp.text - assert '"test" :"abc"' in resp.text + assert '"foo": "bar"' in resp.text + assert '"test": "abc"' in resp.text assert resp.status_int == 200 def test_live_anchor_encode(self, fmod_sl): resp = self.get('/live/{0}httpbin.org/get?val=abc%23%23xyz', fmod_sl) assert 'get?val=abc%23%23xyz"' in resp.text - assert '"val" :"abc##xyz"' in resp.text + assert '"val": "abc##xyz"' in resp.text #assert '"http://httpbin.org/anything/abc##xyz"' in resp.text assert resp.status_int == 200 @@ -181,7 +181,7 @@ def test_live_video_info(self): def test_deflate(self, fmod_sl): resp = self.get('/live/{0}http://httpbin.org/deflate', fmod_sl) - assert b'"deflated" :true' in resp.body + assert b'"deflated": true' in resp.body def test_live_origin_and_referrer(self, fmod_sl): headers = {'Referer': 'http://localhost:80/live/{0}http://example.com/test'.format(fmod_sl), From 8e1cd43ebe48677af7d4822965f16ccf1c15a394 Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Tue, 2 Apr 2024 16:07:23 -0400 Subject: [PATCH 31/31] Pin markupsafe>=2.1.1 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index f3df2ffcf..1802001d8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,5 +16,5 @@ wsgiprox>=1.5.1 fakeredis<1.0 tldextract python-dateutil -markupsafe +markupsafe>=2.1.1 ua_parser