Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dont leak ip address #310

Merged
merged 4 commits into from
Sep 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
},
install_requires=[
'internetarchive',
'urllib3==1.26.13',
'docopt==0.6.2',
'yt-dlp',
]
Expand Down
20 changes: 19 additions & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
import unittest
import os
from tubeup.utils import sanitize_identifier, check_is_file_empty
import json
from tubeup.utils import sanitize_identifier, check_is_file_empty, strip_ip_from_meta

current_path = os.path.dirname(os.path.realpath(__file__))


def get_testfile_path(name):
return os.path.join(current_path, 'test_tubeup_files', name)


class UtilsTest(unittest.TestCase):
Expand Down Expand Up @@ -48,3 +55,14 @@ def test_check_is_file_empty_when_file_doesnt_exist(self):
FileNotFoundError,
r"^Path 'file_that_doesnt_exist.txt' doesn't exist$"):
check_is_file_empty('file_that_doesnt_exist.txt')

def test_strip_ip_from_meta(self):
with open(get_testfile_path(
'Mountain_3_-_Video_Background_HD_1080p-6iRV8liah8A.'
'info.json')
) as f:
vid_meta = json.load(f)
mod, new_meta = strip_ip_from_meta(vid_meta)
self.assertTrue(mod)
self.assertNotEqual(f.read(), json.dumps(new_meta))
self.assertNotRegex(json.dumps(new_meta), r'36\.73\.93\.234')
7 changes: 6 additions & 1 deletion tubeup/TubeUp.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from internetarchive.config import parse_config_file
from datetime import datetime
from yt_dlp import YoutubeDL
from .utils import (get_itemname, check_is_file_empty,
from .utils import (get_itemname, check_is_file_empty, strip_ip_from_meta,
EMPTY_ANNOTATION_FILE)
from logging import getLogger
from urllib.parse import urlparse
Expand Down Expand Up @@ -324,6 +324,11 @@ def upload_ia(self, videobasename, custom_meta=None):
with open(json_metadata_filepath, 'r', encoding='utf-8') as f:
vid_meta = json.load(f)

mod, new_meta = strip_ip_from_meta(vid_meta)
if mod:
with open(json_metadata_filepath, 'w') as f:
json.dump(new_meta, f)

# Exit if video download did not complete, don't upload .part files to IA
for ext in ['*.part', '*.f303.*', '*.f302.*', '*.ytdl', '*.f251.*', '*.248.*', '*.f247.*', '*.temp']:
if glob.glob(videobasename + ext):
Expand Down
37 changes: 37 additions & 0 deletions tubeup/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import re
from urllib.parse import urlparse, parse_qs, urlencode


EMPTY_ANNOTATION_FILE = ('<?xml version="1.0" encoding="UTF-8" ?>'
Expand Down Expand Up @@ -29,3 +30,39 @@ def check_is_file_empty(filepath):
return os.stat(filepath).st_size == 0
else:
raise FileNotFoundError("Path '%s' doesn't exist" % filepath)


def strip_ip_from_url(url):
"""
Strip occurence of IP address as found in path segments like in /ip/1.2.3.4/
or in an "ip" query-parameter, like in ?ip=1.2.3.4
"""
u = urlparse(url)
u = u._replace(path=re.sub(r'/ip/[^/]+', r'/ip/REDACTED', u.path))
if u.query != '':
qs = parse_qs(u.query)
try:
del (qs['ip'])
u = u._replace(query=urlencode(qs, True))
except KeyError:
pass
return u.geturl()


def strip_ip_from_meta(meta):
modified = False
if 'url' in meta:
redacted_url = strip_ip_from_url(meta['url'])
if redacted_url != meta['url']:
meta['url'] = redacted_url
modified = True

for _format in meta['formats']:
for field in ['manifest_url', 'fragment_base_url', 'url']:
if field in _format:
redacted_url = strip_ip_from_url(_format[field])
if redacted_url != _format[field]:
_format[field] = redacted_url
modified = True

return modified, meta