diff --git a/sabnzbd/urlgrabber.py b/sabnzbd/urlgrabber.py index 3f3f094..fa49a8b 100644 --- a/sabnzbd/urlgrabber.py +++ b/sabnzbd/urlgrabber.py @@ -28,6 +28,7 @@ import urllib.request import urllib.error import urllib.parse from http.client import IncompleteRead, HTTPResponse +from mailbox import Message from threading import Thread import base64 from typing import Tuple, Optional @@ -59,6 +60,27 @@ _RARTING_FIELDS = ( ) +def filename_from_content_disposition(content_disposition): + """ + Extract and validate filename from a Content-Disposition header. + + Origin: https://github.com/httpie/httpie/blob/4c8633c6e51f388523ab4fa649040934402a4fc9/httpie/downloads.py#L98 + :param content_disposition: Content-Disposition value + :type content_disposition: str + :return: the filename if present and valid, otherwise `None` + :example: + filename_from_content_disposition('attachment; filename=jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz') + should return: 'jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz' + """ + msg = Message(f"Content-Disposition: attachment; {content_disposition}") + filename = msg.get_filename() + if filename: + # Basic sanitation. + filename = os.path.basename(filename).lstrip(".").strip() + if filename: + return filename + + class URLGrabber(Thread): def __init__(self): super().__init__() @@ -190,8 +212,8 @@ class URLGrabber(Thread): nzo_info[item] = value # Get filename from Content-Disposition header - if not filename and "filename=" in value: - filename = value[value.index("filename=") + 9 :].strip(";").strip('"') + if not filename and "filename" in value: + filename = filename_from_content_disposition(value) if wait: # For sites that have a rate-limiting attribute diff --git a/tests/test_urlgrabber.py b/tests/test_urlgrabber.py index a9e1d2a..909e832 100644 --- a/tests/test_urlgrabber.py +++ b/tests/test_urlgrabber.py @@ -158,3 +158,43 @@ class TestBuildRequest: self._runner(self.httpbin.url + "/status/404", 404) with pytest.raises(urllib.error.HTTPError): self._runner(self.httpbin.url + "/no/such/file", 404) + + +class TestFilenameFromDispositionHeader: + @pytest.mark.parametrize( + "header, result", + [ + ( + # In this case the first filename (not the UTF-8 encoded) is parsed. + "attachment; filename=jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz; filename*=UTF-8''jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz", + "jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz", + ), + ( + "filename=jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz;", + "jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz", + ), + ( + "filename*=UTF-8''jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz", + "jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz", + ), + ( + "attachment; filename=jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz", + "jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz", + ), + ( + 'attachment; filename="jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz"', + "jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz", + ), + ( + "attachment; filename=/what/ever/filename.tar.gz", + "filename.tar.gz", + ), + ( + "attachment; filename=", + None, + ), + ], + ) + def test_filename_from_disposition_header(self, header, result): + """Test the parsing of different disposition-headers.""" + assert urlgrabber.filename_from_content_disposition(header) == result