Browse Source

Correctly parse the filename in content-disposition header. (#1946)

* Implement regex to match the filename in the content-disposition header.

The following srings will match:
filename=Zombie.Land.Saga.Revenge.S02E12.480p.x264-mSD.nzb; filename*=UTF-8''Zombie.Land.Saga.Revenge.S02E12.480p.x264-mSD.nzb
filename=Zombie.Land.Saga.Revenge.S02E12.480p.x264-mSD.nzb;
filename*=UTF-8''Zombie.Land.Saga.Revenge.S02E12.480p.x264-mSD.nzb

* Missed quote

* Implement the mailbox/Message solution
* Add basic tests

* Add `attachment;`

* Add example with attachment.

* Fix some linting.
* Added edge case tests.

* Added comment.

* Added test to include path elements.

* Only try the content-disposition header when it has `filename` in it

* Project uses double quotes.

* Update test.
* Add `attachment;`

* black formatter

* remove release names.

* trailing commas

* quote enclosures
pull/1951/head
p0ps 4 years ago
committed by GitHub
parent
commit
c1b9b727e6
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 26
      sabnzbd/urlgrabber.py
  2. 40
      tests/test_urlgrabber.py

26
sabnzbd/urlgrabber.py

@ -28,6 +28,7 @@ import urllib.request
import urllib.error import urllib.error
import urllib.parse import urllib.parse
from http.client import IncompleteRead, HTTPResponse from http.client import IncompleteRead, HTTPResponse
from mailbox import Message
from threading import Thread from threading import Thread
import base64 import base64
from typing import Tuple, Optional from typing import Tuple, Optional
@ -59,6 +60,27 @@ _RARTING_FIELDS = (
) )
def filename_from_content_disposition(content_disposition):
"""
Extract and validate filename from a Content-Disposition header.
Origin: https://github.com/httpie/httpie/blob/4c8633c6e51f388523ab4fa649040934402a4fc9/httpie/downloads.py#L98
:param content_disposition: Content-Disposition value
:type content_disposition: str
:return: the filename if present and valid, otherwise `None`
:example:
filename_from_content_disposition('attachment; filename=jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz')
should return: 'jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz'
"""
msg = Message(f"Content-Disposition: attachment; {content_disposition}")
filename = msg.get_filename()
if filename:
# Basic sanitation.
filename = os.path.basename(filename).lstrip(".").strip()
if filename:
return filename
class URLGrabber(Thread): class URLGrabber(Thread):
def __init__(self): def __init__(self):
super().__init__() super().__init__()
@ -190,8 +212,8 @@ class URLGrabber(Thread):
nzo_info[item] = value nzo_info[item] = value
# Get filename from Content-Disposition header # Get filename from Content-Disposition header
if not filename and "filename=" in value: if not filename and "filename" in value:
filename = value[value.index("filename=") + 9 :].strip(";").strip('"') filename = filename_from_content_disposition(value)
if wait: if wait:
# For sites that have a rate-limiting attribute # For sites that have a rate-limiting attribute

40
tests/test_urlgrabber.py

@ -158,3 +158,43 @@ class TestBuildRequest:
self._runner(self.httpbin.url + "/status/404", 404) self._runner(self.httpbin.url + "/status/404", 404)
with pytest.raises(urllib.error.HTTPError): with pytest.raises(urllib.error.HTTPError):
self._runner(self.httpbin.url + "/no/such/file", 404) self._runner(self.httpbin.url + "/no/such/file", 404)
class TestFilenameFromDispositionHeader:
@pytest.mark.parametrize(
"header, result",
[
(
# In this case the first filename (not the UTF-8 encoded) is parsed.
"attachment; filename=jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz; filename*=UTF-8''jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz",
"jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz",
),
(
"filename=jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz;",
"jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz",
),
(
"filename*=UTF-8''jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz",
"jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz",
),
(
"attachment; filename=jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz",
"jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz",
),
(
'attachment; filename="jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz"',
"jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz",
),
(
"attachment; filename=/what/ever/filename.tar.gz",
"filename.tar.gz",
),
(
"attachment; filename=",
None,
),
],
)
def test_filename_from_disposition_header(self, header, result):
"""Test the parsing of different disposition-headers."""
assert urlgrabber.filename_from_content_disposition(header) == result

Loading…
Cancel
Save