Browse Source

Correctly parse the filename in content-disposition header. (#1946)

* Implement regex to match the filename in the content-disposition header.

The following srings will match:
filename=Zombie.Land.Saga.Revenge.S02E12.480p.x264-mSD.nzb; filename*=UTF-8''Zombie.Land.Saga.Revenge.S02E12.480p.x264-mSD.nzb
filename=Zombie.Land.Saga.Revenge.S02E12.480p.x264-mSD.nzb;
filename*=UTF-8''Zombie.Land.Saga.Revenge.S02E12.480p.x264-mSD.nzb

* Missed quote

* Implement the mailbox/Message solution
* Add basic tests

* Add `attachment;`

* Add example with attachment.

* Fix some linting.
* Added edge case tests.

* Added comment.

* Added test to include path elements.

* Only try the content-disposition header when it has `filename` in it

* Project uses double quotes.

* Update test.
* Add `attachment;`

* black formatter

* remove release names.

* trailing commas

* quote enclosures
pull/1951/head
p0ps 4 years ago
committed by GitHub
parent
commit
c1b9b727e6
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 26
      sabnzbd/urlgrabber.py
  2. 40
      tests/test_urlgrabber.py

26
sabnzbd/urlgrabber.py

@ -28,6 +28,7 @@ import urllib.request
import urllib.error
import urllib.parse
from http.client import IncompleteRead, HTTPResponse
from mailbox import Message
from threading import Thread
import base64
from typing import Tuple, Optional
@ -59,6 +60,27 @@ _RARTING_FIELDS = (
)
def filename_from_content_disposition(content_disposition):
"""
Extract and validate filename from a Content-Disposition header.
Origin: https://github.com/httpie/httpie/blob/4c8633c6e51f388523ab4fa649040934402a4fc9/httpie/downloads.py#L98
:param content_disposition: Content-Disposition value
:type content_disposition: str
:return: the filename if present and valid, otherwise `None`
:example:
filename_from_content_disposition('attachment; filename=jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz')
should return: 'jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz'
"""
msg = Message(f"Content-Disposition: attachment; {content_disposition}")
filename = msg.get_filename()
if filename:
# Basic sanitation.
filename = os.path.basename(filename).lstrip(".").strip()
if filename:
return filename
class URLGrabber(Thread):
def __init__(self):
super().__init__()
@ -190,8 +212,8 @@ class URLGrabber(Thread):
nzo_info[item] = value
# Get filename from Content-Disposition header
if not filename and "filename=" in value:
filename = value[value.index("filename=") + 9 :].strip(";").strip('"')
if not filename and "filename" in value:
filename = filename_from_content_disposition(value)
if wait:
# For sites that have a rate-limiting attribute

40
tests/test_urlgrabber.py

@ -158,3 +158,43 @@ class TestBuildRequest:
self._runner(self.httpbin.url + "/status/404", 404)
with pytest.raises(urllib.error.HTTPError):
self._runner(self.httpbin.url + "/no/such/file", 404)
class TestFilenameFromDispositionHeader:
@pytest.mark.parametrize(
"header, result",
[
(
# In this case the first filename (not the UTF-8 encoded) is parsed.
"attachment; filename=jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz; filename*=UTF-8''jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz",
"jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz",
),
(
"filename=jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz;",
"jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz",
),
(
"filename*=UTF-8''jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz",
"jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz",
),
(
"attachment; filename=jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz",
"jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz",
),
(
'attachment; filename="jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz"',
"jakubroztocil-httpie-0.4.1-20-g40bd8f6.tar.gz",
),
(
"attachment; filename=/what/ever/filename.tar.gz",
"filename.tar.gz",
),
(
"attachment; filename=",
None,
),
],
)
def test_filename_from_disposition_header(self, header, result):
"""Test the parsing of different disposition-headers."""
assert urlgrabber.filename_from_content_disposition(header) == result

Loading…
Cancel
Save