diff --git a/README.md b/README.md index 2bc7b4f..cfd58d4 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ If you want to know more you can head over to our website: https://sabnzbd.org. SABnzbd has a few dependencies you'll need before you can get running. If you've previously run SABnzbd from one of the various Linux packages, then you likely already have all the needed dependencies. If not, here's what you're looking for: - `python` (Python 3.6 and higher, often called `python3`) -- Python modules listed in `requirements.txt` +- Python modules listed in `requirements.txt`. Install with `python3 -m pip install -r requirements.txt -U` - `par2` (Multi-threaded par2 installation guide can be found [here](https://sabnzbd.org/wiki/installation/multicore-par2)) - `unrar` (make sure you get the "official" non-free version of unrar) diff --git a/requirements.txt b/requirements.txt index 9afe136..cf802a5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,6 +8,7 @@ cherrypy portend chardet notify2 +puremagic # Windows system integration pywin32>=227; sys_platform == 'win32' diff --git a/sabnzbd/deobfuscate_filenames.py b/sabnzbd/deobfuscate_filenames.py index 68acc2d..9040879 100755 --- a/sabnzbd/deobfuscate_filenames.py +++ b/sabnzbd/deobfuscate_filenames.py @@ -35,6 +35,7 @@ import re from sabnzbd.filesystem import get_unique_filename, renamer, get_ext from sabnzbd.par2file import is_parfile, parse_par2_file +import sabnzbd.utils.file_extension as file_extension # Files to exclude and minimal file size for renaming EXCLUDED_FILE_EXTS = (".vob", ".rar", ".par2", ".mts", ".m2ts", ".cpi", ".clpi", ".mpl", ".mpls", ".bdm", ".bdmv") @@ -134,13 +135,18 @@ def is_probably_obfuscated(myinputfilename): def deobfuscate_list(filelist, usefulname): """Check all files in filelist, and if wanted, deobfuscate: rename to filename based on usefulname""" + # Methods + # 1. based on par2 (if any) + # 2. if no meaningful extension, add it + # 3. based on detecting obfuscated filenames + # to be sure, only keep really exsiting files: - filelist = [f for f in filelist if os.path.exists(f)] + filelist = [f for f in filelist if os.path.isfile(f)] # Search for par2 files in the filelist par2_files = [f for f in filelist if f.endswith(".par2")] # Found any par2 files we can use? - run_renamer = True + par2_renaming_done = False if not par2_files: logging.debug("No par2 files found to process, running renamer") else: @@ -150,65 +156,88 @@ def deobfuscate_list(filelist, usefulname): logging.debug("Deobfuscate par2: handling %s", par2_file) if decode_par2(par2_file): logging.debug("Deobfuscate par2 repair/verify finished") - run_renamer = False + par2_renaming_done = True else: logging.debug("Deobfuscate par2 repair/verify did not find anything to rename") - # No par2 files? Then we try to rename qualifying (big, not-excluded, obfuscated) files to the job-name - if run_renamer: - excluded_file_exts = EXCLUDED_FILE_EXTS - # If there is a collection with bigger files with the same extension, we don't want to rename it - extcounter = {} - for file in filelist: - if os.path.getsize(file) < MIN_FILE_SIZE: - # too small to care - continue - ext = get_ext(file) - if ext in extcounter: - extcounter[ext] += 1 - else: - extcounter[ext] = 1 - if extcounter[ext] >= 3 and ext not in excluded_file_exts: - # collection, and extension not yet in excluded_file_exts, so add it - excluded_file_exts = (*excluded_file_exts, ext) - logging.debug( - "Found a collection of at least %s files with extension %s, so not renaming those files", - extcounter[ext], - ext, - ) - - logging.debug("Trying to see if there are qualifying files to be deobfuscated") - # We start with he biggest file ... probably the most important file - filelist = sorted(filelist, key=os.path.getsize, reverse=True) - for filename in filelist: - # check that file is still there (and not renamed by the secondary renaming process below) - if not os.path.isfile(filename): - continue - logging.debug("Deobfuscate inspecting %s", filename) - # Do we need to rename this file? - # Criteria: big, not-excluded extension, obfuscated (in that order) - if ( - os.path.getsize(filename) > MIN_FILE_SIZE - and get_ext(filename) not in excluded_file_exts - and is_probably_obfuscated(filename) # this as last test to avoid unnecessary analysis - ): - # Rename and make sure the new filename is unique - path, file = os.path.split(filename) - # construct new_name: - new_name = get_unique_filename("%s%s" % (os.path.join(path, usefulname), get_ext(filename))) - logging.info("Deobfuscate renaming %s to %s", filename, new_name) - renamer(filename, new_name) - # find other files with the same basename in filelist, and rename them in the same way: - basedirfile, _ = os.path.splitext(filename) # something like "/home/this/myiso" - for otherfile in filelist: - if otherfile.startswith(basedirfile + ".") and os.path.isfile(otherfile): - # yes, same basedirfile, only different extension - remainingextension = otherfile.replace(basedirfile, "") # might be long ext, like ".dut.srt" - new_name = get_unique_filename("%s%s" % (os.path.join(path, usefulname), remainingextension)) - logging.info("Deobfuscate renaming %s to %s", otherfile, new_name) - # Rename and make sure the new filename is unique - renamer(otherfile, new_name) + if par2_renaming_done: + # TODO really needed to quit here? We could also proceed with the other actions. Anyway: + return # done + + # let's see if there are files with uncommon/unpopular (so: obfuscated) extensions + # if so, let's give them a better extension based on their internal content/info + # Example: if 'kjladsflkjadf.adsflkjads' is probably a PNG, rename to 'kjladsflkjadf.adsflkjads.png' + newlist = [] + for file in filelist: + if file_extension.has_popular_extension(file): + # common extension, like .doc or .iso, so assume OK and change nothing + logging.debug("extension of %s looks common", file) + newlist.append(file) + else: + # uncommon (so: obfuscated) extension + new_extension_to_add = file_extension.what_is_most_likely_extension(file) + if new_extension_to_add: + new_name = get_unique_filename("%s%s" % (file, new_extension_to_add)) + logging.info("Deobfuscate renaming (adding extension) %s to %s", file, new_name) + renamer(file, new_name) + newlist.append(new_name) else: - logging.debug("%s excluded from deobfuscation based on size, extension or non-obfuscation", filename) - else: - logging.info("No qualifying files found to deobfuscate") + # no new extension found + newlist.append(file) + filelist = newlist + + # Now we try to rename qualifying (big, not-excluded, obfuscated) files to the job-name + excluded_file_exts = EXCLUDED_FILE_EXTS + # If there is a collection with bigger files with the same extension, we don't want to rename it + extcounter = {} + for file in filelist: + if os.path.getsize(file) < MIN_FILE_SIZE: + # too small to care + continue + ext = get_ext(file) + if ext in extcounter: + extcounter[ext] += 1 + else: + extcounter[ext] = 1 + if extcounter[ext] >= 3 and ext not in excluded_file_exts: + # collection, and extension not yet in excluded_file_exts, so add it + excluded_file_exts = (*excluded_file_exts, ext) + logging.debug( + "Found a collection of at least %s files with extension %s, so not renaming those files", + extcounter[ext], + ext, + ) + + logging.debug("Trying to see if there are qualifying files to be deobfuscated") + # We start with he biggest file ... probably the most important file + filelist = sorted(filelist, key=os.path.getsize, reverse=True) + for filename in filelist: + # check that file is still there (and not renamed by the secondary renaming process below) + if not os.path.isfile(filename): + continue + logging.debug("Deobfuscate inspecting %s", filename) + # Do we need to rename this file? + # Criteria: big, not-excluded extension, obfuscated (in that order) + if ( + os.path.getsize(filename) > MIN_FILE_SIZE + and get_ext(filename) not in excluded_file_exts + and is_probably_obfuscated(filename) # this as last test to avoid unnecessary analysis + ): + # Rename and make sure the new filename is unique + path, file = os.path.split(filename) + # construct new_name: + new_name = get_unique_filename("%s%s" % (os.path.join(path, usefulname), get_ext(filename))) + logging.info("Deobfuscate renaming %s to %s", filename, new_name) + renamer(filename, new_name) + # find other files with the same basename in filelist, and rename them in the same way: + basedirfile, _ = os.path.splitext(filename) # something like "/home/this/myiso" + for otherfile in filelist: + if otherfile.startswith(basedirfile + ".") and os.path.isfile(otherfile): + # yes, same basedirfile, only different extension + remainingextension = otherfile.replace(basedirfile, "") # might be long ext, like ".dut.srt" + new_name = get_unique_filename("%s%s" % (os.path.join(path, usefulname), remainingextension)) + logging.info("Deobfuscate renaming %s to %s", otherfile, new_name) + # Rename and make sure the new filename is unique + renamer(otherfile, new_name) + else: + logging.debug("%s excluded from deobfuscation based on size, extension or non-obfuscation", filename) diff --git a/sabnzbd/utils/diskspeed.py b/sabnzbd/utils/diskspeed.py index da76720..e5001b9 100644 --- a/sabnzbd/utils/diskspeed.py +++ b/sabnzbd/utils/diskspeed.py @@ -1,6 +1,6 @@ #!/usr/bin/python3 -""" Measure writing speed of disk specifiec, or working directory if not specified""" +""" Measure writing speed of disk specified (or working directory if not specified)""" import time import os diff --git a/sabnzbd/utils/file_extension.py b/sabnzbd/utils/file_extension.py new file mode 100644 index 0000000..79aa984 --- /dev/null +++ b/sabnzbd/utils/file_extension.py @@ -0,0 +1,314 @@ +#!/usr/bin/python3 + +""" function to check and find correct extension of a (deobfuscated) file +Note: extension always contains a leading dot +""" + + +import puremagic +import os +import sys +from typing import List +from pathlib import Path +from sabnzbd.filesystem import get_ext + +# common extension from https://www.computerhope.com/issues/ch001789.htm +POPULAR_EXT = ( + "3g2", + "3gp", + "7z", + "ai", + "aif", + "apk", + "arj", + "asp", + "aspx", + "avi", + "bak", + "bat", + "bin", + "bmp", + "c", + "cab", + "cda", + "cer", + "cfg", + "cfm", + "cgi", + "cgi", + "cgi", + "class", + "com", + "cpl", + "cpp", + "cs", + "css", + "csv", + "cur", + "dat", + "db", + "dbf", + "deb", + "dll", + "dmg", + "dmp", + "doc", + "docx", + "drv", + "email", + "eml", + "emlx", + "exe", + "flv", + "fnt", + "fon", + "gadget", + "gif", + "h", + "h264", + "htm", + "html", + "icns", + "ico", + "ico", + "ini", + "iso", + "jar", + "java", + "jpeg", + "jpg", + "js", + "jsp", + "key", + "lnk", + "log", + "m4v", + "mdb", + "mid", + "midi", + "mkv", + "mov", + "mp3", + "mp4", + "mpa", + "mpeg", + "mpg", + "msg", + "msi", + "msi", + "odp", + "ods", + "odt", + "oft", + "ogg", + "ost", + "otf", + "part", + "pdf", + "php", + "php", + "pkg", + "pl", + "pl", + "pl", + "png", + "pps", + "ppt", + "pptx", + "ps", + "psd", + "pst", + "py", + "py", + "py", + "rar", + "rm", + "rpm", + "rss", + "rtf", + "sav", + "sh", + "sql", + "svg", + "swf", + "swift", + "sys", + "tar", + "tar", + "gz", + "tex", + "tif", + "tiff", + "tmp", + "toast", + "ttf", + "txt", + "vb", + "vcd", + "vcf", + "vob", + "wav", + "wma", + "wmv", + "wpd", + "wpl", + "wsf", + "xhtml", + "xls", + "xlsm", + "xlsx", + "z", + "zip", +) + +DOWNLOAD_EXT = ( + "ass", + "avi", + "bat", + "bdmv", + "bin", + "bup", + "clpi", + "crx", + "db", + "diz", + "djvu", + "docx", + "epub", + "exe", + "flac", + "gif", + "gz", + "htm", + "html", + "icns", + "ico", + "idx", + "ifo", + "img", + "inf", + "info", + "ini", + "iso", + "jpg", + "log", + "m2ts", + "m3u", + "m4a", + "mkv", + "mp3", + "mp4", + "mpls", + "mx", + "nfo", + "nib", + "nzb", + "otf", + "par2", + "part", + "pdf", + "pem", + "php", + "plist", + "png", + "py", + "rar", + "releaseinfo", + "rev", + "sfv", + "sh", + "srr", + "srs", + "srt", + "strings", + "sub", + "sup", + "sys", + "tif", + "ttf", + "txt", + "url", + "vob", + "website", + "wmv", + "xpi", +) + +# combine to one tuple, with unique entries: +ALL_EXT = tuple(set(POPULAR_EXT + DOWNLOAD_EXT)) +# prepend a dot to each extension, because we work with a leading dot in extensions +ALL_EXT = tuple(["." + i for i in ALL_EXT]) + + +def has_popular_extension(file_path: str) -> bool: + """returns boolean if the extension of file_path is a popular, well-known extension""" + file_extension = get_ext(file_path) + return file_extension in ALL_EXT + + +def all_possible_extensions(file_path: str) -> List[str]: + """returns a list with all possible extensions (with leading dot) for given file_path as reported by puremagic""" + extension_list = [] + for i in puremagic.magic_file(file_path): + extension_list.append(i.extension) + return extension_list + + +def what_is_most_likely_extension(file_path: str) -> str: + """Returns most_likely extension, with a leading dot""" + for possible_extension in all_possible_extensions(file_path): + # let's see if technically-suggested extension by puremagic is also likely IRL + if possible_extension in ALL_EXT: + # Yes, looks likely + return possible_extension + + # Check if text or NZB, as puremagic is not good at that. + try: + txt = Path(file_path).read_text() + # Yes, a text file ... so let's check if it's even an NZB: + if txt.lower().find("= 0: + # yes, contains NZB signals: + return ".nzb" + else: + return ".txt" + except UnicodeDecodeError: + # not txt (and not nzb) + pass + + # no popular extension found, so just trust puremagic and return the first extension (if any) + try: + return all_possible_extensions(file_path)[0] + except IndexError: + return "" + + +if __name__ == "__main__": + privacy = False + + # parse all parameters on CLI as files to be ext-checked + for i in range(1, len(sys.argv)): + if sys.argv[i] == "-p": + # privacy, please ... so only print last 10 chars of a file + privacy = True + continue + + file_path = sys.argv[i] + + if privacy: + to_be_printed = file_path[-10:] + else: + to_be_printed = file_path + + if has_popular_extension(file_path): + # a common extension, so let's see what puremagic says, so that we can learn + filename, file_extension = os.path.splitext(file_path) + file_extension = file_extension[1:].lower() + + print( + "IRL-ext", + file_extension, + "most_likely", + what_is_most_likely_extension(file_path), + "puremagic", + all_possible_extensions(file_path), + ) diff --git a/tests/data/test_file_extension/apeeengeee b/tests/data/test_file_extension/apeeengeee new file mode 100644 index 0000000..2b4840c Binary files /dev/null and b/tests/data/test_file_extension/apeeengeee differ diff --git a/tests/data/test_file_extension/my_matroska b/tests/data/test_file_extension/my_matroska new file mode 100644 index 0000000..6486fc0 Binary files /dev/null and b/tests/data/test_file_extension/my_matroska differ diff --git a/tests/data/test_file_extension/some_nzb_file b/tests/data/test_file_extension/some_nzb_file new file mode 100644 index 0000000..fd4c82d --- /dev/null +++ b/tests/data/test_file_extension/some_nzb_file @@ -0,0 +1,84 @@ + + + + + + alt.binaries.test + + + QoEbWuJpTnYmReOxUbFmBvLx-1623601671928@nyuu + + + + + alt.binaries.test + + + OfUzNpRoQlEkAkJwUoHxJlJj-1623601671929@nyuu + + + + + alt.binaries.test + + + TsNlKcDyMiCiNeHrMhFrQwPu-1623601671929@nyuu + + + + + alt.binaries.test + + + RvFtBzLeVzYhCiSjNkYqPkYv-1623601672004@nyuu + + + + + alt.binaries.test + + + CyBcLhFsErVvWhKaJbKySsLh-1623601672003@nyuu + + + + + alt.binaries.test + + + ZtFjLqEiBmQgZyHyRjIvLmDq-1623601671925@nyuu + + + + + alt.binaries.test + + + ZbBmMqCmJyRgOjAiSgMmFhUs-1623601672012@nyuu + + + + + alt.binaries.test + + + OmEhDrElGwEkYrHsTcFlYeYp-1623601672019@nyuu + + + + + alt.binaries.test + + + SkUsGaAkBjNpHoCsLtLiBcYn-1623601672044@nyuu + + + + + alt.binaries.test + + + PfYdNqVpPpLvOqTvYrXoRbQi-1623601672045@nyuu + + + diff --git a/tests/data/test_file_extension/somepeedeef b/tests/data/test_file_extension/somepeedeef new file mode 100644 index 0000000..ccd76aa Binary files /dev/null and b/tests/data/test_file_extension/somepeedeef differ diff --git a/tests/data/test_file_extension/sometxtfile b/tests/data/test_file_extension/sometxtfile new file mode 100644 index 0000000..5a05b61 --- /dev/null +++ b/tests/data/test_file_extension/sometxtfile @@ -0,0 +1,4 @@ +Yes, this is a text file. + +The END + diff --git a/tests/test_deobfuscate_filenames.py b/tests/test_deobfuscate_filenames.py index 951e194..7729226 100644 --- a/tests/test_deobfuscate_filenames.py +++ b/tests/test_deobfuscate_filenames.py @@ -86,7 +86,7 @@ class TestDeobfuscateFinalResult: os.mkdir(dirname) # Create a big enough file with a non-useful, obfuscated filename - output_file1 = os.path.join(dirname, "111c1c9e2bdfb5114044bf25152b7eab.bla") + output_file1 = os.path.join(dirname, "111c1c9e2bdfb5114044bf25152b7eab.bin") create_big_file(output_file1) assert os.path.isfile(output_file1) @@ -99,9 +99,8 @@ class TestDeobfuscateFinalResult: # Check original files: assert not os.path.isfile(output_file1) # original filename should not be there anymore - # Check the renaming - assert os.path.isfile(os.path.join(dirname, jobname + ".bla")) # ... it should be renamed to the jobname + assert os.path.isfile(os.path.join(dirname, jobname + ".bin")) # ... it should be renamed to the jobname # Done. Remove (non-empty) directory shutil.rmtree(dirname) @@ -115,12 +114,12 @@ class TestDeobfuscateFinalResult: os.mkdir(dirname) # Create a big enough file with a non-useful filename - output_file1 = os.path.join(dirname, "111c1c9e2bdfb5114044bf25152b7eaa.bla") + output_file1 = os.path.join(dirname, "111c1c9e2bdfb5114044bf25152b7eaa.bin") create_big_file(output_file1) assert os.path.isfile(output_file1) # and another one - output_file2 = os.path.join(dirname, "222c1c9e2bdfb5114044bf25152b7eaa.bla") + output_file2 = os.path.join(dirname, "222c1c9e2bdfb5114044bf25152b7eaa.bin") create_big_file(output_file2) assert os.path.isfile(output_file2) @@ -128,11 +127,11 @@ class TestDeobfuscateFinalResult: myfilelist = [output_file1, output_file2] # Create some extra files ... that will not be in the list - output_file3 = os.path.join(dirname, "333c1c9e2bdfb5114044bf25152b7eaa.bla") + output_file3 = os.path.join(dirname, "333c1c9e2bdfb5114044bf25152b7eaa.bin") create_big_file(output_file3) assert os.path.isfile(output_file3) - output_file4 = os.path.join(dirname, "This Great Download 2020.bla") + output_file4 = os.path.join(dirname, "This Great Download 2020.bin") create_big_file(output_file4) assert os.path.isfile(output_file4) @@ -147,8 +146,8 @@ class TestDeobfuscateFinalResult: assert os.path.isfile(output_file4) # and this one too # Check the renaming - assert os.path.isfile(os.path.join(dirname, jobname + ".bla")) # ... it should be renamed to the jobname - assert os.path.isfile(os.path.join(dirname, jobname + ".1.bla")) # should not be there + assert os.path.isfile(os.path.join(dirname, jobname + ".bin")) # ... it should be renamed to the jobname + assert os.path.isfile(os.path.join(dirname, jobname + ".1.bin")) # should be there (2nd file renamed) # Done. Remove (non-empty) directory shutil.rmtree(dirname) @@ -165,7 +164,7 @@ class TestDeobfuscateFinalResult: os.mkdir(subsubdirname) # Create a big enough file with a non-useful, obfuscated filename - output_file1 = os.path.join(subsubdirname, "111c1c9e2bdfb5114044bf25152b7eab.bla") + output_file1 = os.path.join(subsubdirname, "111c1c9e2bdfb5114044bf25152b7eab.bin") create_big_file(output_file1) assert os.path.isfile(output_file1) @@ -180,7 +179,7 @@ class TestDeobfuscateFinalResult: assert not os.path.isfile(output_file1) # original filename should not be there anymore # Check the renaming - assert os.path.isfile(os.path.join(subsubdirname, jobname + ".bla")) # ... it should be renamed to the jobname + assert os.path.isfile(os.path.join(subsubdirname, jobname + ".bin")) # ... it should be renamed to the jobname # Done. Remove (non-empty) directory shutil.rmtree(dirname) @@ -235,7 +234,7 @@ class TestDeobfuscateFinalResult: shutil.rmtree(dirname) def test_deobfuscate_collection_with_same_extension(self): - # input: a collection of bigger files with the same extension + # input: a collection of 3+ bigger files with the same extension # test that there is no renaming on the collection ... as that's useless on a collection # Create directory (with a random directory name) @@ -243,24 +242,24 @@ class TestDeobfuscateFinalResult: os.mkdir(dirname) # Create big enough files with a non-useful filenames, all with same extension - file1 = os.path.join(dirname, "file1.bla") + file1 = os.path.join(dirname, "file1.bin") create_big_file(file1) assert os.path.isfile(file1) - file2 = os.path.join(dirname, "file2.bla") + file2 = os.path.join(dirname, "file2.bin") create_big_file(file2) assert os.path.isfile(file2) - file3 = os.path.join(dirname, "file3.bla") + file3 = os.path.join(dirname, "file3.bin") create_big_file(file3) assert os.path.isfile(file3) - file4 = os.path.join(dirname, "file4.bla") + file4 = os.path.join(dirname, "file4.bin") create_big_file(file4) assert os.path.isfile(file4) # other extension ... so this one should get renamed - otherfile = os.path.join(dirname, "other.bin") + otherfile = os.path.join(dirname, "other.iso") create_big_file(otherfile) assert os.path.isfile(otherfile) @@ -281,7 +280,7 @@ class TestDeobfuscateFinalResult: assert not os.path.isfile(otherfile) # should be renamed # Check the renaming - assert os.path.isfile(os.path.join(dirname, jobname + ".bin")) # ... should be renamed to the jobname + assert os.path.isfile(os.path.join(dirname, jobname + ".iso")) # ... should be renamed to the jobname # Done. Remove (non-empty) directory shutil.rmtree(dirname) @@ -294,8 +293,8 @@ class TestDeobfuscateFinalResult: jobname = "My Important Download 2020" deobfuscate_list(myfilelist, jobname) - # Create directory with a directory name to could be renamed - dirname = os.path.join(SAB_DATA_DIR, "333c1c9e2bdfb5114044bf25152b7eaa.bla") + # Create directory with a directory name that could be renamed, but should not + dirname = os.path.join(SAB_DATA_DIR, "333c1c9e2bdfb5114044bf25152b7eaa.bin") os.mkdir(dirname) myfilelist = [dirname] jobname = "My Important Download 2020" diff --git a/tests/test_file_extension.py b/tests/test_file_extension.py new file mode 100644 index 0000000..0d1ed00 --- /dev/null +++ b/tests/test_file_extension.py @@ -0,0 +1,54 @@ +#!/usr/bin/python3 -OO +# Copyright 2007-2021 The SABnzbd-Team +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +""" +Testing SABnzbd correct extension functionality module +""" + +import os +from tests.testhelper import * +import sabnzbd.utils.file_extension as file_extension + + +class Test_File_Extension: + def test_has_popular_extension(self): + assert file_extension.has_popular_extension("blabla/blabla.mkv") + assert file_extension.has_popular_extension("blabla/blabla.srt") + assert file_extension.has_popular_extension("djjddj/aaaaa.epub") + assert not file_extension.has_popular_extension("98ads098f098fa.a0ds98f098asdf") + + def test_what_is_most_likely_extension(self): + # These are real-content files, where the contents determine the extension + filename = "tests/data/test_file_extension/apeeengeee" # A PNG + assert os.path.isfile(filename) + assert file_extension.what_is_most_likely_extension(filename) == ".png" + + filename = "tests/data/test_file_extension/somepeedeef" # Some PDF + assert os.path.isfile(filename) + assert file_extension.what_is_most_likely_extension(filename) == ".pdf" + + filename = "tests/data/test_file_extension/my_matroska" # my Matroska MKV + assert os.path.isfile(filename) + assert file_extension.what_is_most_likely_extension(filename) == ".mkv" + + filename = "tests/data/test_file_extension/sometxtfile" # a txt file + assert os.path.isfile(filename) + assert file_extension.what_is_most_likely_extension(filename) == ".txt" + + filename = "tests/data/test_file_extension/some_nzb_file" # a NZB file + assert os.path.isfile(filename) + assert file_extension.what_is_most_likely_extension(filename) == ".nzb"