From f0ef984276bb8a3d88fff420d739f10b626e3634 Mon Sep 17 00:00:00 2001 From: Sander Date: Tue, 26 May 2020 10:52:58 +0200 Subject: [PATCH] find and use obfuscated SFV files (#1459) * isSFVfile(): Checks if given file is a SFV file, and returns result as boolean * Obfuscated SFV files: find and use them * Obfuscated SFV files: rename to is_sfv_file() * is_sfv_file(): regexp based, minor stuff, pytest * is_sfv_file(): pytest according to black --- sabnzbd/newsunpack.py | 45 ++++++++++++++++++++++++++++++++++++++++ sabnzbd/postproc.py | 12 +++++++++-- tests/data/good_sfv_unicode.sfv | 11 ++++++++++ tests/data/one_line.sfv | 1 + tests/data/only_comments.sfv | 4 ++++ tests/data/random.bin | Bin 0 -> 5120 bytes tests/test_is_sfv_file.py | 28 +++++++++++++++++++++++++ 7 files changed, 99 insertions(+), 2 deletions(-) create mode 100644 tests/data/good_sfv_unicode.sfv create mode 100644 tests/data/one_line.sfv create mode 100644 tests/data/only_comments.sfv create mode 100644 tests/data/random.bin create mode 100644 tests/test_is_sfv_file.py diff --git a/sabnzbd/newsunpack.py b/sabnzbd/newsunpack.py index e7d9886..7f2d74e 100644 --- a/sabnzbd/newsunpack.py +++ b/sabnzbd/newsunpack.py @@ -28,7 +28,9 @@ import time import zlib import shutil import functools +import re from subprocess import Popen +from sabnzbd.encoding import ubtou import sabnzbd from sabnzbd.encoding import platform_btou, correct_unknown_encoding @@ -2210,6 +2212,49 @@ def par2_mt_check(par2_path): return False +def is_sfv_file(myfile): + """ Checks if given file is a SFV file, and returns result as boolean """ + + # based on https://stackoverflow.com/a/7392391/5235502 + textchars = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7F}) + is_ascii_string = lambda bytes: not bool(bytes.translate(None, textchars)) + + # first check if it's plain text (ASCII or Unicode) + try: + with open(myfile, "rb") as f: + # get first 10000 bytes to check + myblock = f.read(10000) + if is_ascii_string(myblock): + # ASCII, so store lines for further inspection + try: + lines = ubtou(myblock).split("\n") + except UnicodeDecodeError: + return False + else: + # non-ASCII, so not SFV + return False + except: + # the with-open() went wrong, so not an existing file, so certainly not a SFV file + return False + + sfv_info_line_counter = 0 + for line in lines: + if re.search("^[^;].*\ +[A-Fa-f0-9]{8}$", line): + # valid, useful SFV line: some text, then one or more space, and a 8-digit hex number + sfv_info_line_counter += 1 + if sfv_info_line_counter >= 10: + # with 10 valid, useful lines we're confident enough + # (note: if we find less lines (even just 1 line), with no negatives, it is OK. See below) + break + elif re.search("^;", line) or re.search("^\ *$", line): + # comment line or just spaces, so continue to next line + continue + else: + # not a valid SFV line, so not a SFV file: + return False + # if we get here, no negatives were found, and at least 1 valid line is OK + return sfv_info_line_counter >= 1 + def sfv_check(sfvs, nzo, workdir): """ Verify files using SFV files """ # Update status diff --git a/sabnzbd/postproc.py b/sabnzbd/postproc.py index a9f7fe6..d4bc3f1 100644 --- a/sabnzbd/postproc.py +++ b/sabnzbd/postproc.py @@ -34,6 +34,7 @@ from sabnzbd.newsunpack import ( sfv_check, build_filelists, rar_sort, + is_sfv_file, ) from threading import Thread from sabnzbd.misc import on_cleanup_list @@ -799,9 +800,16 @@ def try_sfv_check(nzo, workdir): # Get list of SFV names sfvs = globber_full(workdir, "*.sfv") - # Skip if there's no SFV's + # If no files named *.sfv, let's search for obfuscated SFV files if not sfvs: - return None + files = globber_full(workdir, "*") + for file in files: + if sabnzbd.newsunpack.is_sfv_file(file): + logging.debug("Found and will use obfuscated SFV file: %s", file) + sfvs.append(file) + if not sfvs: + # still no SFV, so: + return None result = sfv_check(sfvs, nzo, workdir) if not result: diff --git a/tests/data/good_sfv_unicode.sfv b/tests/data/good_sfv_unicode.sfv new file mode 100644 index 0000000..2762560 --- /dev/null +++ b/tests/data/good_sfv_unicode.sfv @@ -0,0 +1,11 @@ +; Generated by cksfv v1.3.14 on 2020-05-25 at 17:49.33 +; Project web site: http://www.iki.fi/shd/foss/cksfv/ +; +; 10485760 17:47.45 2020-05-25 normalascanbe.bin +; 10485760 17:44.36 2020-05-25 schöne_Türen_öffnen +; 10485760 17:44.05 2020-05-25 this is a file with spaces +; 10485760 17:47.30 2020-05-25 你好世界 +normalascanbe.bin 681718CC +schöne_Türen_öffnen 788E9541 +this is a file with spaces 24041228 +你好世界 78938E11 diff --git a/tests/data/one_line.sfv b/tests/data/one_line.sfv new file mode 100644 index 0000000..dccef67 --- /dev/null +++ b/tests/data/one_line.sfv @@ -0,0 +1 @@ +My Only File ABCD1234 diff --git a/tests/data/only_comments.sfv b/tests/data/only_comments.sfv new file mode 100644 index 0000000..b05fd3d --- /dev/null +++ b/tests/data/only_comments.sfv @@ -0,0 +1,4 @@ +; this is a line +; and this too +; and more +; and more ... diff --git a/tests/data/random.bin b/tests/data/random.bin new file mode 100644 index 0000000000000000000000000000000000000000..566363c27526b3fdfe060f75b656141ae565eac9 GIT binary patch literal 5120 zcmV+b6#whNnAa0RtN~)73YrP03~2LmhK&q=#Z5GK3^ zr^L+SZEql^`0}G~u3y%wNAWk3)M;{858QccrBdrm%9lFpNOCF}xV&BYFqN2@tiyAr0=J!c~S%PQp??6rW_8xE@RP} z7!em?nter%!z9etzm|5&inMB-^$lIWR@v$#V8x2!$%t&jG583di{>xw@M6r`<7}y5 zkJb{6t?>}*`^yJ8N~I&hyg{aQ3hI(Nqxz%#n9mbqll!#Xj>u_o>v%O?)qZ)R0YxOk&?VQ~@oy7FE-39vW1^2*l z0Qb{TH3f=FH?O^DWjr`|g@L4C>gpK9gxL2~jyD8)vwz>lmWHkR)An82YJ4Cseu0L_ z071@Y(R+(W9IQCiGDn!d`=YI8(!Ai{;Vw#i4l~VaPKJV#Ze^WB zxxsy2oGJv+3T@M5-G`VHsR#0Jn^;&hb%LZr=30ZLKEJSP73t55EA)%ZvOIWkmXzBc z=YGQ)+%Ut*hUP=Ow9T%B8{`q99Ztq{pU@*)7v;CQm(_p~uCNRdYh_SAL5&Si}K8=R2}KXxxd zXBaoWUg~iI;5Tw_e$5dPgsDE}K5mvcimZ5%;*tv)@cG!ni*P%Zm2WlT*idx7)xAsYt|5;wro zp^>ThsW9Ap1j*w}0^+Ok%Igxt0k+*{!&-SuEZU!+y(4`TZ8)vu1cb*(Ft^#@H<;w@ zO6vrFX>UqslR{{yF9s-wi34wRSBM6tfI&?ILt6o|AEhRhj@jB!h~c0!WOZ*;*14wL9JHkJLielVzgtgV zVLV`DywAU>CH$-oRRD^7e?h9&?BC}7?kMOg%$p&k--#9n^^UA2`G@aabKUhrBKk)k zwt_u*ZFA>k7rwH4tmMVvR*n7p<>~L@Y!CRz8#>P27cT(^N&w$k!H=fgtZ&<<6OSjb zoIa#YO#sKCbvS>WWu5Wjz+pV=^%q%fnq>P*ApBOCwAYiW;~jH2%Rk-T2oMr*r0~%Z zMUXDT6`8@NbDB9Uu~_6&{w!g(Ffc`dReBFP)aF{%M2^w#HIP~$|3eG&EoY=oOBjP5ZwxG0PoA#nZ((*fxfGPHDf1|3PmEt( zeRZkpk|DoT@N+z&4H9x$tIE|gLa1voO+mtO9T!UMSVKf``-F#@+$Si^)1dBL;JQWE zFL}+dx1=CDh@_05(#A~5ViG@4Fi;30ZQP~1fm~>ixR+Q&9&7(8GSL8IQU>%%p6B%O zyhpCE#qq2(qy#BqH!9qdrvfM3Cz?fS;Qn#wP&N17=vPp+T{q98t7}9JLH-kA2U2FP zyF!~x=7(+VJ`bSS+y0xg;1AcNu z`*2-ax%vAK=S56GzKwJaoi^3EIx)Is$_dp_46nb>^F~OoRtvPj=CcDv0Dy45xeGa~ zv24}up!C(mFD*+gNwNTKG~ zC`VAx3>5DK5w@?UhWD(;LiM>Xc|iTBy~1(F9p{w2jT1#C{gnIoU^KAXF9)XbFim9I zE8upi&wrly!-LI){@19>hVo6$g@VC!gh%l&nma%xaxvGiBY6G3dOqi)qBMEI!0EMt zuk%)R7+s)ecv#}e7zv`FsT=wICW>dTuXX@o0!OgzpC5Mmd~AXV&!e)~)BTeD!=R0m zpA3gK8YfC4O&!~aP1w_Q>=es<*Kl53x&Fa=lVZ6}?-JC3m-AEyzuHvO@ut*py4zJC zgus+w4FJUlOmUaq;C|2D=-r>Spy8B#VbFKKbmo(LeN*nQ&Hn}Ip{8k@Kuf_`>_UdD zKX)kDF+qBWKd=Ufe%Hh{hf8Mg*Ltn4Yb(5vI)BqXa~E3o(ZZ41h5o`fY%__n-!tq~ zujMou$*daninC=mcdn1evP4l=d;mj8*)u}Jf7sf5!CG+TZX*9%+IH^#67R{~l+=qx zjJbwgPP=8o)Z*&vQCZQ*vTR&b2JOVxJ3J%Ts_=x4xFfRnb%Y zDd^8;SefT2B zv3<1$+L@XrSEoq6Xrhd}^gtJ|vl^ZsTQCChXKyjpo2zu-kPqp-=Z?q0?t8bamJ)6F zF&-|yqUpsmL1X6DEQ^0Re=bMw%{M9AYkpGkpceOHSY>9yVdCp0WuzGtLj< z@7Re;+1_6C$Zq)-k%laJ_tkqS{l*yWj0+?{MN$79s_qtrn;t}6CPH1~-Qe8g`HWaf zLZ%t^SN#byQ=OB%+#E3!bM0yXI6n~+zb&`=svT)$9p3ah#eGHbllq{EAenvoEau8& z(5QqSdRpZx{(wg{jJiJjKB zbXsDyFP)W>JRf55LO$s8PS<3bzfW-Jv=S%tJiX{%-Zir!lhJwO_t0RGEJG*IP zF07s`fT+bib18chz@jG~EjDPC3r447DN^>-#4nr3CyEiSFa?p$j&8T_HwKExG>==0 z>4~?oFhag51JHz>&cLzl0ar2WgD7jMH|rFsYjaD&WQdD6Q|SWo3nejJ=S|<3{%Az+ zTZECF;t)RlUiVyQ%j+z_N&6^bB9MeeAUWj9gvecz*^r`J#$o#79Yg5|7tbO?KLVo& z=I=R;j^|DI6yI0$%Xbbn?m!drB>5V&i%L{z@wv()!AYZTw3=r{PNQBdLm6_JO3)Y6 zWMm|BAc0620LzSS3KwRqweIg{ciIaOmwBe(QGV4rxDUD1c0%}l1gXM^c1;$hBxOR( z{nRmD`r7D6j?T;o^BW`^puUYuB1+CDvu2TXdrcuc;1h=KTsP?@ zgbc5*8|+g@Fdo<_uee-jZ*<6M}vMiHtxm_bAnhOV@0nYT8298yFSlH zykNw_SenV1tskLGKv6F(F*xI#GSg;aNn~YH0$PBnfbiCDy`yqc{RIzZ*hj9MEUx-r0R(#Gtw777SB>> zNnK1QYuq0dh>g66e>p9;D72mJF$_(_5DTz zS$-%qxikQ~agj$v`ssga?bw-@Hwtg#Jvbx^ONO3DN&>!V#lG1-BP*kFJ>ecbOpB%C zOvlBxTpBppn^H(pjJt(SPM8^s+H+jAg73@32d)atMi%P`P{z1LSrBFB)KM|Mh+Xhz z|GnWuh5Z}uuh5)Y<**n7Yk8iqqaP%P6!FK<4dav`L|C=(?bgxQ-jX+RAQ{?O;X`f0 zk@virl#b0Lg~70ZFNH0A3?#)X!XN#s`h)GgI(nQ^pQm8lEg`<{8aAtwYim6vIxpGP z8^NmU|0&paDQJt&V(G59bG|fkwpbeEw9JI!H9C{IN!S;8igJ@hD#pLdQFf)dyV@jh z1lQLpX;8M+nj)<{m6l&^&nUx^&X1NbAOtKM(LI#Rhtm}{T1Aick-16nv72lJb2)mX z9gc2DlMDoQ=Z}zdhGR!(UG@>5k|+UxjlKV^=GPx;STyf)e@ z(6xOISLo9Sxp1RgX<)=Fw$)I{q_A{3fdDLdsTxPxg}#8DNj)q~|MJ$VPbw^?i2en# zf`|f4LD`Xlbw^`lep;e* zHl)>Cht=V;hxbJe;_&N|Yy(?Bv8dN02VXcXi&v0VDy%N}f2HdQiQf;uZkt^cn~X$% z27p@wvxYB| z@MxQkx6+yh^yjFvF}wjb-xpxCOFtQVLrB8+(7vAf`}8D!WhQxS0dt#`B&qLS4Z6`& zu!>>W>94_6y=D=0Thh|GHv#n{7G{T$M-xxCJotRG1?hIrV}VU4@Ih2 zJmw+>KK(lP*ja;TFkMk}#p#}Z?)2@YcWkS47Bf1j0G*Yh$7VZJ*?J)qN5++()&c6I zId}oEh`A(y^JH~_LDc`YuDKx8BQ3U~yjFL*8%@GIf7D48-)F+YfMiQGGsE=q3B!b^ z?ygXpd_>vV(;+Ecnm`OwVb`+k+MxpskXMG2e}am-UXJT3!R3jhNWPSRj(?3obf4J0YaUUHD4L+NJ|78YnAw}HP(#!e zQMH+L`L}yCqDsAV=c`nU8GprSQ?eh$D8v~3ht$$)L`wh+yL>^64bdYJ6|4S{`r|qv z21lCg zXg74{Utk4YL^t%+zwR9t3{12JV9eJsZkc;VA>Jd2ERQc89Y4fjUQ`Go!m-EzL*pyY&p>nu-}54 z*OQnjt#cU+wo43#iIC6kHy*3_J+2%t(e^2tIy)^`qx)qMXgMcJt3$Fp_;+Kap^w%1 z{Dot7um_F+ literal 0 HcmV?d00001 diff --git a/tests/test_is_sfv_file.py b/tests/test_is_sfv_file.py new file mode 100644 index 0000000..41c2414 --- /dev/null +++ b/tests/test_is_sfv_file.py @@ -0,0 +1,28 @@ +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +""" +tests.test_is_sfv_file- Testing SABnzbd is_sfv_file() +""" + +from sabnzbd.newsunpack import is_sfv_file + + +class Test_is_sfv_file: + """ Tests of is_sfv_file() against various input files + """ + + def test_valid_unicode_sfv(self): + assert is_sfv_file("tests/data/good_sfv_unicode.sfv") + + def test_valid_one_line_sfv(self): + assert is_sfv_file("tests/data/one_line.sfv") + + def test_only_comments(self): + assert not is_sfv_file("tests/data/only_comments.sfv") + + def test_random_bin(self): + assert not is_sfv_file("tests/data/random.bin")