From 46c62f6151a4c28426d43f11713209b9523312a2 Mon Sep 17 00:00:00 2001 From: JackDandy Date: Tue, 21 Jan 2020 11:17:34 +0000 Subject: [PATCH] Change add rarfile_py3 3.1 (a4202ca). Change backport rarfile_py2; Fixes for multivolume RAR3 with encrypted headers. --- CHANGES.md | 2 + lib/rarfile/__init__.py | 9 + lib/rarfile/__init__.pyi | 0 lib/rarfile/rarfile.py | 3040 ------------------------------------------ lib/rarfile/rarfile.pyi | 258 ++++ lib/rarfile_py2/__init__.py | 0 lib/rarfile_py2/rarfile.py | 3041 ++++++++++++++++++++++++++++++++++++++++++ lib/rarfile_py3/__init__.py | 0 lib/rarfile_py3/rarfile.py | 3054 +++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 6364 insertions(+), 3040 deletions(-) create mode 100644 lib/rarfile/__init__.pyi delete mode 100644 lib/rarfile/rarfile.py create mode 100644 lib/rarfile/rarfile.pyi create mode 100644 lib/rarfile_py2/__init__.py create mode 100644 lib/rarfile_py2/rarfile.py create mode 100644 lib/rarfile_py3/__init__.py create mode 100644 lib/rarfile_py3/rarfile.py diff --git a/CHANGES.md b/CHANGES.md index 9de7173..bb3b232 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,6 +4,8 @@ * Change improve Python performance of handling core objects * Change improve performance for find_show_by_id * Change episode overview, move pulldown from 'Set/Failed' to 'Override/Failed' +* Change add rarfile_py3 3.1 (a4202ca) +* Change backport rarfile_py2; Fixes for multivolume RAR3 with encrypted headers * Update Apprise 0.8.0 (6aa52c3) to 0.8.3 (4aee9de) * Update attr 19.2.0.dev0 (daf2bc8) to 20.1.0.dev0 (9b5e988) * Update Beautiful Soup 4.8.1 (r540) to 4.8.2 (r554) diff --git a/lib/rarfile/__init__.py b/lib/rarfile/__init__.py index 8b13789..bde72de 100644 --- a/lib/rarfile/__init__.py +++ b/lib/rarfile/__init__.py @@ -1 +1,10 @@ +import sys +name = 'rarfile' + +locals()[name] = __import__(name) +if None is not name: + sub_name = name + '_py' + ('3', '2')[2 == sys.version_info[0]] + sys.modules[name] = __import__(sub_name) + package = __import__('%s.%s' % (sub_name, name), globals(), locals(), [], 0) + sys.modules.update({name: package, 'lib.%s' % name: package}) diff --git a/lib/rarfile/__init__.pyi b/lib/rarfile/__init__.pyi new file mode 100644 index 0000000..e69de29 diff --git a/lib/rarfile/rarfile.py b/lib/rarfile/rarfile.py deleted file mode 100644 index 2ff5af4..0000000 --- a/lib/rarfile/rarfile.py +++ /dev/null @@ -1,3040 +0,0 @@ -# rarfile.py -# -# Copyright (c) 2005-2019 Marko Kreen -# -# Permission to use, copy, modify, and/or distribute this software for any -# purpose with or without fee is hereby granted, provided that the above -# copyright notice and this permission notice appear in all copies. -# -# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - -r"""RAR archive reader. - -This is Python module for Rar archive reading. The interface -is made as :mod:`zipfile`-like as possible. - -Basic logic: - - Parse archive structure with Python. - - Extract non-compressed files with Python - - Extract compressed files with unrar. - - Optionally write compressed data to temp file to speed up unrar, - otherwise it needs to scan whole archive on each execution. - -Example:: - - import rarfile - - rf = rarfile.RarFile('myarchive.rar') - for f in rf.infolist(): - print f.filename, f.file_size - if f.filename == 'README': - print(rf.read(f)) - -Archive files can also be accessed via file-like object returned -by :meth:`RarFile.open`:: - - import rarfile - - with rarfile.RarFile('archive.rar') as rf: - with rf.open('README') as f: - for ln in f: - print(ln.strip()) - -There are few module-level parameters to tune behaviour, -here they are with defaults, and reason to change it:: - - import rarfile - - # Set to full path of unrar.exe if it is not in PATH - rarfile.UNRAR_TOOL = "unrar" - - # Set to '\\' to be more compatible with old rarfile - rarfile.PATH_SEP = '/' - -For more details, refer to source. - -""" - -from __future__ import division, print_function - -## -## Imports and compat - support both Python 2.x and 3.x -## - -import sys -import os -import errno -import struct - -from struct import pack, unpack, Struct -from binascii import crc32, hexlify -from tempfile import mkstemp -from subprocess import Popen, PIPE, STDOUT -from io import RawIOBase -from hashlib import sha1, sha256 -from hmac import HMAC -from datetime import datetime, timedelta, tzinfo - -# fixed offset timezone, for UTC -try: - from datetime import timezone -except ImportError: - class timezone(tzinfo): - """Compat timezone.""" - __slots__ = ('_ofs', '_name') - _DST = timedelta(0) - - def __init__(self, offset, name): - super(timezone, self).__init__() - self._ofs, self._name = offset, name - - def utcoffset(self, dt): - return self._ofs - - def tzname(self, dt): - return self._name - - def dst(self, dt): - return self._DST - -# only needed for encryped headers -try: - try: - from cryptography.hazmat.primitives.ciphers import algorithms, modes, Cipher - from cryptography.hazmat.backends import default_backend - from cryptography.hazmat.primitives import hashes - from cryptography.hazmat.primitives.kdf import pbkdf2 - - class AES_CBC_Decrypt(object): - """Decrypt API""" - def __init__(self, key, iv): - ciph = Cipher(algorithms.AES(key), modes.CBC(iv), default_backend()) - self.decrypt = ciph.decryptor().update - - def pbkdf2_sha256(password, salt, iters): - """PBKDF2 with HMAC-SHA256""" - ctx = pbkdf2.PBKDF2HMAC(hashes.SHA256(), 32, salt, iters, default_backend()) - return ctx.derive(password) - - except ImportError: - from Crypto.Cipher import AES - from Crypto.Protocol import KDF - - class AES_CBC_Decrypt(object): - """Decrypt API""" - def __init__(self, key, iv): - self.decrypt = AES.new(key, AES.MODE_CBC, iv).decrypt - - def pbkdf2_sha256(password, salt, iters): - """PBKDF2 with HMAC-SHA256""" - return KDF.PBKDF2(password, salt, 32, iters, hmac_sha256) - - _have_crypto = 1 -except ImportError: - _have_crypto = 0 - -try: - try: - from hashlib import blake2s - _have_blake2 = True - except ImportError: - from pyblake2 import blake2s - _have_blake2 = True -except ImportError: - _have_blake2 = False - -# compat with 2.x -if sys.hexversion < 0x3000000: - def rar_crc32(data, prev=0): - """CRC32 with unsigned values. - """ - if (prev > 0) and (prev & 0x80000000): - prev -= (1 << 32) - res = crc32(data, prev) - if res < 0: - res += (1 << 32) - return res - tohex = hexlify - _byte_code = ord -else: # pragma: no cover - def tohex(data): - """Return hex string.""" - return hexlify(data).decode('ascii') - rar_crc32 = crc32 - unicode = str - _byte_code = int # noqa - -# don't break 2.6 completely -if sys.hexversion < 0x2070000: - memoryview = lambda x: x # noqa - -try: - from pathlib import Path - _have_pathlib = True -except ImportError: - _have_pathlib = False - -__version__ = '3.1' - -# export only interesting items -__all__ = ['is_rarfile', 'RarInfo', 'RarFile', 'RarExtFile'] - -## -## Module configuration. Can be tuned after importing. -## - -#: default fallback charset -DEFAULT_CHARSET = "windows-1252" - -#: list of encodings to try, with fallback to DEFAULT_CHARSET if none succeed -TRY_ENCODINGS = ('utf8', 'utf-16le') - -#: 'unrar', 'rar' or full path to either one -UNRAR_TOOL = "unrar" - -#: Command line args to use for opening file for reading. -OPEN_ARGS = ('p', '-inul') - -#: Command line args to use for extracting file to disk. -EXTRACT_ARGS = ('x', '-y', '-idq') - -#: args for testrar() -TEST_ARGS = ('t', '-idq') - -# -# Allow use of tool that is not compatible with unrar. -# -# By default use 'bsdtar' which is 'tar' program that -# sits on top of libarchive. -# -# Problems with libarchive RAR backend: -# - Does not support solid archives. -# - Does not support password-protected archives. -# - -ALT_TOOL = 'bsdtar' -ALT_OPEN_ARGS = ('-x', '--to-stdout', '-f') -ALT_EXTRACT_ARGS = ('-x', '-f') -ALT_TEST_ARGS = ('-t', '-f') -ALT_CHECK_ARGS = ('--help',) - -#ALT_TOOL = 'unar' -#ALT_OPEN_ARGS = ('-o', '-') -#ALT_EXTRACT_ARGS = () -#ALT_TEST_ARGS = ('-test',) # does not work -#ALT_CHECK_ARGS = ('-v',) - -#: whether to speed up decompression by using tmp archive -USE_EXTRACT_HACK = 1 - -#: limit the filesize for tmp archive usage -HACK_SIZE_LIMIT = 20 * 1024 * 1024 - -#: Separator for path name components. RAR internally uses '\\'. -#: Use '/' to be similar with zipfile. -PATH_SEP = '/' - -## -## rar constants -## - -# block types -RAR_BLOCK_MARK = 0x72 # r -RAR_BLOCK_MAIN = 0x73 # s -RAR_BLOCK_FILE = 0x74 # t -RAR_BLOCK_OLD_COMMENT = 0x75 # u -RAR_BLOCK_OLD_EXTRA = 0x76 # v -RAR_BLOCK_OLD_SUB = 0x77 # w -RAR_BLOCK_OLD_RECOVERY = 0x78 # x -RAR_BLOCK_OLD_AUTH = 0x79 # y -RAR_BLOCK_SUB = 0x7a # z -RAR_BLOCK_ENDARC = 0x7b # { - -# flags for RAR_BLOCK_MAIN -RAR_MAIN_VOLUME = 0x0001 -RAR_MAIN_COMMENT = 0x0002 -RAR_MAIN_LOCK = 0x0004 -RAR_MAIN_SOLID = 0x0008 -RAR_MAIN_NEWNUMBERING = 0x0010 -RAR_MAIN_AUTH = 0x0020 -RAR_MAIN_RECOVERY = 0x0040 -RAR_MAIN_PASSWORD = 0x0080 -RAR_MAIN_FIRSTVOLUME = 0x0100 -RAR_MAIN_ENCRYPTVER = 0x0200 - -# flags for RAR_BLOCK_FILE -RAR_FILE_SPLIT_BEFORE = 0x0001 -RAR_FILE_SPLIT_AFTER = 0x0002 -RAR_FILE_PASSWORD = 0x0004 -RAR_FILE_COMMENT = 0x0008 -RAR_FILE_SOLID = 0x0010 -RAR_FILE_DICTMASK = 0x00e0 -RAR_FILE_DICT64 = 0x0000 -RAR_FILE_DICT128 = 0x0020 -RAR_FILE_DICT256 = 0x0040 -RAR_FILE_DICT512 = 0x0060 -RAR_FILE_DICT1024 = 0x0080 -RAR_FILE_DICT2048 = 0x00a0 -RAR_FILE_DICT4096 = 0x00c0 -RAR_FILE_DIRECTORY = 0x00e0 -RAR_FILE_LARGE = 0x0100 -RAR_FILE_UNICODE = 0x0200 -RAR_FILE_SALT = 0x0400 -RAR_FILE_VERSION = 0x0800 -RAR_FILE_EXTTIME = 0x1000 -RAR_FILE_EXTFLAGS = 0x2000 - -# flags for RAR_BLOCK_ENDARC -RAR_ENDARC_NEXT_VOLUME = 0x0001 -RAR_ENDARC_DATACRC = 0x0002 -RAR_ENDARC_REVSPACE = 0x0004 -RAR_ENDARC_VOLNR = 0x0008 - -# flags common to all blocks -RAR_SKIP_IF_UNKNOWN = 0x4000 -RAR_LONG_BLOCK = 0x8000 - -# Host OS types -RAR_OS_MSDOS = 0 -RAR_OS_OS2 = 1 -RAR_OS_WIN32 = 2 -RAR_OS_UNIX = 3 -RAR_OS_MACOS = 4 -RAR_OS_BEOS = 5 - -# Compression methods - '0'..'5' -RAR_M0 = 0x30 -RAR_M1 = 0x31 -RAR_M2 = 0x32 -RAR_M3 = 0x33 -RAR_M4 = 0x34 -RAR_M5 = 0x35 - -# -# RAR5 constants -# - -RAR5_BLOCK_MAIN = 1 -RAR5_BLOCK_FILE = 2 -RAR5_BLOCK_SERVICE = 3 -RAR5_BLOCK_ENCRYPTION = 4 -RAR5_BLOCK_ENDARC = 5 - -RAR5_BLOCK_FLAG_EXTRA_DATA = 0x01 -RAR5_BLOCK_FLAG_DATA_AREA = 0x02 -RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN = 0x04 -RAR5_BLOCK_FLAG_SPLIT_BEFORE = 0x08 -RAR5_BLOCK_FLAG_SPLIT_AFTER = 0x10 -RAR5_BLOCK_FLAG_DEPENDS_PREV = 0x20 -RAR5_BLOCK_FLAG_KEEP_WITH_PARENT = 0x40 - -RAR5_MAIN_FLAG_ISVOL = 0x01 -RAR5_MAIN_FLAG_HAS_VOLNR = 0x02 -RAR5_MAIN_FLAG_SOLID = 0x04 -RAR5_MAIN_FLAG_RECOVERY = 0x08 -RAR5_MAIN_FLAG_LOCKED = 0x10 - -RAR5_FILE_FLAG_ISDIR = 0x01 -RAR5_FILE_FLAG_HAS_MTIME = 0x02 -RAR5_FILE_FLAG_HAS_CRC32 = 0x04 -RAR5_FILE_FLAG_UNKNOWN_SIZE = 0x08 - -RAR5_COMPR_SOLID = 0x40 - -RAR5_ENC_FLAG_HAS_CHECKVAL = 0x01 - -RAR5_ENDARC_FLAG_NEXT_VOL = 0x01 - -RAR5_XFILE_ENCRYPTION = 1 -RAR5_XFILE_HASH = 2 -RAR5_XFILE_TIME = 3 -RAR5_XFILE_VERSION = 4 -RAR5_XFILE_REDIR = 5 -RAR5_XFILE_OWNER = 6 -RAR5_XFILE_SERVICE = 7 - -RAR5_XTIME_UNIXTIME = 0x01 -RAR5_XTIME_HAS_MTIME = 0x02 -RAR5_XTIME_HAS_CTIME = 0x04 -RAR5_XTIME_HAS_ATIME = 0x08 - -RAR5_XENC_CIPHER_AES256 = 0 - -RAR5_XENC_CHECKVAL = 0x01 -RAR5_XENC_TWEAKED = 0x02 - -RAR5_XHASH_BLAKE2SP = 0 - -RAR5_XREDIR_UNIX_SYMLINK = 1 -RAR5_XREDIR_WINDOWS_SYMLINK = 2 -RAR5_XREDIR_WINDOWS_JUNCTION = 3 -RAR5_XREDIR_HARD_LINK = 4 -RAR5_XREDIR_FILE_COPY = 5 - -RAR5_XREDIR_ISDIR = 0x01 - -RAR5_XOWNER_UNAME = 0x01 -RAR5_XOWNER_GNAME = 0x02 -RAR5_XOWNER_UID = 0x04 -RAR5_XOWNER_GID = 0x08 - -RAR5_OS_WINDOWS = 0 -RAR5_OS_UNIX = 1 - -## -## internal constants -## - -RAR_ID = b"Rar!\x1a\x07\x00" -RAR5_ID = b"Rar!\x1a\x07\x01\x00" -ZERO = b'\0' -EMPTY = b'' -UTC = timezone(timedelta(0), 'UTC') -BSIZE = 32 * 1024 - -def _get_rar_version(xfile): - """Check quickly whether file is rar archive. - """ - with XFile(xfile) as fd: - buf = fd.read(len(RAR5_ID)) - if buf.startswith(RAR_ID): - return 3 - elif buf.startswith(RAR5_ID): - return 5 - return 0 - -## -## Public interface -## - -def is_rarfile(xfile): - """Check quickly whether file is rar archive. - """ - return _get_rar_version(xfile) > 0 - -class Error(Exception): - """Base class for rarfile errors.""" - -class BadRarFile(Error): - """Incorrect data in archive.""" - -class NotRarFile(Error): - """The file is not RAR archive.""" - -class BadRarName(Error): - """Cannot guess multipart name components.""" - -class NoRarEntry(Error): - """File not found in RAR""" - -class PasswordRequired(Error): - """File requires password""" - -class NeedFirstVolume(Error): - """Need to start from first volume.""" - -class NoCrypto(Error): - """Cannot parse encrypted headers - no crypto available.""" - -class RarExecError(Error): - """Problem reported by unrar/rar.""" - -class RarWarning(RarExecError): - """Non-fatal error""" - -class RarFatalError(RarExecError): - """Fatal error""" - -class RarCRCError(RarExecError): - """CRC error during unpacking""" - -class RarLockedArchiveError(RarExecError): - """Must not modify locked archive""" - -class RarWriteError(RarExecError): - """Write error""" - -class RarOpenError(RarExecError): - """Open error""" - -class RarUserError(RarExecError): - """User error""" - -class RarMemoryError(RarExecError): - """Memory error""" - -class RarCreateError(RarExecError): - """Create error""" - -class RarNoFilesError(RarExecError): - """No files that match pattern were found""" - -class RarUserBreak(RarExecError): - """User stop""" - -class RarWrongPassword(RarExecError): - """Incorrect password""" - -class RarUnknownError(RarExecError): - """Unknown exit code""" - -class RarSignalExit(RarExecError): - """Unrar exited with signal""" - -class RarCannotExec(RarExecError): - """Executable not found.""" - - -class RarInfo(object): - r"""An entry in rar archive. - - RAR3 extended timestamps are :class:`datetime.datetime` objects without timezone. - RAR5 extended timestamps are :class:`datetime.datetime` objects with UTC timezone. - - Attributes: - - filename - File name with relative path. - Path separator is '/'. Always unicode string. - - date_time - File modification timestamp. As tuple of (year, month, day, hour, minute, second). - RAR5 allows archives where it is missing, it's None then. - - file_size - Uncompressed size. - - compress_size - Compressed size. - - compress_type - Compression method: one of :data:`RAR_M0` .. :data:`RAR_M5` constants. - - extract_version - Minimal Rar version needed for decompressing. As (major*10 + minor), - so 2.9 is 29. - - RAR3: 10, 20, 29 - - RAR5 does not have such field in archive, it's simply set to 50. - - host_os - Host OS type, one of RAR_OS_* constants. - - RAR3: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`, :data:`RAR_OS_MSDOS`, - :data:`RAR_OS_OS2`, :data:`RAR_OS_BEOS`. - - RAR5: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`. - - mode - File attributes. May be either dos-style or unix-style, depending on host_os. - - mtime - File modification time. Same value as :attr:`date_time` - but as :class:`datetime.datetime` object with extended precision. - - ctime - Optional time field: creation time. As :class:`datetime.datetime` object. - - atime - Optional time field: last access time. As :class:`datetime.datetime` object. - - arctime - Optional time field: archival time. As :class:`datetime.datetime` object. - (RAR3-only) - - CRC - CRC-32 of uncompressed file, unsigned int. - - RAR5: may be None. - - blake2sp_hash - Blake2SP hash over decompressed data. (RAR5-only) - - comment - Optional file comment field. Unicode string. (RAR3-only) - - file_redir - If not None, file is link of some sort. Contains tuple of (type, flags, target). - (RAR5-only) - - Type is one of constants: - - :data:`RAR5_XREDIR_UNIX_SYMLINK` - unix symlink to target. - :data:`RAR5_XREDIR_WINDOWS_SYMLINK` - windows symlink to target. - :data:`RAR5_XREDIR_WINDOWS_JUNCTION` - windows junction. - :data:`RAR5_XREDIR_HARD_LINK` - hard link to target. - :data:`RAR5_XREDIR_FILE_COPY` - current file is copy of another archive entry. - - Flags may contain :data:`RAR5_XREDIR_ISDIR` bit. - - volume - Volume nr, starting from 0. - - volume_file - Volume file name, where file starts. - - """ - - # zipfile-compatible fields - filename = None - file_size = None - compress_size = None - date_time = None - comment = None - CRC = None - volume = None - orig_filename = None - - # optional extended time fields, datetime() objects. - mtime = None - ctime = None - atime = None - - extract_version = None - mode = None - host_os = None - compress_type = None - - # rar3-only fields - comment = None - arctime = None - - # rar5-only fields - blake2sp_hash = None - file_redir = None - - # internal fields - flags = 0 - type = None - - def isdir(self): - """Returns True if entry is a directory. - """ - if self.type == RAR_BLOCK_FILE: - return (self.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY - return False - - def needs_password(self): - """Returns True if data is stored password-protected. - """ - if self.type == RAR_BLOCK_FILE: - return (self.flags & RAR_FILE_PASSWORD) > 0 - return False - - -class RarFile(object): - """Parse RAR structure, provide access to files in archive. - """ - - #: Archive comment. Unicode string or None. - comment = None - - def __init__(self, rarfile, mode="r", charset=None, info_callback=None, - crc_check=True, errors="stop"): - """Open and parse a RAR archive. - - Parameters: - - rarfile - archive file name - mode - only 'r' is supported. - charset - fallback charset to use, if filenames are not already Unicode-enabled. - info_callback - debug callback, gets to see all archive entries. - crc_check - set to False to disable CRC checks - errors - Either "stop" to quietly stop parsing on errors, - or "strict" to raise errors. Default is "stop". - """ - if _have_pathlib and isinstance(rarfile, Path): - self._rarfile = str(rarfile) - else: - self._rarfile = rarfile - - self._charset = charset or DEFAULT_CHARSET - self._info_callback = info_callback - self._crc_check = crc_check - self._password = None - self._file_parser = None - - if errors == "stop": - self._strict = False - elif errors == "strict": - self._strict = True - else: - raise ValueError("Invalid value for 'errors' parameter.") - - if mode != "r": - raise NotImplementedError("RarFile supports only mode=r") - - self._parse() - - def __enter__(self): - """Open context.""" - return self - - def __exit__(self, typ, value, traceback): - """Exit context""" - self.close() - - def setpassword(self, password): - """Sets the password to use when extracting. - """ - self._password = password - if self._file_parser: - if self._file_parser.has_header_encryption(): - self._file_parser = None - if not self._file_parser: - self._parse() - else: - self._file_parser.setpassword(self._password) - - def needs_password(self): - """Returns True if any archive entries require password for extraction. - """ - return self._file_parser.needs_password() - - def namelist(self): - """Return list of filenames in archive. - """ - return [f.filename for f in self.infolist()] - - def infolist(self): - """Return RarInfo objects for all files/directories in archive. - """ - return self._file_parser.infolist() - - def volumelist(self): - """Returns filenames of archive volumes. - - In case of single-volume archive, the list contains - just the name of main archive file. - """ - return self._file_parser.volumelist() - - def getinfo(self, fname): - """Return RarInfo for file. - """ - return self._file_parser.getinfo(fname) - - def open(self, fname, mode='r', psw=None): - """Returns file-like object (:class:`RarExtFile`) from where the data can be read. - - The object implements :class:`io.RawIOBase` interface, so it can - be further wrapped with :class:`io.BufferedReader` - and :class:`io.TextIOWrapper`. - - On older Python where io module is not available, it implements - only .read(), .seek(), .tell() and .close() methods. - - The object is seekable, although the seeking is fast only on - uncompressed files, on compressed files the seeking is implemented - by reading ahead and/or restarting the decompression. - - Parameters: - - fname - file name or RarInfo instance. - mode - must be 'r' - psw - password to use for extracting. - """ - - if mode != 'r': - raise NotImplementedError("RarFile.open() supports only mode=r") - - # entry lookup - inf = self.getinfo(fname) - if inf.isdir(): - raise TypeError("Directory does not have any data: " + inf.filename) - - # check password - if inf.needs_password(): - psw = psw or self._password - if psw is None: - raise PasswordRequired("File %s requires password" % inf.filename) - else: - psw = None - - return self._file_parser.open(inf, psw) - - def read(self, fname, psw=None): - """Return uncompressed data for archive entry. - - For longer files using :meth:`RarFile.open` may be better idea. - - Parameters: - - fname - filename or RarInfo instance - psw - password to use for extracting. - """ - - with self.open(fname, 'r', psw) as f: - return f.read() - - def close(self): - """Release open resources.""" - pass - - def printdir(self): - """Print archive file list to stdout.""" - for f in self.infolist(): - print(f.filename) - - def extract(self, member, path=None, pwd=None): - """Extract single file into current directory. - - Parameters: - - member - filename or :class:`RarInfo` instance - path - optional destination path - pwd - optional password to use - """ - if isinstance(member, RarInfo): - fname = member.filename - elif _have_pathlib and isinstance(member, Path): - fname = str(member) - else: - fname = member - self._extract([fname], path, pwd) - - def extractall(self, path=None, members=None, pwd=None): - """Extract all files into current directory. - - Parameters: - - path - optional destination path - members - optional filename or :class:`RarInfo` instance list to extract - pwd - optional password to use - """ - fnlist = [] - if members is not None: - for m in members: - if isinstance(m, RarInfo): - fnlist.append(m.filename) - else: - fnlist.append(m) - self._extract(fnlist, path, pwd) - - def testrar(self): - """Let 'unrar' test the archive. - """ - cmd = [UNRAR_TOOL] + list(TEST_ARGS) - add_password_arg(cmd, self._password) - cmd.append('--') - with XTempFile(self._rarfile) as rarfile: - cmd.append(rarfile) - p = custom_popen(cmd) - output = p.communicate()[0] - check_returncode(p, output) - - def strerror(self): - """Return error string if parsing failed or None if no problems. - """ - if not self._file_parser: - return "Not a RAR file" - return self._file_parser.strerror() - - ## - ## private methods - ## - - def _parse(self): - ver = _get_rar_version(self._rarfile) - if ver == 3: - p3 = RAR3Parser(self._rarfile, self._password, self._crc_check, - self._charset, self._strict, self._info_callback) - self._file_parser = p3 # noqa - elif ver == 5: - p5 = RAR5Parser(self._rarfile, self._password, self._crc_check, - self._charset, self._strict, self._info_callback) - self._file_parser = p5 # noqa - else: - raise BadRarFile("Not a RAR file") - - self._file_parser.parse() - self.comment = self._file_parser.comment - - # call unrar to extract a file - def _extract(self, fnlist, path=None, psw=None): - cmd = [UNRAR_TOOL] + list(EXTRACT_ARGS) - - # pasoword - psw = psw or self._password - add_password_arg(cmd, psw) - cmd.append('--') - - # rar file - with XTempFile(self._rarfile) as rarfn: - cmd.append(rarfn) - - # file list - for fn in fnlist: - if os.sep != PATH_SEP: - fn = fn.replace(PATH_SEP, os.sep) - cmd.append(fn) - - # destination path - if path is not None: - if _have_pathlib and isinstance(path, Path): - path = str(path) - cmd.append(path + os.sep) - - # call - p = custom_popen(cmd) - output = p.communicate()[0] - check_returncode(p, output) - -# -# File format parsing -# - -class CommonParser(object): - """Shared parser parts.""" - _main = None - _hdrenc_main = None - _needs_password = False - _fd = None - _expect_sig = None - _parse_error = None - _password = None - comment = None - - def __init__(self, rarfile, password, crc_check, charset, strict, info_cb): - self._rarfile = rarfile - self._password = password - self._crc_check = crc_check - self._charset = charset - self._strict = strict - self._info_callback = info_cb - self._info_list = [] - self._info_map = {} - self._vol_list = [] - - def has_header_encryption(self): - """Returns True if headers are encrypted - """ - if self._hdrenc_main: - return True - if self._main: - if self._main.flags & RAR_MAIN_PASSWORD: - return True - return False - - def setpassword(self, psw): - """Set cached password.""" - self._password = psw - - def volumelist(self): - """Volume files""" - return self._vol_list - - def needs_password(self): - """Is password required""" - return self._needs_password - - def strerror(self): - """Last error""" - return self._parse_error - - def infolist(self): - """List of RarInfo records. - """ - return self._info_list - - def getinfo(self, member): - """Return RarInfo for filename - """ - if isinstance(member, RarInfo): - fname = member.filename - elif _have_pathlib and isinstance(member, Path): - fname = str(member) - else: - fname = member - - # accept both ways here - if PATH_SEP == '/': - fname2 = fname.replace("\\", "/") - else: - fname2 = fname.replace("/", "\\") - - try: - return self._info_map[fname] - except KeyError: - try: - return self._info_map[fname2] - except KeyError: - raise NoRarEntry("No such file: %s" % fname) - - # read rar - def parse(self): - """Process file.""" - self._fd = None - try: - self._parse_real() - finally: - if self._fd: - self._fd.close() - self._fd = None - - def _parse_real(self): - fd = XFile(self._rarfile) - self._fd = fd - sig = fd.read(len(self._expect_sig)) - if sig != self._expect_sig: - if isinstance(self._rarfile, (str, unicode)): - raise NotRarFile("Not a Rar archive: {}".format(self._rarfile)) - raise NotRarFile("Not a Rar archive") - - volume = 0 # first vol (.rar) is 0 - more_vols = False - endarc = False - volfile = self._rarfile - self._vol_list = [self._rarfile] - while 1: - if endarc: - h = None # don't read past ENDARC - else: - h = self._parse_header(fd) - if not h: - if more_vols: - volume += 1 - fd.close() - try: - volfile = self._next_volname(volfile) - fd = XFile(volfile) - except IOError: - self._set_error("Cannot open next volume: %s", volfile) - break - self._fd = fd - sig = fd.read(len(self._expect_sig)) - if sig != self._expect_sig: - self._set_error("Invalid volume sig: %s", volfile) - break - more_vols = False - endarc = False - self._vol_list.append(volfile) - continue - break - h.volume = volume - h.volume_file = volfile - - if h.type == RAR_BLOCK_MAIN and not self._main: - self._main = h - if h.flags & RAR_MAIN_NEWNUMBERING: - # RAR 2.x does not set FIRSTVOLUME, - # so check it only if NEWNUMBERING is used - if (h.flags & RAR_MAIN_FIRSTVOLUME) == 0: - if getattr(h, 'main_volume_number', None) is not None: - # rar5 may have more info - raise NeedFirstVolume( - "Need to start from first volume (current: %r)" - % (h.main_volume_number,) - ) - raise NeedFirstVolume("Need to start from first volume") - if h.flags & RAR_MAIN_PASSWORD: - self._needs_password = True - if not self._password: - break - elif h.type == RAR_BLOCK_ENDARC: - more_vols = (h.flags & RAR_ENDARC_NEXT_VOLUME) > 0 - endarc = True - elif h.type == RAR_BLOCK_FILE: - # RAR 2.x does not write RAR_BLOCK_ENDARC - if h.flags & RAR_FILE_SPLIT_AFTER: - more_vols = True - # RAR 2.x does not set RAR_MAIN_FIRSTVOLUME - if volume == 0 and h.flags & RAR_FILE_SPLIT_BEFORE: - raise NeedFirstVolume("Need to start from first volume") - - if h.needs_password(): - self._needs_password = True - - # store it - self.process_entry(fd, h) - - if self._info_callback: - self._info_callback(h) - - # go to next header - if h.add_size > 0: - fd.seek(h.data_offset + h.add_size, 0) - - def process_entry(self, fd, item): - """Examine item, add into lookup cache.""" - raise NotImplementedError() - - def _decrypt_header(self, fd): - raise NotImplementedError('_decrypt_header') - - def _parse_block_header(self, fd): - raise NotImplementedError('_parse_block_header') - - def _open_hack(self, inf, psw): - raise NotImplementedError('_open_hack') - - # read single header - def _parse_header(self, fd): - try: - # handle encrypted headers - if (self._main and self._main.flags & RAR_MAIN_PASSWORD) or self._hdrenc_main: - if not self._password: - return None - fd = self._decrypt_header(fd) - - # now read actual header - return self._parse_block_header(fd) - except struct.error: - self._set_error('Broken header in RAR file') - return None - - # given current vol name, construct next one - def _next_volname(self, volfile): - if is_filelike(volfile): - raise IOError("Working on single FD") - if self._main.flags & RAR_MAIN_NEWNUMBERING: - return _next_newvol(volfile) - return _next_oldvol(volfile) - - def _set_error(self, msg, *args): - if args: - msg = msg % args - self._parse_error = msg - if self._strict: - raise BadRarFile(msg) - - def open(self, inf, psw): - """Return stream object for file data.""" - - if inf.file_redir: - # cannot leave to unrar as it expects copied file to exist - if inf.file_redir[0] in (RAR5_XREDIR_FILE_COPY, RAR5_XREDIR_HARD_LINK): - inf = self.getinfo(inf.file_redir[2]) - if not inf: - raise BadRarFile('cannot find copied file') - - if inf.flags & RAR_FILE_SPLIT_BEFORE: - raise NeedFirstVolume("Partial file, please start from first volume: " + inf.filename) - - # is temp write usable? - use_hack = 1 - if not self._main: - use_hack = 0 - elif self._main._must_disable_hack(): - use_hack = 0 - elif inf._must_disable_hack(): - use_hack = 0 - elif is_filelike(self._rarfile): - pass - elif inf.file_size > HACK_SIZE_LIMIT: - use_hack = 0 - elif not USE_EXTRACT_HACK: - use_hack = 0 - - # now extract - if inf.compress_type == RAR_M0 and (inf.flags & RAR_FILE_PASSWORD) == 0 and inf.file_redir is None: - return self._open_clear(inf) - elif use_hack: - return self._open_hack(inf, psw) - elif is_filelike(self._rarfile): - return self._open_unrar_membuf(self._rarfile, inf, psw) - else: - return self._open_unrar(self._rarfile, inf, psw) - - def _open_clear(self, inf): - return DirectReader(self, inf) - - def _open_hack_core(self, inf, psw, prefix, suffix): - - size = inf.compress_size + inf.header_size - rf = XFile(inf.volume_file, 0) - rf.seek(inf.header_offset) - - tmpfd, tmpname = mkstemp(suffix='.rar') - tmpf = os.fdopen(tmpfd, "wb") - - try: - tmpf.write(prefix) - while size > 0: - if size > BSIZE: - buf = rf.read(BSIZE) - else: - buf = rf.read(size) - if not buf: - raise BadRarFile('read failed: ' + inf.filename) - tmpf.write(buf) - size -= len(buf) - tmpf.write(suffix) - tmpf.close() - rf.close() - except: - rf.close() - tmpf.close() - os.unlink(tmpname) - raise - - return self._open_unrar(tmpname, inf, psw, tmpname) - - # write in-memory archive to temp file - needed for solid archives - def _open_unrar_membuf(self, memfile, inf, psw): - tmpname = membuf_tempfile(memfile) - return self._open_unrar(tmpname, inf, psw, tmpname, force_file=True) - - # extract using unrar - def _open_unrar(self, rarfile, inf, psw=None, tmpfile=None, force_file=False): - cmd = [UNRAR_TOOL] + list(OPEN_ARGS) - add_password_arg(cmd, psw) - cmd.append("--") - cmd.append(rarfile) - - # not giving filename avoids encoding related problems - if not tmpfile or force_file: - fn = inf.filename - if PATH_SEP != os.sep: - fn = fn.replace(PATH_SEP, os.sep) - cmd.append(fn) - - # read from unrar pipe - return PipeReader(self, inf, cmd, tmpfile) - -# -# RAR3 format -# - -class Rar3Info(RarInfo): - """RAR3 specific fields.""" - extract_version = 15 - salt = None - add_size = 0 - header_crc = None - header_size = None - header_offset = None - data_offset = None - _md_class = None - _md_expect = None - - # make sure some rar5 fields are always present - file_redir = None - blake2sp_hash = None - - def _must_disable_hack(self): - if self.type == RAR_BLOCK_FILE: - if self.flags & RAR_FILE_PASSWORD: - return True - elif self.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER): - return True - elif self.type == RAR_BLOCK_MAIN: - if self.flags & (RAR_MAIN_SOLID | RAR_MAIN_PASSWORD): - return True - return False - - -class RAR3Parser(CommonParser): - """Parse RAR3 file format. - """ - _expect_sig = RAR_ID - _last_aes_key = (None, None, None) # (salt, key, iv) - - def _decrypt_header(self, fd): - if not _have_crypto: - raise NoCrypto('Cannot parse encrypted headers - no crypto') - salt = fd.read(8) - if self._last_aes_key[0] == salt: - key, iv = self._last_aes_key[1:] - else: - key, iv = rar3_s2k(self._password, salt) - self._last_aes_key = (salt, key, iv) - return HeaderDecrypt(fd, key, iv) - - # common header - def _parse_block_header(self, fd): - h = Rar3Info() - h.header_offset = fd.tell() - - # read and parse base header - buf = fd.read(S_BLK_HDR.size) - if not buf: - return None - t = S_BLK_HDR.unpack_from(buf) - h.header_crc, h.type, h.flags, h.header_size = t - - # read full header - if h.header_size > S_BLK_HDR.size: - hdata = buf + fd.read(h.header_size - S_BLK_HDR.size) - else: - hdata = buf - h.data_offset = fd.tell() - - # unexpected EOF? - if len(hdata) != h.header_size: - self._set_error('Unexpected EOF when reading header') - return None - - pos = S_BLK_HDR.size - - # block has data assiciated with it? - if h.flags & RAR_LONG_BLOCK: - h.add_size, pos = load_le32(hdata, pos) - else: - h.add_size = 0 - - # parse interesting ones, decide header boundaries for crc - if h.type == RAR_BLOCK_MARK: - return h - elif h.type == RAR_BLOCK_MAIN: - pos += 6 - if h.flags & RAR_MAIN_ENCRYPTVER: - pos += 1 - crc_pos = pos - if h.flags & RAR_MAIN_COMMENT: - self._parse_subblocks(h, hdata, pos) - elif h.type == RAR_BLOCK_FILE: - pos = self._parse_file_header(h, hdata, pos - 4) - crc_pos = pos - if h.flags & RAR_FILE_COMMENT: - pos = self._parse_subblocks(h, hdata, pos) - elif h.type == RAR_BLOCK_SUB: - pos = self._parse_file_header(h, hdata, pos - 4) - crc_pos = h.header_size - elif h.type == RAR_BLOCK_OLD_AUTH: - pos += 8 - crc_pos = pos - elif h.type == RAR_BLOCK_OLD_EXTRA: - pos += 7 - crc_pos = pos - else: - crc_pos = h.header_size - - # check crc - if h.type == RAR_BLOCK_OLD_SUB: - crcdat = hdata[2:] + fd.read(h.add_size) - else: - crcdat = hdata[2:crc_pos] - - calc_crc = rar_crc32(crcdat) & 0xFFFF - - # return good header - if h.header_crc == calc_crc: - return h - - # header parsing failed. - self._set_error('Header CRC error (%02x): exp=%x got=%x (xlen = %d)', - h.type, h.header_crc, calc_crc, len(crcdat)) - - # instead panicing, send eof - return None - - # read file-specific header - def _parse_file_header(self, h, hdata, pos): - fld = S_FILE_HDR.unpack_from(hdata, pos) - pos += S_FILE_HDR.size - - h.compress_size = fld[0] - h.file_size = fld[1] - h.host_os = fld[2] - h.CRC = fld[3] - h.date_time = parse_dos_time(fld[4]) - h.mtime = to_datetime(h.date_time) - h.extract_version = fld[5] - h.compress_type = fld[6] - name_size = fld[7] - h.mode = fld[8] - - h._md_class = CRC32Context - h._md_expect = h.CRC - - if h.flags & RAR_FILE_LARGE: - h1, pos = load_le32(hdata, pos) - h2, pos = load_le32(hdata, pos) - h.compress_size |= h1 << 32 - h.file_size |= h2 << 32 - h.add_size = h.compress_size - - name, pos = load_bytes(hdata, name_size, pos) - if h.flags & RAR_FILE_UNICODE: - nul = name.find(ZERO) - h.orig_filename = name[:nul] - u = UnicodeFilename(h.orig_filename, name[nul + 1:]) - h.filename = u.decode() - - # if parsing failed fall back to simple name - if u.failed: - h.filename = self._decode(h.orig_filename) - else: - h.orig_filename = name - h.filename = self._decode(name) - - # change separator, if requested - if PATH_SEP != '\\': - h.filename = h.filename.replace('\\', PATH_SEP) - - if h.flags & RAR_FILE_SALT: - h.salt, pos = load_bytes(hdata, 8, pos) - else: - h.salt = None - - # optional extended time stamps - if h.flags & RAR_FILE_EXTTIME: - pos = _parse_ext_time(h, hdata, pos) - else: - h.mtime = h.atime = h.ctime = h.arctime = None - - return pos - - # find old-style comment subblock - def _parse_subblocks(self, h, hdata, pos): - while pos < len(hdata): - # ordinary block header - t = S_BLK_HDR.unpack_from(hdata, pos) - ___scrc, stype, sflags, slen = t - pos_next = pos + slen - pos += S_BLK_HDR.size - - # corrupt header - if pos_next < pos: - break - - # followed by block-specific header - if stype == RAR_BLOCK_OLD_COMMENT and pos + S_COMMENT_HDR.size <= pos_next: - declen, ver, meth, crc = S_COMMENT_HDR.unpack_from(hdata, pos) - pos += S_COMMENT_HDR.size - data = hdata[pos : pos_next] - cmt = rar3_decompress(ver, meth, data, declen, sflags, - crc, self._password) - if not self._crc_check: - h.comment = self._decode_comment(cmt) - elif rar_crc32(cmt) & 0xFFFF == crc: - h.comment = self._decode_comment(cmt) - - pos = pos_next - return pos - - def _read_comment_v3(self, inf, psw=None): - - # read data - with XFile(inf.volume_file) as rf: - rf.seek(inf.data_offset) - data = rf.read(inf.compress_size) - - # decompress - cmt = rar3_decompress(inf.extract_version, inf.compress_type, data, - inf.file_size, inf.flags, inf.CRC, psw, inf.salt) - - # check crc - if self._crc_check: - crc = rar_crc32(cmt) - if crc != inf.CRC: - return None - - return self._decode_comment(cmt) - - def _decode(self, val): - for c in TRY_ENCODINGS: - try: - return val.decode(c) - except UnicodeError: - pass - return val.decode(self._charset, 'replace') - - def _decode_comment(self, val): - return self._decode(val) - - def process_entry(self, fd, item): - if item.type == RAR_BLOCK_FILE: - # use only first part - if (item.flags & RAR_FILE_SPLIT_BEFORE) == 0: - self._info_map[item.filename] = item - self._info_list.append(item) - elif len(self._info_list) > 0: - # final crc is in last block - old = self._info_list[-1] - old.CRC = item.CRC - old._md_expect = item._md_expect - old.compress_size += item.compress_size - - # parse new-style comment - if item.type == RAR_BLOCK_SUB and item.filename == 'CMT': - if item.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER): - pass - elif item.flags & RAR_FILE_SOLID: - # file comment - cmt = self._read_comment_v3(item, self._password) - if len(self._info_list) > 0: - old = self._info_list[-1] - old.comment = cmt - else: - # archive comment - cmt = self._read_comment_v3(item, self._password) - self.comment = cmt - - if item.type == RAR_BLOCK_MAIN: - if item.flags & RAR_MAIN_COMMENT: - self.comment = item.comment - if item.flags & RAR_MAIN_PASSWORD: - self._needs_password = True - - # put file compressed data into temporary .rar archive, and run - # unrar on that, thus avoiding unrar going over whole archive - def _open_hack(self, inf, psw): - # create main header: crc, type, flags, size, res1, res2 - prefix = RAR_ID + S_BLK_HDR.pack(0x90CF, 0x73, 0, 13) + ZERO * (2 + 4) - return self._open_hack_core(inf, psw, prefix, EMPTY) - -# -# RAR5 format -# - -class Rar5Info(RarInfo): - """Shared fields for RAR5 records. - """ - extract_version = 50 - header_crc = None - header_size = None - header_offset = None - data_offset = None - - # type=all - block_type = None - block_flags = None - add_size = 0 - block_extra_size = 0 - - # type=MAIN - volume_number = None - _md_class = None - _md_expect = None - - def _must_disable_hack(self): - return False - - -class Rar5BaseFile(Rar5Info): - """Shared sturct for file & service record. - """ - type = -1 - file_flags = None - file_encryption = (0, 0, 0, EMPTY, EMPTY, EMPTY) - file_compress_flags = None - file_redir = None - file_owner = None - file_version = None - blake2sp_hash = None - - def _must_disable_hack(self): - if self.flags & RAR_FILE_PASSWORD: - return True - if self.block_flags & (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER): - return True - if self.file_compress_flags & RAR5_COMPR_SOLID: - return True - if self.file_redir: - return True - return False - - -class Rar5FileInfo(Rar5BaseFile): - """RAR5 file record. - """ - type = RAR_BLOCK_FILE - - -class Rar5ServiceInfo(Rar5BaseFile): - """RAR5 service record. - """ - type = RAR_BLOCK_SUB - - -class Rar5MainInfo(Rar5Info): - """RAR5 archive main record. - """ - type = RAR_BLOCK_MAIN - main_flags = None - main_volume_number = None - - def _must_disable_hack(self): - if self.main_flags & RAR5_MAIN_FLAG_SOLID: - return True - return False - - -class Rar5EncryptionInfo(Rar5Info): - """RAR5 archive header encryption record. - """ - type = RAR5_BLOCK_ENCRYPTION - encryption_algo = None - encryption_flags = None - encryption_kdf_count = None - encryption_salt = None - encryption_check_value = None - - def needs_password(self): - return True - - -class Rar5EndArcInfo(Rar5Info): - """RAR5 end of archive record. - """ - type = RAR_BLOCK_ENDARC - endarc_flags = None - - -class RAR5Parser(CommonParser): - """Parse RAR5 format. - """ - _expect_sig = RAR5_ID - _hdrenc_main = None - - # AES encrypted headers - _last_aes256_key = (-1, None, None) # (kdf_count, salt, key) - - def _gen_key(self, kdf_count, salt): - if self._last_aes256_key[:2] == (kdf_count, salt): - return self._last_aes256_key[2] - if kdf_count > 24: - raise BadRarFile('Too large kdf_count') - psw = self._password - if isinstance(psw, unicode): - psw = psw.encode('utf8') - key = pbkdf2_sha256(psw, salt, 1 << kdf_count) - self._last_aes256_key = (kdf_count, salt, key) - return key - - def _decrypt_header(self, fd): - if not _have_crypto: - raise NoCrypto('Cannot parse encrypted headers - no crypto') - h = self._hdrenc_main - key = self._gen_key(h.encryption_kdf_count, h.encryption_salt) - iv = fd.read(16) - return HeaderDecrypt(fd, key, iv) - - # common header - def _parse_block_header(self, fd): - header_offset = fd.tell() - - preload = 4 + 3 - start_bytes = fd.read(preload) - header_crc, pos = load_le32(start_bytes, 0) - hdrlen, pos = load_vint(start_bytes, pos) - if hdrlen > 2 * 1024 * 1024: - return None - header_size = pos + hdrlen - - # read full header, check for EOF - hdata = start_bytes + fd.read(header_size - len(start_bytes)) - if len(hdata) != header_size: - self._set_error('Unexpected EOF when reading header') - return None - data_offset = fd.tell() - - calc_crc = rar_crc32(memoryview(hdata)[4:]) - if header_crc != calc_crc: - # header parsing failed. - self._set_error('Header CRC error: exp=%x got=%x (xlen = %d)', - header_crc, calc_crc, len(hdata)) - return None - - block_type, pos = load_vint(hdata, pos) - - if block_type == RAR5_BLOCK_MAIN: - h, pos = self._parse_block_common(Rar5MainInfo(), hdata) - h = self._parse_main_block(h, hdata, pos) - elif block_type == RAR5_BLOCK_FILE: - h, pos = self._parse_block_common(Rar5FileInfo(), hdata) - h = self._parse_file_block(h, hdata, pos) - elif block_type == RAR5_BLOCK_SERVICE: - h, pos = self._parse_block_common(Rar5ServiceInfo(), hdata) - h = self._parse_file_block(h, hdata, pos) - elif block_type == RAR5_BLOCK_ENCRYPTION: - h, pos = self._parse_block_common(Rar5EncryptionInfo(), hdata) - h = self._parse_encryption_block(h, hdata, pos) - elif block_type == RAR5_BLOCK_ENDARC: - h, pos = self._parse_block_common(Rar5EndArcInfo(), hdata) - h = self._parse_endarc_block(h, hdata, pos) - else: - h = None - if h: - h.header_offset = header_offset - h.data_offset = data_offset - return h - - def _parse_block_common(self, h, hdata): - h.header_crc, pos = load_le32(hdata, 0) - hdrlen, pos = load_vint(hdata, pos) - h.header_size = hdrlen + pos - h.block_type, pos = load_vint(hdata, pos) - h.block_flags, pos = load_vint(hdata, pos) - - if h.block_flags & RAR5_BLOCK_FLAG_EXTRA_DATA: - h.block_extra_size, pos = load_vint(hdata, pos) - if h.block_flags & RAR5_BLOCK_FLAG_DATA_AREA: - h.add_size, pos = load_vint(hdata, pos) - - h.compress_size = h.add_size - - if h.block_flags & RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN: - h.flags |= RAR_SKIP_IF_UNKNOWN - if h.block_flags & RAR5_BLOCK_FLAG_DATA_AREA: - h.flags |= RAR_LONG_BLOCK - return h, pos - - def _parse_main_block(self, h, hdata, pos): - h.main_flags, pos = load_vint(hdata, pos) - if h.main_flags & RAR5_MAIN_FLAG_HAS_VOLNR: - h.main_volume_number, pos = load_vint(hdata, pos) - - h.flags |= RAR_MAIN_NEWNUMBERING - if h.main_flags & RAR5_MAIN_FLAG_SOLID: - h.flags |= RAR_MAIN_SOLID - if h.main_flags & RAR5_MAIN_FLAG_ISVOL: - h.flags |= RAR_MAIN_VOLUME - if h.main_flags & RAR5_MAIN_FLAG_RECOVERY: - h.flags |= RAR_MAIN_RECOVERY - if self._hdrenc_main: - h.flags |= RAR_MAIN_PASSWORD - if h.main_flags & RAR5_MAIN_FLAG_HAS_VOLNR == 0: - h.flags |= RAR_MAIN_FIRSTVOLUME - - return h - - def _parse_file_block(self, h, hdata, pos): - h.file_flags, pos = load_vint(hdata, pos) - h.file_size, pos = load_vint(hdata, pos) - h.mode, pos = load_vint(hdata, pos) - - if h.file_flags & RAR5_FILE_FLAG_HAS_MTIME: - h.mtime, pos = load_unixtime(hdata, pos) - h.date_time = h.mtime.timetuple()[:6] - if h.file_flags & RAR5_FILE_FLAG_HAS_CRC32: - h.CRC, pos = load_le32(hdata, pos) - h._md_class = CRC32Context - h._md_expect = h.CRC - - h.file_compress_flags, pos = load_vint(hdata, pos) - h.file_host_os, pos = load_vint(hdata, pos) - h.orig_filename, pos = load_vstr(hdata, pos) - h.filename = h.orig_filename.decode('utf8', 'replace') - - # use compatible values - if h.file_host_os == RAR5_OS_WINDOWS: - h.host_os = RAR_OS_WIN32 - else: - h.host_os = RAR_OS_UNIX - h.compress_type = RAR_M0 + ((h.file_compress_flags >> 7) & 7) - - if h.block_extra_size: - # allow 1 byte of garbage - while pos < len(hdata) - 1: - xsize, pos = load_vint(hdata, pos) - xdata, pos = load_bytes(hdata, xsize, pos) - self._process_file_extra(h, xdata) - - if h.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE: - h.flags |= RAR_FILE_SPLIT_BEFORE - if h.block_flags & RAR5_BLOCK_FLAG_SPLIT_AFTER: - h.flags |= RAR_FILE_SPLIT_AFTER - if h.file_flags & RAR5_FILE_FLAG_ISDIR: - h.flags |= RAR_FILE_DIRECTORY - if h.file_compress_flags & RAR5_COMPR_SOLID: - h.flags |= RAR_FILE_SOLID - - return h - - def _parse_endarc_block(self, h, hdata, pos): - h.endarc_flags, pos = load_vint(hdata, pos) - if h.endarc_flags & RAR5_ENDARC_FLAG_NEXT_VOL: - h.flags |= RAR_ENDARC_NEXT_VOLUME - return h - - def _parse_encryption_block(self, h, hdata, pos): - h.encryption_algo, pos = load_vint(hdata, pos) - h.encryption_flags, pos = load_vint(hdata, pos) - h.encryption_kdf_count, pos = load_byte(hdata, pos) - h.encryption_salt, pos = load_bytes(hdata, 16, pos) - if h.encryption_flags & RAR5_ENC_FLAG_HAS_CHECKVAL: - h.encryption_check_value = load_bytes(hdata, 12, pos) - if h.encryption_algo != RAR5_XENC_CIPHER_AES256: - raise BadRarFile('Unsupported header encryption cipher') - self._hdrenc_main = h - return h - - # file extra record - def _process_file_extra(self, h, xdata): - xtype, pos = load_vint(xdata, 0) - if xtype == RAR5_XFILE_TIME: - self._parse_file_xtime(h, xdata, pos) - elif xtype == RAR5_XFILE_ENCRYPTION: - self._parse_file_encryption(h, xdata, pos) - elif xtype == RAR5_XFILE_HASH: - self._parse_file_hash(h, xdata, pos) - elif xtype == RAR5_XFILE_VERSION: - self._parse_file_version(h, xdata, pos) - elif xtype == RAR5_XFILE_REDIR: - self._parse_file_redir(h, xdata, pos) - elif xtype == RAR5_XFILE_OWNER: - self._parse_file_owner(h, xdata, pos) - elif xtype == RAR5_XFILE_SERVICE: - pass - else: - pass - - # extra block for file time record - def _parse_file_xtime(self, h, xdata, pos): - tflags, pos = load_vint(xdata, pos) - ldr = load_windowstime - if tflags & RAR5_XTIME_UNIXTIME: - ldr = load_unixtime - if tflags & RAR5_XTIME_HAS_MTIME: - h.mtime, pos = ldr(xdata, pos) - h.date_time = h.mtime.timetuple()[:6] - if tflags & RAR5_XTIME_HAS_CTIME: - h.ctime, pos = ldr(xdata, pos) - if tflags & RAR5_XTIME_HAS_ATIME: - h.atime, pos = ldr(xdata, pos) - - # just remember encryption info - def _parse_file_encryption(self, h, xdata, pos): - algo, pos = load_vint(xdata, pos) - flags, pos = load_vint(xdata, pos) - kdf_count, pos = load_byte(xdata, pos) - salt, pos = load_bytes(xdata, 16, pos) - iv, pos = load_bytes(xdata, 16, pos) - checkval = None - if flags & RAR5_XENC_CHECKVAL: - checkval, pos = load_bytes(xdata, 12, pos) - if flags & RAR5_XENC_TWEAKED: - h._md_expect = None - h._md_class = NoHashContext - - h.file_encryption = (algo, flags, kdf_count, salt, iv, checkval) - h.flags |= RAR_FILE_PASSWORD - - def _parse_file_hash(self, h, xdata, pos): - hash_type, pos = load_vint(xdata, pos) - if hash_type == RAR5_XHASH_BLAKE2SP: - h.blake2sp_hash, pos = load_bytes(xdata, 32, pos) - if _have_blake2 and (h.file_encryption[1] & RAR5_XENC_TWEAKED) == 0: - h._md_class = Blake2SP - h._md_expect = h.blake2sp_hash - - def _parse_file_version(self, h, xdata, pos): - flags, pos = load_vint(xdata, pos) - version, pos = load_vint(xdata, pos) - h.file_version = (flags, version) - - def _parse_file_redir(self, h, xdata, pos): - redir_type, pos = load_vint(xdata, pos) - redir_flags, pos = load_vint(xdata, pos) - redir_name, pos = load_vstr(xdata, pos) - redir_name = redir_name.decode('utf8', 'replace') - h.file_redir = (redir_type, redir_flags, redir_name) - - def _parse_file_owner(self, h, xdata, pos): - user_name = group_name = user_id = group_id = None - - flags, pos = load_vint(xdata, pos) - if flags & RAR5_XOWNER_UNAME: - user_name, pos = load_vstr(xdata, pos) - if flags & RAR5_XOWNER_GNAME: - group_name, pos = load_vstr(xdata, pos) - if flags & RAR5_XOWNER_UID: - user_id, pos = load_vint(xdata, pos) - if flags & RAR5_XOWNER_GID: - group_id, pos = load_vint(xdata, pos) - - h.file_owner = (user_name, group_name, user_id, group_id) - - def process_entry(self, fd, item): - if item.block_type == RAR5_BLOCK_FILE: - # use only first part - if (item.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE) == 0: - self._info_map[item.filename] = item - self._info_list.append(item) - elif len(self._info_list) > 0: - # final crc is in last block - old = self._info_list[-1] - old.CRC = item.CRC - old._md_expect = item._md_expect - old.blake2sp_hash = item.blake2sp_hash - old.compress_size += item.compress_size - elif item.block_type == RAR5_BLOCK_SERVICE: - if item.filename == 'CMT': - self._load_comment(fd, item) - - def _load_comment(self, fd, item): - if item.block_flags & (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER): - return None - if item.compress_type != RAR_M0: - return None - - if item.flags & RAR_FILE_PASSWORD: - algo, ___flags, kdf_count, salt, iv, ___checkval = item.file_encryption - if algo != RAR5_XENC_CIPHER_AES256: - return None - key = self._gen_key(kdf_count, salt) - f = HeaderDecrypt(fd, key, iv) - cmt = f.read(item.file_size) - else: - # archive comment - with self._open_clear(item) as cmtstream: - cmt = cmtstream.read() - - # rar bug? - appends zero to comment - cmt = cmt.split(ZERO, 1)[0] - self.comment = cmt.decode('utf8') - return None - - def _open_hack(self, inf, psw): - # len, type, blk_flags, flags - main_hdr = b'\x03\x01\x00\x00' - endarc_hdr = b'\x03\x05\x00\x00' - main_hdr = S_LONG.pack(rar_crc32(main_hdr)) + main_hdr - endarc_hdr = S_LONG.pack(rar_crc32(endarc_hdr)) + endarc_hdr - return self._open_hack_core(inf, psw, RAR5_ID + main_hdr, endarc_hdr) - -## -## Utility classes -## - -class UnicodeFilename(object): - """Handle RAR3 unicode filename decompression. - """ - def __init__(self, name, encdata): - self.std_name = bytearray(name) - self.encdata = bytearray(encdata) - self.pos = self.encpos = 0 - self.buf = bytearray() - self.failed = 0 - - def enc_byte(self): - """Copy encoded byte.""" - try: - c = self.encdata[self.encpos] - self.encpos += 1 - return c - except IndexError: - self.failed = 1 - return 0 - - def std_byte(self): - """Copy byte from 8-bit representation.""" - try: - return self.std_name[self.pos] - except IndexError: - self.failed = 1 - return ord('?') - - def put(self, lo, hi): - """Copy 16-bit value to result.""" - self.buf.append(lo) - self.buf.append(hi) - self.pos += 1 - - def decode(self): - """Decompress compressed UTF16 value.""" - hi = self.enc_byte() - flagbits = 0 - while self.encpos < len(self.encdata): - if flagbits == 0: - flags = self.enc_byte() - flagbits = 8 - flagbits -= 2 - t = (flags >> flagbits) & 3 - if t == 0: - self.put(self.enc_byte(), 0) - elif t == 1: - self.put(self.enc_byte(), hi) - elif t == 2: - self.put(self.enc_byte(), self.enc_byte()) - else: - n = self.enc_byte() - if n & 0x80: - c = self.enc_byte() - for _ in range((n & 0x7f) + 2): - lo = (self.std_byte() + c) & 0xFF - self.put(lo, hi) - else: - for _ in range(n + 2): - self.put(self.std_byte(), 0) - return self.buf.decode("utf-16le", "replace") - - -class RarExtFile(RawIOBase): - """Base class for file-like object that :meth:`RarFile.open` returns. - - Provides public methods and common crc checking. - - Behaviour: - - no short reads - .read() and .readinfo() read as much as requested. - - no internal buffer, use io.BufferedReader for that. - """ - - #: Filename of the archive entry - name = None - - def __init__(self, parser, inf): - """Open archive entry. - """ - super(RarExtFile, self).__init__() - - # standard io.* properties - self.name = inf.filename - self.mode = 'rb' - - self._parser = parser - self._inf = inf - self._fd = None - self._remain = 0 - self._returncode = 0 - - self._md_context = None - - self._open() - - def _open(self): - if self._fd: - self._fd.close() - md_class = self._inf._md_class or NoHashContext - self._md_context = md_class() - self._fd = None - self._remain = self._inf.file_size - - def read(self, cnt=None): - """Read all or specified amount of data from archive entry.""" - - # sanitize cnt - if cnt is None or cnt < 0: - cnt = self._remain - elif cnt > self._remain: - cnt = self._remain - if cnt == 0: - return EMPTY - - # actual read - data = self._read(cnt) - if data: - self._md_context.update(data) - self._remain -= len(data) - if len(data) != cnt: - raise BadRarFile("Failed the read enough data") - - # done? - if not data or self._remain == 0: - # self.close() - self._check() - return data - - def _check(self): - """Check final CRC.""" - final = self._md_context.digest() - exp = self._inf._md_expect - if exp is None: - return - if final is None: - return - if self._returncode: - check_returncode(self, '') - if self._remain != 0: - raise BadRarFile("Failed the read enough data") - if final != exp: - raise BadRarFile("Corrupt file - CRC check failed: %s - exp=%r got=%r" % ( - self._inf.filename, exp, final)) - - def _read(self, cnt): - """Actual read that gets sanitized cnt.""" - raise NotImplementedError("_read") - - def close(self): - """Close open resources.""" - - super(RarExtFile, self).close() - - if self._fd: - self._fd.close() - self._fd = None - - def __del__(self): - """Hook delete to make sure tempfile is removed.""" - self.close() - - def readinto(self, buf): - """Zero-copy read directly into buffer. - - Returns bytes read. - """ - raise NotImplementedError('readinto') - - def tell(self): - """Return current reading position in uncompressed data.""" - return self._inf.file_size - self._remain - - def seek(self, ofs, whence=0): - """Seek in data. - - On uncompressed files, the seeking works by actual - seeks so it's fast. On compresses files its slow - - forward seeking happends by reading ahead, - backwards by re-opening and decompressing from the start. - """ - - # disable crc check when seeking - self._md_context = NoHashContext() - - fsize = self._inf.file_size - cur_ofs = self.tell() - - if whence == 0: # seek from beginning of file - new_ofs = ofs - elif whence == 1: # seek from current position - new_ofs = cur_ofs + ofs - elif whence == 2: # seek from end of file - new_ofs = fsize + ofs - else: - raise ValueError('Invalid value for whence') - - # sanity check - if new_ofs < 0: - new_ofs = 0 - elif new_ofs > fsize: - new_ofs = fsize - - # do the actual seek - if new_ofs >= cur_ofs: - self._skip(new_ofs - cur_ofs) - else: - # reopen and seek - self._open() - self._skip(new_ofs) - return self.tell() - - def _skip(self, cnt): - """Read and discard data""" - while cnt > 0: - if cnt > 8192: - buf = self.read(8192) - else: - buf = self.read(cnt) - if not buf: - break - cnt -= len(buf) - - def readable(self): - """Returns True""" - return True - - def writable(self): - """Returns False. - - Writing is not supported. - """ - return False - - def seekable(self): - """Returns True. - - Seeking is supported, although it's slow on compressed files. - """ - return True - - def readall(self): - """Read all remaining data""" - # avoid RawIOBase default impl - return self.read() - - -class PipeReader(RarExtFile): - """Read data from pipe, handle tempfile cleanup.""" - - def __init__(self, rf, inf, cmd, tempfile=None): - self._cmd = cmd - self._proc = None - self._tempfile = tempfile - super(PipeReader, self).__init__(rf, inf) - - def _close_proc(self): - if not self._proc: - return - if self._proc.stdout: - self._proc.stdout.close() - if self._proc.stdin: - self._proc.stdin.close() - if self._proc.stderr: - self._proc.stderr.close() - self._proc.wait() - self._returncode = self._proc.returncode - self._proc = None - - def _open(self): - super(PipeReader, self)._open() - - # stop old process - self._close_proc() - - # launch new process - self._returncode = 0 - self._proc = custom_popen(self._cmd) - self._fd = self._proc.stdout - - # avoid situation where unrar waits on stdin - if self._proc.stdin: - self._proc.stdin.close() - - def _read(self, cnt): - """Read from pipe.""" - - # normal read is usually enough - data = self._fd.read(cnt) - if len(data) == cnt or not data: - return data - - # short read, try looping - buf = [data] - cnt -= len(data) - while cnt > 0: - data = self._fd.read(cnt) - if not data: - break - cnt -= len(data) - buf.append(data) - return EMPTY.join(buf) - - def close(self): - """Close open resources.""" - - self._close_proc() - super(PipeReader, self).close() - - if self._tempfile: - try: - os.unlink(self._tempfile) - except OSError: - pass - self._tempfile = None - - def readinto(self, buf): - """Zero-copy read directly into buffer.""" - cnt = len(buf) - if cnt > self._remain: - cnt = self._remain - vbuf = memoryview(buf) - res = got = 0 - while got < cnt: - res = self._fd.readinto(vbuf[got : cnt]) - if not res: - break - self._md_context.update(vbuf[got : got + res]) - self._remain -= res - got += res - return got - - -class DirectReader(RarExtFile): - """Read uncompressed data directly from archive. - """ - _cur = None - _cur_avail = None - _volfile = None - - def _open(self): - super(DirectReader, self)._open() - - self._volfile = self._inf.volume_file - self._fd = XFile(self._volfile, 0) - self._fd.seek(self._inf.header_offset, 0) - self._cur = self._parser._parse_header(self._fd) - self._cur_avail = self._cur.add_size - - def _skip(self, cnt): - """RAR Seek, skipping through rar files to get to correct position - """ - - while cnt > 0: - # next vol needed? - if self._cur_avail == 0: - if not self._open_next(): - break - - # fd is in read pos, do the read - if cnt > self._cur_avail: - cnt -= self._cur_avail - self._remain -= self._cur_avail - self._cur_avail = 0 - else: - self._fd.seek(cnt, 1) - self._cur_avail -= cnt - self._remain -= cnt - cnt = 0 - - def _read(self, cnt): - """Read from potentially multi-volume archive.""" - - buf = [] - while cnt > 0: - # next vol needed? - if self._cur_avail == 0: - if not self._open_next(): - break - - # fd is in read pos, do the read - if cnt > self._cur_avail: - data = self._fd.read(self._cur_avail) - else: - data = self._fd.read(cnt) - if not data: - break - - # got some data - cnt -= len(data) - self._cur_avail -= len(data) - buf.append(data) - - if len(buf) == 1: - return buf[0] - return EMPTY.join(buf) - - def _open_next(self): - """Proceed to next volume.""" - - # is the file split over archives? - if (self._cur.flags & RAR_FILE_SPLIT_AFTER) == 0: - return False - - if self._fd: - self._fd.close() - self._fd = None - - # open next part - self._volfile = self._parser._next_volname(self._volfile) - fd = open(self._volfile, "rb", 0) - self._fd = fd - sig = fd.read(len(self._parser._expect_sig)) - if sig != self._parser._expect_sig: - raise BadRarFile("Invalid signature") - - # loop until first file header - while 1: - cur = self._parser._parse_header(fd) - if not cur: - raise BadRarFile("Unexpected EOF") - if cur.type in (RAR_BLOCK_MARK, RAR_BLOCK_MAIN): - if cur.add_size: - fd.seek(cur.add_size, 1) - continue - if cur.orig_filename != self._inf.orig_filename: - raise BadRarFile("Did not found file entry") - self._cur = cur - self._cur_avail = cur.add_size - return True - - def readinto(self, buf): - """Zero-copy read directly into buffer.""" - got = 0 - vbuf = memoryview(buf) - while got < len(buf): - # next vol needed? - if self._cur_avail == 0: - if not self._open_next(): - break - - # length for next read - cnt = len(buf) - got - if cnt > self._cur_avail: - cnt = self._cur_avail - - # read into temp view - res = self._fd.readinto(vbuf[got : got + cnt]) - if not res: - break - self._md_context.update(vbuf[got : got + res]) - self._cur_avail -= res - self._remain -= res - got += res - return got - - -class HeaderDecrypt(object): - """File-like object that decrypts from another file""" - def __init__(self, f, key, iv): - self.f = f - self.ciph = AES_CBC_Decrypt(key, iv) - self.buf = EMPTY - - def tell(self): - """Current file pos - works only on block boundaries.""" - return self.f.tell() - - def read(self, cnt=None): - """Read and decrypt.""" - if cnt > 8 * 1024: - raise BadRarFile('Bad count to header decrypt - wrong password?') - - # consume old data - if cnt <= len(self.buf): - res = self.buf[:cnt] - self.buf = self.buf[cnt:] - return res - res = self.buf - self.buf = EMPTY - cnt -= len(res) - - # decrypt new data - blklen = 16 - while cnt > 0: - enc = self.f.read(blklen) - if len(enc) < blklen: - break - dec = self.ciph.decrypt(enc) - if cnt >= len(dec): - res += dec - cnt -= len(dec) - else: - res += dec[:cnt] - self.buf = dec[cnt:] - cnt = 0 - - return res - - -# handle (filename|filelike) object -class XFile(object): - """Input may be filename or file object. - """ - __slots__ = ('_fd', '_need_close') - - def __init__(self, xfile, bufsize=1024): - if is_filelike(xfile): - self._need_close = False - self._fd = xfile - self._fd.seek(0) - else: - self._need_close = True - self._fd = open(xfile, 'rb', bufsize) - - def read(self, n=None): - """Read from file.""" - return self._fd.read(n) - - def tell(self): - """Return file pos.""" - return self._fd.tell() - - def seek(self, ofs, whence=0): - """Move file pos.""" - return self._fd.seek(ofs, whence) - - def readinto(self, dst): - """Read into buffer.""" - return self._fd.readinto(dst) - - def close(self): - """Close file object.""" - if self._need_close: - self._fd.close() - - def __enter__(self): - return self - - def __exit__(self, typ, val, tb): - self.close() - - -class NoHashContext(object): - """No-op hash function.""" - def __init__(self, data=None): - """Initialize""" - def update(self, data): - """Update data""" - def digest(self): - """Final hash""" - def hexdigest(self): - """Hexadecimal digest.""" - - -class CRC32Context(object): - """Hash context that uses CRC32.""" - __slots__ = ['_crc'] - - def __init__(self, data=None): - self._crc = 0 - if data: - self.update(data) - - def update(self, data): - """Process data.""" - self._crc = rar_crc32(data, self._crc) - - def digest(self): - """Final hash.""" - return self._crc - - def hexdigest(self): - """Hexadecimal digest.""" - return '%08x' % self.digest() - - -class Blake2SP(object): - """Blake2sp hash context. - """ - __slots__ = ['_thread', '_buf', '_cur', '_digest'] - digest_size = 32 - block_size = 64 - parallelism = 8 - - def __init__(self, data=None): - self._buf = b'' - self._cur = 0 - self._digest = None - self._thread = [] - - for i in range(self.parallelism): - ctx = self._blake2s(i, 0, i == (self.parallelism - 1)) - self._thread.append(ctx) - - if data: - self.update(data) - - def _blake2s(self, ofs, depth, is_last): - return blake2s(node_offset=ofs, node_depth=depth, last_node=is_last, - depth=2, inner_size=32, fanout=self.parallelism) - - def _add_block(self, blk): - self._thread[self._cur].update(blk) - self._cur = (self._cur + 1) % self.parallelism - - def update(self, data): - """Hash data. - """ - view = memoryview(data) - bs = self.block_size - if self._buf: - need = bs - len(self._buf) - if len(view) < need: - self._buf += view.tobytes() - return - self._add_block(self._buf + view[:need].tobytes()) - view = view[need:] - while len(view) >= bs: - self._add_block(view[:bs]) - view = view[bs:] - self._buf = view.tobytes() - - def digest(self): - """Return final digest value. - """ - if self._digest is None: - if self._buf: - self._add_block(self._buf) - self._buf = EMPTY - ctx = self._blake2s(0, 1, True) - for t in self._thread: - ctx.update(t.digest()) - self._digest = ctx.digest() - return self._digest - - def hexdigest(self): - """Hexadecimal digest.""" - return tohex(self.digest()) - - -class Rar3Sha1(object): - """Bug-compat for SHA1 - """ - digest_size = 20 - block_size = 64 - - _BLK_BE = struct.Struct(b'>16L') - _BLK_LE = struct.Struct(b'<16L') - - __slots__ = ('_nbytes', '_md', '_rarbug') - - def __init__(self, data=b'', rarbug=False): - self._md = sha1() - self._nbytes = 0 - self._rarbug = rarbug - self.update(data) - - def update(self, data): - """Process more data.""" - self._md.update(data) - bufpos = self._nbytes & 63 - self._nbytes += len(data) - - if self._rarbug and len(data) > 64: - dpos = self.block_size - bufpos - while dpos + self.block_size <= len(data): - self._corrupt(data, dpos) - dpos += self.block_size - - def digest(self): - """Return final state.""" - return self._md.digest() - - def hexdigest(self): - """Return final state as hex string.""" - return self._md.hexdigest() - - def _corrupt(self, data, dpos): - """Corruption from SHA1 core.""" - ws = list(self._BLK_BE.unpack_from(data, dpos)) - for t in range(16, 80): - tmp = ws[(t - 3) & 15] ^ ws[(t - 8) & 15] ^ ws[(t - 14) & 15] ^ ws[(t - 16) & 15] - ws[t & 15] = ((tmp << 1) | (tmp >> (32 - 1))) & 0xFFFFFFFF - self._BLK_LE.pack_into(data, dpos, *ws) - - -## -## Utility functions -## - -S_LONG = Struct(' len(buf): - raise BadRarFile('cannot load byte') - return S_BYTE.unpack_from(buf, pos)[0], end - -def load_le32(buf, pos): - """Load little-endian 32-bit integer""" - end = pos + 4 - if end > len(buf): - raise BadRarFile('cannot load le32') - return S_LONG.unpack_from(buf, pos)[0], pos + 4 - -def load_bytes(buf, num, pos): - """Load sequence of bytes""" - end = pos + num - if end > len(buf): - raise BadRarFile('cannot load bytes') - return buf[pos : end], end - -def load_vstr(buf, pos): - """Load bytes prefixed by vint length""" - slen, pos = load_vint(buf, pos) - return load_bytes(buf, slen, pos) - -def load_dostime(buf, pos): - """Load LE32 dos timestamp""" - stamp, pos = load_le32(buf, pos) - tup = parse_dos_time(stamp) - return to_datetime(tup), pos - -def load_unixtime(buf, pos): - """Load LE32 unix timestamp""" - secs, pos = load_le32(buf, pos) - dt = datetime.fromtimestamp(secs, UTC) - return dt, pos - -def load_windowstime(buf, pos): - """Load LE64 windows timestamp""" - # unix epoch (1970) in seconds from windows epoch (1601) - unix_epoch = 11644473600 - val1, pos = load_le32(buf, pos) - val2, pos = load_le32(buf, pos) - secs, n1secs = divmod((val2 << 32) | val1, 10000000) - dt = datetime.fromtimestamp(secs - unix_epoch, UTC) - dt = dt.replace(microsecond=n1secs // 10) - return dt, pos - -# new-style next volume -def _next_newvol(volfile): - i = len(volfile) - 1 - while i >= 0: - if volfile[i] >= '0' and volfile[i] <= '9': - return _inc_volname(volfile, i) - i -= 1 - raise BadRarName("Cannot construct volume name: " + volfile) - -# old-style next volume -def _next_oldvol(volfile): - # rar -> r00 - if volfile[-4:].lower() == '.rar': - return volfile[:-2] + '00' - return _inc_volname(volfile, len(volfile) - 1) - -# increase digits with carry, otherwise just increment char -def _inc_volname(volfile, i): - fn = list(volfile) - while i >= 0: - if fn[i] != '9': - fn[i] = chr(ord(fn[i]) + 1) - break - fn[i] = '0' - i -= 1 - return ''.join(fn) - -# rar3 extended time fields -def _parse_ext_time(h, data, pos): - # flags and rest of data can be missing - flags = 0 - if pos + 2 <= len(data): - flags = S_SHORT.unpack_from(data, pos)[0] - pos += 2 - - mtime, pos = _parse_xtime(flags >> 3 * 4, data, pos, h.mtime) - h.ctime, pos = _parse_xtime(flags >> 2 * 4, data, pos) - h.atime, pos = _parse_xtime(flags >> 1 * 4, data, pos) - h.arctime, pos = _parse_xtime(flags >> 0 * 4, data, pos) - if mtime: - h.mtime = mtime - h.date_time = mtime.timetuple()[:6] - return pos - -# rar3 one extended time field -def _parse_xtime(flag, data, pos, basetime=None): - res = None - if flag & 8: - if not basetime: - basetime, pos = load_dostime(data, pos) - - # load second fractions - rem = 0 - cnt = flag & 3 - for _ in range(cnt): - b, pos = load_byte(data, pos) - rem = (b << 16) | (rem >> 8) - - # convert 100ns units to microseconds - usec = rem // 10 - if usec > 1000000: - usec = 999999 - - # dostime has room for 30 seconds only, correct if needed - if flag & 4 and basetime.second < 59: - res = basetime.replace(microsecond=usec, second=basetime.second + 1) - else: - res = basetime.replace(microsecond=usec) - return res, pos - -def is_filelike(obj): - """Filename or file object? - """ - if _have_pathlib: - filename_types = (bytes, unicode, Path) - else: - filename_types = (bytes, unicode) - - if isinstance(obj, filename_types): - return False - res = True - for a in ('read', 'tell', 'seek'): - res = res and hasattr(obj, a) - if not res: - raise ValueError("Invalid object passed as file") - return True - -def rar3_s2k(psw, salt): - """String-to-key hash for RAR3. - """ - if not isinstance(psw, unicode): - psw = psw.decode('utf8') - seed = bytearray(psw.encode('utf-16le') + salt) - h = Rar3Sha1(rarbug=True) - iv = EMPTY - for i in range(16): - for j in range(0x4000): - cnt = S_LONG.pack(i * 0x4000 + j) - h.update(seed) - h.update(cnt[:3]) - if j == 0: - iv += h.digest()[19:20] - key_be = h.digest()[:16] - key_le = pack("LLLL", key_be)) - return key_le, iv - -def rar3_decompress(vers, meth, data, declen=0, flags=0, crc=0, psw=None, salt=None): - """Decompress blob of compressed data. - - Used for data with non-standard header - eg. comments. - """ - # already uncompressed? - if meth == RAR_M0 and (flags & RAR_FILE_PASSWORD) == 0: - return data - - # take only necessary flags - flags = flags & (RAR_FILE_PASSWORD | RAR_FILE_SALT | RAR_FILE_DICTMASK) - flags |= RAR_LONG_BLOCK - - # file header - fname = b'data' - date = 0 - mode = 0x20 - fhdr = S_FILE_HDR.pack(len(data), declen, RAR_OS_MSDOS, crc, - date, vers, meth, len(fname), mode) - fhdr += fname - if flags & RAR_FILE_SALT: - if not salt: - return EMPTY - fhdr += salt - - # full header - hlen = S_BLK_HDR.size + len(fhdr) - hdr = S_BLK_HDR.pack(0, RAR_BLOCK_FILE, flags, hlen) + fhdr - hcrc = rar_crc32(hdr[2:]) & 0xFFFF - hdr = S_BLK_HDR.pack(hcrc, RAR_BLOCK_FILE, flags, hlen) + fhdr - - # archive main header - mh = S_BLK_HDR.pack(0x90CF, RAR_BLOCK_MAIN, 0, 13) + ZERO * (2 + 4) - - # decompress via temp rar - tmpfd, tmpname = mkstemp(suffix='.rar') - tmpf = os.fdopen(tmpfd, "wb") - try: - tmpf.write(RAR_ID + mh + hdr + data) - tmpf.close() - - cmd = [UNRAR_TOOL] + list(OPEN_ARGS) - add_password_arg(cmd, psw, (flags & RAR_FILE_PASSWORD)) - cmd.append(tmpname) - - p = custom_popen(cmd) - return p.communicate()[0] - finally: - tmpf.close() - os.unlink(tmpname) - -def to_datetime(t): - """Convert 6-part time tuple into datetime object. - """ - if t is None: - return None - - # extract values - year, mon, day, h, m, s = t - - # assume the values are valid - try: - return datetime(year, mon, day, h, m, s) - except ValueError: - pass - - # sanitize invalid values - mday = (0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) - if mon < 1: - mon = 1 - if mon > 12: - mon = 12 - if day < 1: - day = 1 - if day > mday[mon]: - day = mday[mon] - if h > 23: - h = 23 - if m > 59: - m = 59 - if s > 59: - s = 59 - if mon == 2 and day == 29: - try: - return datetime(year, mon, day, h, m, s) - except ValueError: - day = 28 - return datetime(year, mon, day, h, m, s) - -def parse_dos_time(stamp): - """Parse standard 32-bit DOS timestamp. - """ - sec, stamp = stamp & 0x1F, stamp >> 5 - mn, stamp = stamp & 0x3F, stamp >> 6 - hr, stamp = stamp & 0x1F, stamp >> 5 - day, stamp = stamp & 0x1F, stamp >> 5 - mon, stamp = stamp & 0x0F, stamp >> 4 - yr = (stamp & 0x7F) + 1980 - return (yr, mon, day, hr, mn, sec * 2) - -def custom_popen(cmd): - """Disconnect cmd from parent fds, read only from stdout. - """ - # needed for py2exe - creationflags = 0 - if sys.platform == 'win32': - creationflags = 0x08000000 # CREATE_NO_WINDOW - - # run command - try: - p = Popen(cmd, bufsize=0, stdout=PIPE, stdin=PIPE, stderr=STDOUT, - creationflags=creationflags) - except OSError as ex: - if ex.errno == errno.ENOENT: - raise RarCannotExec("Unrar not installed? (rarfile.UNRAR_TOOL=%r)" % UNRAR_TOOL) - if ex.errno == errno.EACCES or ex.errno == errno.EPERM: - raise RarCannotExec("Cannot execute unrar (rarfile.UNRAR_TOOL=%r)" % UNRAR_TOOL) - raise - return p - -def custom_check(cmd, ignore_retcode=False): - """Run command, collect output, raise error if needed. - """ - p = custom_popen(cmd) - out, _ = p.communicate() - if p.returncode and not ignore_retcode: - raise RarExecError("Check-run failed") - return out - -def add_password_arg(cmd, psw, ___required=False): - """Append password switch to commandline. - """ - if UNRAR_TOOL == ALT_TOOL: - return - if psw is not None: - cmd.append('-p' + psw) - else: - cmd.append('-p-') - -def check_returncode(p, out): - """Raise exception according to unrar exit code. - """ - code = p.returncode - if code == 0: - return - - # map return code to exception class, codes from rar.txt - errmap = [None, - RarWarning, RarFatalError, RarCRCError, RarLockedArchiveError, # 1..4 - RarWriteError, RarOpenError, RarUserError, RarMemoryError, # 5..8 - RarCreateError, RarNoFilesError, RarWrongPassword] # 9..11 - if UNRAR_TOOL == ALT_TOOL: - errmap = [None] - if code > 0 and code < len(errmap): - exc = errmap[code] - elif code == 255: - exc = RarUserBreak - elif code < 0: - exc = RarSignalExit - else: - exc = RarUnknownError - - # format message - if out: - msg = "%s [%d]: %s" % (exc.__doc__, p.returncode, out) - else: - msg = "%s [%d]" % (exc.__doc__, p.returncode) - - raise exc(msg) - -def hmac_sha256(key, data): - """HMAC-SHA256""" - return HMAC(key, data, sha256).digest() - -def membuf_tempfile(memfile): - """Write in-memory file object to real file.""" - memfile.seek(0, 0) - - tmpfd, tmpname = mkstemp(suffix='.rar') - tmpf = os.fdopen(tmpfd, "wb") - - try: - while True: - buf = memfile.read(BSIZE) - if not buf: - break - tmpf.write(buf) - tmpf.close() - except: - tmpf.close() - os.unlink(tmpname) - raise - return tmpname - -class XTempFile(object): - """Real file for archive. - """ - __slots__ = ('_tmpfile', '_filename') - - def __init__(self, rarfile): - if is_filelike(rarfile): - self._tmpfile = membuf_tempfile(rarfile) - self._filename = self._tmpfile - else: - self._tmpfile = None - self._filename = rarfile - - def __enter__(self): - return self._filename - - def __exit__(self, exc_type, exc_value, tb): - if self._tmpfile: - try: - os.unlink(self._tmpfile) - except OSError: - pass - self._tmpfile = None - -# -# Check if unrar works -# - -ORIG_UNRAR_TOOL = UNRAR_TOOL -ORIG_OPEN_ARGS = OPEN_ARGS -ORIG_EXTRACT_ARGS = EXTRACT_ARGS -ORIG_TEST_ARGS = TEST_ARGS - -def _check_unrar_tool(): - global UNRAR_TOOL, OPEN_ARGS, EXTRACT_ARGS, TEST_ARGS - try: - # does UNRAR_TOOL work? - custom_check([ORIG_UNRAR_TOOL], True) - - UNRAR_TOOL = ORIG_UNRAR_TOOL - OPEN_ARGS = ORIG_OPEN_ARGS - EXTRACT_ARGS = ORIG_EXTRACT_ARGS - TEST_ARGS = ORIG_TEST_ARGS - except RarCannotExec: - try: - # does ALT_TOOL work? - custom_check([ALT_TOOL] + list(ALT_CHECK_ARGS), True) - # replace config - UNRAR_TOOL = ALT_TOOL - OPEN_ARGS = ALT_OPEN_ARGS - EXTRACT_ARGS = ALT_EXTRACT_ARGS - TEST_ARGS = ALT_TEST_ARGS - except RarCannotExec: - # no usable tool, only uncompressed archives work - return False - return True - -_check_unrar_tool() - diff --git a/lib/rarfile/rarfile.pyi b/lib/rarfile/rarfile.pyi new file mode 100644 index 0000000..efdd563 --- /dev/null +++ b/lib/rarfile/rarfile.pyi @@ -0,0 +1,258 @@ +from io import RawIOBase +from typing import Any, Optional + +class AES_CBC_Decrypt: + decrypt: Any = ... + def __init__(self, key: Any, iv: Any) -> None: ... + +class AES_CBC_Decrypt: + decrypt: Any = ... + def __init__(self, key: Any, iv: Any) -> None: ... + +def is_rarfile(xfile: Any): ... +def is_rarfile_sfx(xfile: Any): ... + +class Error(Exception): ... +class BadRarFile(Error): ... +class NotRarFile(Error): ... +class BadRarName(Error): ... +class NoRarEntry(Error): ... +class PasswordRequired(Error): ... +class NeedFirstVolume(Error): ... +class NoCrypto(Error): ... +class RarExecError(Error): ... +class RarWarning(RarExecError): ... +class RarFatalError(RarExecError): ... +class RarCRCError(RarExecError): ... +class RarLockedArchiveError(RarExecError): ... +class RarWriteError(RarExecError): ... +class RarOpenError(RarExecError): ... +class RarUserError(RarExecError): ... +class RarMemoryError(RarExecError): ... +class RarCreateError(RarExecError): ... +class RarNoFilesError(RarExecError): ... +class RarUserBreak(RarExecError): ... +class RarWrongPassword(RarExecError): ... +class RarUnknownError(RarExecError): ... +class RarSignalExit(RarExecError): ... +class RarCannotExec(RarExecError): ... + +class RarInfo: + filename: Any = ... + file_size: Any = ... + compress_size: Any = ... + date_time: Any = ... + comment: Any = ... + CRC: Any = ... + volume: Any = ... + orig_filename: Any = ... + mtime: Any = ... + ctime: Any = ... + atime: Any = ... + extract_version: Any = ... + mode: Any = ... + host_os: Any = ... + compress_type: Any = ... + arctime: Any = ... + blake2sp_hash: Any = ... + file_redir: Any = ... + flags: int = ... + type: Any = ... + def isdir(self): ... + def needs_password(self): ... + +class RarFile: + comment: Any = ... + def __init__(self, rarfile: Any, mode: str = ..., charset: Optional[Any] = ..., info_callback: Optional[Any] = ..., crc_check: bool = ..., errors: str = ...) -> None: ... + def __enter__(self): ... + def __exit__(self, typ: Any, value: Any, traceback: Any) -> None: ... + def setpassword(self, password: Any) -> None: ... + def needs_password(self): ... + def namelist(self): ... + def infolist(self): ... + def volumelist(self): ... + def getinfo(self, fname: Any): ... + def open(self, fname: Any, mode: str = ..., psw: Optional[Any] = ...): ... + def read(self, fname: Any, psw: Optional[Any] = ...): ... + def close(self) -> None: ... + def printdir(self) -> None: ... + def extract(self, member: Any, path: Optional[Any] = ..., pwd: Optional[Any] = ...) -> None: ... + def extractall(self, path: Optional[Any] = ..., members: Optional[Any] = ..., pwd: Optional[Any] = ...) -> None: ... + def testrar(self) -> None: ... + def strerror(self): ... + +class CommonParser: + comment: Any = ... + def __init__(self, rarfile: Any, password: Any, crc_check: Any, charset: Any, strict: Any, info_cb: Any, sfx_offset: Any) -> None: ... + def has_header_encryption(self): ... + def setpassword(self, psw: Any) -> None: ... + def volumelist(self): ... + def needs_password(self): ... + def strerror(self): ... + def infolist(self): ... + def getinfo(self, member: Any): ... + def parse(self) -> None: ... + def process_entry(self, fd: Any, item: Any) -> None: ... + def open(self, inf: Any, psw: Any): ... + +class Rar3Info(RarInfo): + extract_version: int = ... + salt: Any = ... + add_size: int = ... + header_crc: Any = ... + header_size: Any = ... + header_offset: Any = ... + data_offset: Any = ... + file_redir: Any = ... + blake2sp_hash: Any = ... + +class RAR3Parser(CommonParser): + comment: Any = ... + def process_entry(self, fd: Any, item: Any) -> None: ... + +class Rar5Info(RarInfo): + extract_version: int = ... + header_crc: Any = ... + header_size: Any = ... + header_offset: Any = ... + data_offset: Any = ... + block_type: Any = ... + block_flags: Any = ... + add_size: int = ... + block_extra_size: int = ... + volume_number: Any = ... + +class Rar5BaseFile(Rar5Info): + type: int = ... + file_flags: Any = ... + file_encryption: Any = ... + file_compress_flags: Any = ... + file_redir: Any = ... + file_owner: Any = ... + file_version: Any = ... + blake2sp_hash: Any = ... + +class Rar5FileInfo(Rar5BaseFile): + type: Any = ... + +class Rar5ServiceInfo(Rar5BaseFile): + type: Any = ... + +class Rar5MainInfo(Rar5Info): + type: Any = ... + main_flags: Any = ... + main_volume_number: Any = ... + +class Rar5EncryptionInfo(Rar5Info): + type: Any = ... + encryption_algo: Any = ... + encryption_flags: Any = ... + encryption_kdf_count: Any = ... + encryption_salt: Any = ... + encryption_check_value: Any = ... + def needs_password(self): ... + +class Rar5EndArcInfo(Rar5Info): + type: Any = ... + endarc_flags: Any = ... + +class RAR5Parser(CommonParser): + def process_entry(self, fd: Any, item: Any) -> None: ... + +class UnicodeFilename: + std_name: Any = ... + encdata: Any = ... + pos: int = ... + buf: Any = ... + failed: int = ... + def __init__(self, name: Any, encdata: Any) -> None: ... + def enc_byte(self): ... + def std_byte(self): ... + def put(self, lo: Any, hi: Any) -> None: ... + def decode(self): ... + +class RarExtFile(RawIOBase): + name: Any = ... + mode: str = ... + def __init__(self, parser: Any, inf: Any) -> None: ... + def read(self, cnt: Optional[Any] = ...): ... + def close(self) -> None: ... + def __del__(self) -> None: ... + def readinto(self, buf: Any) -> None: ... + def tell(self): ... + def seek(self, ofs: Any, whence: int = ...): ... + def readable(self): ... + def writable(self): ... + def seekable(self): ... + def readall(self): ... + +class PipeReader(RarExtFile): + def __init__(self, rf: Any, inf: Any, cmd: Any, tempfile: Optional[Any] = ...) -> None: ... + def close(self) -> None: ... + def readinto(self, buf: Any): ... + +class DirectReader(RarExtFile): + def readinto(self, buf: Any): ... + +class HeaderDecrypt: + f: Any = ... + ciph: Any = ... + buf: Any = ... + def __init__(self, f: Any, key: Any, iv: Any) -> None: ... + def tell(self): ... + def read(self, cnt: Optional[Any] = ...): ... + +class XFile: + def __init__(self, xfile: Any, bufsize: int = ...) -> None: ... + def read(self, n: Optional[Any] = ...): ... + def tell(self): ... + def seek(self, ofs: Any, whence: int = ...): ... + def readinto(self, dst: Any): ... + def close(self) -> None: ... + def __enter__(self): ... + def __exit__(self, typ: Any, val: Any, tb: Any) -> None: ... + +class NoHashContext: + def __init__(self, data: Optional[Any] = ...) -> None: ... + def update(self, data: Any) -> None: ... + def digest(self) -> None: ... + def hexdigest(self) -> None: ... + +class CRC32Context: + def __init__(self, data: Optional[Any] = ...) -> None: ... + def update(self, data: Any) -> None: ... + def digest(self): ... + def hexdigest(self): ... + +class Blake2SP: + digest_size: int = ... + block_size: int = ... + parallelism: int = ... + def __init__(self, data: Optional[Any] = ...) -> None: ... + def update(self, data: Any) -> None: ... + def digest(self): ... + def hexdigest(self): ... + +class Rar3Sha1: + digest_size: int = ... + block_size: int = ... + def __init__(self, data: bytes = ..., rarbug: bool = ...) -> None: ... + def update(self, data: Any) -> None: ... + def digest(self): ... + def hexdigest(self): ... + +class XTempFile: + def __init__(self, rarfile: Any) -> None: ... + def __enter__(self): ... + def __exit__(self, exc_type: Any, exc_value: Any, tb: Any) -> None: ... + +class ToolSetup: + setup: Any = ... + def __init__(self, setup: Any) -> None: ... + def check(self): ... + def open_cmdline(self, psw: Any, rarfn: Any, filefn: Optional[Any] = ...): ... + def test_cmdline(self, psw: Any, rarfn: Any): ... + def extract_cmdline(self, psw: Any, rarfn: Any, fnlist: Any, path: Any): ... + def get_errmap(self): ... + def get_cmdline(self, key: Any, psw: Any, nodash: bool = ...): ... + def add_password_arg(self, cmdline: Any, psw: Any) -> None: ... diff --git a/lib/rarfile_py2/__init__.py b/lib/rarfile_py2/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lib/rarfile_py2/rarfile.py b/lib/rarfile_py2/rarfile.py new file mode 100644 index 0000000..7754a59 --- /dev/null +++ b/lib/rarfile_py2/rarfile.py @@ -0,0 +1,3041 @@ +# rarfile.py +# +# Copyright (c) 2005-2019 Marko Kreen +# +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +r"""RAR archive reader. + +This is Python module for Rar archive reading. The interface +is made as :mod:`zipfile`-like as possible. + +Basic logic: + - Parse archive structure with Python. + - Extract non-compressed files with Python + - Extract compressed files with unrar. + - Optionally write compressed data to temp file to speed up unrar, + otherwise it needs to scan whole archive on each execution. + +Example:: + + import rarfile + + rf = rarfile.RarFile('myarchive.rar') + for f in rf.infolist(): + print f.filename, f.file_size + if f.filename == 'README': + print(rf.read(f)) + +Archive files can also be accessed via file-like object returned +by :meth:`RarFile.open`:: + + import rarfile + + with rarfile.RarFile('archive.rar') as rf: + with rf.open('README') as f: + for ln in f: + print(ln.strip()) + +There are few module-level parameters to tune behaviour, +here they are with defaults, and reason to change it:: + + import rarfile + + # Set to full path of unrar.exe if it is not in PATH + rarfile.UNRAR_TOOL = "unrar" + + # Set to '\\' to be more compatible with old rarfile + rarfile.PATH_SEP = '/' + +For more details, refer to source. + +""" + +from __future__ import division, print_function + +## +## Imports and compat - support both Python 2.x and 3.x +## + +import sys +import os +import errno +import struct + +from struct import pack, unpack, Struct +from binascii import crc32, hexlify +from tempfile import mkstemp +from subprocess import Popen, PIPE, STDOUT +from io import RawIOBase +from hashlib import sha1, sha256 +from hmac import HMAC +from datetime import datetime, timedelta, tzinfo + +# fixed offset timezone, for UTC +try: + from datetime import timezone +except ImportError: + class timezone(tzinfo): + """Compat timezone.""" + __slots__ = ('_ofs', '_name') + _DST = timedelta(0) + + def __init__(self, offset, name): + super(timezone, self).__init__() + self._ofs, self._name = offset, name + + def utcoffset(self, dt): + return self._ofs + + def tzname(self, dt): + return self._name + + def dst(self, dt): + return self._DST + +# only needed for encryped headers +try: + try: + from cryptography.hazmat.primitives.ciphers import algorithms, modes, Cipher + from cryptography.hazmat.backends import default_backend + from cryptography.hazmat.primitives import hashes + from cryptography.hazmat.primitives.kdf import pbkdf2 + + class AES_CBC_Decrypt(object): + """Decrypt API""" + def __init__(self, key, iv): + ciph = Cipher(algorithms.AES(key), modes.CBC(iv), default_backend()) + self.decrypt = ciph.decryptor().update + + def pbkdf2_sha256(password, salt, iters): + """PBKDF2 with HMAC-SHA256""" + ctx = pbkdf2.PBKDF2HMAC(hashes.SHA256(), 32, salt, iters, default_backend()) + return ctx.derive(password) + + except ImportError: + from Crypto.Cipher import AES + from Crypto.Protocol import KDF + + class AES_CBC_Decrypt(object): + """Decrypt API""" + def __init__(self, key, iv): + self.decrypt = AES.new(key, AES.MODE_CBC, iv).decrypt + + def pbkdf2_sha256(password, salt, iters): + """PBKDF2 with HMAC-SHA256""" + return KDF.PBKDF2(password, salt, 32, iters, hmac_sha256) + + _have_crypto = 1 +except ImportError: + _have_crypto = 0 + +try: + try: + from hashlib import blake2s + _have_blake2 = True + except ImportError: + from pyblake2 import blake2s + _have_blake2 = True +except ImportError: + _have_blake2 = False + +# compat with 2.x +if sys.hexversion < 0x3000000: + def rar_crc32(data, prev=0): + """CRC32 with unsigned values. + """ + if (prev > 0) and (prev & 0x80000000): + prev -= (1 << 32) + res = crc32(data, prev) + if res < 0: + res += (1 << 32) + return res + tohex = hexlify + _byte_code = ord +else: # pragma: no cover + def tohex(data): + """Return hex string.""" + return hexlify(data).decode('ascii') + rar_crc32 = crc32 + unicode = str + _byte_code = int # noqa + +# don't break 2.6 completely +if sys.hexversion < 0x2070000: + memoryview = lambda x: x # noqa + +try: + from pathlib import Path + _have_pathlib = True +except ImportError: + _have_pathlib = False + +__version__ = '3.1' + +# export only interesting items +__all__ = ['is_rarfile', 'RarInfo', 'RarFile', 'RarExtFile'] + +## +## Module configuration. Can be tuned after importing. +## + +#: default fallback charset +DEFAULT_CHARSET = "windows-1252" + +#: list of encodings to try, with fallback to DEFAULT_CHARSET if none succeed +TRY_ENCODINGS = ('utf8', 'utf-16le') + +#: 'unrar', 'rar' or full path to either one +UNRAR_TOOL = "unrar" + +#: Command line args to use for opening file for reading. +OPEN_ARGS = ('p', '-inul') + +#: Command line args to use for extracting file to disk. +EXTRACT_ARGS = ('x', '-y', '-idq') + +#: args for testrar() +TEST_ARGS = ('t', '-idq') + +# +# Allow use of tool that is not compatible with unrar. +# +# By default use 'bsdtar' which is 'tar' program that +# sits on top of libarchive. +# +# Problems with libarchive RAR backend: +# - Does not support solid archives. +# - Does not support password-protected archives. +# + +ALT_TOOL = 'bsdtar' +ALT_OPEN_ARGS = ('-x', '--to-stdout', '-f') +ALT_EXTRACT_ARGS = ('-x', '-f') +ALT_TEST_ARGS = ('-t', '-f') +ALT_CHECK_ARGS = ('--help',) + +#ALT_TOOL = 'unar' +#ALT_OPEN_ARGS = ('-o', '-') +#ALT_EXTRACT_ARGS = () +#ALT_TEST_ARGS = ('-test',) # does not work +#ALT_CHECK_ARGS = ('-v',) + +#: whether to speed up decompression by using tmp archive +USE_EXTRACT_HACK = 1 + +#: limit the filesize for tmp archive usage +HACK_SIZE_LIMIT = 20 * 1024 * 1024 + +#: Separator for path name components. RAR internally uses '\\'. +#: Use '/' to be similar with zipfile. +PATH_SEP = '/' + +## +## rar constants +## + +# block types +RAR_BLOCK_MARK = 0x72 # r +RAR_BLOCK_MAIN = 0x73 # s +RAR_BLOCK_FILE = 0x74 # t +RAR_BLOCK_OLD_COMMENT = 0x75 # u +RAR_BLOCK_OLD_EXTRA = 0x76 # v +RAR_BLOCK_OLD_SUB = 0x77 # w +RAR_BLOCK_OLD_RECOVERY = 0x78 # x +RAR_BLOCK_OLD_AUTH = 0x79 # y +RAR_BLOCK_SUB = 0x7a # z +RAR_BLOCK_ENDARC = 0x7b # { + +# flags for RAR_BLOCK_MAIN +RAR_MAIN_VOLUME = 0x0001 +RAR_MAIN_COMMENT = 0x0002 +RAR_MAIN_LOCK = 0x0004 +RAR_MAIN_SOLID = 0x0008 +RAR_MAIN_NEWNUMBERING = 0x0010 +RAR_MAIN_AUTH = 0x0020 +RAR_MAIN_RECOVERY = 0x0040 +RAR_MAIN_PASSWORD = 0x0080 +RAR_MAIN_FIRSTVOLUME = 0x0100 +RAR_MAIN_ENCRYPTVER = 0x0200 + +# flags for RAR_BLOCK_FILE +RAR_FILE_SPLIT_BEFORE = 0x0001 +RAR_FILE_SPLIT_AFTER = 0x0002 +RAR_FILE_PASSWORD = 0x0004 +RAR_FILE_COMMENT = 0x0008 +RAR_FILE_SOLID = 0x0010 +RAR_FILE_DICTMASK = 0x00e0 +RAR_FILE_DICT64 = 0x0000 +RAR_FILE_DICT128 = 0x0020 +RAR_FILE_DICT256 = 0x0040 +RAR_FILE_DICT512 = 0x0060 +RAR_FILE_DICT1024 = 0x0080 +RAR_FILE_DICT2048 = 0x00a0 +RAR_FILE_DICT4096 = 0x00c0 +RAR_FILE_DIRECTORY = 0x00e0 +RAR_FILE_LARGE = 0x0100 +RAR_FILE_UNICODE = 0x0200 +RAR_FILE_SALT = 0x0400 +RAR_FILE_VERSION = 0x0800 +RAR_FILE_EXTTIME = 0x1000 +RAR_FILE_EXTFLAGS = 0x2000 + +# flags for RAR_BLOCK_ENDARC +RAR_ENDARC_NEXT_VOLUME = 0x0001 +RAR_ENDARC_DATACRC = 0x0002 +RAR_ENDARC_REVSPACE = 0x0004 +RAR_ENDARC_VOLNR = 0x0008 + +# flags common to all blocks +RAR_SKIP_IF_UNKNOWN = 0x4000 +RAR_LONG_BLOCK = 0x8000 + +# Host OS types +RAR_OS_MSDOS = 0 +RAR_OS_OS2 = 1 +RAR_OS_WIN32 = 2 +RAR_OS_UNIX = 3 +RAR_OS_MACOS = 4 +RAR_OS_BEOS = 5 + +# Compression methods - '0'..'5' +RAR_M0 = 0x30 +RAR_M1 = 0x31 +RAR_M2 = 0x32 +RAR_M3 = 0x33 +RAR_M4 = 0x34 +RAR_M5 = 0x35 + +# +# RAR5 constants +# + +RAR5_BLOCK_MAIN = 1 +RAR5_BLOCK_FILE = 2 +RAR5_BLOCK_SERVICE = 3 +RAR5_BLOCK_ENCRYPTION = 4 +RAR5_BLOCK_ENDARC = 5 + +RAR5_BLOCK_FLAG_EXTRA_DATA = 0x01 +RAR5_BLOCK_FLAG_DATA_AREA = 0x02 +RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN = 0x04 +RAR5_BLOCK_FLAG_SPLIT_BEFORE = 0x08 +RAR5_BLOCK_FLAG_SPLIT_AFTER = 0x10 +RAR5_BLOCK_FLAG_DEPENDS_PREV = 0x20 +RAR5_BLOCK_FLAG_KEEP_WITH_PARENT = 0x40 + +RAR5_MAIN_FLAG_ISVOL = 0x01 +RAR5_MAIN_FLAG_HAS_VOLNR = 0x02 +RAR5_MAIN_FLAG_SOLID = 0x04 +RAR5_MAIN_FLAG_RECOVERY = 0x08 +RAR5_MAIN_FLAG_LOCKED = 0x10 + +RAR5_FILE_FLAG_ISDIR = 0x01 +RAR5_FILE_FLAG_HAS_MTIME = 0x02 +RAR5_FILE_FLAG_HAS_CRC32 = 0x04 +RAR5_FILE_FLAG_UNKNOWN_SIZE = 0x08 + +RAR5_COMPR_SOLID = 0x40 + +RAR5_ENC_FLAG_HAS_CHECKVAL = 0x01 + +RAR5_ENDARC_FLAG_NEXT_VOL = 0x01 + +RAR5_XFILE_ENCRYPTION = 1 +RAR5_XFILE_HASH = 2 +RAR5_XFILE_TIME = 3 +RAR5_XFILE_VERSION = 4 +RAR5_XFILE_REDIR = 5 +RAR5_XFILE_OWNER = 6 +RAR5_XFILE_SERVICE = 7 + +RAR5_XTIME_UNIXTIME = 0x01 +RAR5_XTIME_HAS_MTIME = 0x02 +RAR5_XTIME_HAS_CTIME = 0x04 +RAR5_XTIME_HAS_ATIME = 0x08 + +RAR5_XENC_CIPHER_AES256 = 0 + +RAR5_XENC_CHECKVAL = 0x01 +RAR5_XENC_TWEAKED = 0x02 + +RAR5_XHASH_BLAKE2SP = 0 + +RAR5_XREDIR_UNIX_SYMLINK = 1 +RAR5_XREDIR_WINDOWS_SYMLINK = 2 +RAR5_XREDIR_WINDOWS_JUNCTION = 3 +RAR5_XREDIR_HARD_LINK = 4 +RAR5_XREDIR_FILE_COPY = 5 + +RAR5_XREDIR_ISDIR = 0x01 + +RAR5_XOWNER_UNAME = 0x01 +RAR5_XOWNER_GNAME = 0x02 +RAR5_XOWNER_UID = 0x04 +RAR5_XOWNER_GID = 0x08 + +RAR5_OS_WINDOWS = 0 +RAR5_OS_UNIX = 1 + +## +## internal constants +## + +RAR_ID = b"Rar!\x1a\x07\x00" +RAR5_ID = b"Rar!\x1a\x07\x01\x00" +ZERO = b'\0' +EMPTY = b'' +UTC = timezone(timedelta(0), 'UTC') +BSIZE = 32 * 1024 + +def _get_rar_version(xfile): + """Check quickly whether file is rar archive. + """ + with XFile(xfile) as fd: + buf = fd.read(len(RAR5_ID)) + if buf.startswith(RAR_ID): + return 3 + elif buf.startswith(RAR5_ID): + return 5 + return 0 + +## +## Public interface +## + +def is_rarfile(xfile): + """Check quickly whether file is rar archive. + """ + return _get_rar_version(xfile) > 0 + +class Error(Exception): + """Base class for rarfile errors.""" + +class BadRarFile(Error): + """Incorrect data in archive.""" + +class NotRarFile(Error): + """The file is not RAR archive.""" + +class BadRarName(Error): + """Cannot guess multipart name components.""" + +class NoRarEntry(Error): + """File not found in RAR""" + +class PasswordRequired(Error): + """File requires password""" + +class NeedFirstVolume(Error): + """Need to start from first volume.""" + +class NoCrypto(Error): + """Cannot parse encrypted headers - no crypto available.""" + +class RarExecError(Error): + """Problem reported by unrar/rar.""" + +class RarWarning(RarExecError): + """Non-fatal error""" + +class RarFatalError(RarExecError): + """Fatal error""" + +class RarCRCError(RarExecError): + """CRC error during unpacking""" + +class RarLockedArchiveError(RarExecError): + """Must not modify locked archive""" + +class RarWriteError(RarExecError): + """Write error""" + +class RarOpenError(RarExecError): + """Open error""" + +class RarUserError(RarExecError): + """User error""" + +class RarMemoryError(RarExecError): + """Memory error""" + +class RarCreateError(RarExecError): + """Create error""" + +class RarNoFilesError(RarExecError): + """No files that match pattern were found""" + +class RarUserBreak(RarExecError): + """User stop""" + +class RarWrongPassword(RarExecError): + """Incorrect password""" + +class RarUnknownError(RarExecError): + """Unknown exit code""" + +class RarSignalExit(RarExecError): + """Unrar exited with signal""" + +class RarCannotExec(RarExecError): + """Executable not found.""" + + +class RarInfo(object): + r"""An entry in rar archive. + + RAR3 extended timestamps are :class:`datetime.datetime` objects without timezone. + RAR5 extended timestamps are :class:`datetime.datetime` objects with UTC timezone. + + Attributes: + + filename + File name with relative path. + Path separator is '/'. Always unicode string. + + date_time + File modification timestamp. As tuple of (year, month, day, hour, minute, second). + RAR5 allows archives where it is missing, it's None then. + + file_size + Uncompressed size. + + compress_size + Compressed size. + + compress_type + Compression method: one of :data:`RAR_M0` .. :data:`RAR_M5` constants. + + extract_version + Minimal Rar version needed for decompressing. As (major*10 + minor), + so 2.9 is 29. + + RAR3: 10, 20, 29 + + RAR5 does not have such field in archive, it's simply set to 50. + + host_os + Host OS type, one of RAR_OS_* constants. + + RAR3: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`, :data:`RAR_OS_MSDOS`, + :data:`RAR_OS_OS2`, :data:`RAR_OS_BEOS`. + + RAR5: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`. + + mode + File attributes. May be either dos-style or unix-style, depending on host_os. + + mtime + File modification time. Same value as :attr:`date_time` + but as :class:`datetime.datetime` object with extended precision. + + ctime + Optional time field: creation time. As :class:`datetime.datetime` object. + + atime + Optional time field: last access time. As :class:`datetime.datetime` object. + + arctime + Optional time field: archival time. As :class:`datetime.datetime` object. + (RAR3-only) + + CRC + CRC-32 of uncompressed file, unsigned int. + + RAR5: may be None. + + blake2sp_hash + Blake2SP hash over decompressed data. (RAR5-only) + + comment + Optional file comment field. Unicode string. (RAR3-only) + + file_redir + If not None, file is link of some sort. Contains tuple of (type, flags, target). + (RAR5-only) + + Type is one of constants: + + :data:`RAR5_XREDIR_UNIX_SYMLINK` + unix symlink to target. + :data:`RAR5_XREDIR_WINDOWS_SYMLINK` + windows symlink to target. + :data:`RAR5_XREDIR_WINDOWS_JUNCTION` + windows junction. + :data:`RAR5_XREDIR_HARD_LINK` + hard link to target. + :data:`RAR5_XREDIR_FILE_COPY` + current file is copy of another archive entry. + + Flags may contain :data:`RAR5_XREDIR_ISDIR` bit. + + volume + Volume nr, starting from 0. + + volume_file + Volume file name, where file starts. + + """ + + # zipfile-compatible fields + filename = None + file_size = None + compress_size = None + date_time = None + comment = None + CRC = None + volume = None + orig_filename = None + + # optional extended time fields, datetime() objects. + mtime = None + ctime = None + atime = None + + extract_version = None + mode = None + host_os = None + compress_type = None + + # rar3-only fields + comment = None + arctime = None + + # rar5-only fields + blake2sp_hash = None + file_redir = None + + # internal fields + flags = 0 + type = None + + def isdir(self): + """Returns True if entry is a directory. + """ + if self.type == RAR_BLOCK_FILE: + return (self.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY + return False + + def needs_password(self): + """Returns True if data is stored password-protected. + """ + if self.type == RAR_BLOCK_FILE: + return (self.flags & RAR_FILE_PASSWORD) > 0 + return False + + +class RarFile(object): + """Parse RAR structure, provide access to files in archive. + """ + + #: Archive comment. Unicode string or None. + comment = None + + def __init__(self, rarfile, mode="r", charset=None, info_callback=None, + crc_check=True, errors="stop"): + """Open and parse a RAR archive. + + Parameters: + + rarfile + archive file name + mode + only 'r' is supported. + charset + fallback charset to use, if filenames are not already Unicode-enabled. + info_callback + debug callback, gets to see all archive entries. + crc_check + set to False to disable CRC checks + errors + Either "stop" to quietly stop parsing on errors, + or "strict" to raise errors. Default is "stop". + """ + if _have_pathlib and isinstance(rarfile, Path): + self._rarfile = str(rarfile) + else: + self._rarfile = rarfile + + self._charset = charset or DEFAULT_CHARSET + self._info_callback = info_callback + self._crc_check = crc_check + self._password = None + self._file_parser = None + + if errors == "stop": + self._strict = False + elif errors == "strict": + self._strict = True + else: + raise ValueError("Invalid value for 'errors' parameter.") + + if mode != "r": + raise NotImplementedError("RarFile supports only mode=r") + + self._parse() + + def __enter__(self): + """Open context.""" + return self + + def __exit__(self, typ, value, traceback): + """Exit context""" + self.close() + + def setpassword(self, password): + """Sets the password to use when extracting. + """ + self._password = password + if self._file_parser: + if self._file_parser.has_header_encryption(): + self._file_parser = None + if not self._file_parser: + self._parse() + else: + self._file_parser.setpassword(self._password) + + def needs_password(self): + """Returns True if any archive entries require password for extraction. + """ + return self._file_parser.needs_password() + + def namelist(self): + """Return list of filenames in archive. + """ + return [f.filename for f in self.infolist()] + + def infolist(self): + """Return RarInfo objects for all files/directories in archive. + """ + return self._file_parser.infolist() + + def volumelist(self): + """Returns filenames of archive volumes. + + In case of single-volume archive, the list contains + just the name of main archive file. + """ + return self._file_parser.volumelist() + + def getinfo(self, fname): + """Return RarInfo for file. + """ + return self._file_parser.getinfo(fname) + + def open(self, fname, mode='r', psw=None): + """Returns file-like object (:class:`RarExtFile`) from where the data can be read. + + The object implements :class:`io.RawIOBase` interface, so it can + be further wrapped with :class:`io.BufferedReader` + and :class:`io.TextIOWrapper`. + + On older Python where io module is not available, it implements + only .read(), .seek(), .tell() and .close() methods. + + The object is seekable, although the seeking is fast only on + uncompressed files, on compressed files the seeking is implemented + by reading ahead and/or restarting the decompression. + + Parameters: + + fname + file name or RarInfo instance. + mode + must be 'r' + psw + password to use for extracting. + """ + + if mode != 'r': + raise NotImplementedError("RarFile.open() supports only mode=r") + + # entry lookup + inf = self.getinfo(fname) + if inf.isdir(): + raise TypeError("Directory does not have any data: " + inf.filename) + + # check password + if inf.needs_password(): + psw = psw or self._password + if psw is None: + raise PasswordRequired("File %s requires password" % inf.filename) + else: + psw = None + + return self._file_parser.open(inf, psw) + + def read(self, fname, psw=None): + """Return uncompressed data for archive entry. + + For longer files using :meth:`RarFile.open` may be better idea. + + Parameters: + + fname + filename or RarInfo instance + psw + password to use for extracting. + """ + + with self.open(fname, 'r', psw) as f: + return f.read() + + def close(self): + """Release open resources.""" + pass + + def printdir(self): + """Print archive file list to stdout.""" + for f in self.infolist(): + print(f.filename) + + def extract(self, member, path=None, pwd=None): + """Extract single file into current directory. + + Parameters: + + member + filename or :class:`RarInfo` instance + path + optional destination path + pwd + optional password to use + """ + if isinstance(member, RarInfo): + fname = member.filename + elif _have_pathlib and isinstance(member, Path): + fname = str(member) + else: + fname = member + self._extract([fname], path, pwd) + + def extractall(self, path=None, members=None, pwd=None): + """Extract all files into current directory. + + Parameters: + + path + optional destination path + members + optional filename or :class:`RarInfo` instance list to extract + pwd + optional password to use + """ + fnlist = [] + if members is not None: + for m in members: + if isinstance(m, RarInfo): + fnlist.append(m.filename) + else: + fnlist.append(m) + self._extract(fnlist, path, pwd) + + def testrar(self): + """Let 'unrar' test the archive. + """ + cmd = [UNRAR_TOOL] + list(TEST_ARGS) + add_password_arg(cmd, self._password) + cmd.append('--') + with XTempFile(self._rarfile) as rarfile: + cmd.append(rarfile) + p = custom_popen(cmd) + output = p.communicate()[0] + check_returncode(p, output) + + def strerror(self): + """Return error string if parsing failed or None if no problems. + """ + if not self._file_parser: + return "Not a RAR file" + return self._file_parser.strerror() + + ## + ## private methods + ## + + def _parse(self): + ver = _get_rar_version(self._rarfile) + if ver == 3: + p3 = RAR3Parser(self._rarfile, self._password, self._crc_check, + self._charset, self._strict, self._info_callback) + self._file_parser = p3 # noqa + elif ver == 5: + p5 = RAR5Parser(self._rarfile, self._password, self._crc_check, + self._charset, self._strict, self._info_callback) + self._file_parser = p5 # noqa + else: + raise BadRarFile("Not a RAR file") + + self._file_parser.parse() + self.comment = self._file_parser.comment + + # call unrar to extract a file + def _extract(self, fnlist, path=None, psw=None): + cmd = [UNRAR_TOOL] + list(EXTRACT_ARGS) + + # pasoword + psw = psw or self._password + add_password_arg(cmd, psw) + cmd.append('--') + + # rar file + with XTempFile(self._rarfile) as rarfn: + cmd.append(rarfn) + + # file list + for fn in fnlist: + if os.sep != PATH_SEP: + fn = fn.replace(PATH_SEP, os.sep) + cmd.append(fn) + + # destination path + if path is not None: + if _have_pathlib and isinstance(path, Path): + path = str(path) + cmd.append(path + os.sep) + + # call + p = custom_popen(cmd) + output = p.communicate()[0] + check_returncode(p, output) + +# +# File format parsing +# + +class CommonParser(object): + """Shared parser parts.""" + _main = None + _hdrenc_main = None + _needs_password = False + _fd = None + _expect_sig = None + _parse_error = None + _password = None + comment = None + + def __init__(self, rarfile, password, crc_check, charset, strict, info_cb): + self._rarfile = rarfile + self._password = password + self._crc_check = crc_check + self._charset = charset + self._strict = strict + self._info_callback = info_cb + self._info_list = [] + self._info_map = {} + self._vol_list = [] + + def has_header_encryption(self): + """Returns True if headers are encrypted + """ + if self._hdrenc_main: + return True + if self._main: + if self._main.flags & RAR_MAIN_PASSWORD: + return True + return False + + def setpassword(self, psw): + """Set cached password.""" + self._password = psw + + def volumelist(self): + """Volume files""" + return self._vol_list + + def needs_password(self): + """Is password required""" + return self._needs_password + + def strerror(self): + """Last error""" + return self._parse_error + + def infolist(self): + """List of RarInfo records. + """ + return self._info_list + + def getinfo(self, member): + """Return RarInfo for filename + """ + if isinstance(member, RarInfo): + fname = member.filename + elif _have_pathlib and isinstance(member, Path): + fname = str(member) + else: + fname = member + + # accept both ways here + if PATH_SEP == '/': + fname2 = fname.replace("\\", "/") + else: + fname2 = fname.replace("/", "\\") + + try: + return self._info_map[fname] + except KeyError: + try: + return self._info_map[fname2] + except KeyError: + raise NoRarEntry("No such file: %s" % fname) + + # read rar + def parse(self): + """Process file.""" + self._fd = None + try: + self._parse_real() + finally: + if self._fd: + self._fd.close() + self._fd = None + + def _parse_real(self): + fd = XFile(self._rarfile) + self._fd = fd + sig = fd.read(len(self._expect_sig)) + if sig != self._expect_sig: + if isinstance(self._rarfile, (str, unicode)): + raise NotRarFile("Not a Rar archive: {}".format(self._rarfile)) + raise NotRarFile("Not a Rar archive") + + volume = 0 # first vol (.rar) is 0 + more_vols = False + endarc = False + volfile = self._rarfile + self._vol_list = [self._rarfile] + while 1: + if endarc: + h = None # don't read past ENDARC + else: + h = self._parse_header(fd) + if not h: + if more_vols: + volume += 1 + fd.close() + try: + volfile = self._next_volname(volfile) + fd = XFile(volfile) + except IOError: + self._set_error("Cannot open next volume: %s", volfile) + break + self._fd = fd + sig = fd.read(len(self._expect_sig)) + if sig != self._expect_sig: + self._set_error("Invalid volume sig: %s", volfile) + break + more_vols = False + endarc = False + self._vol_list.append(volfile) + self._main = None + continue + break + h.volume = volume + h.volume_file = volfile + + if h.type == RAR_BLOCK_MAIN and not self._main: + self._main = h + if volume == 0 and (h.flags & RAR_MAIN_NEWNUMBERING): + # RAR 2.x does not set FIRSTVOLUME, + # so check it only if NEWNUMBERING is used + if (h.flags & RAR_MAIN_FIRSTVOLUME) == 0: + if getattr(h, 'main_volume_number', None) is not None: + # rar5 may have more info + raise NeedFirstVolume( + "Need to start from first volume (current: %r)" + % (h.main_volume_number,) + ) + raise NeedFirstVolume("Need to start from first volume") + if h.flags & RAR_MAIN_PASSWORD: + self._needs_password = True + if not self._password: + break + elif h.type == RAR_BLOCK_ENDARC: + more_vols = (h.flags & RAR_ENDARC_NEXT_VOLUME) > 0 + endarc = True + elif h.type == RAR_BLOCK_FILE: + # RAR 2.x does not write RAR_BLOCK_ENDARC + if h.flags & RAR_FILE_SPLIT_AFTER: + more_vols = True + # RAR 2.x does not set RAR_MAIN_FIRSTVOLUME + if volume == 0 and h.flags & RAR_FILE_SPLIT_BEFORE: + raise NeedFirstVolume("Need to start from first volume") + + if h.needs_password(): + self._needs_password = True + + # store it + self.process_entry(fd, h) + + if self._info_callback: + self._info_callback(h) + + # go to next header + if h.add_size > 0: + fd.seek(h.data_offset + h.add_size, 0) + + def process_entry(self, fd, item): + """Examine item, add into lookup cache.""" + raise NotImplementedError() + + def _decrypt_header(self, fd): + raise NotImplementedError('_decrypt_header') + + def _parse_block_header(self, fd): + raise NotImplementedError('_parse_block_header') + + def _open_hack(self, inf, psw): + raise NotImplementedError('_open_hack') + + # read single header + def _parse_header(self, fd): + try: + # handle encrypted headers + if (self._main and self._main.flags & RAR_MAIN_PASSWORD) or self._hdrenc_main: + if not self._password: + return None + fd = self._decrypt_header(fd) + + # now read actual header + return self._parse_block_header(fd) + except struct.error: + self._set_error('Broken header in RAR file') + return None + + # given current vol name, construct next one + def _next_volname(self, volfile): + if is_filelike(volfile): + raise IOError("Working on single FD") + if self._main.flags & RAR_MAIN_NEWNUMBERING: + return _next_newvol(volfile) + return _next_oldvol(volfile) + + def _set_error(self, msg, *args): + if args: + msg = msg % args + self._parse_error = msg + if self._strict: + raise BadRarFile(msg) + + def open(self, inf, psw): + """Return stream object for file data.""" + + if inf.file_redir: + # cannot leave to unrar as it expects copied file to exist + if inf.file_redir[0] in (RAR5_XREDIR_FILE_COPY, RAR5_XREDIR_HARD_LINK): + inf = self.getinfo(inf.file_redir[2]) + if not inf: + raise BadRarFile('cannot find copied file') + + if inf.flags & RAR_FILE_SPLIT_BEFORE: + raise NeedFirstVolume("Partial file, please start from first volume: " + inf.filename) + + # is temp write usable? + use_hack = 1 + if not self._main: + use_hack = 0 + elif self._main._must_disable_hack(): + use_hack = 0 + elif inf._must_disable_hack(): + use_hack = 0 + elif is_filelike(self._rarfile): + pass + elif inf.file_size > HACK_SIZE_LIMIT: + use_hack = 0 + elif not USE_EXTRACT_HACK: + use_hack = 0 + + # now extract + if inf.compress_type == RAR_M0 and (inf.flags & RAR_FILE_PASSWORD) == 0 and inf.file_redir is None: + return self._open_clear(inf) + elif use_hack: + return self._open_hack(inf, psw) + elif is_filelike(self._rarfile): + return self._open_unrar_membuf(self._rarfile, inf, psw) + else: + return self._open_unrar(self._rarfile, inf, psw) + + def _open_clear(self, inf): + return DirectReader(self, inf) + + def _open_hack_core(self, inf, psw, prefix, suffix): + + size = inf.compress_size + inf.header_size + rf = XFile(inf.volume_file, 0) + rf.seek(inf.header_offset) + + tmpfd, tmpname = mkstemp(suffix='.rar') + tmpf = os.fdopen(tmpfd, "wb") + + try: + tmpf.write(prefix) + while size > 0: + if size > BSIZE: + buf = rf.read(BSIZE) + else: + buf = rf.read(size) + if not buf: + raise BadRarFile('read failed: ' + inf.filename) + tmpf.write(buf) + size -= len(buf) + tmpf.write(suffix) + tmpf.close() + rf.close() + except: + rf.close() + tmpf.close() + os.unlink(tmpname) + raise + + return self._open_unrar(tmpname, inf, psw, tmpname) + + # write in-memory archive to temp file - needed for solid archives + def _open_unrar_membuf(self, memfile, inf, psw): + tmpname = membuf_tempfile(memfile) + return self._open_unrar(tmpname, inf, psw, tmpname, force_file=True) + + # extract using unrar + def _open_unrar(self, rarfile, inf, psw=None, tmpfile=None, force_file=False): + cmd = [UNRAR_TOOL] + list(OPEN_ARGS) + add_password_arg(cmd, psw) + cmd.append("--") + cmd.append(rarfile) + + # not giving filename avoids encoding related problems + if not tmpfile or force_file: + fn = inf.filename + if PATH_SEP != os.sep: + fn = fn.replace(PATH_SEP, os.sep) + cmd.append(fn) + + # read from unrar pipe + return PipeReader(self, inf, cmd, tmpfile) + +# +# RAR3 format +# + +class Rar3Info(RarInfo): + """RAR3 specific fields.""" + extract_version = 15 + salt = None + add_size = 0 + header_crc = None + header_size = None + header_offset = None + data_offset = None + _md_class = None + _md_expect = None + + # make sure some rar5 fields are always present + file_redir = None + blake2sp_hash = None + + def _must_disable_hack(self): + if self.type == RAR_BLOCK_FILE: + if self.flags & RAR_FILE_PASSWORD: + return True + elif self.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER): + return True + elif self.type == RAR_BLOCK_MAIN: + if self.flags & (RAR_MAIN_SOLID | RAR_MAIN_PASSWORD): + return True + return False + + +class RAR3Parser(CommonParser): + """Parse RAR3 file format. + """ + _expect_sig = RAR_ID + _last_aes_key = (None, None, None) # (salt, key, iv) + + def _decrypt_header(self, fd): + if not _have_crypto: + raise NoCrypto('Cannot parse encrypted headers - no crypto') + salt = fd.read(8) + if self._last_aes_key[0] == salt: + key, iv = self._last_aes_key[1:] + else: + key, iv = rar3_s2k(self._password, salt) + self._last_aes_key = (salt, key, iv) + return HeaderDecrypt(fd, key, iv) + + # common header + def _parse_block_header(self, fd): + h = Rar3Info() + h.header_offset = fd.tell() + + # read and parse base header + buf = fd.read(S_BLK_HDR.size) + if not buf: + return None + t = S_BLK_HDR.unpack_from(buf) + h.header_crc, h.type, h.flags, h.header_size = t + + # read full header + if h.header_size > S_BLK_HDR.size: + hdata = buf + fd.read(h.header_size - S_BLK_HDR.size) + else: + hdata = buf + h.data_offset = fd.tell() + + # unexpected EOF? + if len(hdata) != h.header_size: + self._set_error('Unexpected EOF when reading header') + return None + + pos = S_BLK_HDR.size + + # block has data assiciated with it? + if h.flags & RAR_LONG_BLOCK: + h.add_size, pos = load_le32(hdata, pos) + else: + h.add_size = 0 + + # parse interesting ones, decide header boundaries for crc + if h.type == RAR_BLOCK_MARK: + return h + elif h.type == RAR_BLOCK_MAIN: + pos += 6 + if h.flags & RAR_MAIN_ENCRYPTVER: + pos += 1 + crc_pos = pos + if h.flags & RAR_MAIN_COMMENT: + self._parse_subblocks(h, hdata, pos) + elif h.type == RAR_BLOCK_FILE: + pos = self._parse_file_header(h, hdata, pos - 4) + crc_pos = pos + if h.flags & RAR_FILE_COMMENT: + pos = self._parse_subblocks(h, hdata, pos) + elif h.type == RAR_BLOCK_SUB: + pos = self._parse_file_header(h, hdata, pos - 4) + crc_pos = h.header_size + elif h.type == RAR_BLOCK_OLD_AUTH: + pos += 8 + crc_pos = pos + elif h.type == RAR_BLOCK_OLD_EXTRA: + pos += 7 + crc_pos = pos + else: + crc_pos = h.header_size + + # check crc + if h.type == RAR_BLOCK_OLD_SUB: + crcdat = hdata[2:] + fd.read(h.add_size) + else: + crcdat = hdata[2:crc_pos] + + calc_crc = rar_crc32(crcdat) & 0xFFFF + + # return good header + if h.header_crc == calc_crc: + return h + + # header parsing failed. + self._set_error('Header CRC error (%02x): exp=%x got=%x (xlen = %d)', + h.type, h.header_crc, calc_crc, len(crcdat)) + + # instead panicing, send eof + return None + + # read file-specific header + def _parse_file_header(self, h, hdata, pos): + fld = S_FILE_HDR.unpack_from(hdata, pos) + pos += S_FILE_HDR.size + + h.compress_size = fld[0] + h.file_size = fld[1] + h.host_os = fld[2] + h.CRC = fld[3] + h.date_time = parse_dos_time(fld[4]) + h.mtime = to_datetime(h.date_time) + h.extract_version = fld[5] + h.compress_type = fld[6] + name_size = fld[7] + h.mode = fld[8] + + h._md_class = CRC32Context + h._md_expect = h.CRC + + if h.flags & RAR_FILE_LARGE: + h1, pos = load_le32(hdata, pos) + h2, pos = load_le32(hdata, pos) + h.compress_size |= h1 << 32 + h.file_size |= h2 << 32 + h.add_size = h.compress_size + + name, pos = load_bytes(hdata, name_size, pos) + if h.flags & RAR_FILE_UNICODE: + nul = name.find(ZERO) + h.orig_filename = name[:nul] + u = UnicodeFilename(h.orig_filename, name[nul + 1:]) + h.filename = u.decode() + + # if parsing failed fall back to simple name + if u.failed: + h.filename = self._decode(h.orig_filename) + else: + h.orig_filename = name + h.filename = self._decode(name) + + # change separator, if requested + if PATH_SEP != '\\': + h.filename = h.filename.replace('\\', PATH_SEP) + + if h.flags & RAR_FILE_SALT: + h.salt, pos = load_bytes(hdata, 8, pos) + else: + h.salt = None + + # optional extended time stamps + if h.flags & RAR_FILE_EXTTIME: + pos = _parse_ext_time(h, hdata, pos) + else: + h.mtime = h.atime = h.ctime = h.arctime = None + + return pos + + # find old-style comment subblock + def _parse_subblocks(self, h, hdata, pos): + while pos < len(hdata): + # ordinary block header + t = S_BLK_HDR.unpack_from(hdata, pos) + ___scrc, stype, sflags, slen = t + pos_next = pos + slen + pos += S_BLK_HDR.size + + # corrupt header + if pos_next < pos: + break + + # followed by block-specific header + if stype == RAR_BLOCK_OLD_COMMENT and pos + S_COMMENT_HDR.size <= pos_next: + declen, ver, meth, crc = S_COMMENT_HDR.unpack_from(hdata, pos) + pos += S_COMMENT_HDR.size + data = hdata[pos : pos_next] + cmt = rar3_decompress(ver, meth, data, declen, sflags, + crc, self._password) + if not self._crc_check: + h.comment = self._decode_comment(cmt) + elif rar_crc32(cmt) & 0xFFFF == crc: + h.comment = self._decode_comment(cmt) + + pos = pos_next + return pos + + def _read_comment_v3(self, inf, psw=None): + + # read data + with XFile(inf.volume_file) as rf: + rf.seek(inf.data_offset) + data = rf.read(inf.compress_size) + + # decompress + cmt = rar3_decompress(inf.extract_version, inf.compress_type, data, + inf.file_size, inf.flags, inf.CRC, psw, inf.salt) + + # check crc + if self._crc_check: + crc = rar_crc32(cmt) + if crc != inf.CRC: + return None + + return self._decode_comment(cmt) + + def _decode(self, val): + for c in TRY_ENCODINGS: + try: + return val.decode(c) + except UnicodeError: + pass + return val.decode(self._charset, 'replace') + + def _decode_comment(self, val): + return self._decode(val) + + def process_entry(self, fd, item): + if item.type == RAR_BLOCK_FILE: + # use only first part + if (item.flags & RAR_FILE_SPLIT_BEFORE) == 0: + self._info_map[item.filename] = item + self._info_list.append(item) + elif len(self._info_list) > 0: + # final crc is in last block + old = self._info_list[-1] + old.CRC = item.CRC + old._md_expect = item._md_expect + old.compress_size += item.compress_size + + # parse new-style comment + if item.type == RAR_BLOCK_SUB and item.filename == 'CMT': + if item.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER): + pass + elif item.flags & RAR_FILE_SOLID: + # file comment + cmt = self._read_comment_v3(item, self._password) + if len(self._info_list) > 0: + old = self._info_list[-1] + old.comment = cmt + else: + # archive comment + cmt = self._read_comment_v3(item, self._password) + self.comment = cmt + + if item.type == RAR_BLOCK_MAIN: + if item.flags & RAR_MAIN_COMMENT: + self.comment = item.comment + if item.flags & RAR_MAIN_PASSWORD: + self._needs_password = True + + # put file compressed data into temporary .rar archive, and run + # unrar on that, thus avoiding unrar going over whole archive + def _open_hack(self, inf, psw): + # create main header: crc, type, flags, size, res1, res2 + prefix = RAR_ID + S_BLK_HDR.pack(0x90CF, 0x73, 0, 13) + ZERO * (2 + 4) + return self._open_hack_core(inf, psw, prefix, EMPTY) + +# +# RAR5 format +# + +class Rar5Info(RarInfo): + """Shared fields for RAR5 records. + """ + extract_version = 50 + header_crc = None + header_size = None + header_offset = None + data_offset = None + + # type=all + block_type = None + block_flags = None + add_size = 0 + block_extra_size = 0 + + # type=MAIN + volume_number = None + _md_class = None + _md_expect = None + + def _must_disable_hack(self): + return False + + +class Rar5BaseFile(Rar5Info): + """Shared sturct for file & service record. + """ + type = -1 + file_flags = None + file_encryption = (0, 0, 0, EMPTY, EMPTY, EMPTY) + file_compress_flags = None + file_redir = None + file_owner = None + file_version = None + blake2sp_hash = None + + def _must_disable_hack(self): + if self.flags & RAR_FILE_PASSWORD: + return True + if self.block_flags & (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER): + return True + if self.file_compress_flags & RAR5_COMPR_SOLID: + return True + if self.file_redir: + return True + return False + + +class Rar5FileInfo(Rar5BaseFile): + """RAR5 file record. + """ + type = RAR_BLOCK_FILE + + +class Rar5ServiceInfo(Rar5BaseFile): + """RAR5 service record. + """ + type = RAR_BLOCK_SUB + + +class Rar5MainInfo(Rar5Info): + """RAR5 archive main record. + """ + type = RAR_BLOCK_MAIN + main_flags = None + main_volume_number = None + + def _must_disable_hack(self): + if self.main_flags & RAR5_MAIN_FLAG_SOLID: + return True + return False + + +class Rar5EncryptionInfo(Rar5Info): + """RAR5 archive header encryption record. + """ + type = RAR5_BLOCK_ENCRYPTION + encryption_algo = None + encryption_flags = None + encryption_kdf_count = None + encryption_salt = None + encryption_check_value = None + + def needs_password(self): + return True + + +class Rar5EndArcInfo(Rar5Info): + """RAR5 end of archive record. + """ + type = RAR_BLOCK_ENDARC + endarc_flags = None + + +class RAR5Parser(CommonParser): + """Parse RAR5 format. + """ + _expect_sig = RAR5_ID + _hdrenc_main = None + + # AES encrypted headers + _last_aes256_key = (-1, None, None) # (kdf_count, salt, key) + + def _gen_key(self, kdf_count, salt): + if self._last_aes256_key[:2] == (kdf_count, salt): + return self._last_aes256_key[2] + if kdf_count > 24: + raise BadRarFile('Too large kdf_count') + psw = self._password + if isinstance(psw, unicode): + psw = psw.encode('utf8') + key = pbkdf2_sha256(psw, salt, 1 << kdf_count) + self._last_aes256_key = (kdf_count, salt, key) + return key + + def _decrypt_header(self, fd): + if not _have_crypto: + raise NoCrypto('Cannot parse encrypted headers - no crypto') + h = self._hdrenc_main + key = self._gen_key(h.encryption_kdf_count, h.encryption_salt) + iv = fd.read(16) + return HeaderDecrypt(fd, key, iv) + + # common header + def _parse_block_header(self, fd): + header_offset = fd.tell() + + preload = 4 + 3 + start_bytes = fd.read(preload) + header_crc, pos = load_le32(start_bytes, 0) + hdrlen, pos = load_vint(start_bytes, pos) + if hdrlen > 2 * 1024 * 1024: + return None + header_size = pos + hdrlen + + # read full header, check for EOF + hdata = start_bytes + fd.read(header_size - len(start_bytes)) + if len(hdata) != header_size: + self._set_error('Unexpected EOF when reading header') + return None + data_offset = fd.tell() + + calc_crc = rar_crc32(memoryview(hdata)[4:]) + if header_crc != calc_crc: + # header parsing failed. + self._set_error('Header CRC error: exp=%x got=%x (xlen = %d)', + header_crc, calc_crc, len(hdata)) + return None + + block_type, pos = load_vint(hdata, pos) + + if block_type == RAR5_BLOCK_MAIN: + h, pos = self._parse_block_common(Rar5MainInfo(), hdata) + h = self._parse_main_block(h, hdata, pos) + elif block_type == RAR5_BLOCK_FILE: + h, pos = self._parse_block_common(Rar5FileInfo(), hdata) + h = self._parse_file_block(h, hdata, pos) + elif block_type == RAR5_BLOCK_SERVICE: + h, pos = self._parse_block_common(Rar5ServiceInfo(), hdata) + h = self._parse_file_block(h, hdata, pos) + elif block_type == RAR5_BLOCK_ENCRYPTION: + h, pos = self._parse_block_common(Rar5EncryptionInfo(), hdata) + h = self._parse_encryption_block(h, hdata, pos) + elif block_type == RAR5_BLOCK_ENDARC: + h, pos = self._parse_block_common(Rar5EndArcInfo(), hdata) + h = self._parse_endarc_block(h, hdata, pos) + else: + h = None + if h: + h.header_offset = header_offset + h.data_offset = data_offset + return h + + def _parse_block_common(self, h, hdata): + h.header_crc, pos = load_le32(hdata, 0) + hdrlen, pos = load_vint(hdata, pos) + h.header_size = hdrlen + pos + h.block_type, pos = load_vint(hdata, pos) + h.block_flags, pos = load_vint(hdata, pos) + + if h.block_flags & RAR5_BLOCK_FLAG_EXTRA_DATA: + h.block_extra_size, pos = load_vint(hdata, pos) + if h.block_flags & RAR5_BLOCK_FLAG_DATA_AREA: + h.add_size, pos = load_vint(hdata, pos) + + h.compress_size = h.add_size + + if h.block_flags & RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN: + h.flags |= RAR_SKIP_IF_UNKNOWN + if h.block_flags & RAR5_BLOCK_FLAG_DATA_AREA: + h.flags |= RAR_LONG_BLOCK + return h, pos + + def _parse_main_block(self, h, hdata, pos): + h.main_flags, pos = load_vint(hdata, pos) + if h.main_flags & RAR5_MAIN_FLAG_HAS_VOLNR: + h.main_volume_number, pos = load_vint(hdata, pos) + + h.flags |= RAR_MAIN_NEWNUMBERING + if h.main_flags & RAR5_MAIN_FLAG_SOLID: + h.flags |= RAR_MAIN_SOLID + if h.main_flags & RAR5_MAIN_FLAG_ISVOL: + h.flags |= RAR_MAIN_VOLUME + if h.main_flags & RAR5_MAIN_FLAG_RECOVERY: + h.flags |= RAR_MAIN_RECOVERY + if self._hdrenc_main: + h.flags |= RAR_MAIN_PASSWORD + if h.main_flags & RAR5_MAIN_FLAG_HAS_VOLNR == 0: + h.flags |= RAR_MAIN_FIRSTVOLUME + + return h + + def _parse_file_block(self, h, hdata, pos): + h.file_flags, pos = load_vint(hdata, pos) + h.file_size, pos = load_vint(hdata, pos) + h.mode, pos = load_vint(hdata, pos) + + if h.file_flags & RAR5_FILE_FLAG_HAS_MTIME: + h.mtime, pos = load_unixtime(hdata, pos) + h.date_time = h.mtime.timetuple()[:6] + if h.file_flags & RAR5_FILE_FLAG_HAS_CRC32: + h.CRC, pos = load_le32(hdata, pos) + h._md_class = CRC32Context + h._md_expect = h.CRC + + h.file_compress_flags, pos = load_vint(hdata, pos) + h.file_host_os, pos = load_vint(hdata, pos) + h.orig_filename, pos = load_vstr(hdata, pos) + h.filename = h.orig_filename.decode('utf8', 'replace') + + # use compatible values + if h.file_host_os == RAR5_OS_WINDOWS: + h.host_os = RAR_OS_WIN32 + else: + h.host_os = RAR_OS_UNIX + h.compress_type = RAR_M0 + ((h.file_compress_flags >> 7) & 7) + + if h.block_extra_size: + # allow 1 byte of garbage + while pos < len(hdata) - 1: + xsize, pos = load_vint(hdata, pos) + xdata, pos = load_bytes(hdata, xsize, pos) + self._process_file_extra(h, xdata) + + if h.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE: + h.flags |= RAR_FILE_SPLIT_BEFORE + if h.block_flags & RAR5_BLOCK_FLAG_SPLIT_AFTER: + h.flags |= RAR_FILE_SPLIT_AFTER + if h.file_flags & RAR5_FILE_FLAG_ISDIR: + h.flags |= RAR_FILE_DIRECTORY + if h.file_compress_flags & RAR5_COMPR_SOLID: + h.flags |= RAR_FILE_SOLID + + return h + + def _parse_endarc_block(self, h, hdata, pos): + h.endarc_flags, pos = load_vint(hdata, pos) + if h.endarc_flags & RAR5_ENDARC_FLAG_NEXT_VOL: + h.flags |= RAR_ENDARC_NEXT_VOLUME + return h + + def _parse_encryption_block(self, h, hdata, pos): + h.encryption_algo, pos = load_vint(hdata, pos) + h.encryption_flags, pos = load_vint(hdata, pos) + h.encryption_kdf_count, pos = load_byte(hdata, pos) + h.encryption_salt, pos = load_bytes(hdata, 16, pos) + if h.encryption_flags & RAR5_ENC_FLAG_HAS_CHECKVAL: + h.encryption_check_value = load_bytes(hdata, 12, pos) + if h.encryption_algo != RAR5_XENC_CIPHER_AES256: + raise BadRarFile('Unsupported header encryption cipher') + self._hdrenc_main = h + return h + + # file extra record + def _process_file_extra(self, h, xdata): + xtype, pos = load_vint(xdata, 0) + if xtype == RAR5_XFILE_TIME: + self._parse_file_xtime(h, xdata, pos) + elif xtype == RAR5_XFILE_ENCRYPTION: + self._parse_file_encryption(h, xdata, pos) + elif xtype == RAR5_XFILE_HASH: + self._parse_file_hash(h, xdata, pos) + elif xtype == RAR5_XFILE_VERSION: + self._parse_file_version(h, xdata, pos) + elif xtype == RAR5_XFILE_REDIR: + self._parse_file_redir(h, xdata, pos) + elif xtype == RAR5_XFILE_OWNER: + self._parse_file_owner(h, xdata, pos) + elif xtype == RAR5_XFILE_SERVICE: + pass + else: + pass + + # extra block for file time record + def _parse_file_xtime(self, h, xdata, pos): + tflags, pos = load_vint(xdata, pos) + ldr = load_windowstime + if tflags & RAR5_XTIME_UNIXTIME: + ldr = load_unixtime + if tflags & RAR5_XTIME_HAS_MTIME: + h.mtime, pos = ldr(xdata, pos) + h.date_time = h.mtime.timetuple()[:6] + if tflags & RAR5_XTIME_HAS_CTIME: + h.ctime, pos = ldr(xdata, pos) + if tflags & RAR5_XTIME_HAS_ATIME: + h.atime, pos = ldr(xdata, pos) + + # just remember encryption info + def _parse_file_encryption(self, h, xdata, pos): + algo, pos = load_vint(xdata, pos) + flags, pos = load_vint(xdata, pos) + kdf_count, pos = load_byte(xdata, pos) + salt, pos = load_bytes(xdata, 16, pos) + iv, pos = load_bytes(xdata, 16, pos) + checkval = None + if flags & RAR5_XENC_CHECKVAL: + checkval, pos = load_bytes(xdata, 12, pos) + if flags & RAR5_XENC_TWEAKED: + h._md_expect = None + h._md_class = NoHashContext + + h.file_encryption = (algo, flags, kdf_count, salt, iv, checkval) + h.flags |= RAR_FILE_PASSWORD + + def _parse_file_hash(self, h, xdata, pos): + hash_type, pos = load_vint(xdata, pos) + if hash_type == RAR5_XHASH_BLAKE2SP: + h.blake2sp_hash, pos = load_bytes(xdata, 32, pos) + if _have_blake2 and (h.file_encryption[1] & RAR5_XENC_TWEAKED) == 0: + h._md_class = Blake2SP + h._md_expect = h.blake2sp_hash + + def _parse_file_version(self, h, xdata, pos): + flags, pos = load_vint(xdata, pos) + version, pos = load_vint(xdata, pos) + h.file_version = (flags, version) + + def _parse_file_redir(self, h, xdata, pos): + redir_type, pos = load_vint(xdata, pos) + redir_flags, pos = load_vint(xdata, pos) + redir_name, pos = load_vstr(xdata, pos) + redir_name = redir_name.decode('utf8', 'replace') + h.file_redir = (redir_type, redir_flags, redir_name) + + def _parse_file_owner(self, h, xdata, pos): + user_name = group_name = user_id = group_id = None + + flags, pos = load_vint(xdata, pos) + if flags & RAR5_XOWNER_UNAME: + user_name, pos = load_vstr(xdata, pos) + if flags & RAR5_XOWNER_GNAME: + group_name, pos = load_vstr(xdata, pos) + if flags & RAR5_XOWNER_UID: + user_id, pos = load_vint(xdata, pos) + if flags & RAR5_XOWNER_GID: + group_id, pos = load_vint(xdata, pos) + + h.file_owner = (user_name, group_name, user_id, group_id) + + def process_entry(self, fd, item): + if item.block_type == RAR5_BLOCK_FILE: + # use only first part + if (item.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE) == 0: + self._info_map[item.filename] = item + self._info_list.append(item) + elif len(self._info_list) > 0: + # final crc is in last block + old = self._info_list[-1] + old.CRC = item.CRC + old._md_expect = item._md_expect + old.blake2sp_hash = item.blake2sp_hash + old.compress_size += item.compress_size + elif item.block_type == RAR5_BLOCK_SERVICE: + if item.filename == 'CMT': + self._load_comment(fd, item) + + def _load_comment(self, fd, item): + if item.block_flags & (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER): + return None + if item.compress_type != RAR_M0: + return None + + if item.flags & RAR_FILE_PASSWORD: + algo, ___flags, kdf_count, salt, iv, ___checkval = item.file_encryption + if algo != RAR5_XENC_CIPHER_AES256: + return None + key = self._gen_key(kdf_count, salt) + f = HeaderDecrypt(fd, key, iv) + cmt = f.read(item.file_size) + else: + # archive comment + with self._open_clear(item) as cmtstream: + cmt = cmtstream.read() + + # rar bug? - appends zero to comment + cmt = cmt.split(ZERO, 1)[0] + self.comment = cmt.decode('utf8') + return None + + def _open_hack(self, inf, psw): + # len, type, blk_flags, flags + main_hdr = b'\x03\x01\x00\x00' + endarc_hdr = b'\x03\x05\x00\x00' + main_hdr = S_LONG.pack(rar_crc32(main_hdr)) + main_hdr + endarc_hdr = S_LONG.pack(rar_crc32(endarc_hdr)) + endarc_hdr + return self._open_hack_core(inf, psw, RAR5_ID + main_hdr, endarc_hdr) + +## +## Utility classes +## + +class UnicodeFilename(object): + """Handle RAR3 unicode filename decompression. + """ + def __init__(self, name, encdata): + self.std_name = bytearray(name) + self.encdata = bytearray(encdata) + self.pos = self.encpos = 0 + self.buf = bytearray() + self.failed = 0 + + def enc_byte(self): + """Copy encoded byte.""" + try: + c = self.encdata[self.encpos] + self.encpos += 1 + return c + except IndexError: + self.failed = 1 + return 0 + + def std_byte(self): + """Copy byte from 8-bit representation.""" + try: + return self.std_name[self.pos] + except IndexError: + self.failed = 1 + return ord('?') + + def put(self, lo, hi): + """Copy 16-bit value to result.""" + self.buf.append(lo) + self.buf.append(hi) + self.pos += 1 + + def decode(self): + """Decompress compressed UTF16 value.""" + hi = self.enc_byte() + flagbits = 0 + while self.encpos < len(self.encdata): + if flagbits == 0: + flags = self.enc_byte() + flagbits = 8 + flagbits -= 2 + t = (flags >> flagbits) & 3 + if t == 0: + self.put(self.enc_byte(), 0) + elif t == 1: + self.put(self.enc_byte(), hi) + elif t == 2: + self.put(self.enc_byte(), self.enc_byte()) + else: + n = self.enc_byte() + if n & 0x80: + c = self.enc_byte() + for _ in range((n & 0x7f) + 2): + lo = (self.std_byte() + c) & 0xFF + self.put(lo, hi) + else: + for _ in range(n + 2): + self.put(self.std_byte(), 0) + return self.buf.decode("utf-16le", "replace") + + +class RarExtFile(RawIOBase): + """Base class for file-like object that :meth:`RarFile.open` returns. + + Provides public methods and common crc checking. + + Behaviour: + - no short reads - .read() and .readinfo() read as much as requested. + - no internal buffer, use io.BufferedReader for that. + """ + + #: Filename of the archive entry + name = None + + def __init__(self, parser, inf): + """Open archive entry. + """ + super(RarExtFile, self).__init__() + + # standard io.* properties + self.name = inf.filename + self.mode = 'rb' + + self._parser = parser + self._inf = inf + self._fd = None + self._remain = 0 + self._returncode = 0 + + self._md_context = None + + self._open() + + def _open(self): + if self._fd: + self._fd.close() + md_class = self._inf._md_class or NoHashContext + self._md_context = md_class() + self._fd = None + self._remain = self._inf.file_size + + def read(self, cnt=None): + """Read all or specified amount of data from archive entry.""" + + # sanitize cnt + if cnt is None or cnt < 0: + cnt = self._remain + elif cnt > self._remain: + cnt = self._remain + if cnt == 0: + return EMPTY + + # actual read + data = self._read(cnt) + if data: + self._md_context.update(data) + self._remain -= len(data) + if len(data) != cnt: + raise BadRarFile("Failed the read enough data") + + # done? + if not data or self._remain == 0: + # self.close() + self._check() + return data + + def _check(self): + """Check final CRC.""" + final = self._md_context.digest() + exp = self._inf._md_expect + if exp is None: + return + if final is None: + return + if self._returncode: + check_returncode(self, '') + if self._remain != 0: + raise BadRarFile("Failed the read enough data") + if final != exp: + raise BadRarFile("Corrupt file - CRC check failed: %s - exp=%r got=%r" % ( + self._inf.filename, exp, final)) + + def _read(self, cnt): + """Actual read that gets sanitized cnt.""" + raise NotImplementedError("_read") + + def close(self): + """Close open resources.""" + + super(RarExtFile, self).close() + + if self._fd: + self._fd.close() + self._fd = None + + def __del__(self): + """Hook delete to make sure tempfile is removed.""" + self.close() + + def readinto(self, buf): + """Zero-copy read directly into buffer. + + Returns bytes read. + """ + raise NotImplementedError('readinto') + + def tell(self): + """Return current reading position in uncompressed data.""" + return self._inf.file_size - self._remain + + def seek(self, ofs, whence=0): + """Seek in data. + + On uncompressed files, the seeking works by actual + seeks so it's fast. On compresses files its slow + - forward seeking happends by reading ahead, + backwards by re-opening and decompressing from the start. + """ + + # disable crc check when seeking + self._md_context = NoHashContext() + + fsize = self._inf.file_size + cur_ofs = self.tell() + + if whence == 0: # seek from beginning of file + new_ofs = ofs + elif whence == 1: # seek from current position + new_ofs = cur_ofs + ofs + elif whence == 2: # seek from end of file + new_ofs = fsize + ofs + else: + raise ValueError('Invalid value for whence') + + # sanity check + if new_ofs < 0: + new_ofs = 0 + elif new_ofs > fsize: + new_ofs = fsize + + # do the actual seek + if new_ofs >= cur_ofs: + self._skip(new_ofs - cur_ofs) + else: + # reopen and seek + self._open() + self._skip(new_ofs) + return self.tell() + + def _skip(self, cnt): + """Read and discard data""" + while cnt > 0: + if cnt > 8192: + buf = self.read(8192) + else: + buf = self.read(cnt) + if not buf: + break + cnt -= len(buf) + + def readable(self): + """Returns True""" + return True + + def writable(self): + """Returns False. + + Writing is not supported. + """ + return False + + def seekable(self): + """Returns True. + + Seeking is supported, although it's slow on compressed files. + """ + return True + + def readall(self): + """Read all remaining data""" + # avoid RawIOBase default impl + return self.read() + + +class PipeReader(RarExtFile): + """Read data from pipe, handle tempfile cleanup.""" + + def __init__(self, rf, inf, cmd, tempfile=None): + self._cmd = cmd + self._proc = None + self._tempfile = tempfile + super(PipeReader, self).__init__(rf, inf) + + def _close_proc(self): + if not self._proc: + return + if self._proc.stdout: + self._proc.stdout.close() + if self._proc.stdin: + self._proc.stdin.close() + if self._proc.stderr: + self._proc.stderr.close() + self._proc.wait() + self._returncode = self._proc.returncode + self._proc = None + + def _open(self): + super(PipeReader, self)._open() + + # stop old process + self._close_proc() + + # launch new process + self._returncode = 0 + self._proc = custom_popen(self._cmd) + self._fd = self._proc.stdout + + # avoid situation where unrar waits on stdin + if self._proc.stdin: + self._proc.stdin.close() + + def _read(self, cnt): + """Read from pipe.""" + + # normal read is usually enough + data = self._fd.read(cnt) + if len(data) == cnt or not data: + return data + + # short read, try looping + buf = [data] + cnt -= len(data) + while cnt > 0: + data = self._fd.read(cnt) + if not data: + break + cnt -= len(data) + buf.append(data) + return EMPTY.join(buf) + + def close(self): + """Close open resources.""" + + self._close_proc() + super(PipeReader, self).close() + + if self._tempfile: + try: + os.unlink(self._tempfile) + except OSError: + pass + self._tempfile = None + + def readinto(self, buf): + """Zero-copy read directly into buffer.""" + cnt = len(buf) + if cnt > self._remain: + cnt = self._remain + vbuf = memoryview(buf) + res = got = 0 + while got < cnt: + res = self._fd.readinto(vbuf[got : cnt]) + if not res: + break + self._md_context.update(vbuf[got : got + res]) + self._remain -= res + got += res + return got + + +class DirectReader(RarExtFile): + """Read uncompressed data directly from archive. + """ + _cur = None + _cur_avail = None + _volfile = None + + def _open(self): + super(DirectReader, self)._open() + + self._volfile = self._inf.volume_file + self._fd = XFile(self._volfile, 0) + self._fd.seek(self._inf.header_offset, 0) + self._cur = self._parser._parse_header(self._fd) + self._cur_avail = self._cur.add_size + + def _skip(self, cnt): + """RAR Seek, skipping through rar files to get to correct position + """ + + while cnt > 0: + # next vol needed? + if self._cur_avail == 0: + if not self._open_next(): + break + + # fd is in read pos, do the read + if cnt > self._cur_avail: + cnt -= self._cur_avail + self._remain -= self._cur_avail + self._cur_avail = 0 + else: + self._fd.seek(cnt, 1) + self._cur_avail -= cnt + self._remain -= cnt + cnt = 0 + + def _read(self, cnt): + """Read from potentially multi-volume archive.""" + + buf = [] + while cnt > 0: + # next vol needed? + if self._cur_avail == 0: + if not self._open_next(): + break + + # fd is in read pos, do the read + if cnt > self._cur_avail: + data = self._fd.read(self._cur_avail) + else: + data = self._fd.read(cnt) + if not data: + break + + # got some data + cnt -= len(data) + self._cur_avail -= len(data) + buf.append(data) + + if len(buf) == 1: + return buf[0] + return EMPTY.join(buf) + + def _open_next(self): + """Proceed to next volume.""" + + # is the file split over archives? + if (self._cur.flags & RAR_FILE_SPLIT_AFTER) == 0: + return False + + if self._fd: + self._fd.close() + self._fd = None + + # open next part + self._volfile = self._parser._next_volname(self._volfile) + fd = open(self._volfile, "rb", 0) + self._fd = fd + sig = fd.read(len(self._parser._expect_sig)) + if sig != self._parser._expect_sig: + raise BadRarFile("Invalid signature") + + # loop until first file header + while 1: + cur = self._parser._parse_header(fd) + if not cur: + raise BadRarFile("Unexpected EOF") + if cur.type in (RAR_BLOCK_MARK, RAR_BLOCK_MAIN): + if cur.add_size: + fd.seek(cur.add_size, 1) + continue + if cur.orig_filename != self._inf.orig_filename: + raise BadRarFile("Did not found file entry") + self._cur = cur + self._cur_avail = cur.add_size + return True + + def readinto(self, buf): + """Zero-copy read directly into buffer.""" + got = 0 + vbuf = memoryview(buf) + while got < len(buf): + # next vol needed? + if self._cur_avail == 0: + if not self._open_next(): + break + + # length for next read + cnt = len(buf) - got + if cnt > self._cur_avail: + cnt = self._cur_avail + + # read into temp view + res = self._fd.readinto(vbuf[got : got + cnt]) + if not res: + break + self._md_context.update(vbuf[got : got + res]) + self._cur_avail -= res + self._remain -= res + got += res + return got + + +class HeaderDecrypt(object): + """File-like object that decrypts from another file""" + def __init__(self, f, key, iv): + self.f = f + self.ciph = AES_CBC_Decrypt(key, iv) + self.buf = EMPTY + + def tell(self): + """Current file pos - works only on block boundaries.""" + return self.f.tell() + + def read(self, cnt=None): + """Read and decrypt.""" + if cnt > 8 * 1024: + raise BadRarFile('Bad count to header decrypt - wrong password?') + + # consume old data + if cnt <= len(self.buf): + res = self.buf[:cnt] + self.buf = self.buf[cnt:] + return res + res = self.buf + self.buf = EMPTY + cnt -= len(res) + + # decrypt new data + blklen = 16 + while cnt > 0: + enc = self.f.read(blklen) + if len(enc) < blklen: + break + dec = self.ciph.decrypt(enc) + if cnt >= len(dec): + res += dec + cnt -= len(dec) + else: + res += dec[:cnt] + self.buf = dec[cnt:] + cnt = 0 + + return res + + +# handle (filename|filelike) object +class XFile(object): + """Input may be filename or file object. + """ + __slots__ = ('_fd', '_need_close') + + def __init__(self, xfile, bufsize=1024): + if is_filelike(xfile): + self._need_close = False + self._fd = xfile + self._fd.seek(0) + else: + self._need_close = True + self._fd = open(xfile, 'rb', bufsize) + + def read(self, n=None): + """Read from file.""" + return self._fd.read(n) + + def tell(self): + """Return file pos.""" + return self._fd.tell() + + def seek(self, ofs, whence=0): + """Move file pos.""" + return self._fd.seek(ofs, whence) + + def readinto(self, dst): + """Read into buffer.""" + return self._fd.readinto(dst) + + def close(self): + """Close file object.""" + if self._need_close: + self._fd.close() + + def __enter__(self): + return self + + def __exit__(self, typ, val, tb): + self.close() + + +class NoHashContext(object): + """No-op hash function.""" + def __init__(self, data=None): + """Initialize""" + def update(self, data): + """Update data""" + def digest(self): + """Final hash""" + def hexdigest(self): + """Hexadecimal digest.""" + + +class CRC32Context(object): + """Hash context that uses CRC32.""" + __slots__ = ['_crc'] + + def __init__(self, data=None): + self._crc = 0 + if data: + self.update(data) + + def update(self, data): + """Process data.""" + self._crc = rar_crc32(data, self._crc) + + def digest(self): + """Final hash.""" + return self._crc + + def hexdigest(self): + """Hexadecimal digest.""" + return '%08x' % self.digest() + + +class Blake2SP(object): + """Blake2sp hash context. + """ + __slots__ = ['_thread', '_buf', '_cur', '_digest'] + digest_size = 32 + block_size = 64 + parallelism = 8 + + def __init__(self, data=None): + self._buf = b'' + self._cur = 0 + self._digest = None + self._thread = [] + + for i in range(self.parallelism): + ctx = self._blake2s(i, 0, i == (self.parallelism - 1)) + self._thread.append(ctx) + + if data: + self.update(data) + + def _blake2s(self, ofs, depth, is_last): + return blake2s(node_offset=ofs, node_depth=depth, last_node=is_last, + depth=2, inner_size=32, fanout=self.parallelism) + + def _add_block(self, blk): + self._thread[self._cur].update(blk) + self._cur = (self._cur + 1) % self.parallelism + + def update(self, data): + """Hash data. + """ + view = memoryview(data) + bs = self.block_size + if self._buf: + need = bs - len(self._buf) + if len(view) < need: + self._buf += view.tobytes() + return + self._add_block(self._buf + view[:need].tobytes()) + view = view[need:] + while len(view) >= bs: + self._add_block(view[:bs]) + view = view[bs:] + self._buf = view.tobytes() + + def digest(self): + """Return final digest value. + """ + if self._digest is None: + if self._buf: + self._add_block(self._buf) + self._buf = EMPTY + ctx = self._blake2s(0, 1, True) + for t in self._thread: + ctx.update(t.digest()) + self._digest = ctx.digest() + return self._digest + + def hexdigest(self): + """Hexadecimal digest.""" + return tohex(self.digest()) + + +class Rar3Sha1(object): + """Bug-compat for SHA1 + """ + digest_size = 20 + block_size = 64 + + _BLK_BE = struct.Struct(b'>16L') + _BLK_LE = struct.Struct(b'<16L') + + __slots__ = ('_nbytes', '_md', '_rarbug') + + def __init__(self, data=b'', rarbug=False): + self._md = sha1() + self._nbytes = 0 + self._rarbug = rarbug + self.update(data) + + def update(self, data): + """Process more data.""" + self._md.update(data) + bufpos = self._nbytes & 63 + self._nbytes += len(data) + + if self._rarbug and len(data) > 64: + dpos = self.block_size - bufpos + while dpos + self.block_size <= len(data): + self._corrupt(data, dpos) + dpos += self.block_size + + def digest(self): + """Return final state.""" + return self._md.digest() + + def hexdigest(self): + """Return final state as hex string.""" + return self._md.hexdigest() + + def _corrupt(self, data, dpos): + """Corruption from SHA1 core.""" + ws = list(self._BLK_BE.unpack_from(data, dpos)) + for t in range(16, 80): + tmp = ws[(t - 3) & 15] ^ ws[(t - 8) & 15] ^ ws[(t - 14) & 15] ^ ws[(t - 16) & 15] + ws[t & 15] = ((tmp << 1) | (tmp >> (32 - 1))) & 0xFFFFFFFF + self._BLK_LE.pack_into(data, dpos, *ws) + + +## +## Utility functions +## + +S_LONG = Struct(' len(buf): + raise BadRarFile('cannot load byte') + return S_BYTE.unpack_from(buf, pos)[0], end + +def load_le32(buf, pos): + """Load little-endian 32-bit integer""" + end = pos + 4 + if end > len(buf): + raise BadRarFile('cannot load le32') + return S_LONG.unpack_from(buf, pos)[0], pos + 4 + +def load_bytes(buf, num, pos): + """Load sequence of bytes""" + end = pos + num + if end > len(buf): + raise BadRarFile('cannot load bytes') + return buf[pos : end], end + +def load_vstr(buf, pos): + """Load bytes prefixed by vint length""" + slen, pos = load_vint(buf, pos) + return load_bytes(buf, slen, pos) + +def load_dostime(buf, pos): + """Load LE32 dos timestamp""" + stamp, pos = load_le32(buf, pos) + tup = parse_dos_time(stamp) + return to_datetime(tup), pos + +def load_unixtime(buf, pos): + """Load LE32 unix timestamp""" + secs, pos = load_le32(buf, pos) + dt = datetime.fromtimestamp(secs, UTC) + return dt, pos + +def load_windowstime(buf, pos): + """Load LE64 windows timestamp""" + # unix epoch (1970) in seconds from windows epoch (1601) + unix_epoch = 11644473600 + val1, pos = load_le32(buf, pos) + val2, pos = load_le32(buf, pos) + secs, n1secs = divmod((val2 << 32) | val1, 10000000) + dt = datetime.fromtimestamp(secs - unix_epoch, UTC) + dt = dt.replace(microsecond=n1secs // 10) + return dt, pos + +# new-style next volume +def _next_newvol(volfile): + i = len(volfile) - 1 + while i >= 0: + if volfile[i] >= '0' and volfile[i] <= '9': + return _inc_volname(volfile, i) + i -= 1 + raise BadRarName("Cannot construct volume name: " + volfile) + +# old-style next volume +def _next_oldvol(volfile): + # rar -> r00 + if volfile[-4:].lower() == '.rar': + return volfile[:-2] + '00' + return _inc_volname(volfile, len(volfile) - 1) + +# increase digits with carry, otherwise just increment char +def _inc_volname(volfile, i): + fn = list(volfile) + while i >= 0: + if fn[i] != '9': + fn[i] = chr(ord(fn[i]) + 1) + break + fn[i] = '0' + i -= 1 + return ''.join(fn) + +# rar3 extended time fields +def _parse_ext_time(h, data, pos): + # flags and rest of data can be missing + flags = 0 + if pos + 2 <= len(data): + flags = S_SHORT.unpack_from(data, pos)[0] + pos += 2 + + mtime, pos = _parse_xtime(flags >> 3 * 4, data, pos, h.mtime) + h.ctime, pos = _parse_xtime(flags >> 2 * 4, data, pos) + h.atime, pos = _parse_xtime(flags >> 1 * 4, data, pos) + h.arctime, pos = _parse_xtime(flags >> 0 * 4, data, pos) + if mtime: + h.mtime = mtime + h.date_time = mtime.timetuple()[:6] + return pos + +# rar3 one extended time field +def _parse_xtime(flag, data, pos, basetime=None): + res = None + if flag & 8: + if not basetime: + basetime, pos = load_dostime(data, pos) + + # load second fractions + rem = 0 + cnt = flag & 3 + for _ in range(cnt): + b, pos = load_byte(data, pos) + rem = (b << 16) | (rem >> 8) + + # convert 100ns units to microseconds + usec = rem // 10 + if usec > 1000000: + usec = 999999 + + # dostime has room for 30 seconds only, correct if needed + if flag & 4 and basetime.second < 59: + res = basetime.replace(microsecond=usec, second=basetime.second + 1) + else: + res = basetime.replace(microsecond=usec) + return res, pos + +def is_filelike(obj): + """Filename or file object? + """ + if _have_pathlib: + filename_types = (bytes, unicode, Path) + else: + filename_types = (bytes, unicode) + + if isinstance(obj, filename_types): + return False + res = True + for a in ('read', 'tell', 'seek'): + res = res and hasattr(obj, a) + if not res: + raise ValueError("Invalid object passed as file") + return True + +def rar3_s2k(psw, salt): + """String-to-key hash for RAR3. + """ + if not isinstance(psw, unicode): + psw = psw.decode('utf8') + seed = bytearray(psw.encode('utf-16le') + salt) + h = Rar3Sha1(rarbug=True) + iv = EMPTY + for i in range(16): + for j in range(0x4000): + cnt = S_LONG.pack(i * 0x4000 + j) + h.update(seed) + h.update(cnt[:3]) + if j == 0: + iv += h.digest()[19:20] + key_be = h.digest()[:16] + key_le = pack("LLLL", key_be)) + return key_le, iv + +def rar3_decompress(vers, meth, data, declen=0, flags=0, crc=0, psw=None, salt=None): + """Decompress blob of compressed data. + + Used for data with non-standard header - eg. comments. + """ + # already uncompressed? + if meth == RAR_M0 and (flags & RAR_FILE_PASSWORD) == 0: + return data + + # take only necessary flags + flags = flags & (RAR_FILE_PASSWORD | RAR_FILE_SALT | RAR_FILE_DICTMASK) + flags |= RAR_LONG_BLOCK + + # file header + fname = b'data' + date = 0 + mode = 0x20 + fhdr = S_FILE_HDR.pack(len(data), declen, RAR_OS_MSDOS, crc, + date, vers, meth, len(fname), mode) + fhdr += fname + if flags & RAR_FILE_SALT: + if not salt: + return EMPTY + fhdr += salt + + # full header + hlen = S_BLK_HDR.size + len(fhdr) + hdr = S_BLK_HDR.pack(0, RAR_BLOCK_FILE, flags, hlen) + fhdr + hcrc = rar_crc32(hdr[2:]) & 0xFFFF + hdr = S_BLK_HDR.pack(hcrc, RAR_BLOCK_FILE, flags, hlen) + fhdr + + # archive main header + mh = S_BLK_HDR.pack(0x90CF, RAR_BLOCK_MAIN, 0, 13) + ZERO * (2 + 4) + + # decompress via temp rar + tmpfd, tmpname = mkstemp(suffix='.rar') + tmpf = os.fdopen(tmpfd, "wb") + try: + tmpf.write(RAR_ID + mh + hdr + data) + tmpf.close() + + cmd = [UNRAR_TOOL] + list(OPEN_ARGS) + add_password_arg(cmd, psw, (flags & RAR_FILE_PASSWORD)) + cmd.append(tmpname) + + p = custom_popen(cmd) + return p.communicate()[0] + finally: + tmpf.close() + os.unlink(tmpname) + +def to_datetime(t): + """Convert 6-part time tuple into datetime object. + """ + if t is None: + return None + + # extract values + year, mon, day, h, m, s = t + + # assume the values are valid + try: + return datetime(year, mon, day, h, m, s) + except ValueError: + pass + + # sanitize invalid values + mday = (0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) + if mon < 1: + mon = 1 + if mon > 12: + mon = 12 + if day < 1: + day = 1 + if day > mday[mon]: + day = mday[mon] + if h > 23: + h = 23 + if m > 59: + m = 59 + if s > 59: + s = 59 + if mon == 2 and day == 29: + try: + return datetime(year, mon, day, h, m, s) + except ValueError: + day = 28 + return datetime(year, mon, day, h, m, s) + +def parse_dos_time(stamp): + """Parse standard 32-bit DOS timestamp. + """ + sec, stamp = stamp & 0x1F, stamp >> 5 + mn, stamp = stamp & 0x3F, stamp >> 6 + hr, stamp = stamp & 0x1F, stamp >> 5 + day, stamp = stamp & 0x1F, stamp >> 5 + mon, stamp = stamp & 0x0F, stamp >> 4 + yr = (stamp & 0x7F) + 1980 + return (yr, mon, day, hr, mn, sec * 2) + +def custom_popen(cmd): + """Disconnect cmd from parent fds, read only from stdout. + """ + # needed for py2exe + creationflags = 0 + if sys.platform == 'win32': + creationflags = 0x08000000 # CREATE_NO_WINDOW + + # run command + try: + p = Popen(cmd, bufsize=0, stdout=PIPE, stdin=PIPE, stderr=STDOUT, + creationflags=creationflags) + except OSError as ex: + if ex.errno == errno.ENOENT: + raise RarCannotExec("Unrar not installed? (rarfile.UNRAR_TOOL=%r)" % UNRAR_TOOL) + if ex.errno == errno.EACCES or ex.errno == errno.EPERM: + raise RarCannotExec("Cannot execute unrar (rarfile.UNRAR_TOOL=%r)" % UNRAR_TOOL) + raise + return p + +def custom_check(cmd, ignore_retcode=False): + """Run command, collect output, raise error if needed. + """ + p = custom_popen(cmd) + out, _ = p.communicate() + if p.returncode and not ignore_retcode: + raise RarExecError("Check-run failed") + return out + +def add_password_arg(cmd, psw, ___required=False): + """Append password switch to commandline. + """ + if UNRAR_TOOL == ALT_TOOL: + return + if psw is not None: + cmd.append('-p' + psw) + else: + cmd.append('-p-') + +def check_returncode(p, out): + """Raise exception according to unrar exit code. + """ + code = p.returncode + if code == 0: + return + + # map return code to exception class, codes from rar.txt + errmap = [None, + RarWarning, RarFatalError, RarCRCError, RarLockedArchiveError, # 1..4 + RarWriteError, RarOpenError, RarUserError, RarMemoryError, # 5..8 + RarCreateError, RarNoFilesError, RarWrongPassword] # 9..11 + if UNRAR_TOOL == ALT_TOOL: + errmap = [None] + if code > 0 and code < len(errmap): + exc = errmap[code] + elif code == 255: + exc = RarUserBreak + elif code < 0: + exc = RarSignalExit + else: + exc = RarUnknownError + + # format message + if out: + msg = "%s [%d]: %s" % (exc.__doc__, p.returncode, out) + else: + msg = "%s [%d]" % (exc.__doc__, p.returncode) + + raise exc(msg) + +def hmac_sha256(key, data): + """HMAC-SHA256""" + return HMAC(key, data, sha256).digest() + +def membuf_tempfile(memfile): + """Write in-memory file object to real file.""" + memfile.seek(0, 0) + + tmpfd, tmpname = mkstemp(suffix='.rar') + tmpf = os.fdopen(tmpfd, "wb") + + try: + while True: + buf = memfile.read(BSIZE) + if not buf: + break + tmpf.write(buf) + tmpf.close() + except: + tmpf.close() + os.unlink(tmpname) + raise + return tmpname + +class XTempFile(object): + """Real file for archive. + """ + __slots__ = ('_tmpfile', '_filename') + + def __init__(self, rarfile): + if is_filelike(rarfile): + self._tmpfile = membuf_tempfile(rarfile) + self._filename = self._tmpfile + else: + self._tmpfile = None + self._filename = rarfile + + def __enter__(self): + return self._filename + + def __exit__(self, exc_type, exc_value, tb): + if self._tmpfile: + try: + os.unlink(self._tmpfile) + except OSError: + pass + self._tmpfile = None + +# +# Check if unrar works +# + +ORIG_UNRAR_TOOL = UNRAR_TOOL +ORIG_OPEN_ARGS = OPEN_ARGS +ORIG_EXTRACT_ARGS = EXTRACT_ARGS +ORIG_TEST_ARGS = TEST_ARGS + +def _check_unrar_tool(): + global UNRAR_TOOL, OPEN_ARGS, EXTRACT_ARGS, TEST_ARGS + try: + # does UNRAR_TOOL work? + custom_check([ORIG_UNRAR_TOOL], True) + + UNRAR_TOOL = ORIG_UNRAR_TOOL + OPEN_ARGS = ORIG_OPEN_ARGS + EXTRACT_ARGS = ORIG_EXTRACT_ARGS + TEST_ARGS = ORIG_TEST_ARGS + except RarCannotExec: + try: + # does ALT_TOOL work? + custom_check([ALT_TOOL] + list(ALT_CHECK_ARGS), True) + # replace config + UNRAR_TOOL = ALT_TOOL + OPEN_ARGS = ALT_OPEN_ARGS + EXTRACT_ARGS = ALT_EXTRACT_ARGS + TEST_ARGS = ALT_TEST_ARGS + except RarCannotExec: + # no usable tool, only uncompressed archives work + return False + return True + +_check_unrar_tool() + diff --git a/lib/rarfile_py3/__init__.py b/lib/rarfile_py3/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lib/rarfile_py3/rarfile.py b/lib/rarfile_py3/rarfile.py new file mode 100644 index 0000000..a4d7725 --- /dev/null +++ b/lib/rarfile_py3/rarfile.py @@ -0,0 +1,3054 @@ +# rarfile.py +# +# Copyright (c) 2005-2019 Marko Kreen +# +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +r"""RAR archive reader. + +This is Python module for Rar archive reading. The interface +is made as :mod:`zipfile`-like as possible. + +Basic logic: + - Parse archive structure with Python. + - Extract non-compressed files with Python + - Extract compressed files with unrar. + - Optionally write compressed data to temp file to speed up unrar, + otherwise it needs to scan whole archive on each execution. + +Example:: + + import rarfile + + rf = rarfile.RarFile("myarchive.rar") + for f in rf.infolist(): + print(f.filename, f.file_size) + if f.filename == "README": + print(rf.read(f)) + +Archive files can also be accessed via file-like object returned +by :meth:`RarFile.open`:: + + import rarfile + + with rarfile.RarFile("archive.rar") as rf: + with rf.open("README") as f: + for ln in f: + print(ln.strip()) + +For decompression to work, either ``unrar`` or ``unar`` tool must be in PATH. + +""" + +## +## Imports and compat - support various crypto options +## + +import sys +import os +import errno +import struct + +from struct import pack, unpack, Struct +from binascii import crc32 as rar_crc32, hexlify +from tempfile import mkstemp +from subprocess import Popen, PIPE, STDOUT +from io import RawIOBase, BytesIO +from hashlib import sha1, sha256, blake2s +from hmac import HMAC +from datetime import datetime, timedelta, timezone +from pathlib import Path + +# only needed for encryped headers +try: + try: + from cryptography.hazmat.primitives.ciphers import algorithms, modes, Cipher + from cryptography.hazmat.backends import default_backend + from cryptography.hazmat.primitives import hashes + from cryptography.hazmat.primitives.kdf import pbkdf2 + + class AES_CBC_Decrypt(object): + """Decrypt API""" + def __init__(self, key, iv): + ciph = Cipher(algorithms.AES(key), modes.CBC(iv), default_backend()) + self.decrypt = ciph.decryptor().update + + def pbkdf2_sha256(password, salt, iters): + """PBKDF2 with HMAC-SHA256""" + ctx = pbkdf2.PBKDF2HMAC(hashes.SHA256(), 32, salt, iters, default_backend()) + return ctx.derive(password) + + except ImportError: + from Crypto.Cipher import AES + from Crypto.Protocol import KDF + + class AES_CBC_Decrypt(object): + """Decrypt API""" + def __init__(self, key, iv): + self.decrypt = AES.new(key, AES.MODE_CBC, iv).decrypt + + def pbkdf2_sha256(password, salt, iters): + """PBKDF2 with HMAC-SHA256""" + return KDF.PBKDF2(password, salt, 32, iters, hmac_sha256) + + _have_crypto = 1 +except ImportError: + _have_crypto = 0 + + +def tohex(data): + """Return hex string.""" + return hexlify(data).decode("ascii") + + +__version__ = "3.1" + +# export only interesting items +__all__ = ["is_rarfile", "is_rarfile_sfx", "RarInfo", "RarFile", "RarExtFile"] + +## +## Module configuration. Can be tuned after importing. +## + + +#: executable for unrar tool +UNRAR_TOOL = "unrar" + +#: executable for unar tool +UNAR_TOOL = "unar" +LSAR_TOOL = "lsar" + +#: executable for bsdtar tool +BSDTAR_TOOL = "bsdtar" + +#: default fallback charset +DEFAULT_CHARSET = "windows-1252" + +#: list of encodings to try, with fallback to DEFAULT_CHARSET if none succeed +TRY_ENCODINGS = ("utf8", "utf-16le") + +#: whether to speed up decompression by using tmp archive +USE_EXTRACT_HACK = 1 + +#: limit the filesize for tmp archive usage +HACK_SIZE_LIMIT = 20 * 1024 * 1024 + +#: set specific directory for mkstemp() used by hack dir usage +HACK_TMP_DIR = None + +#: Separator for path name components. RAR internally uses "\\". +#: Use "/" to be similar with zipfile. +PATH_SEP = "/" + +## +## rar constants +## + +# block types +RAR_BLOCK_MARK = 0x72 # r +RAR_BLOCK_MAIN = 0x73 # s +RAR_BLOCK_FILE = 0x74 # t +RAR_BLOCK_OLD_COMMENT = 0x75 # u +RAR_BLOCK_OLD_EXTRA = 0x76 # v +RAR_BLOCK_OLD_SUB = 0x77 # w +RAR_BLOCK_OLD_RECOVERY = 0x78 # x +RAR_BLOCK_OLD_AUTH = 0x79 # y +RAR_BLOCK_SUB = 0x7a # z +RAR_BLOCK_ENDARC = 0x7b # { + +# flags for RAR_BLOCK_MAIN +RAR_MAIN_VOLUME = 0x0001 +RAR_MAIN_COMMENT = 0x0002 +RAR_MAIN_LOCK = 0x0004 +RAR_MAIN_SOLID = 0x0008 +RAR_MAIN_NEWNUMBERING = 0x0010 +RAR_MAIN_AUTH = 0x0020 +RAR_MAIN_RECOVERY = 0x0040 +RAR_MAIN_PASSWORD = 0x0080 +RAR_MAIN_FIRSTVOLUME = 0x0100 +RAR_MAIN_ENCRYPTVER = 0x0200 + +# flags for RAR_BLOCK_FILE +RAR_FILE_SPLIT_BEFORE = 0x0001 +RAR_FILE_SPLIT_AFTER = 0x0002 +RAR_FILE_PASSWORD = 0x0004 +RAR_FILE_COMMENT = 0x0008 +RAR_FILE_SOLID = 0x0010 +RAR_FILE_DICTMASK = 0x00e0 +RAR_FILE_DICT64 = 0x0000 +RAR_FILE_DICT128 = 0x0020 +RAR_FILE_DICT256 = 0x0040 +RAR_FILE_DICT512 = 0x0060 +RAR_FILE_DICT1024 = 0x0080 +RAR_FILE_DICT2048 = 0x00a0 +RAR_FILE_DICT4096 = 0x00c0 +RAR_FILE_DIRECTORY = 0x00e0 +RAR_FILE_LARGE = 0x0100 +RAR_FILE_UNICODE = 0x0200 +RAR_FILE_SALT = 0x0400 +RAR_FILE_VERSION = 0x0800 +RAR_FILE_EXTTIME = 0x1000 +RAR_FILE_EXTFLAGS = 0x2000 + +# flags for RAR_BLOCK_ENDARC +RAR_ENDARC_NEXT_VOLUME = 0x0001 +RAR_ENDARC_DATACRC = 0x0002 +RAR_ENDARC_REVSPACE = 0x0004 +RAR_ENDARC_VOLNR = 0x0008 + +# flags common to all blocks +RAR_SKIP_IF_UNKNOWN = 0x4000 +RAR_LONG_BLOCK = 0x8000 + +# Host OS types +RAR_OS_MSDOS = 0 +RAR_OS_OS2 = 1 +RAR_OS_WIN32 = 2 +RAR_OS_UNIX = 3 +RAR_OS_MACOS = 4 +RAR_OS_BEOS = 5 + +# Compression methods - "0".."5" +RAR_M0 = 0x30 +RAR_M1 = 0x31 +RAR_M2 = 0x32 +RAR_M3 = 0x33 +RAR_M4 = 0x34 +RAR_M5 = 0x35 + +# +# RAR5 constants +# + +RAR5_BLOCK_MAIN = 1 +RAR5_BLOCK_FILE = 2 +RAR5_BLOCK_SERVICE = 3 +RAR5_BLOCK_ENCRYPTION = 4 +RAR5_BLOCK_ENDARC = 5 + +RAR5_BLOCK_FLAG_EXTRA_DATA = 0x01 +RAR5_BLOCK_FLAG_DATA_AREA = 0x02 +RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN = 0x04 +RAR5_BLOCK_FLAG_SPLIT_BEFORE = 0x08 +RAR5_BLOCK_FLAG_SPLIT_AFTER = 0x10 +RAR5_BLOCK_FLAG_DEPENDS_PREV = 0x20 +RAR5_BLOCK_FLAG_KEEP_WITH_PARENT = 0x40 + +RAR5_MAIN_FLAG_ISVOL = 0x01 +RAR5_MAIN_FLAG_HAS_VOLNR = 0x02 +RAR5_MAIN_FLAG_SOLID = 0x04 +RAR5_MAIN_FLAG_RECOVERY = 0x08 +RAR5_MAIN_FLAG_LOCKED = 0x10 + +RAR5_FILE_FLAG_ISDIR = 0x01 +RAR5_FILE_FLAG_HAS_MTIME = 0x02 +RAR5_FILE_FLAG_HAS_CRC32 = 0x04 +RAR5_FILE_FLAG_UNKNOWN_SIZE = 0x08 + +RAR5_COMPR_SOLID = 0x40 + +RAR5_ENC_FLAG_HAS_CHECKVAL = 0x01 + +RAR5_ENDARC_FLAG_NEXT_VOL = 0x01 + +RAR5_XFILE_ENCRYPTION = 1 +RAR5_XFILE_HASH = 2 +RAR5_XFILE_TIME = 3 +RAR5_XFILE_VERSION = 4 +RAR5_XFILE_REDIR = 5 +RAR5_XFILE_OWNER = 6 +RAR5_XFILE_SERVICE = 7 + +RAR5_XTIME_UNIXTIME = 0x01 +RAR5_XTIME_HAS_MTIME = 0x02 +RAR5_XTIME_HAS_CTIME = 0x04 +RAR5_XTIME_HAS_ATIME = 0x08 + +RAR5_XENC_CIPHER_AES256 = 0 + +RAR5_XENC_CHECKVAL = 0x01 +RAR5_XENC_TWEAKED = 0x02 + +RAR5_XHASH_BLAKE2SP = 0 + +RAR5_XREDIR_UNIX_SYMLINK = 1 +RAR5_XREDIR_WINDOWS_SYMLINK = 2 +RAR5_XREDIR_WINDOWS_JUNCTION = 3 +RAR5_XREDIR_HARD_LINK = 4 +RAR5_XREDIR_FILE_COPY = 5 + +RAR5_XREDIR_ISDIR = 0x01 + +RAR5_XOWNER_UNAME = 0x01 +RAR5_XOWNER_GNAME = 0x02 +RAR5_XOWNER_UID = 0x04 +RAR5_XOWNER_GID = 0x08 + +RAR5_OS_WINDOWS = 0 +RAR5_OS_UNIX = 1 + +## +## internal constants +## + +RAR_ID = b"Rar!\x1a\x07\x00" +RAR5_ID = b"Rar!\x1a\x07\x01\x00" +ZERO = b"\0" +EMPTY = b"" +UTC = timezone(timedelta(0), "UTC") +BSIZE = 32 * 1024 + +SFX_MAX_SIZE = 2 * 1024 * 1024 +RAR_V3 = 3 +RAR_V5 = 5 + +def _get_rar_version(xfile): + """Check quickly whether file is rar archive. + """ + with XFile(xfile) as fd: + buf = fd.read(len(RAR5_ID)) + if buf.startswith(RAR_ID): + return RAR_V3 + elif buf.startswith(RAR5_ID): + return RAR_V5 + return 0 + +def _find_sfx_header(xfile): + sig = RAR_ID[:-1] + buf = BytesIO() + steps = (64, SFX_MAX_SIZE) + + with XFile(xfile) as fd: + for step in steps: + data = fd.read(step) + if not data: + break + buf.write(data) + curdata = buf.getvalue() + findpos = 0 + while True: + pos = curdata.find(sig, findpos) + if pos < 0: + break + if curdata[pos:pos+len(RAR_ID)] == RAR_ID: + return RAR_V3, pos + if curdata[pos:pos+len(RAR5_ID)] == RAR5_ID: + return RAR_V5, pos + findpos = pos + len(sig) + return 0, 0 + +## +## Public interface +## + +def is_rarfile(xfile): + """Check quickly whether file is rar archive. + """ + return _get_rar_version(xfile) > 0 + +def is_rarfile_sfx(xfile): + """Check whether file is rar archive with support for SFX. + + It will read 2M from file. + """ + return _find_sfx_header(xfile)[0] > 0 + +class Error(Exception): + """Base class for rarfile errors.""" + +class BadRarFile(Error): + """Incorrect data in archive.""" + +class NotRarFile(Error): + """The file is not RAR archive.""" + +class BadRarName(Error): + """Cannot guess multipart name components.""" + +class NoRarEntry(Error): + """File not found in RAR""" + +class PasswordRequired(Error): + """File requires password""" + +class NeedFirstVolume(Error): + """Need to start from first volume.""" + +class NoCrypto(Error): + """Cannot parse encrypted headers - no crypto available.""" + +class RarExecError(Error): + """Problem reported by unrar/rar.""" + +class RarWarning(RarExecError): + """Non-fatal error""" + +class RarFatalError(RarExecError): + """Fatal error""" + +class RarCRCError(RarExecError): + """CRC error during unpacking""" + +class RarLockedArchiveError(RarExecError): + """Must not modify locked archive""" + +class RarWriteError(RarExecError): + """Write error""" + +class RarOpenError(RarExecError): + """Open error""" + +class RarUserError(RarExecError): + """User error""" + +class RarMemoryError(RarExecError): + """Memory error""" + +class RarCreateError(RarExecError): + """Create error""" + +class RarNoFilesError(RarExecError): + """No files that match pattern were found""" + +class RarUserBreak(RarExecError): + """User stop""" + +class RarWrongPassword(RarExecError): + """Incorrect password""" + +class RarUnknownError(RarExecError): + """Unknown exit code""" + +class RarSignalExit(RarExecError): + """Unrar exited with signal""" + +class RarCannotExec(RarExecError): + """Executable not found.""" + + +class RarInfo(object): + r"""An entry in rar archive. + + RAR3 extended timestamps are :class:`datetime.datetime` objects without timezone. + RAR5 extended timestamps are :class:`datetime.datetime` objects with UTC timezone. + + Attributes: + + filename + File name with relative path. + Path separator is "/". Always unicode string. + + date_time + File modification timestamp. As tuple of (year, month, day, hour, minute, second). + RAR5 allows archives where it is missing, it's None then. + + file_size + Uncompressed size. + + compress_size + Compressed size. + + compress_type + Compression method: one of :data:`RAR_M0` .. :data:`RAR_M5` constants. + + extract_version + Minimal Rar version needed for decompressing. As (major*10 + minor), + so 2.9 is 29. + + RAR3: 10, 20, 29 + + RAR5 does not have such field in archive, it's simply set to 50. + + host_os + Host OS type, one of RAR_OS_* constants. + + RAR3: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`, :data:`RAR_OS_MSDOS`, + :data:`RAR_OS_OS2`, :data:`RAR_OS_BEOS`. + + RAR5: :data:`RAR_OS_WIN32`, :data:`RAR_OS_UNIX`. + + mode + File attributes. May be either dos-style or unix-style, depending on host_os. + + mtime + File modification time. Same value as :attr:`date_time` + but as :class:`datetime.datetime` object with extended precision. + + ctime + Optional time field: creation time. As :class:`datetime.datetime` object. + + atime + Optional time field: last access time. As :class:`datetime.datetime` object. + + arctime + Optional time field: archival time. As :class:`datetime.datetime` object. + (RAR3-only) + + CRC + CRC-32 of uncompressed file, unsigned int. + + RAR5: may be None. + + blake2sp_hash + Blake2SP hash over decompressed data. (RAR5-only) + + comment + Optional file comment field. Unicode string. (RAR3-only) + + file_redir + If not None, file is link of some sort. Contains tuple of (type, flags, target). + (RAR5-only) + + Type is one of constants: + + :data:`RAR5_XREDIR_UNIX_SYMLINK` + unix symlink to target. + :data:`RAR5_XREDIR_WINDOWS_SYMLINK` + windows symlink to target. + :data:`RAR5_XREDIR_WINDOWS_JUNCTION` + windows junction. + :data:`RAR5_XREDIR_HARD_LINK` + hard link to target. + :data:`RAR5_XREDIR_FILE_COPY` + current file is copy of another archive entry. + + Flags may contain :data:`RAR5_XREDIR_ISDIR` bit. + + volume + Volume nr, starting from 0. + + volume_file + Volume file name, where file starts. + + """ + + # zipfile-compatible fields + filename = None + file_size = None + compress_size = None + date_time = None + comment = None + CRC = None + volume = None + orig_filename = None + + # optional extended time fields, datetime() objects. + mtime = None + ctime = None + atime = None + + extract_version = None + mode = None + host_os = None + compress_type = None + + # rar3-only fields + comment = None + arctime = None + + # rar5-only fields + blake2sp_hash = None + file_redir = None + + # internal fields + flags = 0 + type = None + + def isdir(self): + """Returns True if entry is a directory. + """ + if self.type == RAR_BLOCK_FILE: + return (self.flags & RAR_FILE_DIRECTORY) == RAR_FILE_DIRECTORY + return False + + def needs_password(self): + """Returns True if data is stored password-protected. + """ + if self.type == RAR_BLOCK_FILE: + return (self.flags & RAR_FILE_PASSWORD) > 0 + return False + + +class RarFile(object): + """Parse RAR structure, provide access to files in archive. + """ + + #: Archive comment. Unicode string or None. + comment = None + + def __init__(self, rarfile, mode="r", charset=None, info_callback=None, + crc_check=True, errors="stop"): + """Open and parse a RAR archive. + + Parameters: + + rarfile + archive file name + mode + only "r" is supported. + charset + fallback charset to use, if filenames are not already Unicode-enabled. + info_callback + debug callback, gets to see all archive entries. + crc_check + set to False to disable CRC checks + errors + Either "stop" to quietly stop parsing on errors, + or "strict" to raise errors. Default is "stop". + """ + if isinstance(rarfile, Path): + self._rarfile = str(rarfile) + else: + self._rarfile = rarfile + + self._charset = charset or DEFAULT_CHARSET + self._info_callback = info_callback + self._crc_check = crc_check + self._password = None + self._file_parser = None + + if errors == "stop": + self._strict = False + elif errors == "strict": + self._strict = True + else: + raise ValueError("Invalid value for errors= parameter.") + + if mode != "r": + raise NotImplementedError("RarFile supports only mode=r") + + self._parse() + + def __enter__(self): + """Open context.""" + return self + + def __exit__(self, typ, value, traceback): + """Exit context""" + self.close() + + def setpassword(self, password): + """Sets the password to use when extracting. + """ + self._password = password + if self._file_parser: + if self._file_parser.has_header_encryption(): + self._file_parser = None + if not self._file_parser: + self._parse() + else: + self._file_parser.setpassword(self._password) + + def needs_password(self): + """Returns True if any archive entries require password for extraction. + """ + return self._file_parser.needs_password() + + def namelist(self): + """Return list of filenames in archive. + """ + return [f.filename for f in self.infolist()] + + def infolist(self): + """Return RarInfo objects for all files/directories in archive. + """ + return self._file_parser.infolist() + + def volumelist(self): + """Returns filenames of archive volumes. + + In case of single-volume archive, the list contains + just the name of main archive file. + """ + return self._file_parser.volumelist() + + def getinfo(self, fname): + """Return RarInfo for file. + """ + return self._file_parser.getinfo(fname) + + def open(self, fname, mode="r", psw=None): + """Returns file-like object (:class:`RarExtFile`) from where the data can be read. + + The object implements :class:`io.RawIOBase` interface, so it can + be further wrapped with :class:`io.BufferedReader` + and :class:`io.TextIOWrapper`. + + On older Python where io module is not available, it implements + only .read(), .seek(), .tell() and .close() methods. + + The object is seekable, although the seeking is fast only on + uncompressed files, on compressed files the seeking is implemented + by reading ahead and/or restarting the decompression. + + Parameters: + + fname + file name or RarInfo instance. + mode + must be "r" + psw + password to use for extracting. + """ + + if mode != "r": + raise NotImplementedError("RarFile.open() supports only mode=r") + + # entry lookup + inf = self.getinfo(fname) + if inf.isdir(): + raise TypeError("Directory does not have any data: " + inf.filename) + + # check password + if inf.needs_password(): + psw = psw or self._password + if psw is None: + raise PasswordRequired("File %s requires password" % inf.filename) + else: + psw = None + + return self._file_parser.open(inf, psw) + + def read(self, fname, psw=None): + """Return uncompressed data for archive entry. + + For longer files using :meth:`RarFile.open` may be better idea. + + Parameters: + + fname + filename or RarInfo instance + psw + password to use for extracting. + """ + + with self.open(fname, "r", psw) as f: + return f.read() + + def close(self): + """Release open resources.""" + pass + + def printdir(self): + """Print archive file list to stdout.""" + for f in self.infolist(): + print(f.filename) + + def extract(self, member, path=None, pwd=None): + """Extract single file into current directory. + + Parameters: + + member + filename or :class:`RarInfo` instance + path + optional destination path + pwd + optional password to use + """ + if isinstance(member, RarInfo): + fname = member.filename + elif isinstance(member, Path): + fname = str(member) + else: + fname = member + self._extract([fname], path, pwd) + + def extractall(self, path=None, members=None, pwd=None): + """Extract all files into current directory. + + Parameters: + + path + optional destination path + members + optional filename or :class:`RarInfo` instance list to extract + pwd + optional password to use + """ + fnlist = [] + if members is not None: + for m in members: + if isinstance(m, RarInfo): + fnlist.append(m.filename) + else: + fnlist.append(m) + self._extract(fnlist, path, pwd) + + def testrar(self): + """Let "unrar" test the archive. + """ + setup = tool_setup() + with XTempFile(self._rarfile) as rarfile: + cmd = setup.test_cmdline(self._password, rarfile) + p = custom_popen(cmd) + output = p.communicate()[0] + check_returncode(p, output) + + def strerror(self): + """Return error string if parsing failed or None if no problems. + """ + if not self._file_parser: + return "Not a RAR file" + return self._file_parser.strerror() + + ## + ## private methods + ## + + def _parse(self): + ver, sfx_ofs = _find_sfx_header(self._rarfile) + if ver == RAR_V3: + p3 = RAR3Parser(self._rarfile, self._password, self._crc_check, + self._charset, self._strict, self._info_callback, + sfx_ofs) + self._file_parser = p3 # noqa + elif ver == RAR_V5: + p5 = RAR5Parser(self._rarfile, self._password, self._crc_check, + self._charset, self._strict, self._info_callback, + sfx_ofs) + self._file_parser = p5 # noqa + else: + raise BadRarFile("Not a RAR file") + + self._file_parser.parse() + self.comment = self._file_parser.comment + + # call unrar to extract a file + def _extract(self, fnlist, path=None, psw=None): + setup = tool_setup() + + if os.sep != PATH_SEP: + fnlist = [fn.replace(PATH_SEP, os.sep) for fn in fnlist] + + if path and isinstance(path, Path): + path = str(path) + + psw = psw or self._password + + # rar file + with XTempFile(self._rarfile) as rarfn: + cmd = setup.extract_cmdline(psw, rarfn, fnlist, path) + + # call + p = custom_popen(cmd) + output = p.communicate()[0] + check_returncode(p, output) + +# +# File format parsing +# + +class CommonParser(object): + """Shared parser parts.""" + _main = None + _hdrenc_main = None + _needs_password = False + _fd = None + _expect_sig = None + _parse_error = None + _password = None + comment = None + + def __init__(self, rarfile, password, crc_check, charset, strict, info_cb, sfx_offset): + self._rarfile = rarfile + self._password = password + self._crc_check = crc_check + self._charset = charset + self._strict = strict + self._info_callback = info_cb + self._info_list = [] + self._info_map = {} + self._vol_list = [] + self._sfx_offset = sfx_offset + + def has_header_encryption(self): + """Returns True if headers are encrypted + """ + if self._hdrenc_main: + return True + if self._main: + if self._main.flags & RAR_MAIN_PASSWORD: + return True + return False + + def setpassword(self, psw): + """Set cached password.""" + self._password = psw + + def volumelist(self): + """Volume files""" + return self._vol_list + + def needs_password(self): + """Is password required""" + return self._needs_password + + def strerror(self): + """Last error""" + return self._parse_error + + def infolist(self): + """List of RarInfo records. + """ + return self._info_list + + def getinfo(self, member): + """Return RarInfo for filename + """ + if isinstance(member, RarInfo): + fname = member.filename + elif isinstance(member, Path): + fname = str(member) + else: + fname = member + + # accept both ways here + if PATH_SEP == "/": + fname2 = fname.replace("\\", "/") + else: + fname2 = fname.replace("/", "\\") + + try: + return self._info_map[fname] + except KeyError: + try: + return self._info_map[fname2] + except KeyError: + raise NoRarEntry("No such file: %s" % fname) + + # read rar + def parse(self): + """Process file.""" + self._fd = None + try: + self._parse_real() + finally: + if self._fd: + self._fd.close() + self._fd = None + + def _parse_real(self): + fd = XFile(self._rarfile) + self._fd = fd + fd.seek(self._sfx_offset, 0) + sig = fd.read(len(self._expect_sig)) + if sig != self._expect_sig: + if isinstance(self._rarfile, str): + raise NotRarFile("Not a Rar archive: {}".format(self._rarfile)) + raise NotRarFile("Not a Rar archive") + + volume = 0 # first vol (.rar) is 0 + more_vols = False + endarc = False + volfile = self._rarfile + self._vol_list = [self._rarfile] + while 1: + if endarc: + h = None # don"t read past ENDARC + else: + h = self._parse_header(fd) + if not h: + if more_vols: + volume += 1 + fd.close() + try: + volfile = self._next_volname(volfile) + fd = XFile(volfile) + except IOError: + self._set_error("Cannot open next volume: %s", volfile) + break + self._fd = fd + sig = fd.read(len(self._expect_sig)) + if sig != self._expect_sig: + self._set_error("Invalid volume sig: %s", volfile) + break + more_vols = False + endarc = False + self._vol_list.append(volfile) + self._main = None + continue + break + h.volume = volume + h.volume_file = volfile + + if h.type == RAR_BLOCK_MAIN and not self._main: + self._main = h + if volume == 0 and (h.flags & RAR_MAIN_NEWNUMBERING): + # RAR 2.x does not set FIRSTVOLUME, + # so check it only if NEWNUMBERING is used + if (h.flags & RAR_MAIN_FIRSTVOLUME) == 0: + if getattr(h, "main_volume_number", None) is not None: + # rar5 may have more info + raise NeedFirstVolume( + "Need to start from first volume (current: %r)" + % (h.main_volume_number,) + ) + raise NeedFirstVolume("Need to start from first volume") + if h.flags & RAR_MAIN_PASSWORD: + self._needs_password = True + if not self._password: + break + elif h.type == RAR_BLOCK_ENDARC: + more_vols = (h.flags & RAR_ENDARC_NEXT_VOLUME) > 0 + endarc = True + elif h.type == RAR_BLOCK_FILE: + # RAR 2.x does not write RAR_BLOCK_ENDARC + if h.flags & RAR_FILE_SPLIT_AFTER: + more_vols = True + # RAR 2.x does not set RAR_MAIN_FIRSTVOLUME + if volume == 0 and h.flags & RAR_FILE_SPLIT_BEFORE: + raise NeedFirstVolume("Need to start from first volume") + + if h.needs_password(): + self._needs_password = True + + # store it + self.process_entry(fd, h) + + if self._info_callback: + self._info_callback(h) + + # go to next header + if h.add_size > 0: + fd.seek(h.data_offset + h.add_size, 0) + + def process_entry(self, fd, item): + """Examine item, add into lookup cache.""" + raise NotImplementedError() + + def _decrypt_header(self, fd): + raise NotImplementedError("_decrypt_header") + + def _parse_block_header(self, fd): + raise NotImplementedError("_parse_block_header") + + def _open_hack(self, inf, psw): + raise NotImplementedError("_open_hack") + + # read single header + def _parse_header(self, fd): + try: + # handle encrypted headers + if (self._main and self._main.flags & RAR_MAIN_PASSWORD) or self._hdrenc_main: + if not self._password: + return None + fd = self._decrypt_header(fd) + + # now read actual header + return self._parse_block_header(fd) + except struct.error: + self._set_error("Broken header in RAR file") + return None + + # given current vol name, construct next one + def _next_volname(self, volfile): + if is_filelike(volfile): + raise IOError("Working on single FD") + if self._main.flags & RAR_MAIN_NEWNUMBERING: + return _next_newvol(volfile) + return _next_oldvol(volfile) + + def _set_error(self, msg, *args): + if args: + msg = msg % args + self._parse_error = msg + if self._strict: + raise BadRarFile(msg) + + def open(self, inf, psw): + """Return stream object for file data.""" + + if inf.file_redir: + # cannot leave to unrar as it expects copied file to exist + if inf.file_redir[0] in (RAR5_XREDIR_FILE_COPY, RAR5_XREDIR_HARD_LINK): + inf = self.getinfo(inf.file_redir[2]) + if not inf: + raise BadRarFile("cannot find copied file") + + if inf.flags & RAR_FILE_SPLIT_BEFORE: + raise NeedFirstVolume("Partial file, please start from first volume: " + inf.filename) + + # is temp write usable? + use_hack = 1 + if not self._main: + use_hack = 0 + elif self._main._must_disable_hack(): + use_hack = 0 + elif inf._must_disable_hack(): + use_hack = 0 + elif is_filelike(self._rarfile): + pass + elif inf.file_size > HACK_SIZE_LIMIT: + use_hack = 0 + elif not USE_EXTRACT_HACK: + use_hack = 0 + + # now extract + if inf.compress_type == RAR_M0 and (inf.flags & RAR_FILE_PASSWORD) == 0 and inf.file_redir is None: + return self._open_clear(inf) + elif use_hack: + return self._open_hack(inf, psw) + elif is_filelike(self._rarfile): + return self._open_unrar_membuf(self._rarfile, inf, psw) + else: + return self._open_unrar(self._rarfile, inf, psw) + + def _open_clear(self, inf): + return DirectReader(self, inf) + + def _open_hack_core(self, inf, psw, prefix, suffix): + + size = inf.compress_size + inf.header_size + rf = XFile(inf.volume_file, 0) + rf.seek(inf.header_offset) + + tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR) + tmpf = os.fdopen(tmpfd, "wb") + + try: + tmpf.write(prefix) + while size > 0: + if size > BSIZE: + buf = rf.read(BSIZE) + else: + buf = rf.read(size) + if not buf: + raise BadRarFile("read failed: " + inf.filename) + tmpf.write(buf) + size -= len(buf) + tmpf.write(suffix) + tmpf.close() + rf.close() + except: + rf.close() + tmpf.close() + os.unlink(tmpname) + raise + + return self._open_unrar(tmpname, inf, psw, tmpname) + + # write in-memory archive to temp file - needed for solid archives + def _open_unrar_membuf(self, memfile, inf, psw): + tmpname = membuf_tempfile(memfile) + return self._open_unrar(tmpname, inf, psw, tmpname, force_file=True) + + # extract using unrar + def _open_unrar(self, rarfile, inf, psw=None, tmpfile=None, force_file=False): + setup = tool_setup() + + # not giving filename avoids encoding related problems + fn = None + if not tmpfile or force_file: + fn = inf.filename + if PATH_SEP != os.sep: + fn = fn.replace(PATH_SEP, os.sep) + + # read from unrar pipe + cmd = setup.open_cmdline(psw, rarfile, fn) + return PipeReader(self, inf, cmd, tmpfile) + +# +# RAR3 format +# + +class Rar3Info(RarInfo): + """RAR3 specific fields.""" + extract_version = 15 + salt = None + add_size = 0 + header_crc = None + header_size = None + header_offset = None + data_offset = None + _md_class = None + _md_expect = None + + # make sure some rar5 fields are always present + file_redir = None + blake2sp_hash = None + + def _must_disable_hack(self): + if self.type == RAR_BLOCK_FILE: + if self.flags & RAR_FILE_PASSWORD: + return True + elif self.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER): + return True + elif self.type == RAR_BLOCK_MAIN: + if self.flags & (RAR_MAIN_SOLID | RAR_MAIN_PASSWORD): + return True + return False + + +class RAR3Parser(CommonParser): + """Parse RAR3 file format. + """ + _expect_sig = RAR_ID + _last_aes_key = (None, None, None) # (salt, key, iv) + + def _decrypt_header(self, fd): + if not _have_crypto: + raise NoCrypto("Cannot parse encrypted headers - no crypto") + salt = fd.read(8) + if self._last_aes_key[0] == salt: + key, iv = self._last_aes_key[1:] + else: + key, iv = rar3_s2k(self._password, salt) + self._last_aes_key = (salt, key, iv) + return HeaderDecrypt(fd, key, iv) + + # common header + def _parse_block_header(self, fd): + h = Rar3Info() + h.header_offset = fd.tell() + + # read and parse base header + buf = fd.read(S_BLK_HDR.size) + if not buf: + return None + t = S_BLK_HDR.unpack_from(buf) + h.header_crc, h.type, h.flags, h.header_size = t + + # read full header + if h.header_size > S_BLK_HDR.size: + hdata = buf + fd.read(h.header_size - S_BLK_HDR.size) + else: + hdata = buf + h.data_offset = fd.tell() + + # unexpected EOF? + if len(hdata) != h.header_size: + self._set_error("Unexpected EOF when reading header") + return None + + pos = S_BLK_HDR.size + + # block has data assiciated with it? + if h.flags & RAR_LONG_BLOCK: + h.add_size, pos = load_le32(hdata, pos) + else: + h.add_size = 0 + + # parse interesting ones, decide header boundaries for crc + if h.type == RAR_BLOCK_MARK: + return h + elif h.type == RAR_BLOCK_MAIN: + pos += 6 + if h.flags & RAR_MAIN_ENCRYPTVER: + pos += 1 + crc_pos = pos + if h.flags & RAR_MAIN_COMMENT: + self._parse_subblocks(h, hdata, pos) + elif h.type == RAR_BLOCK_FILE: + pos = self._parse_file_header(h, hdata, pos - 4) + crc_pos = pos + if h.flags & RAR_FILE_COMMENT: + pos = self._parse_subblocks(h, hdata, pos) + elif h.type == RAR_BLOCK_SUB: + pos = self._parse_file_header(h, hdata, pos - 4) + crc_pos = h.header_size + elif h.type == RAR_BLOCK_OLD_AUTH: + pos += 8 + crc_pos = pos + elif h.type == RAR_BLOCK_OLD_EXTRA: + pos += 7 + crc_pos = pos + else: + crc_pos = h.header_size + + # check crc + if h.type == RAR_BLOCK_OLD_SUB: + crcdat = hdata[2:] + fd.read(h.add_size) + else: + crcdat = hdata[2:crc_pos] + + calc_crc = rar_crc32(crcdat) & 0xFFFF + + # return good header + if h.header_crc == calc_crc: + return h + + # header parsing failed. + self._set_error("Header CRC error (%02x): exp=%x got=%x (xlen = %d)", + h.type, h.header_crc, calc_crc, len(crcdat)) + + # instead panicing, send eof + return None + + # read file-specific header + def _parse_file_header(self, h, hdata, pos): + fld = S_FILE_HDR.unpack_from(hdata, pos) + pos += S_FILE_HDR.size + + h.compress_size = fld[0] + h.file_size = fld[1] + h.host_os = fld[2] + h.CRC = fld[3] + h.date_time = parse_dos_time(fld[4]) + h.mtime = to_datetime(h.date_time) + h.extract_version = fld[5] + h.compress_type = fld[6] + name_size = fld[7] + h.mode = fld[8] + + h._md_class = CRC32Context + h._md_expect = h.CRC + + if h.flags & RAR_FILE_LARGE: + h1, pos = load_le32(hdata, pos) + h2, pos = load_le32(hdata, pos) + h.compress_size |= h1 << 32 + h.file_size |= h2 << 32 + h.add_size = h.compress_size + + name, pos = load_bytes(hdata, name_size, pos) + if h.flags & RAR_FILE_UNICODE: + nul = name.find(ZERO) + h.orig_filename = name[:nul] + u = UnicodeFilename(h.orig_filename, name[nul + 1:]) + h.filename = u.decode() + + # if parsing failed fall back to simple name + if u.failed: + h.filename = self._decode(h.orig_filename) + else: + h.orig_filename = name + h.filename = self._decode(name) + + # change separator, if requested + if PATH_SEP != "\\": + h.filename = h.filename.replace("\\", PATH_SEP) + + if h.flags & RAR_FILE_SALT: + h.salt, pos = load_bytes(hdata, 8, pos) + else: + h.salt = None + + # optional extended time stamps + if h.flags & RAR_FILE_EXTTIME: + pos = _parse_ext_time(h, hdata, pos) + else: + h.mtime = h.atime = h.ctime = h.arctime = None + + return pos + + # find old-style comment subblock + def _parse_subblocks(self, h, hdata, pos): + while pos < len(hdata): + # ordinary block header + t = S_BLK_HDR.unpack_from(hdata, pos) + ___scrc, stype, sflags, slen = t + pos_next = pos + slen + pos += S_BLK_HDR.size + + # corrupt header + if pos_next < pos: + break + + # followed by block-specific header + if stype == RAR_BLOCK_OLD_COMMENT and pos + S_COMMENT_HDR.size <= pos_next: + declen, ver, meth, crc = S_COMMENT_HDR.unpack_from(hdata, pos) + pos += S_COMMENT_HDR.size + data = hdata[pos : pos_next] + cmt = rar3_decompress(ver, meth, data, declen, sflags, + crc, self._password) + if not self._crc_check: + h.comment = self._decode_comment(cmt) + elif rar_crc32(cmt) & 0xFFFF == crc: + h.comment = self._decode_comment(cmt) + + pos = pos_next + return pos + + def _read_comment_v3(self, inf, psw=None): + + # read data + with XFile(inf.volume_file) as rf: + rf.seek(inf.data_offset) + data = rf.read(inf.compress_size) + + # decompress + cmt = rar3_decompress(inf.extract_version, inf.compress_type, data, + inf.file_size, inf.flags, inf.CRC, psw, inf.salt) + + # check crc + if self._crc_check: + crc = rar_crc32(cmt) + if crc != inf.CRC: + return None + + return self._decode_comment(cmt) + + def _decode(self, val): + for c in TRY_ENCODINGS: + try: + return val.decode(c) + except UnicodeError: + pass + return val.decode(self._charset, "replace") + + def _decode_comment(self, val): + return self._decode(val) + + def process_entry(self, fd, item): + if item.type == RAR_BLOCK_FILE: + # use only first part + if (item.flags & RAR_FILE_SPLIT_BEFORE) == 0: + self._info_map[item.filename] = item + self._info_list.append(item) + elif len(self._info_list) > 0: + # final crc is in last block + old = self._info_list[-1] + old.CRC = item.CRC + old._md_expect = item._md_expect + old.compress_size += item.compress_size + + # parse new-style comment + if item.type == RAR_BLOCK_SUB and item.filename == "CMT": + if item.flags & (RAR_FILE_SPLIT_BEFORE | RAR_FILE_SPLIT_AFTER): + pass + elif item.flags & RAR_FILE_SOLID: + # file comment + cmt = self._read_comment_v3(item, self._password) + if len(self._info_list) > 0: + old = self._info_list[-1] + old.comment = cmt + else: + # archive comment + cmt = self._read_comment_v3(item, self._password) + self.comment = cmt + + if item.type == RAR_BLOCK_MAIN: + if item.flags & RAR_MAIN_COMMENT: + self.comment = item.comment + if item.flags & RAR_MAIN_PASSWORD: + self._needs_password = True + + # put file compressed data into temporary .rar archive, and run + # unrar on that, thus avoiding unrar going over whole archive + def _open_hack(self, inf, psw): + # create main header: crc, type, flags, size, res1, res2 + prefix = RAR_ID + S_BLK_HDR.pack(0x90CF, 0x73, 0, 13) + ZERO * (2 + 4) + return self._open_hack_core(inf, psw, prefix, EMPTY) + +# +# RAR5 format +# + +class Rar5Info(RarInfo): + """Shared fields for RAR5 records. + """ + extract_version = 50 + header_crc = None + header_size = None + header_offset = None + data_offset = None + + # type=all + block_type = None + block_flags = None + add_size = 0 + block_extra_size = 0 + + # type=MAIN + volume_number = None + _md_class = None + _md_expect = None + + def _must_disable_hack(self): + return False + + +class Rar5BaseFile(Rar5Info): + """Shared sturct for file & service record. + """ + type = -1 + file_flags = None + file_encryption = (0, 0, 0, EMPTY, EMPTY, EMPTY) + file_compress_flags = None + file_redir = None + file_owner = None + file_version = None + blake2sp_hash = None + + def _must_disable_hack(self): + if self.flags & RAR_FILE_PASSWORD: + return True + if self.block_flags & (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER): + return True + if self.file_compress_flags & RAR5_COMPR_SOLID: + return True + if self.file_redir: + return True + return False + + +class Rar5FileInfo(Rar5BaseFile): + """RAR5 file record. + """ + type = RAR_BLOCK_FILE + + +class Rar5ServiceInfo(Rar5BaseFile): + """RAR5 service record. + """ + type = RAR_BLOCK_SUB + + +class Rar5MainInfo(Rar5Info): + """RAR5 archive main record. + """ + type = RAR_BLOCK_MAIN + main_flags = None + main_volume_number = None + + def _must_disable_hack(self): + if self.main_flags & RAR5_MAIN_FLAG_SOLID: + return True + return False + + +class Rar5EncryptionInfo(Rar5Info): + """RAR5 archive header encryption record. + """ + type = RAR5_BLOCK_ENCRYPTION + encryption_algo = None + encryption_flags = None + encryption_kdf_count = None + encryption_salt = None + encryption_check_value = None + + def needs_password(self): + return True + + +class Rar5EndArcInfo(Rar5Info): + """RAR5 end of archive record. + """ + type = RAR_BLOCK_ENDARC + endarc_flags = None + + +class RAR5Parser(CommonParser): + """Parse RAR5 format. + """ + _expect_sig = RAR5_ID + _hdrenc_main = None + + # AES encrypted headers + _last_aes256_key = (-1, None, None) # (kdf_count, salt, key) + + def _gen_key(self, kdf_count, salt): + if self._last_aes256_key[:2] == (kdf_count, salt): + return self._last_aes256_key[2] + if kdf_count > 24: + raise BadRarFile("Too large kdf_count") + psw = self._password + if isinstance(psw, str): + psw = psw.encode("utf8") + key = pbkdf2_sha256(psw, salt, 1 << kdf_count) + self._last_aes256_key = (kdf_count, salt, key) + return key + + def _decrypt_header(self, fd): + if not _have_crypto: + raise NoCrypto("Cannot parse encrypted headers - no crypto") + h = self._hdrenc_main + key = self._gen_key(h.encryption_kdf_count, h.encryption_salt) + iv = fd.read(16) + return HeaderDecrypt(fd, key, iv) + + # common header + def _parse_block_header(self, fd): + header_offset = fd.tell() + + preload = 4 + 3 + start_bytes = fd.read(preload) + header_crc, pos = load_le32(start_bytes, 0) + hdrlen, pos = load_vint(start_bytes, pos) + if hdrlen > 2 * 1024 * 1024: + return None + header_size = pos + hdrlen + + # read full header, check for EOF + hdata = start_bytes + fd.read(header_size - len(start_bytes)) + if len(hdata) != header_size: + self._set_error("Unexpected EOF when reading header") + return None + data_offset = fd.tell() + + calc_crc = rar_crc32(memoryview(hdata)[4:]) + if header_crc != calc_crc: + # header parsing failed. + self._set_error("Header CRC error: exp=%x got=%x (xlen = %d)", + header_crc, calc_crc, len(hdata)) + return None + + block_type, pos = load_vint(hdata, pos) + + if block_type == RAR5_BLOCK_MAIN: + h, pos = self._parse_block_common(Rar5MainInfo(), hdata) + h = self._parse_main_block(h, hdata, pos) + elif block_type == RAR5_BLOCK_FILE: + h, pos = self._parse_block_common(Rar5FileInfo(), hdata) + h = self._parse_file_block(h, hdata, pos) + elif block_type == RAR5_BLOCK_SERVICE: + h, pos = self._parse_block_common(Rar5ServiceInfo(), hdata) + h = self._parse_file_block(h, hdata, pos) + elif block_type == RAR5_BLOCK_ENCRYPTION: + h, pos = self._parse_block_common(Rar5EncryptionInfo(), hdata) + h = self._parse_encryption_block(h, hdata, pos) + elif block_type == RAR5_BLOCK_ENDARC: + h, pos = self._parse_block_common(Rar5EndArcInfo(), hdata) + h = self._parse_endarc_block(h, hdata, pos) + else: + h = None + if h: + h.header_offset = header_offset + h.data_offset = data_offset + return h + + def _parse_block_common(self, h, hdata): + h.header_crc, pos = load_le32(hdata, 0) + hdrlen, pos = load_vint(hdata, pos) + h.header_size = hdrlen + pos + h.block_type, pos = load_vint(hdata, pos) + h.block_flags, pos = load_vint(hdata, pos) + + if h.block_flags & RAR5_BLOCK_FLAG_EXTRA_DATA: + h.block_extra_size, pos = load_vint(hdata, pos) + if h.block_flags & RAR5_BLOCK_FLAG_DATA_AREA: + h.add_size, pos = load_vint(hdata, pos) + + h.compress_size = h.add_size + + if h.block_flags & RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN: + h.flags |= RAR_SKIP_IF_UNKNOWN + if h.block_flags & RAR5_BLOCK_FLAG_DATA_AREA: + h.flags |= RAR_LONG_BLOCK + return h, pos + + def _parse_main_block(self, h, hdata, pos): + h.main_flags, pos = load_vint(hdata, pos) + if h.main_flags & RAR5_MAIN_FLAG_HAS_VOLNR: + h.main_volume_number, pos = load_vint(hdata, pos) + + h.flags |= RAR_MAIN_NEWNUMBERING + if h.main_flags & RAR5_MAIN_FLAG_SOLID: + h.flags |= RAR_MAIN_SOLID + if h.main_flags & RAR5_MAIN_FLAG_ISVOL: + h.flags |= RAR_MAIN_VOLUME + if h.main_flags & RAR5_MAIN_FLAG_RECOVERY: + h.flags |= RAR_MAIN_RECOVERY + if self._hdrenc_main: + h.flags |= RAR_MAIN_PASSWORD + if h.main_flags & RAR5_MAIN_FLAG_HAS_VOLNR == 0: + h.flags |= RAR_MAIN_FIRSTVOLUME + + return h + + def _parse_file_block(self, h, hdata, pos): + h.file_flags, pos = load_vint(hdata, pos) + h.file_size, pos = load_vint(hdata, pos) + h.mode, pos = load_vint(hdata, pos) + + if h.file_flags & RAR5_FILE_FLAG_HAS_MTIME: + h.mtime, pos = load_unixtime(hdata, pos) + h.date_time = h.mtime.timetuple()[:6] + if h.file_flags & RAR5_FILE_FLAG_HAS_CRC32: + h.CRC, pos = load_le32(hdata, pos) + h._md_class = CRC32Context + h._md_expect = h.CRC + + h.file_compress_flags, pos = load_vint(hdata, pos) + h.file_host_os, pos = load_vint(hdata, pos) + h.orig_filename, pos = load_vstr(hdata, pos) + h.filename = h.orig_filename.decode("utf8", "replace") + + # use compatible values + if h.file_host_os == RAR5_OS_WINDOWS: + h.host_os = RAR_OS_WIN32 + else: + h.host_os = RAR_OS_UNIX + h.compress_type = RAR_M0 + ((h.file_compress_flags >> 7) & 7) + + if h.block_extra_size: + # allow 1 byte of garbage + while pos < len(hdata) - 1: + xsize, pos = load_vint(hdata, pos) + xdata, pos = load_bytes(hdata, xsize, pos) + self._process_file_extra(h, xdata) + + if h.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE: + h.flags |= RAR_FILE_SPLIT_BEFORE + if h.block_flags & RAR5_BLOCK_FLAG_SPLIT_AFTER: + h.flags |= RAR_FILE_SPLIT_AFTER + if h.file_flags & RAR5_FILE_FLAG_ISDIR: + h.flags |= RAR_FILE_DIRECTORY + if h.file_compress_flags & RAR5_COMPR_SOLID: + h.flags |= RAR_FILE_SOLID + + return h + + def _parse_endarc_block(self, h, hdata, pos): + h.endarc_flags, pos = load_vint(hdata, pos) + if h.endarc_flags & RAR5_ENDARC_FLAG_NEXT_VOL: + h.flags |= RAR_ENDARC_NEXT_VOLUME + return h + + def _parse_encryption_block(self, h, hdata, pos): + h.encryption_algo, pos = load_vint(hdata, pos) + h.encryption_flags, pos = load_vint(hdata, pos) + h.encryption_kdf_count, pos = load_byte(hdata, pos) + h.encryption_salt, pos = load_bytes(hdata, 16, pos) + if h.encryption_flags & RAR5_ENC_FLAG_HAS_CHECKVAL: + h.encryption_check_value = load_bytes(hdata, 12, pos) + if h.encryption_algo != RAR5_XENC_CIPHER_AES256: + raise BadRarFile("Unsupported header encryption cipher") + self._hdrenc_main = h + return h + + # file extra record + def _process_file_extra(self, h, xdata): + xtype, pos = load_vint(xdata, 0) + if xtype == RAR5_XFILE_TIME: + self._parse_file_xtime(h, xdata, pos) + elif xtype == RAR5_XFILE_ENCRYPTION: + self._parse_file_encryption(h, xdata, pos) + elif xtype == RAR5_XFILE_HASH: + self._parse_file_hash(h, xdata, pos) + elif xtype == RAR5_XFILE_VERSION: + self._parse_file_version(h, xdata, pos) + elif xtype == RAR5_XFILE_REDIR: + self._parse_file_redir(h, xdata, pos) + elif xtype == RAR5_XFILE_OWNER: + self._parse_file_owner(h, xdata, pos) + elif xtype == RAR5_XFILE_SERVICE: + pass + else: + pass + + # extra block for file time record + def _parse_file_xtime(self, h, xdata, pos): + tflags, pos = load_vint(xdata, pos) + ldr = load_windowstime + if tflags & RAR5_XTIME_UNIXTIME: + ldr = load_unixtime + if tflags & RAR5_XTIME_HAS_MTIME: + h.mtime, pos = ldr(xdata, pos) + h.date_time = h.mtime.timetuple()[:6] + if tflags & RAR5_XTIME_HAS_CTIME: + h.ctime, pos = ldr(xdata, pos) + if tflags & RAR5_XTIME_HAS_ATIME: + h.atime, pos = ldr(xdata, pos) + + # just remember encryption info + def _parse_file_encryption(self, h, xdata, pos): + algo, pos = load_vint(xdata, pos) + flags, pos = load_vint(xdata, pos) + kdf_count, pos = load_byte(xdata, pos) + salt, pos = load_bytes(xdata, 16, pos) + iv, pos = load_bytes(xdata, 16, pos) + checkval = None + if flags & RAR5_XENC_CHECKVAL: + checkval, pos = load_bytes(xdata, 12, pos) + if flags & RAR5_XENC_TWEAKED: + h._md_expect = None + h._md_class = NoHashContext + + h.file_encryption = (algo, flags, kdf_count, salt, iv, checkval) + h.flags |= RAR_FILE_PASSWORD + + def _parse_file_hash(self, h, xdata, pos): + hash_type, pos = load_vint(xdata, pos) + if hash_type == RAR5_XHASH_BLAKE2SP: + h.blake2sp_hash, pos = load_bytes(xdata, 32, pos) + if (h.file_encryption[1] & RAR5_XENC_TWEAKED) == 0: + h._md_class = Blake2SP + h._md_expect = h.blake2sp_hash + + def _parse_file_version(self, h, xdata, pos): + flags, pos = load_vint(xdata, pos) + version, pos = load_vint(xdata, pos) + h.file_version = (flags, version) + + def _parse_file_redir(self, h, xdata, pos): + redir_type, pos = load_vint(xdata, pos) + redir_flags, pos = load_vint(xdata, pos) + redir_name, pos = load_vstr(xdata, pos) + redir_name = redir_name.decode("utf8", "replace") + h.file_redir = (redir_type, redir_flags, redir_name) + + def _parse_file_owner(self, h, xdata, pos): + user_name = group_name = user_id = group_id = None + + flags, pos = load_vint(xdata, pos) + if flags & RAR5_XOWNER_UNAME: + user_name, pos = load_vstr(xdata, pos) + if flags & RAR5_XOWNER_GNAME: + group_name, pos = load_vstr(xdata, pos) + if flags & RAR5_XOWNER_UID: + user_id, pos = load_vint(xdata, pos) + if flags & RAR5_XOWNER_GID: + group_id, pos = load_vint(xdata, pos) + + h.file_owner = (user_name, group_name, user_id, group_id) + + def process_entry(self, fd, item): + if item.block_type == RAR5_BLOCK_FILE: + # use only first part + if (item.block_flags & RAR5_BLOCK_FLAG_SPLIT_BEFORE) == 0: + self._info_map[item.filename] = item + self._info_list.append(item) + elif len(self._info_list) > 0: + # final crc is in last block + old = self._info_list[-1] + old.CRC = item.CRC + old._md_expect = item._md_expect + old.blake2sp_hash = item.blake2sp_hash + old.compress_size += item.compress_size + elif item.block_type == RAR5_BLOCK_SERVICE: + if item.filename == "CMT": + self._load_comment(fd, item) + + def _load_comment(self, fd, item): + if item.block_flags & (RAR5_BLOCK_FLAG_SPLIT_BEFORE | RAR5_BLOCK_FLAG_SPLIT_AFTER): + return None + if item.compress_type != RAR_M0: + return None + + if item.flags & RAR_FILE_PASSWORD: + algo, ___flags, kdf_count, salt, iv, ___checkval = item.file_encryption + if algo != RAR5_XENC_CIPHER_AES256: + return None + key = self._gen_key(kdf_count, salt) + f = HeaderDecrypt(fd, key, iv) + cmt = f.read(item.file_size) + else: + # archive comment + with self._open_clear(item) as cmtstream: + cmt = cmtstream.read() + + # rar bug? - appends zero to comment + cmt = cmt.split(ZERO, 1)[0] + self.comment = cmt.decode("utf8") + return None + + def _open_hack(self, inf, psw): + # len, type, blk_flags, flags + main_hdr = b"\x03\x01\x00\x00" + endarc_hdr = b"\x03\x05\x00\x00" + main_hdr = S_LONG.pack(rar_crc32(main_hdr)) + main_hdr + endarc_hdr = S_LONG.pack(rar_crc32(endarc_hdr)) + endarc_hdr + return self._open_hack_core(inf, psw, RAR5_ID + main_hdr, endarc_hdr) + +## +## Utility classes +## + +class UnicodeFilename(object): + """Handle RAR3 unicode filename decompression. + """ + def __init__(self, name, encdata): + self.std_name = bytearray(name) + self.encdata = bytearray(encdata) + self.pos = self.encpos = 0 + self.buf = bytearray() + self.failed = 0 + + def enc_byte(self): + """Copy encoded byte.""" + try: + c = self.encdata[self.encpos] + self.encpos += 1 + return c + except IndexError: + self.failed = 1 + return 0 + + def std_byte(self): + """Copy byte from 8-bit representation.""" + try: + return self.std_name[self.pos] + except IndexError: + self.failed = 1 + return ord("?") + + def put(self, lo, hi): + """Copy 16-bit value to result.""" + self.buf.append(lo) + self.buf.append(hi) + self.pos += 1 + + def decode(self): + """Decompress compressed UTF16 value.""" + hi = self.enc_byte() + flagbits = 0 + while self.encpos < len(self.encdata): + if flagbits == 0: + flags = self.enc_byte() + flagbits = 8 + flagbits -= 2 + t = (flags >> flagbits) & 3 + if t == 0: + self.put(self.enc_byte(), 0) + elif t == 1: + self.put(self.enc_byte(), hi) + elif t == 2: + self.put(self.enc_byte(), self.enc_byte()) + else: + n = self.enc_byte() + if n & 0x80: + c = self.enc_byte() + for _ in range((n & 0x7f) + 2): + lo = (self.std_byte() + c) & 0xFF + self.put(lo, hi) + else: + for _ in range(n + 2): + self.put(self.std_byte(), 0) + return self.buf.decode("utf-16le", "replace") + + +class RarExtFile(RawIOBase): + """Base class for file-like object that :meth:`RarFile.open` returns. + + Provides public methods and common crc checking. + + Behaviour: + - no short reads - .read() and .readinfo() read as much as requested. + - no internal buffer, use io.BufferedReader for that. + """ + + #: Filename of the archive entry + name = None + + def __init__(self, parser, inf): + """Open archive entry. + """ + super(RarExtFile, self).__init__() + + # standard io.* properties + self.name = inf.filename + self.mode = "rb" + + self._parser = parser + self._inf = inf + self._fd = None + self._remain = 0 + self._returncode = 0 + + self._md_context = None + + self._open() + + def _open(self): + if self._fd: + self._fd.close() + md_class = self._inf._md_class or NoHashContext + self._md_context = md_class() + self._fd = None + self._remain = self._inf.file_size + + def read(self, cnt=None): + """Read all or specified amount of data from archive entry.""" + + # sanitize cnt + if cnt is None or cnt < 0: + cnt = self._remain + elif cnt > self._remain: + cnt = self._remain + if cnt == 0: + return EMPTY + + # actual read + data = self._read(cnt) + if data: + self._md_context.update(data) + self._remain -= len(data) + if len(data) != cnt: + raise BadRarFile("Failed the read enough data") + + # done? + if not data or self._remain == 0: + # self.close() + self._check() + return data + + def _check(self): + """Check final CRC.""" + final = self._md_context.digest() + exp = self._inf._md_expect + if exp is None: + return + if final is None: + return + if self._returncode: + check_returncode(self, "") + if self._remain != 0: + raise BadRarFile("Failed the read enough data") + if final != exp: + raise BadRarFile("Corrupt file - CRC check failed: %s - exp=%r got=%r" % ( + self._inf.filename, exp, final)) + + def _read(self, cnt): + """Actual read that gets sanitized cnt.""" + raise NotImplementedError("_read") + + def close(self): + """Close open resources.""" + + super(RarExtFile, self).close() + + if self._fd: + self._fd.close() + self._fd = None + + def __del__(self): + """Hook delete to make sure tempfile is removed.""" + self.close() + + def readinto(self, buf): + """Zero-copy read directly into buffer. + + Returns bytes read. + """ + raise NotImplementedError("readinto") + + def tell(self): + """Return current reading position in uncompressed data.""" + return self._inf.file_size - self._remain + + def seek(self, ofs, whence=0): + """Seek in data. + + On uncompressed files, the seeking works by actual + seeks so it's fast. On compresses files its slow + - forward seeking happends by reading ahead, + backwards by re-opening and decompressing from the start. + """ + + # disable crc check when seeking + self._md_context = NoHashContext() + + fsize = self._inf.file_size + cur_ofs = self.tell() + + if whence == 0: # seek from beginning of file + new_ofs = ofs + elif whence == 1: # seek from current position + new_ofs = cur_ofs + ofs + elif whence == 2: # seek from end of file + new_ofs = fsize + ofs + else: + raise ValueError("Invalid value for whence") + + # sanity check + if new_ofs < 0: + new_ofs = 0 + elif new_ofs > fsize: + new_ofs = fsize + + # do the actual seek + if new_ofs >= cur_ofs: + self._skip(new_ofs - cur_ofs) + else: + # reopen and seek + self._open() + self._skip(new_ofs) + return self.tell() + + def _skip(self, cnt): + """Read and discard data""" + while cnt > 0: + if cnt > 8192: + buf = self.read(8192) + else: + buf = self.read(cnt) + if not buf: + break + cnt -= len(buf) + + def readable(self): + """Returns True""" + return True + + def writable(self): + """Returns False. + + Writing is not supported. + """ + return False + + def seekable(self): + """Returns True. + + Seeking is supported, although it's slow on compressed files. + """ + return True + + def readall(self): + """Read all remaining data""" + # avoid RawIOBase default impl + return self.read() + + +class PipeReader(RarExtFile): + """Read data from pipe, handle tempfile cleanup.""" + + def __init__(self, rf, inf, cmd, tempfile=None): + self._cmd = cmd + self._proc = None + self._tempfile = tempfile + super(PipeReader, self).__init__(rf, inf) + + def _close_proc(self): + if not self._proc: + return + if self._proc.stdout: + self._proc.stdout.close() + if self._proc.stdin: + self._proc.stdin.close() + if self._proc.stderr: + self._proc.stderr.close() + self._proc.wait() + self._returncode = self._proc.returncode + self._proc = None + + def _open(self): + super(PipeReader, self)._open() + + # stop old process + self._close_proc() + + # launch new process + self._returncode = 0 + self._proc = custom_popen(self._cmd) + self._fd = self._proc.stdout + + # avoid situation where unrar waits on stdin + if self._proc.stdin: + self._proc.stdin.close() + + def _read(self, cnt): + """Read from pipe.""" + + # normal read is usually enough + data = self._fd.read(cnt) + if len(data) == cnt or not data: + return data + + # short read, try looping + buf = [data] + cnt -= len(data) + while cnt > 0: + data = self._fd.read(cnt) + if not data: + break + cnt -= len(data) + buf.append(data) + return EMPTY.join(buf) + + def close(self): + """Close open resources.""" + + self._close_proc() + super(PipeReader, self).close() + + if self._tempfile: + try: + os.unlink(self._tempfile) + except OSError: + pass + self._tempfile = None + + def readinto(self, buf): + """Zero-copy read directly into buffer.""" + cnt = len(buf) + if cnt > self._remain: + cnt = self._remain + vbuf = memoryview(buf) + res = got = 0 + while got < cnt: + res = self._fd.readinto(vbuf[got : cnt]) + if not res: + break + self._md_context.update(vbuf[got : got + res]) + self._remain -= res + got += res + return got + + +class DirectReader(RarExtFile): + """Read uncompressed data directly from archive. + """ + _cur = None + _cur_avail = None + _volfile = None + + def _open(self): + super(DirectReader, self)._open() + + self._volfile = self._inf.volume_file + self._fd = XFile(self._volfile, 0) + self._fd.seek(self._inf.header_offset, 0) + self._cur = self._parser._parse_header(self._fd) + self._cur_avail = self._cur.add_size + + def _skip(self, cnt): + """RAR Seek, skipping through rar files to get to correct position + """ + + while cnt > 0: + # next vol needed? + if self._cur_avail == 0: + if not self._open_next(): + break + + # fd is in read pos, do the read + if cnt > self._cur_avail: + cnt -= self._cur_avail + self._remain -= self._cur_avail + self._cur_avail = 0 + else: + self._fd.seek(cnt, 1) + self._cur_avail -= cnt + self._remain -= cnt + cnt = 0 + + def _read(self, cnt): + """Read from potentially multi-volume archive.""" + + buf = [] + while cnt > 0: + # next vol needed? + if self._cur_avail == 0: + if not self._open_next(): + break + + # fd is in read pos, do the read + if cnt > self._cur_avail: + data = self._fd.read(self._cur_avail) + else: + data = self._fd.read(cnt) + if not data: + break + + # got some data + cnt -= len(data) + self._cur_avail -= len(data) + buf.append(data) + + if len(buf) == 1: + return buf[0] + return EMPTY.join(buf) + + def _open_next(self): + """Proceed to next volume.""" + + # is the file split over archives? + if (self._cur.flags & RAR_FILE_SPLIT_AFTER) == 0: + return False + + if self._fd: + self._fd.close() + self._fd = None + + # open next part + self._volfile = self._parser._next_volname(self._volfile) + fd = open(self._volfile, "rb", 0) + self._fd = fd + sig = fd.read(len(self._parser._expect_sig)) + if sig != self._parser._expect_sig: + raise BadRarFile("Invalid signature") + + # loop until first file header + while 1: + cur = self._parser._parse_header(fd) + if not cur: + raise BadRarFile("Unexpected EOF") + if cur.type in (RAR_BLOCK_MARK, RAR_BLOCK_MAIN): + if cur.add_size: + fd.seek(cur.add_size, 1) + continue + if cur.orig_filename != self._inf.orig_filename: + raise BadRarFile("Did not found file entry") + self._cur = cur + self._cur_avail = cur.add_size + return True + + def readinto(self, buf): + """Zero-copy read directly into buffer.""" + got = 0 + vbuf = memoryview(buf) + while got < len(buf): + # next vol needed? + if self._cur_avail == 0: + if not self._open_next(): + break + + # length for next read + cnt = len(buf) - got + if cnt > self._cur_avail: + cnt = self._cur_avail + + # read into temp view + res = self._fd.readinto(vbuf[got : got + cnt]) + if not res: + break + self._md_context.update(vbuf[got : got + res]) + self._cur_avail -= res + self._remain -= res + got += res + return got + + +class HeaderDecrypt(object): + """File-like object that decrypts from another file""" + def __init__(self, f, key, iv): + self.f = f + self.ciph = AES_CBC_Decrypt(key, iv) + self.buf = EMPTY + + def tell(self): + """Current file pos - works only on block boundaries.""" + return self.f.tell() + + def read(self, cnt=None): + """Read and decrypt.""" + if cnt > 8 * 1024: + raise BadRarFile("Bad count to header decrypt - wrong password?") + + # consume old data + if cnt <= len(self.buf): + res = self.buf[:cnt] + self.buf = self.buf[cnt:] + return res + res = self.buf + self.buf = EMPTY + cnt -= len(res) + + # decrypt new data + blklen = 16 + while cnt > 0: + enc = self.f.read(blklen) + if len(enc) < blklen: + break + dec = self.ciph.decrypt(enc) + if cnt >= len(dec): + res += dec + cnt -= len(dec) + else: + res += dec[:cnt] + self.buf = dec[cnt:] + cnt = 0 + + return res + + +# handle (filename|filelike) object +class XFile(object): + """Input may be filename or file object. + """ + __slots__ = ("_fd", "_need_close") + + def __init__(self, xfile, bufsize=1024): + if is_filelike(xfile): + self._need_close = False + self._fd = xfile + self._fd.seek(0) + else: + self._need_close = True + self._fd = open(xfile, "rb", bufsize) + + def read(self, n=None): + """Read from file.""" + return self._fd.read(n) + + def tell(self): + """Return file pos.""" + return self._fd.tell() + + def seek(self, ofs, whence=0): + """Move file pos.""" + return self._fd.seek(ofs, whence) + + def readinto(self, dst): + """Read into buffer.""" + return self._fd.readinto(dst) + + def close(self): + """Close file object.""" + if self._need_close: + self._fd.close() + + def __enter__(self): + return self + + def __exit__(self, typ, val, tb): + self.close() + + +class NoHashContext(object): + """No-op hash function.""" + def __init__(self, data=None): + """Initialize""" + def update(self, data): + """Update data""" + def digest(self): + """Final hash""" + def hexdigest(self): + """Hexadecimal digest.""" + + +class CRC32Context(object): + """Hash context that uses CRC32.""" + __slots__ = ["_crc"] + + def __init__(self, data=None): + self._crc = 0 + if data: + self.update(data) + + def update(self, data): + """Process data.""" + self._crc = rar_crc32(data, self._crc) + + def digest(self): + """Final hash.""" + return self._crc + + def hexdigest(self): + """Hexadecimal digest.""" + return "%08x" % self.digest() + + +class Blake2SP(object): + """Blake2sp hash context. + """ + __slots__ = ["_thread", "_buf", "_cur", "_digest"] + digest_size = 32 + block_size = 64 + parallelism = 8 + + def __init__(self, data=None): + self._buf = b"" + self._cur = 0 + self._digest = None + self._thread = [] + + for i in range(self.parallelism): + ctx = self._blake2s(i, 0, i == (self.parallelism - 1)) + self._thread.append(ctx) + + if data: + self.update(data) + + def _blake2s(self, ofs, depth, is_last): + return blake2s(node_offset=ofs, node_depth=depth, last_node=is_last, + depth=2, inner_size=32, fanout=self.parallelism) + + def _add_block(self, blk): + self._thread[self._cur].update(blk) + self._cur = (self._cur + 1) % self.parallelism + + def update(self, data): + """Hash data. + """ + view = memoryview(data) + bs = self.block_size + if self._buf: + need = bs - len(self._buf) + if len(view) < need: + self._buf += view.tobytes() + return + self._add_block(self._buf + view[:need].tobytes()) + view = view[need:] + while len(view) >= bs: + self._add_block(view[:bs]) + view = view[bs:] + self._buf = view.tobytes() + + def digest(self): + """Return final digest value. + """ + if self._digest is None: + if self._buf: + self._add_block(self._buf) + self._buf = EMPTY + ctx = self._blake2s(0, 1, True) + for t in self._thread: + ctx.update(t.digest()) + self._digest = ctx.digest() + return self._digest + + def hexdigest(self): + """Hexadecimal digest.""" + return tohex(self.digest()) + + +class Rar3Sha1(object): + """Bug-compat for SHA1 + """ + digest_size = 20 + block_size = 64 + + _BLK_BE = struct.Struct(b">16L") + _BLK_LE = struct.Struct(b"<16L") + + __slots__ = ("_nbytes", "_md", "_rarbug") + + def __init__(self, data=b"", rarbug=False): + self._md = sha1() + self._nbytes = 0 + self._rarbug = rarbug + self.update(data) + + def update(self, data): + """Process more data.""" + self._md.update(data) + bufpos = self._nbytes & 63 + self._nbytes += len(data) + + if self._rarbug and len(data) > 64: + dpos = self.block_size - bufpos + while dpos + self.block_size <= len(data): + self._corrupt(data, dpos) + dpos += self.block_size + + def digest(self): + """Return final state.""" + return self._md.digest() + + def hexdigest(self): + """Return final state as hex string.""" + return self._md.hexdigest() + + def _corrupt(self, data, dpos): + """Corruption from SHA1 core.""" + ws = list(self._BLK_BE.unpack_from(data, dpos)) + for t in range(16, 80): + tmp = ws[(t - 3) & 15] ^ ws[(t - 8) & 15] ^ ws[(t - 14) & 15] ^ ws[(t - 16) & 15] + ws[t & 15] = ((tmp << 1) | (tmp >> (32 - 1))) & 0xFFFFFFFF + self._BLK_LE.pack_into(data, dpos, *ws) + + +## +## Utility functions +## + +S_LONG = Struct(" len(buf): + raise BadRarFile("cannot load byte") + return S_BYTE.unpack_from(buf, pos)[0], end + +def load_le32(buf, pos): + """Load little-endian 32-bit integer""" + end = pos + 4 + if end > len(buf): + raise BadRarFile("cannot load le32") + return S_LONG.unpack_from(buf, pos)[0], pos + 4 + +def load_bytes(buf, num, pos): + """Load sequence of bytes""" + end = pos + num + if end > len(buf): + raise BadRarFile("cannot load bytes") + return buf[pos : end], end + +def load_vstr(buf, pos): + """Load bytes prefixed by vint length""" + slen, pos = load_vint(buf, pos) + return load_bytes(buf, slen, pos) + +def load_dostime(buf, pos): + """Load LE32 dos timestamp""" + stamp, pos = load_le32(buf, pos) + tup = parse_dos_time(stamp) + return to_datetime(tup), pos + +def load_unixtime(buf, pos): + """Load LE32 unix timestamp""" + secs, pos = load_le32(buf, pos) + dt = datetime.fromtimestamp(secs, UTC) + return dt, pos + +def load_windowstime(buf, pos): + """Load LE64 windows timestamp""" + # unix epoch (1970) in seconds from windows epoch (1601) + unix_epoch = 11644473600 + val1, pos = load_le32(buf, pos) + val2, pos = load_le32(buf, pos) + secs, n1secs = divmod((val2 << 32) | val1, 10000000) + dt = datetime.fromtimestamp(secs - unix_epoch, UTC) + dt = dt.replace(microsecond=n1secs // 10) + return dt, pos + +# new-style next volume +def _next_newvol(volfile): + i = len(volfile) - 1 + while i >= 0: + if volfile[i] >= "0" and volfile[i] <= "9": + return _inc_volname(volfile, i) + i -= 1 + raise BadRarName("Cannot construct volume name: " + volfile) + +# old-style next volume +def _next_oldvol(volfile): + # rar -> r00 + if volfile[-4:].lower() == ".rar": + return volfile[:-2] + "00" + return _inc_volname(volfile, len(volfile) - 1) + +# increase digits with carry, otherwise just increment char +def _inc_volname(volfile, i): + fn = list(volfile) + while i >= 0: + if fn[i] != "9": + fn[i] = chr(ord(fn[i]) + 1) + break + fn[i] = "0" + i -= 1 + return "".join(fn) + +# rar3 extended time fields +def _parse_ext_time(h, data, pos): + # flags and rest of data can be missing + flags = 0 + if pos + 2 <= len(data): + flags = S_SHORT.unpack_from(data, pos)[0] + pos += 2 + + mtime, pos = _parse_xtime(flags >> 3 * 4, data, pos, h.mtime) + h.ctime, pos = _parse_xtime(flags >> 2 * 4, data, pos) + h.atime, pos = _parse_xtime(flags >> 1 * 4, data, pos) + h.arctime, pos = _parse_xtime(flags >> 0 * 4, data, pos) + if mtime: + h.mtime = mtime + h.date_time = mtime.timetuple()[:6] + return pos + +# rar3 one extended time field +def _parse_xtime(flag, data, pos, basetime=None): + res = None + if flag & 8: + if not basetime: + basetime, pos = load_dostime(data, pos) + + # load second fractions + rem = 0 + cnt = flag & 3 + for _ in range(cnt): + b, pos = load_byte(data, pos) + rem = (b << 16) | (rem >> 8) + + # convert 100ns units to microseconds + usec = rem // 10 + if usec > 1000000: + usec = 999999 + + # dostime has room for 30 seconds only, correct if needed + if flag & 4 and basetime.second < 59: + res = basetime.replace(microsecond=usec, second=basetime.second + 1) + else: + res = basetime.replace(microsecond=usec) + return res, pos + +def is_filelike(obj): + """Filename or file object? + """ + filename_types = (bytes, str, Path) + + if isinstance(obj, filename_types): + return False + res = True + for a in ("read", "tell", "seek"): + res = res and hasattr(obj, a) + if not res: + raise ValueError("Invalid object passed as file") + return True + +def rar3_s2k(psw, salt): + """String-to-key hash for RAR3. + """ + if not isinstance(psw, str): + psw = psw.decode("utf8") + seed = bytearray(psw.encode("utf-16le") + salt) + h = Rar3Sha1(rarbug=True) + iv = EMPTY + for i in range(16): + for j in range(0x4000): + cnt = S_LONG.pack(i * 0x4000 + j) + h.update(seed) + h.update(cnt[:3]) + if j == 0: + iv += h.digest()[19:20] + key_be = h.digest()[:16] + key_le = pack("LLLL", key_be)) + return key_le, iv + +def rar3_decompress(vers, meth, data, declen=0, flags=0, crc=0, psw=None, salt=None): + """Decompress blob of compressed data. + + Used for data with non-standard header - eg. comments. + """ + # already uncompressed? + if meth == RAR_M0 and (flags & RAR_FILE_PASSWORD) == 0: + return data + + # take only necessary flags + flags = flags & (RAR_FILE_PASSWORD | RAR_FILE_SALT | RAR_FILE_DICTMASK) + flags |= RAR_LONG_BLOCK + + # file header + fname = b"data" + date = ((2010 - 1980) << 25) + (12 << 21) + (31 << 16) + mode = 0x20 + fhdr = S_FILE_HDR.pack(len(data), declen, RAR_OS_MSDOS, crc, + date, vers, meth, len(fname), mode) + fhdr += fname + if flags & RAR_FILE_SALT: + if not salt: + return EMPTY + fhdr += salt + + # full header + hlen = S_BLK_HDR.size + len(fhdr) + hdr = S_BLK_HDR.pack(0, RAR_BLOCK_FILE, flags, hlen) + fhdr + hcrc = rar_crc32(hdr[2:]) & 0xFFFF + hdr = S_BLK_HDR.pack(hcrc, RAR_BLOCK_FILE, flags, hlen) + fhdr + + # archive main header + mh = S_BLK_HDR.pack(0x90CF, RAR_BLOCK_MAIN, 0, 13) + ZERO * (2 + 4) + + # decompress via temp rar + setup = tool_setup() + tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR) + tmpf = os.fdopen(tmpfd, "wb") + try: + tmpf.write(RAR_ID + mh + hdr + data) + tmpf.close() + + curpsw = (flags & RAR_FILE_PASSWORD) and psw or None + cmd = setup.open_cmdline(curpsw, tmpname) + p = custom_popen(cmd) + return p.communicate()[0] + finally: + tmpf.close() + os.unlink(tmpname) + +def to_datetime(t): + """Convert 6-part time tuple into datetime object. + """ + if t is None: + return None + + # extract values + year, mon, day, h, m, s = t + + # assume the values are valid + try: + return datetime(year, mon, day, h, m, s) + except ValueError: + pass + + # sanitize invalid values + mday = (0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) + if mon < 1: + mon = 1 + if mon > 12: + mon = 12 + if day < 1: + day = 1 + if day > mday[mon]: + day = mday[mon] + if h > 23: + h = 23 + if m > 59: + m = 59 + if s > 59: + s = 59 + if mon == 2 and day == 29: + try: + return datetime(year, mon, day, h, m, s) + except ValueError: + day = 28 + return datetime(year, mon, day, h, m, s) + +def parse_dos_time(stamp): + """Parse standard 32-bit DOS timestamp. + """ + sec, stamp = stamp & 0x1F, stamp >> 5 + mn, stamp = stamp & 0x3F, stamp >> 6 + hr, stamp = stamp & 0x1F, stamp >> 5 + day, stamp = stamp & 0x1F, stamp >> 5 + mon, stamp = stamp & 0x0F, stamp >> 4 + yr = (stamp & 0x7F) + 1980 + return (yr, mon, day, hr, mn, sec * 2) + +def custom_popen(cmd): + """Disconnect cmd from parent fds, read only from stdout. + """ + # needed for py2exe + creationflags = 0 + if sys.platform == "win32": + creationflags = 0x08000000 # CREATE_NO_WINDOW + + # run command + try: + p = Popen(cmd, bufsize=0, stdout=PIPE, stdin=PIPE, stderr=STDOUT, + creationflags=creationflags) + except OSError as ex: + if ex.errno == errno.ENOENT: + raise RarCannotExec("Unrar not installed?") + if ex.errno == errno.EACCES or ex.errno == errno.EPERM: + raise RarCannotExec("Cannot execute unrar") + raise + return p + +def check_returncode(p, out): + """Raise exception according to unrar exit code. + """ + code = p.returncode + if code == 0: + return + + errmap = tool_setup().get_errmap() + if code > 0 and code < len(errmap): + exc = errmap[code] + elif code == 255: + exc = RarUserBreak + elif code < 0: + exc = RarSignalExit + else: + exc = RarUnknownError + + # format message + if out: + msg = "%s [%d]: %s" % (exc.__doc__, p.returncode, out) + else: + msg = "%s [%d]" % (exc.__doc__, p.returncode) + + raise exc(msg) + +def hmac_sha256(key, data): + """HMAC-SHA256""" + return HMAC(key, data, sha256).digest() + +def membuf_tempfile(memfile): + """Write in-memory file object to real file.""" + memfile.seek(0, 0) + + tmpfd, tmpname = mkstemp(suffix=".rar", dir=HACK_TMP_DIR) + tmpf = os.fdopen(tmpfd, "wb") + + try: + while True: + buf = memfile.read(BSIZE) + if not buf: + break + tmpf.write(buf) + tmpf.close() + except: + tmpf.close() + os.unlink(tmpname) + raise + return tmpname + +class XTempFile(object): + """Real file for archive. + """ + __slots__ = ("_tmpfile", "_filename") + + def __init__(self, rarfile): + if is_filelike(rarfile): + self._tmpfile = membuf_tempfile(rarfile) + self._filename = self._tmpfile + else: + self._tmpfile = None + self._filename = rarfile + + def __enter__(self): + return self._filename + + def __exit__(self, exc_type, exc_value, tb): + if self._tmpfile: + try: + os.unlink(self._tmpfile) + except OSError: + pass + self._tmpfile = None + +# +# Find working command-line tool +# + +class ToolSetup: + def __init__(self, setup): + self.setup = setup + + def check(self): + cmdline = self.get_cmdline("check_cmd", None) + try: + p = custom_popen(cmdline) + out, _ = p.communicate() + return p.returncode == 0 + except RarCannotExec: + return False + + def open_cmdline(self, psw, rarfn, filefn=None): + cmdline = self.get_cmdline("open_cmd", psw) + cmdline.append(rarfn) + if filefn: + cmdline.append(filefn) + return cmdline + + def test_cmdline(self, psw, rarfn): + cmdline = self.get_cmdline("test_cmd", psw) + cmdline.append(rarfn) + return cmdline + + def extract_cmdline(self, psw, rarfn, fnlist, path): + cmdline = self.get_cmdline("extract_cmd", psw, nodash=True) + dstdir = "DSTDIR" in cmdline + if dstdir: + if not path: + path = "." + cmdline[cmdline.index("DSTDIR", 1)] = path + + cmdline.append("--") + + cmdline.append(rarfn) + for fn in fnlist: + cmdline.append(fn) + + if path and not dstdir: + cmdline.append(path + os.sep) + return cmdline + + def get_errmap(self): + return self.setup["errmap"] + + def get_cmdline(self, key, psw, nodash=False): + cmdline = list(self.setup[key]) + cmdline[0] = globals()[cmdline[0]] + self.add_password_arg(cmdline, psw) + if not nodash: + cmdline.append("--") + return cmdline + + def add_password_arg(self, cmdline, psw): + """Append password switch to commandline. + """ + if psw is not None: + if not isinstance(psw, str): + psw = psw.decode("utf8") + args = self.setup["password"] + if isinstance(args, str): + cmdline.append(args + psw) + else: + cmdline.extend(args) + cmdline.append(psw) + else: + cmdline.extend(self.setup["no_password"]) + +UNRAR_CONFIG = { + "open_cmd": ("UNRAR_TOOL", "p", "-inul"), + "extract_cmd": ("UNRAR_TOOL", "x", "-y", "-idq"), + "test_cmd": ("UNRAR_TOOL", "t", "-idq"), + "check_cmd": ("UNRAR_TOOL", "-inul"), + "password": "-p", + "no_password": ("-p-",), + # map return code to exception class, codes from rar.txt + "errmap": [None, + RarWarning, RarFatalError, RarCRCError, RarLockedArchiveError, # 1..4 + RarWriteError, RarOpenError, RarUserError, RarMemoryError, # 5..8 + RarCreateError, RarNoFilesError, RarWrongPassword] # 9..11 +} + +# Problems with unar RAR backend: +# - Does not support RAR2 locked files [fails to read] +# - Does not support RAR5 Blake2sp hash [reading works] +UNAR_CONFIG = { + "open_cmd": ("UNAR_TOOL", "-q", "-o", "-"), + "extract_cmd": ("UNAR_TOOL", "-q", "-f", "-D", "-o", "DSTDIR"), + "test_cmd": ("LSAR_TOOL", "-test"), + "check_cmd": ("UNAR_TOOL", "-version"), + "password": ("-p",), + "no_password": ("-p", ""), + "errmap": [None], +} + +# Problems with libarchive RAR backend: +# - Does not support solid archives. +# - Does not support password-protected archives. +# - Does not support RARVM-based compression filters. +BSDTAR_CONFIG = { + "open_cmd": ("BSDTAR_TOOL", "-x", "--to-stdout", "-f"), + "extract_cmd": ("BSDTAR_TOOL", "-x", "-C", "DSTDIR", "-f"), + "test_cmd": ("BSDTAR_TOOL", "-t", "-f"), + "check_cmd": ("BSDTAR_TOOL", "--version"), + "password": None, + "no_password": (), + "errmap": [None], +} + +CURRENT_SETUP = None + +def tool_setup(unrar=True, unar=True, bsdtar=True, force=False): + """Pick a tool, return cached ToolSetup. + """ + global CURRENT_SETUP + if force: + CURRENT_SETUP = None + if CURRENT_SETUP is not None: + return CURRENT_SETUP + lst = [] + if unrar: + lst.append(UNRAR_CONFIG) + if unar: + lst.append(UNAR_CONFIG) + if bsdtar: + lst.append(BSDTAR_CONFIG) + + for conf in lst: + setup = ToolSetup(conf) + if setup.check(): + CURRENT_SETUP = setup + break + if CURRENT_SETUP is None: + raise RarCannotExec("Cannot find working tool") + return CURRENT_SETUP +