diff --git a/couchpotato/api.py b/couchpotato/api.py index 091de42..e86b127 100644 --- a/couchpotato/api.py +++ b/couchpotato/api.py @@ -110,6 +110,7 @@ class ApiHandler(RequestHandler): if jsonp_callback: self.write(str(jsonp_callback) + '(' + json.dumps(result) + ')') + self.set_header("Content-Type", "text/javascript") elif isinstance(result, tuple) and result[0] == 'redirect': self.redirect(result[1]) else: diff --git a/couchpotato/core/_base/clientscript/main.py b/couchpotato/core/_base/clientscript/main.py index 1b7f163..b80ddcc 100644 --- a/couchpotato/core/_base/clientscript/main.py +++ b/couchpotato/core/_base/clientscript/main.py @@ -34,6 +34,8 @@ class ClientScript(Plugin): 'scripts/library/question.js', 'scripts/library/scrollspy.js', 'scripts/library/spin.js', + 'scripts/library/Array.stableSort.js', + 'scripts/library/async.js', 'scripts/couchpotato.js', 'scripts/api.js', 'scripts/library/history.js', diff --git a/couchpotato/core/_base/scheduler/main.py b/couchpotato/core/_base/scheduler/main.py index 2c97e1b..87b0533 100644 --- a/couchpotato/core/_base/scheduler/main.py +++ b/couchpotato/core/_base/scheduler/main.py @@ -31,8 +31,8 @@ class Scheduler(Plugin): pass def doShutdown(self): - super(Scheduler, self).doShutdown() self.stop() + return super(Scheduler, self).doShutdown() def stop(self): if self.started: diff --git a/couchpotato/core/_base/updater/main.py b/couchpotato/core/_base/updater/main.py index f3b4b19..aecf0c4 100644 --- a/couchpotato/core/_base/updater/main.py +++ b/couchpotato/core/_base/updater/main.py @@ -298,6 +298,7 @@ class SourceUpdater(BaseUpdater): def replaceWith(self, path): app_dir = ss(Env.get('app_dir')) + data_dir = ss(Env.get('data_dir')) # Get list of files we want to overwrite self.deletePyc() @@ -329,12 +330,15 @@ class SourceUpdater(BaseUpdater): log.error('Failed overwriting file "%s": %s', (tofile, traceback.format_exc())) return False - if Env.get('app_dir') not in Env.get('data_dir'): - for still_exists in existing_files: - try: - os.remove(still_exists) - except: - log.error('Failed removing non-used file: %s', traceback.format_exc()) + for still_exists in existing_files: + + if data_dir in still_exists: + continue + + try: + os.remove(still_exists) + except: + log.error('Failed removing non-used file: %s', traceback.format_exc()) return True diff --git a/couchpotato/core/_base/updater/static/updater.js b/couchpotato/core/_base/updater/static/updater.js index 0577c78..860ad51 100644 --- a/couchpotato/core/_base/updater/static/updater.js +++ b/couchpotato/core/_base/updater/static/updater.js @@ -24,7 +24,7 @@ var UpdaterBase = new Class({ self.doUpdate(); else { App.unBlockPage(); - App.fireEvent('message', 'No updates available'); + App.on('message', 'No updates available'); } } }) diff --git a/couchpotato/core/downloaders/base.py b/couchpotato/core/downloaders/base.py index 08be4bd..9e24d91 100644 --- a/couchpotato/core/downloaders/base.py +++ b/couchpotato/core/downloaders/base.py @@ -66,36 +66,36 @@ class Downloader(Provider): def getAllDownloadStatus(self): return - def _removeFailed(self, item): + def _removeFailed(self, release_download): if self.isDisabled(manual = True, data = {}): return - if item and item.get('downloader') == self.getName(): + if release_download and release_download.get('downloader') == self.getName(): if self.conf('delete_failed'): - return self.removeFailed(item) + return self.removeFailed(release_download) return False return - def removeFailed(self, item): + def removeFailed(self, release_download): return - def _processComplete(self, item): + def _processComplete(self, release_download): if self.isDisabled(manual = True, data = {}): return - if item and item.get('downloader') == self.getName(): + if release_download and release_download.get('downloader') == self.getName(): if self.conf('remove_complete', default = False): - return self.processComplete(item = item, delete_files = self.conf('delete_files', default = False)) + return self.processComplete(release_download = release_download, delete_files = self.conf('delete_files', default = False)) return False return - def processComplete(self, item, delete_files): + def processComplete(self, release_download, delete_files): return - def isCorrectProtocol(self, item_protocol): - is_correct = item_protocol in self.protocol + def isCorrectProtocol(self, protocol): + is_correct = protocol in self.protocol if not is_correct: log.debug("Downloader doesn't support this protocol") @@ -151,20 +151,20 @@ class Downloader(Provider): (d_manual and manual or d_manual is False) and \ (not data or self.isCorrectProtocol(data.get('protocol'))) - def _pause(self, item, pause = True): + def _pause(self, release_download, pause = True): if self.isDisabled(manual = True, data = {}): return - if item and item.get('downloader') == self.getName(): - self.pause(item, pause) + if release_download and release_download.get('downloader') == self.getName(): + self.pause(release_download, pause) return True return False - def pause(self, item, pause): + def pause(self, release_download, pause): return -class StatusList(list): +class ReleaseDownloadList(list): provider = None @@ -173,7 +173,7 @@ class StatusList(list): self.provider = provider self.kwargs = kwargs - super(StatusList, self).__init__() + super(ReleaseDownloadList, self).__init__() def extend(self, results): for r in results: @@ -181,7 +181,7 @@ class StatusList(list): def append(self, result): new_result = self.fillResult(result) - super(StatusList, self).append(new_result) + super(ReleaseDownloadList, self).append(new_result) def fillResult(self, result): @@ -190,6 +190,7 @@ class StatusList(list): 'status': 'busy', 'downloader': self.provider.getName(), 'folder': '', + 'files': '', } return mergeDicts(defaults, result) diff --git a/couchpotato/core/downloaders/blackhole/__init__.py b/couchpotato/core/downloaders/blackhole/__init__.py index 6b5279a..91164d6 100644 --- a/couchpotato/core/downloaders/blackhole/__init__.py +++ b/couchpotato/core/downloaders/blackhole/__init__.py @@ -13,7 +13,7 @@ config = [{ 'list': 'download_providers', 'name': 'blackhole', 'label': 'Black hole', - 'description': 'Download the NZB/Torrent to a specific folder.', + 'description': 'Download the NZB/Torrent to a specific folder. Note: Seeding and copying/linking features do not work with Black hole.', 'wizard': True, 'options': [ { diff --git a/couchpotato/core/downloaders/deluge/main.py b/couchpotato/core/downloaders/deluge/main.py index 580ed7f..f3a1238 100644 --- a/couchpotato/core/downloaders/deluge/main.py +++ b/couchpotato/core/downloaders/deluge/main.py @@ -1,12 +1,14 @@ -from base64 import b64encode -from couchpotato.core.downloaders.base import Downloader, StatusList -from couchpotato.core.helpers.encoding import isInt, ss +from base64 import b64encode, b16encode, b32decode +from bencode import bencode as benc, bdecode +from couchpotato.core.downloaders.base import Downloader, ReleaseDownloadList +from couchpotato.core.helpers.encoding import isInt, sp from couchpotato.core.helpers.variable import tryFloat from couchpotato.core.logger import CPLog -from couchpotato.environment import Env from datetime import timedelta +from hashlib import sha1 from synchronousdeluge import DelugeClient import os.path +import re import traceback log = CPLog(__name__) @@ -72,7 +74,7 @@ class Deluge(Downloader): remote_torrent = self.drpc.add_torrent_magnet(data.get('url'), options) else: filename = self.createFileName(data, filedata, movie) - remote_torrent = self.drpc.add_torrent_file(filename, b64encode(filedata), options) + remote_torrent = self.drpc.add_torrent_file(filename, filedata, options) if not remote_torrent: log.error('Failed sending torrent to Deluge') @@ -85,14 +87,10 @@ class Deluge(Downloader): log.debug('Checking Deluge download status.') - if not os.path.isdir(Env.setting('from', 'renamer')): - log.error('Renamer "from" folder doesn\'t to exist.') - return - if not self.connect(): return False - statuses = StatusList(self) + release_downloads = ReleaseDownloadList(self) queue = self.drpc.get_alltorrents() @@ -101,50 +99,55 @@ class Deluge(Downloader): return False for torrent_id in queue: - item = queue[torrent_id] - log.debug('name=%s / id=%s / save_path=%s / move_completed_path=%s / hash=%s / progress=%s / state=%s / eta=%s / ratio=%s / stop_ratio=%s / is_seed=%s / is_finished=%s / paused=%s', (item['name'], item['hash'], item['save_path'], item['move_completed_path'], item['hash'], item['progress'], item['state'], item['eta'], item['ratio'], item['stop_ratio'], item['is_seed'], item['is_finished'], item['paused'])) + torrent = queue[torrent_id] + log.debug('name=%s / id=%s / save_path=%s / move_completed_path=%s / hash=%s / progress=%s / state=%s / eta=%s / ratio=%s / stop_ratio=%s / is_seed=%s / is_finished=%s / paused=%s', (torrent['name'], torrent['hash'], torrent['save_path'], torrent['move_completed_path'], torrent['hash'], torrent['progress'], torrent['state'], torrent['eta'], torrent['ratio'], torrent['stop_ratio'], torrent['is_seed'], torrent['is_finished'], torrent['paused'])) # Deluge has no easy way to work out if a torrent is stalled or failing. #status = 'failed' status = 'busy' - if item['is_seed'] and tryFloat(item['ratio']) < tryFloat(item['stop_ratio']): - # We have item['seeding_time'] to work out what the seeding time is, but we do not + if torrent['is_seed'] and tryFloat(torrent['ratio']) < tryFloat(torrent['stop_ratio']): + # We have torrent['seeding_time'] to work out what the seeding time is, but we do not # have access to the downloader seed_time, as with deluge we have no way to pass it # when the torrent is added. So Deluge will only look at the ratio. # See above comment in download(). status = 'seeding' - elif item['is_seed'] and item['is_finished'] and item['paused'] and item['state'] == 'Paused': + elif torrent['is_seed'] and torrent['is_finished'] and torrent['paused'] and torrent['state'] == 'Paused': status = 'completed' - download_dir = item['save_path'] - if item['move_on_completed']: - download_dir = item['move_completed_path'] + download_dir = sp(torrent['save_path']) + if torrent['move_on_completed']: + download_dir = torrent['move_completed_path'] + + torrent_files = [] + for file_item in torrent['files']: + torrent_files.append(sp(os.path.join(download_dir, file_item['path']))) - statuses.append({ - 'id': item['hash'], - 'name': item['name'], + release_downloads.append({ + 'id': torrent['hash'], + 'name': torrent['name'], 'status': status, - 'original_status': item['state'], - 'seed_ratio': item['ratio'], - 'timeleft': str(timedelta(seconds = item['eta'])), - 'folder': ss(os.path.join(download_dir, item['name'])), + 'original_status': torrent['state'], + 'seed_ratio': torrent['ratio'], + 'timeleft': str(timedelta(seconds = torrent['eta'])), + 'folder': sp(download_dir if len(torrent_files) == 1 else os.path.join(download_dir, torrent['name'])), + 'files': '|'.join(torrent_files), }) - return statuses + return release_downloads - def pause(self, item, pause = True): + def pause(self, release_download, pause = True): if pause: - return self.drpc.pause_torrent([item['id']]) + return self.drpc.pause_torrent([release_download['id']]) else: - return self.drpc.resume_torrent([item['id']]) + return self.drpc.resume_torrent([release_download['id']]) - def removeFailed(self, item): - log.info('%s failed downloading, deleting...', item['name']) - return self.drpc.remove_torrent(item['id'], True) + def removeFailed(self, release_download): + log.info('%s failed downloading, deleting...', release_download['name']) + return self.drpc.remove_torrent(release_download['id'], True) - def processComplete(self, item, delete_files = False): - log.debug('Requesting Deluge to remove the torrent %s%s.', (item['name'], ' and cleanup the downloaded files' if delete_files else '')) - return self.drpc.remove_torrent(item['id'], remove_local_data = delete_files) + def processComplete(self, release_download, delete_files = False): + log.debug('Requesting Deluge to remove the torrent %s%s.', (release_download['name'], ' and cleanup the downloaded files' if delete_files else '')) + return self.drpc.remove_torrent(release_download['id'], remove_local_data = delete_files) class DelugeRPC(object): @@ -171,7 +174,10 @@ class DelugeRPC(object): try: self.connect() torrent_id = self.client.core.add_torrent_magnet(torrent, options).get() - if options['label']: + if not torrent_id: + torrent_id = self._check_torrent(True, torrent) + + if torrent_id and options['label']: self.client.label.set_torrent(torrent_id, options['label']).get() except Exception, err: log.error('Failed to add torrent magnet %s: %s %s', (torrent, err, traceback.format_exc())) @@ -185,8 +191,11 @@ class DelugeRPC(object): torrent_id = False try: self.connect() - torrent_id = self.client.core.add_torrent_file(filename, torrent, options).get() - if options['label']: + torrent_id = self.client.core.add_torrent_file(filename, b64encode(torrent), options).get() + if not torrent_id: + torrent_id = self._check_torrent(False, torrent) + + if torrent_id and options['label']: self.client.label.set_torrent(torrent_id, options['label']).get() except Exception, err: log.error('Failed to add torrent file %s: %s %s', (filename, err, traceback.format_exc())) @@ -242,3 +251,22 @@ class DelugeRPC(object): def disconnect(self): self.client.disconnect() + + def _check_torrent(self, magnet, torrent): + # Torrent not added, check if it already existed. + if magnet: + torrent_hash = re.findall('urn:btih:([\w]{32,40})', torrent)[0] + else: + info = bdecode(torrent)["info"] + torrent_hash = sha1(benc(info)).hexdigest() + + # Convert base 32 to hex + if len(torrent_hash) == 32: + torrent_hash = b16encode(b32decode(torrent_hash)) + + torrent_hash = torrent_hash.lower() + torrent_check = self.client.core.get_torrent_status(torrent_hash, {}).get() + if torrent_check['hash']: + return torrent_hash + + return False diff --git a/couchpotato/core/downloaders/nzbget/main.py b/couchpotato/core/downloaders/nzbget/main.py index b7cf026..f850613 100644 --- a/couchpotato/core/downloaders/nzbget/main.py +++ b/couchpotato/core/downloaders/nzbget/main.py @@ -1,6 +1,6 @@ from base64 import standard_b64encode -from couchpotato.core.downloaders.base import Downloader, StatusList -from couchpotato.core.helpers.encoding import ss +from couchpotato.core.downloaders.base import Downloader, ReleaseDownloadList +from couchpotato.core.helpers.encoding import ss, sp from couchpotato.core.helpers.variable import tryInt, md5 from couchpotato.core.logger import CPLog from datetime import timedelta @@ -99,60 +99,60 @@ class NZBGet(Downloader): log.error('Failed getting data: %s', traceback.format_exc(1)) return False - statuses = StatusList(self) + release_downloads = ReleaseDownloadList(self) - for item in groups: - log.debug('Found %s in NZBGet download queue', item['NZBFilename']) + for nzb in groups: + log.debug('Found %s in NZBGet download queue', nzb['NZBFilename']) try: - nzb_id = [param['Value'] for param in item['Parameters'] if param['Name'] == 'couchpotato'][0] + nzb_id = [param['Value'] for param in nzb['Parameters'] if param['Name'] == 'couchpotato'][0] except: - nzb_id = item['NZBID'] + nzb_id = nzb['NZBID'] timeleft = -1 try: - if item['ActiveDownloads'] > 0 and item['DownloadRate'] > 0 and not (status['DownloadPaused'] or status['Download2Paused']): - timeleft = str(timedelta(seconds = item['RemainingSizeMB'] / status['DownloadRate'] * 2 ^ 20)) + if nzb['ActiveDownloads'] > 0 and nzb['DownloadRate'] > 0 and not (status['DownloadPaused'] or status['Download2Paused']): + timeleft = str(timedelta(seconds = nzb['RemainingSizeMB'] / status['DownloadRate'] * 2 ^ 20)) except: pass - statuses.append({ + release_downloads.append({ 'id': nzb_id, - 'name': item['NZBFilename'], - 'original_status': 'DOWNLOADING' if item['ActiveDownloads'] > 0 else 'QUEUED', + 'name': nzb['NZBFilename'], + 'original_status': 'DOWNLOADING' if nzb['ActiveDownloads'] > 0 else 'QUEUED', # Seems to have no native API function for time left. This will return the time left after NZBGet started downloading this item 'timeleft': timeleft, }) - for item in queue: # 'Parameters' is not passed in rpc.postqueue - log.debug('Found %s in NZBGet postprocessing queue', item['NZBFilename']) - statuses.append({ - 'id': item['NZBID'], - 'name': item['NZBFilename'], - 'original_status': item['Stage'], + for nzb in queue: # 'Parameters' is not passed in rpc.postqueue + log.debug('Found %s in NZBGet postprocessing queue', nzb['NZBFilename']) + release_downloads.append({ + 'id': nzb['NZBID'], + 'name': nzb['NZBFilename'], + 'original_status': nzb['Stage'], 'timeleft': str(timedelta(seconds = 0)) if not status['PostPaused'] else -1, }) - for item in history: - log.debug('Found %s in NZBGet history. ParStatus: %s, ScriptStatus: %s, Log: %s', (item['NZBFilename'] , item['ParStatus'], item['ScriptStatus'] , item['Log'])) + for nzb in history: + log.debug('Found %s in NZBGet history. ParStatus: %s, ScriptStatus: %s, Log: %s', (nzb['NZBFilename'] , nzb['ParStatus'], nzb['ScriptStatus'] , nzb['Log'])) try: - nzb_id = [param['Value'] for param in item['Parameters'] if param['Name'] == 'couchpotato'][0] + nzb_id = [param['Value'] for param in nzb['Parameters'] if param['Name'] == 'couchpotato'][0] except: - nzb_id = item['NZBID'] - statuses.append({ + nzb_id = nzb['NZBID'] + release_downloads.append({ 'id': nzb_id, - 'name': item['NZBFilename'], - 'status': 'completed' if item['ParStatus'] in ['SUCCESS','NONE'] and item['ScriptStatus'] in ['SUCCESS','NONE'] else 'failed', - 'original_status': item['ParStatus'] + ', ' + item['ScriptStatus'], + 'name': nzb['NZBFilename'], + 'status': 'completed' if nzb['ParStatus'] in ['SUCCESS', 'NONE'] and nzb['ScriptStatus'] in ['SUCCESS', 'NONE'] else 'failed', + 'original_status': nzb['ParStatus'] + ', ' + nzb['ScriptStatus'], 'timeleft': str(timedelta(seconds = 0)), - 'folder': ss(item['DestDir']) + 'folder': sp(nzb['DestDir']) }) - return statuses + return release_downloads - def removeFailed(self, item): + def removeFailed(self, release_download): - log.info('%s failed downloading, deleting...', item['name']) + log.info('%s failed downloading, deleting...', release_download['name']) url = self.url % {'host': self.conf('host'), 'username': self.conf('username'), 'password': self.conf('password')} @@ -179,9 +179,9 @@ class NZBGet(Downloader): for hist in history: for param in hist['Parameters']: - if param['Name'] == 'couchpotato' and param['Value'] == item['id']: + if param['Name'] == 'couchpotato' and param['Value'] == release_download['id']: nzb_id = hist['ID'] - path = hist['DestDir'] + path = hist['DestDir'] if nzb_id and path and rpc.editqueue('HistoryDelete', 0, "", [tryInt(nzb_id)]): shutil.rmtree(path, True) diff --git a/couchpotato/core/downloaders/nzbvortex/main.py b/couchpotato/core/downloaders/nzbvortex/main.py index a652f11..f4e233b 100644 --- a/couchpotato/core/downloaders/nzbvortex/main.py +++ b/couchpotato/core/downloaders/nzbvortex/main.py @@ -1,6 +1,6 @@ from base64 import b64encode -from couchpotato.core.downloaders.base import Downloader, StatusList -from couchpotato.core.helpers.encoding import tryUrlencode, ss +from couchpotato.core.downloaders.base import Downloader, ReleaseDownloadList +from couchpotato.core.helpers.encoding import tryUrlencode, sp from couchpotato.core.helpers.variable import cleanHost from couchpotato.core.logger import CPLog from urllib2 import URLError @@ -30,10 +30,10 @@ class NZBVortex(Downloader): # Send the nzb try: nzb_filename = self.createFileName(data, filedata, movie) - self.call('nzb/add', params = {'file': (ss(nzb_filename), filedata)}, multipart = True) + self.call('nzb/add', params = {'file': (nzb_filename, filedata)}, multipart = True) raw_statuses = self.call('nzb') - nzb_id = [item['id'] for item in raw_statuses.get('nzbs', []) if item['name'] == nzb_filename][0] + nzb_id = [nzb['id'] for nzb in raw_statuses.get('nzbs', []) if nzb['name'] == nzb_filename][0] return self.downloadReturnId(nzb_id) except: log.error('Something went wrong sending the NZB file: %s', traceback.format_exc()) @@ -43,33 +43,33 @@ class NZBVortex(Downloader): raw_statuses = self.call('nzb') - statuses = StatusList(self) - for item in raw_statuses.get('nzbs', []): + release_downloads = ReleaseDownloadList(self) + for nzb in raw_statuses.get('nzbs', []): # Check status status = 'busy' - if item['state'] == 20: + if nzb['state'] == 20: status = 'completed' - elif item['state'] in [21, 22, 24]: + elif nzb['state'] in [21, 22, 24]: status = 'failed' - statuses.append({ - 'id': item['id'], - 'name': item['uiTitle'], + release_downloads.append({ + 'id': nzb['id'], + 'name': nzb['uiTitle'], 'status': status, - 'original_status': item['state'], + 'original_status': nzb['state'], 'timeleft':-1, - 'folder': ss(item['destinationPath']), + 'folder': sp(nzb['destinationPath']), }) - return statuses + return release_downloads - def removeFailed(self, item): + def removeFailed(self, release_download): - log.info('%s failed downloading, deleting...', item['name']) + log.info('%s failed downloading, deleting...', release_download['name']) try: - self.call('nzb/%s/cancel' % item['id']) + self.call('nzb/%s/cancel' % release_download['id']) except: log.error('Failed deleting: %s', traceback.format_exc(0)) return False diff --git a/couchpotato/core/downloaders/rtorrent/__init__.py b/couchpotato/core/downloaders/rtorrent/__init__.py index b04e689..684ea45 100755 --- a/couchpotato/core/downloaders/rtorrent/__init__.py +++ b/couchpotato/core/downloaders/rtorrent/__init__.py @@ -23,6 +23,8 @@ config = [{ { 'name': 'url', 'default': 'http://localhost:80/RPC2', + 'description': 'XML-RPC Endpoint URI. Usually scgi://localhost:5000 ' + 'or http://localhost:80/RPC2' }, { 'name': 'username', @@ -38,7 +40,7 @@ config = [{ { 'name': 'directory', 'type': 'directory', - 'description': 'Directory where rtorrent should download the files too.', + 'description': 'Download to this directory. Keep empty for default rTorrent download directory.', }, { 'name': 'remove_complete', @@ -49,14 +51,6 @@ config = [{ 'description': 'Remove the torrent after it finishes seeding.', }, { - 'name': 'append_label', - 'label': 'Append Label', - 'default': False, - 'advanced': True, - 'type': 'bool', - 'description': 'Append label to download location. Requires you to set the download location above.', - }, - { 'name': 'delete_files', 'label': 'Remove files', 'default': True, diff --git a/couchpotato/core/downloaders/rtorrent/main.py b/couchpotato/core/downloaders/rtorrent/main.py index caf64d5..8381f0a 100755 --- a/couchpotato/core/downloaders/rtorrent/main.py +++ b/couchpotato/core/downloaders/rtorrent/main.py @@ -1,13 +1,13 @@ from base64 import b16encode, b32decode from bencode import bencode, bdecode -from couchpotato.core.downloaders.base import Downloader, StatusList -from couchpotato.core.helpers.encoding import ss +from couchpotato.core.downloaders.base import Downloader, ReleaseDownloadList +from couchpotato.core.helpers.encoding import sp from couchpotato.core.logger import CPLog from datetime import timedelta from hashlib import sha1 from rtorrent import RTorrent from rtorrent.err import MethodError -import shutil, os +import os log = CPLog(__name__) @@ -71,7 +71,7 @@ class rTorrent(Downloader): group.set_command() group.disable() except MethodError, err: - log.error('Unable to set group options: %s', err.message) + log.error('Unable to set group options: %s', err.msg) return False return True @@ -125,9 +125,7 @@ class rTorrent(Downloader): if self.conf('label'): torrent.set_custom(1, self.conf('label')) - if self.conf('directory') and self.conf('append_label'): - torrent.set_directory(os.path.join(self.conf('directory'), self.conf('label'))) - elif self.conf('directory'): + if self.conf('directory'): torrent.set_directory(self.conf('directory')) # Set Ratio Group @@ -151,37 +149,42 @@ class rTorrent(Downloader): try: torrents = self.rt.get_torrents() - statuses = StatusList(self) + release_downloads = ReleaseDownloadList(self) + + for torrent in torrents: + torrent_files = [] + for file_item in torrent.get_files(): + torrent_files.append(sp(os.path.join(torrent.directory, file_item.path))) - for item in torrents: status = 'busy' - if item.complete: - if item.active: + if torrent.complete: + if torrent.active: status = 'seeding' else: status = 'completed' - statuses.append({ - 'id': item.info_hash, - 'name': item.name, + release_downloads.append({ + 'id': torrent.info_hash, + 'name': torrent.name, 'status': status, - 'seed_ratio': item.ratio, - 'original_status': item.state, - 'timeleft': str(timedelta(seconds = float(item.left_bytes) / item.down_rate)) if item.down_rate > 0 else -1, - 'folder': ss(item.directory) + 'seed_ratio': torrent.ratio, + 'original_status': torrent.state, + 'timeleft': str(timedelta(seconds = float(torrent.left_bytes) / torrent.down_rate)) if torrent.down_rate > 0 else -1, + 'folder': sp(torrent.directory), + 'files': '|'.join(torrent_files) }) - return statuses + return release_downloads except Exception, err: log.error('Failed to get status from rTorrent: %s', err) return False - def pause(self, download_info, pause = True): + def pause(self, release_download, pause = True): if not self.connect(): return False - torrent = self.rt.find_torrent(download_info['id']) + torrent = self.rt.find_torrent(release_download['id']) if torrent is None: return False @@ -189,23 +192,34 @@ class rTorrent(Downloader): return torrent.pause() return torrent.resume() - def removeFailed(self, item): - log.info('%s failed downloading, deleting...', item['name']) - return self.processComplete(item, delete_files = True) + def removeFailed(self, release_download): + log.info('%s failed downloading, deleting...', release_download['name']) + return self.processComplete(release_download, delete_files = True) - def processComplete(self, item, delete_files): + def processComplete(self, release_download, delete_files): log.debug('Requesting rTorrent to remove the torrent %s%s.', - (item['name'], ' and cleanup the downloaded files' if delete_files else '')) + (release_download['name'], ' and cleanup the downloaded files' if delete_files else '')) + if not self.connect(): return False - torrent = self.rt.find_torrent(item['id']) + torrent = self.rt.find_torrent(release_download['id']) + if torrent is None: return False - torrent.erase() # just removes the torrent, doesn't delete data - if delete_files: - shutil.rmtree(item['folder'], True) + for file_item in torrent.get_files(): # will only delete files, not dir/sub-dir + os.unlink(os.path.join(torrent.directory, file_item.path)) + + if torrent.is_multi_file() and torrent.directory.endswith(torrent.name): + # Remove empty directories bottom up + try: + for path, _, _ in os.walk(torrent.directory, topdown = False): + os.rmdir(path) + except OSError: + log.info('Directory "%s" contains extra files, unable to remove', torrent.directory) + + torrent.erase() # just removes the torrent, doesn't delete data return True diff --git a/couchpotato/core/downloaders/sabnzbd/main.py b/couchpotato/core/downloaders/sabnzbd/main.py index 41f9f70..aba2123 100644 --- a/couchpotato/core/downloaders/sabnzbd/main.py +++ b/couchpotato/core/downloaders/sabnzbd/main.py @@ -1,11 +1,12 @@ -from couchpotato.core.downloaders.base import Downloader, StatusList -from couchpotato.core.helpers.encoding import tryUrlencode, ss +from couchpotato.core.downloaders.base import Downloader, ReleaseDownloadList +from couchpotato.core.helpers.encoding import tryUrlencode, ss, sp from couchpotato.core.helpers.variable import cleanHost, mergeDicts from couchpotato.core.logger import CPLog from couchpotato.environment import Env from datetime import timedelta from urllib2 import URLError import json +import os import traceback log = CPLog(__name__) @@ -86,58 +87,58 @@ class Sabnzbd(Downloader): log.error('Failed getting history json: %s', traceback.format_exc(1)) return False - statuses = StatusList(self) + release_downloads = ReleaseDownloadList(self) # Get busy releases - for item in queue.get('slots', []): + for nzb in queue.get('slots', []): status = 'busy' - if 'ENCRYPTED / ' in item['filename']: + if 'ENCRYPTED / ' in nzb['filename']: status = 'failed' - statuses.append({ - 'id': item['nzo_id'], - 'name': item['filename'], + release_downloads.append({ + 'id': nzb['nzo_id'], + 'name': nzb['filename'], 'status': status, - 'original_status': item['status'], - 'timeleft': item['timeleft'] if not queue['paused'] else -1, + 'original_status': nzb['status'], + 'timeleft': nzb['timeleft'] if not queue['paused'] else -1, }) # Get old releases - for item in history.get('slots', []): + for nzb in history.get('slots', []): status = 'busy' - if item['status'] == 'Failed' or (item['status'] == 'Completed' and item['fail_message'].strip()): + if nzb['status'] == 'Failed' or (nzb['status'] == 'Completed' and nzb['fail_message'].strip()): status = 'failed' - elif item['status'] == 'Completed': + elif nzb['status'] == 'Completed': status = 'completed' - statuses.append({ - 'id': item['nzo_id'], - 'name': item['name'], + release_downloads.append({ + 'id': nzb['nzo_id'], + 'name': nzb['name'], 'status': status, - 'original_status': item['status'], + 'original_status': nzb['status'], 'timeleft': str(timedelta(seconds = 0)), - 'folder': ss(item['storage']), + 'folder': sp(os.path.dirname(nzb['storage']) if os.path.isfile(nzb['storage']) else nzb['storage']), }) - return statuses + return release_downloads - def removeFailed(self, item): + def removeFailed(self, release_download): - log.info('%s failed downloading, deleting...', item['name']) + log.info('%s failed downloading, deleting...', release_download['name']) try: self.call({ 'mode': 'queue', 'name': 'delete', 'del_files': '1', - 'value': item['id'] + 'value': release_download['id'] }, use_json = False) self.call({ 'mode': 'history', 'name': 'delete', 'del_files': '1', - 'value': item['id'] + 'value': release_download['id'] }, use_json = False) except: log.error('Failed deleting: %s', traceback.format_exc(0)) @@ -145,15 +146,15 @@ class Sabnzbd(Downloader): return True - def processComplete(self, item, delete_files = False): - log.debug('Requesting SabNZBd to remove the NZB %s.', item['name']) + def processComplete(self, release_download, delete_files = False): + log.debug('Requesting SabNZBd to remove the NZB %s.', release_download['name']) try: self.call({ 'mode': 'history', 'name': 'delete', 'del_files': '0', - 'value': item['id'] + 'value': release_download['id'] }, use_json = False) except: log.error('Failed removing: %s', traceback.format_exc(0)) diff --git a/couchpotato/core/downloaders/synology/main.py b/couchpotato/core/downloaders/synology/main.py index d5082c7..0721085 100644 --- a/couchpotato/core/downloaders/synology/main.py +++ b/couchpotato/core/downloaders/synology/main.py @@ -3,6 +3,7 @@ from couchpotato.core.helpers.encoding import isInt from couchpotato.core.logger import CPLog import json import requests +import traceback log = CPLog(__name__) @@ -34,12 +35,12 @@ class Synology(Downloader): elif data['protocol'] in ['nzb', 'torrent']: log.info('Adding %s' % data['protocol']) if not filedata: - log.error('No %s data found' % data['protocol']) + log.error('No %s data found', data['protocol']) else: filename = data['name'] + '.' + data['protocol'] response = srpc.create_task(filename = filename, filedata = filedata) - except Exception, err: - log.error('Exception while adding torrent: %s', err) + except: + log.error('Exception while adding torrent: %s', traceback.format_exc()) finally: return response diff --git a/couchpotato/core/downloaders/transmission/main.py b/couchpotato/core/downloaders/transmission/main.py index 1c35996..2eabb2e 100644 --- a/couchpotato/core/downloaders/transmission/main.py +++ b/couchpotato/core/downloaders/transmission/main.py @@ -1,9 +1,8 @@ from base64 import b64encode -from couchpotato.core.downloaders.base import Downloader, StatusList -from couchpotato.core.helpers.encoding import isInt, ss +from couchpotato.core.downloaders.base import Downloader, ReleaseDownloadList +from couchpotato.core.helpers.encoding import isInt, sp from couchpotato.core.helpers.variable import tryInt, tryFloat from couchpotato.core.logger import CPLog -from couchpotato.environment import Env from datetime import timedelta import httplib import json @@ -89,10 +88,10 @@ class Transmission(Downloader): if not self.connect(): return False - statuses = StatusList(self) + release_downloads = ReleaseDownloadList(self) return_params = { - 'fields': ['id', 'name', 'hashString', 'percentDone', 'status', 'eta', 'isStalled', 'isFinished', 'downloadDir', 'uploadRatio', 'secondsSeeding', 'seedIdleLimit'] + 'fields': ['id', 'name', 'hashString', 'percentDone', 'status', 'eta', 'isStalled', 'isFinished', 'downloadDir', 'uploadRatio', 'secondsSeeding', 'seedIdleLimit', 'files'] } queue = self.trpc.get_alltorrents(return_params) @@ -100,47 +99,48 @@ class Transmission(Downloader): log.debug('Nothing in queue or error') return False - for item in queue['torrents']: + for torrent in queue['torrents']: log.debug('name=%s / id=%s / downloadDir=%s / hashString=%s / percentDone=%s / status=%s / eta=%s / uploadRatio=%s / isFinished=%s', - (item['name'], item['id'], item['downloadDir'], item['hashString'], item['percentDone'], item['status'], item['eta'], item['uploadRatio'], item['isFinished'])) + (torrent['name'], torrent['id'], torrent['downloadDir'], torrent['hashString'], torrent['percentDone'], torrent['status'], torrent['eta'], torrent['uploadRatio'], torrent['isFinished'])) - if not os.path.isdir(Env.setting('from', 'renamer')): - log.error('Renamer "from" folder doesn\'t to exist.') - return + torrent_files = [] + for file_item in torrent['files']: + torrent_files.append(sp(os.path.join(torrent['downloadDir'], file_item['name']))) status = 'busy' - if item['isStalled'] and self.conf('stalled_as_failed'): + if torrent.get('isStalled') and self.conf('stalled_as_failed'): status = 'failed' - elif item['status'] == 0 and item['percentDone'] == 1: + elif torrent['status'] == 0 and torrent['percentDone'] == 1: status = 'completed' - elif item['status'] in [5, 6]: + elif torrent['status'] in [5, 6]: status = 'seeding' - statuses.append({ - 'id': item['hashString'], - 'name': item['name'], + release_downloads.append({ + 'id': torrent['hashString'], + 'name': torrent['name'], 'status': status, - 'original_status': item['status'], - 'seed_ratio': item['uploadRatio'], - 'timeleft': str(timedelta(seconds = item['eta'])), - 'folder': ss(os.path.join(item['downloadDir'], item['name'])), + 'original_status': torrent['status'], + 'seed_ratio': torrent['uploadRatio'], + 'timeleft': str(timedelta(seconds = torrent['eta'])), + 'folder': sp(torrent['downloadDir'] if len(torrent_files) == 1 else os.path.join(torrent['downloadDir'], torrent['name'])), + 'files': '|'.join(torrent_files) }) - return statuses + return release_downloads - def pause(self, item, pause = True): + def pause(self, release_download, pause = True): if pause: - return self.trpc.stop_torrent(item['id']) + return self.trpc.stop_torrent(release_download['id']) else: - return self.trpc.start_torrent(item['id']) + return self.trpc.start_torrent(release_download['id']) - def removeFailed(self, item): - log.info('%s failed downloading, deleting...', item['name']) - return self.trpc.remove_torrent(item['id'], True) + def removeFailed(self, release_download): + log.info('%s failed downloading, deleting...', release_download['name']) + return self.trpc.remove_torrent(release_download['id'], True) - def processComplete(self, item, delete_files = False): - log.debug('Requesting Transmission to remove the torrent %s%s.', (item['name'], ' and cleanup the downloaded files' if delete_files else '')) - return self.trpc.remove_torrent(item['id'], delete_files) + def processComplete(self, release_download, delete_files = False): + log.debug('Requesting Transmission to remove the torrent %s%s.', (release_download['name'], ' and cleanup the downloaded files' if delete_files else '')) + return self.trpc.remove_torrent(release_download['id'], delete_files) class TransmissionRPC(object): diff --git a/couchpotato/core/downloaders/utorrent/__init__.py b/couchpotato/core/downloaders/utorrent/__init__.py index d45e2e6..0c4c323 100644 --- a/couchpotato/core/downloaders/utorrent/__init__.py +++ b/couchpotato/core/downloaders/utorrent/__init__.py @@ -37,6 +37,11 @@ config = [{ 'description': 'Label to add torrent as.', }, { + 'name': 'directory', + 'type': 'directory', + 'description': 'Download to this directory. Keep empty for default uTorrent download directory.', + }, + { 'name': 'remove_complete', 'label': 'Remove torrent', 'default': True, diff --git a/couchpotato/core/downloaders/utorrent/main.py b/couchpotato/core/downloaders/utorrent/main.py index d5262e2..e05d104 100644 --- a/couchpotato/core/downloaders/utorrent/main.py +++ b/couchpotato/core/downloaders/utorrent/main.py @@ -1,7 +1,7 @@ from base64 import b16encode, b32decode from bencode import bencode as benc, bdecode -from couchpotato.core.downloaders.base import Downloader, StatusList -from couchpotato.core.helpers.encoding import isInt, ss +from couchpotato.core.downloaders.base import Downloader, ReleaseDownloadList +from couchpotato.core.helpers.encoding import isInt, ss, sp from couchpotato.core.helpers.variable import tryInt, tryFloat from couchpotato.core.logger import CPLog from datetime import timedelta @@ -77,6 +77,7 @@ class uTorrent(Downloader): else: info = bdecode(filedata)["info"] torrent_hash = sha1(benc(info)).hexdigest().upper() + torrent_filename = self.createFileName(data, filedata, movie) if data.get('seed_ratio'): @@ -91,50 +92,23 @@ class uTorrent(Downloader): if len(torrent_hash) == 32: torrent_hash = b16encode(b32decode(torrent_hash)) + # Set download directory + if self.conf('directory'): + directory = self.conf('directory') + else: + directory = False + # Send request to uTorrent if data.get('protocol') == 'torrent_magnet': - self.utorrent_api.add_torrent_uri(torrent_filename, data.get('url')) + self.utorrent_api.add_torrent_uri(torrent_filename, data.get('url'), directory) else: - self.utorrent_api.add_torrent_file(torrent_filename, filedata) + self.utorrent_api.add_torrent_file(torrent_filename, filedata, directory) # Change settings of added torrent self.utorrent_api.set_torrent(torrent_hash, torrent_params) if self.conf('paused', default = 0): self.utorrent_api.pause_torrent(torrent_hash) - count = 0 - while True: - - count += 1 - # Check if torrent is saved in subfolder of torrent name - getfiles_data = self.utorrent_api.get_files(torrent_hash) - - torrent_files = json.loads(getfiles_data) - if torrent_files.get('error'): - log.error('Error getting data from uTorrent: %s', torrent_files.get('error')) - return False - - if (torrent_files.get('files') and len(torrent_files['files'][1]) > 0) or count > 60: - break - - time.sleep(1) - - # Torrent has only one file, so uTorrent wont create a folder for it - if len(torrent_files['files'][1]) == 1: - # Remove torrent and try again - self.utorrent_api.remove_torrent(torrent_hash, remove_data = True) - - # Send request to uTorrent - if data.get('protocol') == 'torrent_magnet': - self.utorrent_api.add_torrent_uri(torrent_filename, data.get('url'), add_folder = True) - else: - self.utorrent_api.add_torrent_file(torrent_filename, filedata, add_folder = True) - - # Change settings of added torrent - self.utorrent_api.set_torrent(torrent_hash, torrent_params) - if self.conf('paused', default = 0): - self.utorrent_api.pause_torrent(torrent_hash) - return self.downloadReturnId(torrent_hash) def getAllDownloadStatus(self): @@ -144,7 +118,7 @@ class uTorrent(Downloader): if not self.connect(): return False - statuses = StatusList(self) + release_downloads = ReleaseDownloadList(self) data = self.utorrent_api.get_status() if not data: @@ -161,52 +135,74 @@ class uTorrent(Downloader): return False # Get torrents - for item in queue['torrents']: + for torrent in queue['torrents']: + + #Get files of the torrent + torrent_files = [] + try: + torrent_files = json.loads(self.utorrent_api.get_files(torrent[0])) + torrent_files = [sp(os.path.join(torrent[26], torrent_file[0])) for torrent_file in torrent_files['files'][1]] + except: + log.debug('Failed getting files from torrent: %s', torrent[2]) + + status_flags = { + "STARTED" : 1, + "CHECKING" : 2, + "CHECK-START" : 4, + "CHECKED" : 8, + "ERROR" : 16, + "PAUSED" : 32, + "QUEUED" : 64, + "LOADED" : 128 + } - # item[21] = Paused | Downloading | Seeding | Finished status = 'busy' - if 'Finished' in item[21]: - status = 'completed' - self.removeReadOnly(item[26]) - elif 'Seeding' in item[21]: + if (torrent[1] & status_flags["STARTED"] or torrent[1] & status_flags["QUEUED"]) and torrent[4] == 1000: status = 'seeding' - self.removeReadOnly(item[26]) - - statuses.append({ - 'id': item[0], - 'name': item[2], - 'status': status, - 'seed_ratio': float(item[7]) / 1000, - 'original_status': item[1], - 'timeleft': str(timedelta(seconds = item[10])), - 'folder': ss(item[26]), + elif (torrent[1] & status_flags["ERROR"]): + status = 'failed' + elif torrent[4] == 1000: + status = 'completed' + + if not status == 'busy': + self.removeReadOnly(torrent_files) + + release_downloads.append({ + 'id': torrent[0], + 'name': torrent[2], + 'status': status, + 'seed_ratio': float(torrent[7]) / 1000, + 'original_status': torrent[1], + 'timeleft': str(timedelta(seconds = torrent[10])), + 'folder': sp(torrent[26]), + 'files': '|'.join(torrent_files) }) - return statuses + return release_downloads - def pause(self, item, pause = True): + def pause(self, release_download, pause = True): if not self.connect(): return False - return self.utorrent_api.pause_torrent(item['id'], pause) + return self.utorrent_api.pause_torrent(release_download['id'], pause) - def removeFailed(self, item): - log.info('%s failed downloading, deleting...', item['name']) + def removeFailed(self, release_download): + log.info('%s failed downloading, deleting...', release_download['name']) if not self.connect(): return False - return self.utorrent_api.remove_torrent(item['id'], remove_data = True) + return self.utorrent_api.remove_torrent(release_download['id'], remove_data = True) - def processComplete(self, item, delete_files = False): - log.debug('Requesting uTorrent to remove the torrent %s%s.', (item['name'], ' and cleanup the downloaded files' if delete_files else '')) + def processComplete(self, release_download, delete_files = False): + log.debug('Requesting uTorrent to remove the torrent %s%s.', (release_download['name'], ' and cleanup the downloaded files' if delete_files else '')) if not self.connect(): return False - return self.utorrent_api.remove_torrent(item['id'], remove_data = delete_files) + return self.utorrent_api.remove_torrent(release_download['id'], remove_data = delete_files) - def removeReadOnly(self, folder): - #Removes all read-only flags in a folder - if folder and os.path.isdir(folder): - for root, folders, filenames in os.walk(folder): - for filename in filenames: - os.chmod(os.path.join(root, filename), stat.S_IWRITE) + def removeReadOnly(self, files): + #Removes all read-on ly flags in a for all files + for filepath in files: + if os.path.isfile(filepath): + #Windows only needs S_IWRITE, but we bitwise-or with current perms to preserve other permission bits on Linux + os.chmod(filepath, stat.S_IWRITE | os.stat(filepath).st_mode) class uTorrentAPI(object): @@ -260,13 +256,13 @@ class uTorrentAPI(object): def add_torrent_uri(self, filename, torrent, add_folder = False): action = "action=add-url&s=%s" % urllib.quote(torrent) if add_folder: - action += "&path=%s" % urllib.quote(filename) + action += "&path=%s" % urllib.quote(add_folder) return self._request(action) def add_torrent_file(self, filename, filedata, add_folder = False): action = "action=add-file" if add_folder: - action += "&path=%s" % urllib.quote(filename) + action += "&path=%s" % urllib.quote(add_folder) return self._request(action, {"torrent_file": (ss(filename), filedata)}) def set_torrent(self, hash, params): @@ -304,13 +300,13 @@ class uTorrentAPI(object): utorrent_settings = json.loads(self._request(action)) # Create settings dict - for item in utorrent_settings['settings']: - if item[1] == 0: # int - settings_dict[item[0]] = int(item[2] if not item[2].strip() == '' else '0') - elif item[1] == 1: # bool - settings_dict[item[0]] = True if item[2] == 'true' else False - elif item[1] == 2: # string - settings_dict[item[0]] = item[2] + for setting in utorrent_settings['settings']: + if setting[1] == 0: # int + settings_dict[setting[0]] = int(setting[2] if not setting[2].strip() == '' else '0') + elif setting[1] == 1: # bool + settings_dict[setting[0]] = True if setting[2] == 'true' else False + elif setting[1] == 2: # string + settings_dict[setting[0]] = setting[2] #log.debug('uTorrent settings: %s', settings_dict) diff --git a/couchpotato/core/helpers/encoding.py b/couchpotato/core/helpers/encoding.py index 5fa2e2a..e88c6ca 100644 --- a/couchpotato/core/helpers/encoding.py +++ b/couchpotato/core/helpers/encoding.py @@ -1,6 +1,7 @@ from couchpotato.core.logger import CPLog from string import ascii_letters, digits from urllib import quote_plus +import os import re import traceback import unicodedata @@ -47,6 +48,19 @@ def ss(original, *args): log.debug('Failed ss encoding char, force UTF8: %s', e) return u_original.encode('UTF-8') +def sp(path, *args): + + # Standardise encoding, normalise case, path and strip trailing '/' or '\' + if not path or len(path) == 0: + return path + + path = os.path.normcase(os.path.normpath(ss(path, *args))) + + if path != os.path.sep: + path = path.rstrip(os.path.sep) + + return path + def ek(original, *args): if isinstance(original, (str, unicode)): try: diff --git a/couchpotato/core/helpers/variable.py b/couchpotato/core/helpers/variable.py index 6296462..7d35b99 100644 --- a/couchpotato/core/helpers/variable.py +++ b/couchpotato/core/helpers/variable.py @@ -1,6 +1,6 @@ -import collections from couchpotato.core.helpers.encoding import simplifyString, toSafeString, ss from couchpotato.core.logger import CPLog +import collections import hashlib import os.path import platform @@ -137,16 +137,18 @@ def getImdb(txt, check_inside = False, multiple = False): output.close() try: - ids = re.findall('(tt\d{7})', txt) + ids = re.findall('(tt\d{4,7})', txt) + if multiple: - return list(set(ids)) if len(ids) > 0 else [] - return ids[0] + return list(set(['tt%07d' % tryInt(x[2:]) for x in ids])) if len(ids) > 0 else [] + + return 'tt%07d' % tryInt(ids[0][2:]) except IndexError: pass return False -def tryInt(s, default=0): +def tryInt(s, default = 0): try: return int(s) except: return default diff --git a/couchpotato/core/media/__init__.py b/couchpotato/core/media/__init__.py index e6a249d..1ba8386 100644 --- a/couchpotato/core/media/__init__.py +++ b/couchpotato/core/media/__init__.py @@ -38,7 +38,7 @@ class MediaBase(Plugin): def notifyFront(): db = get_session() media = db.query(Media).filter_by(id = media_id).first() - fireEvent('notify.frontend', type = '%s.update.%s' % (media.type, media.id), data = media.to_dict(self.default_dict)) + fireEvent('notify.frontend', type = '%s.update' % media.type, data = media.to_dict(self.default_dict)) db.expire_all() return notifyFront diff --git a/couchpotato/core/media/_base/media/main.py b/couchpotato/core/media/_base/media/main.py index 87afb82..68ae531 100644 --- a/couchpotato/core/media/_base/media/main.py +++ b/couchpotato/core/media/_base/media/main.py @@ -34,7 +34,7 @@ class MediaPlugin(MediaBase): for title in media.library.titles: if title.default: default_title = title.title - fireEvent('notify.frontend', type = '%s.busy.%s' % (media.type, x), data = True) + fireEvent('notify.frontend', type = '%s.busy' % media.type, data = {'id': x}) fireEventAsync('library.update.%s' % media.type, identifier = media.library.identifier, default_title = default_title, force = True, on_complete = self.createOnComplete(x)) db.expire_all() diff --git a/couchpotato/core/media/_base/searcher/__init__.py b/couchpotato/core/media/_base/searcher/__init__.py index 0fb6cc0..5e029a2 100644 --- a/couchpotato/core/media/_base/searcher/__init__.py +++ b/couchpotato/core/media/_base/searcher/__init__.py @@ -47,7 +47,7 @@ config = [{ { 'name': 'ignored_words', 'label': 'Ignored', - 'default': 'german, dutch, french, truefrench, danish, swedish, spanish, italian, korean, dubbed, swesub, korsub, dksubs', + 'default': 'german, dutch, french, truefrench, danish, swedish, spanish, italian, korean, dubbed, swesub, korsub, dksubs, vain', 'description': 'Ignores releases that match any of these sets. (Works like explained above)' }, ], diff --git a/couchpotato/core/media/_base/searcher/main.py b/couchpotato/core/media/_base/searcher/main.py index 0ea8b23..f7df956 100644 --- a/couchpotato/core/media/_base/searcher/main.py +++ b/couchpotato/core/media/_base/searcher/main.py @@ -260,7 +260,7 @@ class Searcher(SearcherBase): except: pass # Match longest name between [] - try: check_names.append(max(check_name.split('['), key = len)) + try: check_names.append(max(re.findall(r'[^[]*\[([^]]*)\]', check_name), key = len).strip()) except: pass for check_name in list(set(check_names)): diff --git a/couchpotato/core/media/movie/_base/main.py b/couchpotato/core/media/movie/_base/main.py index 53a651c..6745c5a 100644 --- a/couchpotato/core/media/movie/_base/main.py +++ b/couchpotato/core/media/movie/_base/main.py @@ -1,14 +1,13 @@ from couchpotato import get_session from couchpotato.api import addApiView from couchpotato.core.event import fireEvent, fireEventAsync, addEvent -from couchpotato.core.helpers.encoding import toUnicode, simplifyString +from couchpotato.core.helpers.encoding import toUnicode from couchpotato.core.helpers.variable import getImdb, splitString, tryInt, \ mergeDicts from couchpotato.core.logger import CPLog from couchpotato.core.media.movie import MovieTypeBase from couchpotato.core.settings.model import Library, LibraryTitle, Media, \ Release -from couchpotato.environment import Env from sqlalchemy.orm import joinedload_all from sqlalchemy.sql.expression import or_, asc, not_, desc from string import ascii_lowercase @@ -54,6 +53,7 @@ class MovieBase(MovieTypeBase): 'params': { 'identifier': {'desc': 'IMDB id of the movie your want to add.'}, 'profile_id': {'desc': 'ID of quality profile you want the add the movie in. If empty will use the default profile.'}, + 'category_id': {'desc': 'ID of category you want the add the movie in. If empty will use no category.'}, 'title': {'desc': 'Movie title to use for searches. Has to be one of the titles returned by movie.search.'}, } }) @@ -79,34 +79,6 @@ class MovieBase(MovieTypeBase): addEvent('movie.list', self.list) addEvent('movie.restatus', self.restatus) - # Clean releases that didn't have activity in the last week - addEvent('app.load', self.cleanReleases) - fireEvent('schedule.interval', 'movie.clean_releases', self.cleanReleases, hours = 4) - - def cleanReleases(self): - - log.debug('Removing releases from dashboard') - - now = time.time() - week = 262080 - - done_status, available_status, snatched_status = \ - fireEvent('status.get', ['done', 'available', 'snatched'], single = True) - - db = get_session() - - # get movies last_edit more than a week ago - movies = db.query(Media) \ - .filter(Media.status_id == done_status.get('id'), Media.last_edit < (now - week)) \ - .all() - - for movie in movies: - for rel in movie.releases: - if rel.status_id in [available_status.get('id'), snatched_status.get('id')]: - fireEvent('release.delete', id = rel.id, single = True) - - db.expire_all() - def getView(self, id = None, **kwargs): movie = self.get(id) if id else None @@ -343,25 +315,6 @@ class MovieBase(MovieTypeBase): 'chars': chars, } - def search(self, q = '', **kwargs): - - cache_key = u'%s/%s' % (__name__, simplifyString(q)) - movies = Env.get('cache').get(cache_key) - - if not movies: - - if getImdb(q): - movies = [fireEvent('movie.info', identifier = q, merge = True)] - else: - movies = fireEvent('movie.search', q = q, merge = True) - Env.get('cache').set(cache_key, movies) - - return { - 'success': True, - 'empty': len(movies) == 0 if movies else 0, - 'movies': movies, - } - def add(self, params = None, force_readd = True, search_after = True, update_library = False, status_id = None): if not params: params = {} diff --git a/couchpotato/core/media/movie/_base/static/list.js b/couchpotato/core/media/movie/_base/static/list.js index aaa8be1..db598b2 100644 --- a/couchpotato/core/media/movie/_base/static/list.js +++ b/couchpotato/core/media/movie/_base/static/list.js @@ -52,8 +52,8 @@ var MovieList = new Class({ self.getMovies(); - App.addEvent('movie.added', self.movieAdded.bind(self)) - App.addEvent('movie.deleted', self.movieDeleted.bind(self)) + App.on('movie.added', self.movieAdded.bind(self)) + App.on('movie.deleted', self.movieDeleted.bind(self)) }, movieDeleted: function(notification){ @@ -65,6 +65,7 @@ var MovieList = new Class({ movie.destroy(); delete self.movies_added[notification.data.id]; self.setCounter(self.counter_count-1); + self.total_movies--; } }) } @@ -75,6 +76,7 @@ var MovieList = new Class({ movieAdded: function(notification){ var self = this; + self.fireEvent('movieAdded', notification); if(self.options.add_new && !self.movies_added[notification.data.id] && notification.data.status.identifier == self.options.status){ window.scroll(0,0); self.createMovie(notification.data, 'top'); @@ -390,6 +392,7 @@ var MovieList = new Class({ self.movies.erase(movie); movie.destroy(); self.setCounter(self.counter_count-1); + self.total_movies--; }); self.calculateSelected(); diff --git a/couchpotato/core/media/movie/_base/static/movie.actions.js b/couchpotato/core/media/movie/_base/static/movie.actions.js index 0e9e5bf..e3591f3 100644 --- a/couchpotato/core/media/movie/_base/static/movie.actions.js +++ b/couchpotato/core/media/movie/_base/static/movie.actions.js @@ -126,7 +126,9 @@ MA.Release = new Class({ else self.showHelper(); - App.addEvent('movie.searcher.ended.'+self.movie.data.id, function(notification){ + App.on('movie.searcher.ended', function(notification){ + if(self.movie.data.id != notification.data.id) return; + self.releases = null; if(self.options_container){ self.options_container.destroy(); @@ -250,12 +252,14 @@ MA.Release = new Class({ else if(!self.next_release && status.identifier == 'available'){ self.next_release = release; } - + var update_handle = function(notification) { - var q = self.movie.quality.getElement('.q_id' + release.quality_id), + if(notification.data.id != release.id) return; + + var q = self.movie.quality.getElement('.q_id' + release.quality_id), status = Status.get(release.status_id), - new_status = Status.get(notification.data); - + new_status = Status.get(notification.data.status_id); + release.status_id = new_status.id release.el.set('class', 'item ' + new_status.identifier); @@ -272,7 +276,7 @@ MA.Release = new Class({ } } - App.addEvent('release.update_status.' + release.id, update_handle); + App.on('release.update_status', update_handle); }); @@ -285,7 +289,7 @@ MA.Release = new Class({ if(self.next_release || (self.last_release && ['ignored', 'failed'].indexOf(self.last_release.status.identifier) === false)){ self.trynext_container = new Element('div.buttons.try_container').inject(self.release_container, 'top'); - + var nr = self.next_release, lr = self.last_release; @@ -381,7 +385,7 @@ MA.Release = new Class({ }, get: function(release, type){ - return release.info[type] || 'n/a' + return release.info[type] !== undefined ? release.info[type] : 'n/a' }, download: function(release){ @@ -393,7 +397,7 @@ MA.Release = new Class({ if(icon) icon.addClass('icon spinner').removeClass('download'); - Api.request('release.download', { + Api.request('release.manual_download', { 'data': { 'id': release.id }, diff --git a/couchpotato/core/media/movie/_base/static/movie.css b/couchpotato/core/media/movie/_base/static/movie.css index c013bd8..a88a207 100644 --- a/couchpotato/core/media/movie/_base/static/movie.css +++ b/couchpotato/core/media/movie/_base/static/movie.css @@ -1036,7 +1036,7 @@ text-overflow: ellipsis; overflow: hidden; width: 85%; - direction: rtl; + direction: ltr; vertical-align: middle; } diff --git a/couchpotato/core/media/movie/_base/static/movie.js b/couchpotato/core/media/movie/_base/static/movie.js index a865325..bc25845 100644 --- a/couchpotato/core/media/movie/_base/static/movie.js +++ b/couchpotato/core/media/movie/_base/static/movie.js @@ -23,23 +23,49 @@ var Movie = new Class({ addEvents: function(){ var self = this; - App.addEvent('movie.update.'+self.data.id, function(notification){ + self.global_events = {} + + // Do refresh with new data + self.global_events['movie.update'] = function(notification){ + if(self.data.id != notification.data.id) return; + self.busy(false); self.removeView(); self.update.delay(2000, self, notification); - }); + } + App.on('movie.update', self.global_events['movie.update']); + // Add spinner on load / search ['movie.busy', 'movie.searcher.started'].each(function(listener){ - App.addEvent(listener+'.'+self.data.id, function(notification){ - if(notification.data) + self.global_events[listener] = function(notification){ + if(notification.data && self.data.id == notification.data.id) self.busy(true) - }); + } + App.on(listener, self.global_events[listener]); }) - App.addEvent('movie.searcher.ended.'+self.data.id, function(notification){ - if(notification.data) + // Remove spinner + self.global_events['movie.searcher.ended'] = function(notification){ + if(notification.data && self.data.id == notification.data.id) self.busy(false) - }); + } + App.on('movie.searcher.ended', self.global_events['movie.searcher.ended']); + + // Reload when releases have updated + self.global_events['release.update_status'] = function(notification){ + var data = notification.data + if(data && self.data.id == data.movie_id){ + + if(!self.data.releases) + self.data.releases = []; + + self.data.releases.push({'quality_id': data.quality_id, 'status_id': data.status_id}); + self.updateReleases(); + } + } + + App.on('release.update_status', self.global_events['release.update_status']); + }, destroy: function(){ @@ -52,9 +78,8 @@ var Movie = new Class({ self.list.checkIfEmpty(); // Remove events - App.removeEvents('movie.update.'+self.data.id); - ['movie.busy', 'movie.searcher.started'].each(function(listener){ - App.removeEvents(listener+'.'+self.data.id); + self.global_events.each(function(handle, listener){ + App.off(listener, handle); }) }, @@ -179,21 +204,7 @@ var Movie = new Class({ }); // Add releases - if(self.data.releases) - self.data.releases.each(function(release){ - - var q = self.quality.getElement('.q_id'+ release.quality_id), - status = Status.get(release.status_id); - - if(!q && (status.identifier == 'snatched' || status.identifier == 'seeding' || status.identifier == 'done')) - var q = self.addQuality(release.quality_id) - - if (status && q && !q.hasClass(status.identifier)){ - q.addClass(status.identifier); - q.set('title', (q.get('title') ? q.get('title') : '') + ' status: '+ status.label) - } - - }); + self.updateReleases(); Object.each(self.options.actions, function(action, key){ self.action[key.toLowerCase()] = action = new self.options.actions[key](self) @@ -203,6 +214,26 @@ var Movie = new Class({ }, + updateReleases: function(){ + var self = this; + if(!self.data.releases || self.data.releases.length == 0) return; + + self.data.releases.each(function(release){ + + var q = self.quality.getElement('.q_id'+ release.quality_id), + status = Status.get(release.status_id); + + if(!q && (status.identifier == 'snatched' || status.identifier == 'seeding' || status.identifier == 'done')) + var q = self.addQuality(release.quality_id) + + if (status && q && !q.hasClass(status.identifier)){ + q.addClass(status.identifier); + q.set('title', (q.get('title') ? q.get('title') : '') + ' status: '+ status.label) + } + + }); + }, + addQuality: function(quality_id){ var self = this; diff --git a/couchpotato/core/media/movie/_base/static/search.js b/couchpotato/core/media/movie/_base/static/search.js index c62cff6..e4f70c0 100644 --- a/couchpotato/core/media/movie/_base/static/search.js +++ b/couchpotato/core/media/movie/_base/static/search.js @@ -107,7 +107,7 @@ Block.Search.MovieItem = new Class({ self.options_el.empty(); self.options_el.adopt( new Element('div.message', { - 'text': json.added ? 'Movie successfully added.' : 'Movie didn\'t add properly. Check logs' + 'text': json.success ? 'Movie successfully added.' : 'Movie didn\'t add properly. Check logs' }) ); self.mask.fade('out'); diff --git a/couchpotato/core/media/movie/library/movie/main.py b/couchpotato/core/media/movie/library/movie/main.py index 6975f73..4918754 100644 --- a/couchpotato/core/media/movie/library/movie/main.py +++ b/couchpotato/core/media/movie/library/movie/main.py @@ -154,7 +154,7 @@ class MovieLibraryPlugin(LibraryBase): else: dates = library.info.get('release_date') - if dates and dates.get('expires', 0) < time.time() or not dates: + if dates and (dates.get('expires', 0) < time.time() or dates.get('expires', 0) > time.time() + (604800 * 4)) or not dates: dates = fireEvent('movie.release_date', identifier = identifier, merge = True) library.info.update({'release_date': dates }) db.commit() diff --git a/couchpotato/core/media/movie/searcher/main.py b/couchpotato/core/media/movie/searcher/main.py index 0020c86..b23fcce 100644 --- a/couchpotato/core/media/movie/searcher/main.py +++ b/couchpotato/core/media/movie/searcher/main.py @@ -117,6 +117,10 @@ class MovieSearcher(SearcherBase, MovieTypeBase): def single(self, movie, search_protocols = None, manual = False): + # movies don't contain 'type' yet, so just set to default here + if not movie.has_key('type'): + movie['type'] = 'movie' + # Find out search type try: if not search_protocols: @@ -145,7 +149,7 @@ class MovieSearcher(SearcherBase, MovieTypeBase): fireEvent('movie.delete', movie['id'], single = True) return - fireEvent('notify.frontend', type = 'movie.searcher.started.%s' % movie['id'], data = True, message = 'Searching for "%s"' % default_title) + fireEvent('notify.frontend', type = 'movie.searcher.started', data = {'id': movie['id']}, message = 'Searching for "%s"' % default_title) ret = False @@ -167,7 +171,7 @@ class MovieSearcher(SearcherBase, MovieTypeBase): log.info('Search for %s in %s', (default_title, quality_type['quality']['label'])) quality = fireEvent('quality.single', identifier = quality_type['quality']['identifier'], single = True) - results = fireEvent('searcher.search', search_protocols, movie, quality, single = True) + results = fireEvent('searcher.search', search_protocols, movie, quality, single = True) or [] if len(results) == 0: log.debug('Nothing found for %s in %s', (default_title, quality_type['quality']['label'])) @@ -179,7 +183,7 @@ class MovieSearcher(SearcherBase, MovieTypeBase): found_releases += fireEvent('release.create_from_search', results, movie, quality_type, single = True) # Try find a valid result and download it - if fireEvent('searcher.try_download_result', results, movie, quality_type, manual, single = True): + if fireEvent('release.try_download_result', results, movie, quality_type, manual, single = True): ret = True # Remove releases that aren't found anymore @@ -199,7 +203,7 @@ class MovieSearcher(SearcherBase, MovieTypeBase): if len(too_early_to_search) > 0: log.info2('Too early to search for %s, %s', (too_early_to_search, default_title)) - fireEvent('notify.frontend', type = 'movie.searcher.ended.%s' % movie['id'], data = True) + fireEvent('notify.frontend', type = 'movie.searcher.ended', data = {'id': movie['id']}) return ret diff --git a/couchpotato/core/media/movie/suggestion/static/suggest.css b/couchpotato/core/media/movie/suggestion/static/suggest.css index 99d108d..d4ba734 100644 --- a/couchpotato/core/media/movie/suggestion/static/suggest.css +++ b/couchpotato/core/media/movie/suggestion/static/suggest.css @@ -30,10 +30,10 @@ } .suggestions .media_result .data .info { - top: 15px; + top: 10px; left: 15px; right: 15px; - bottom: 15px; + bottom: 10px; overflow: hidden; } @@ -74,9 +74,45 @@ font-size: 11px; font-style: italic; text-align: right; - } + .suggestions .media_result .data .info .plot { + display: block; + font-size: 11px; + overflow: hidden; + text-align: justify; + height: 100%; + z-index: 2; + top: 64px; + position: absolute; + background: #4e5969; + cursor: pointer; + transition: all .4s ease-in-out; + padding: 0 3px 10px 0; + } + .suggestions .media_result .data:before { + bottom: 0; + content: ''; + display: block; + height: 10px; + right: 0; + left: 0; + bottom: 10px; + position: absolute; + background: linear-gradient( + 0deg, + rgba(78, 89, 105, 1) 0%, + rgba(78, 89, 105, 0) 100% + ); + z-index: 3; + pointer-events: none; + } + + .suggestions .media_result .data .info .plot.full { + top: 0; + overflow: auto; + } + .suggestions .media_result .data { cursor: default; } @@ -102,7 +138,7 @@ .suggestions .media_result .actions { position: absolute; - bottom: 10px; + top: 10px; right: 10px; display: none; width: 140px; @@ -110,6 +146,9 @@ .suggestions .media_result:hover .actions { display: block; } + .suggestions .media_result:hover h2 .title { + opacity: 0; + } .suggestions .media_result .data.open .actions { display: none; } diff --git a/couchpotato/core/media/movie/suggestion/static/suggest.js b/couchpotato/core/media/movie/suggestion/static/suggest.js index 8664b0b..cb09ef4 100644 --- a/couchpotato/core/media/movie/suggestion/static/suggest.js +++ b/couchpotato/core/media/movie/suggestion/static/suggest.js @@ -95,6 +95,10 @@ var SuggestList = new Class({ ); m.data_container.removeEvents('click'); + var plot = false; + if(m.info.plot && m.info.plot.length > 0) + plot = m.info.plot; + // Add rating m.info_container.adopt( m.rating = m.info.rating && m.info.rating.imdb.length == 2 && parseFloat(m.info.rating.imdb[0]) > 0 ? new Element('span.rating', { @@ -103,6 +107,14 @@ var SuggestList = new Class({ }) : null, m.genre = m.info.genres && m.info.genres.length > 0 ? new Element('span.genres', { 'text': m.info.genres.slice(0, 3).join(', ') + }) : null, + m.plot = plot ? new Element('span.plot', { + 'text': plot, + 'events': { + 'click': function(){ + this.toggleClass('full') + } + } }) : null ) diff --git a/couchpotato/core/notifications/base.py b/couchpotato/core/notifications/base.py index 4c0d099..63d2075 100644 --- a/couchpotato/core/notifications/base.py +++ b/couchpotato/core/notifications/base.py @@ -17,7 +17,7 @@ class Notification(Provider): listen_to = [ 'renamer.after', 'movie.snatched', 'updater.available', 'updater.updated', - 'core.message', + 'core.message.important', ] dont_listen_to = [] diff --git a/couchpotato/core/notifications/core/main.py b/couchpotato/core/notifications/core/main.py index 04acf28..cd63c2c 100644 --- a/couchpotato/core/notifications/core/main.py +++ b/couchpotato/core/notifications/core/main.py @@ -21,6 +21,12 @@ class CoreNotifier(Notification): m_lock = None + listen_to = [ + 'renamer.after', 'movie.snatched', + 'updater.available', 'updater.updated', + 'core.message', 'core.message.important', + ] + def __init__(self): super(CoreNotifier, self).__init__() @@ -121,7 +127,10 @@ class CoreNotifier(Notification): for message in messages: if message.get('time') > last_check: - fireEvent('core.message', message = message.get('message'), data = message) + message['sticky'] = True # Always sticky core messages + + message_type = 'core.message.important' if message.get('important') else 'core.message' + fireEvent(message_type, message = message.get('message'), data = message) if last_check < message.get('time'): last_check = message.get('time') diff --git a/couchpotato/core/notifications/core/static/notification.js b/couchpotato/core/notifications/core/static/notification.js index e485976..a0c3b15 100644 --- a/couchpotato/core/notifications/core/static/notification.js +++ b/couchpotato/core/notifications/core/static/notification.js @@ -10,8 +10,8 @@ var NotificationBase = new Class({ // Listener App.addEvent('unload', self.stopPoll.bind(self)); App.addEvent('reload', self.startInterval.bind(self, [true])); - App.addEvent('notification', self.notify.bind(self)); - App.addEvent('message', self.showMessage.bind(self)); + App.on('notification', self.notify.bind(self)); + App.on('message', self.showMessage.bind(self)); // Add test buttons to settings page App.addEvent('load', self.addTestButtons.bind(self)); @@ -50,9 +50,9 @@ var NotificationBase = new Class({ , 'top'); self.notifications.include(result); - if(result.data.important !== undefined && !result.read){ + if((result.data.important !== undefined || result.data.sticky !== undefined) && !result.read){ var sticky = true - App.fireEvent('message', [result.message, sticky, result]) + App.trigger('message', [result.message, sticky, result]) } else if(!result.read){ self.setBadge(self.notifications.filter(function(n){ return !n.read}).length) @@ -147,7 +147,7 @@ var NotificationBase = new Class({ // Process data if(json){ Array.each(json.result, function(result){ - App.fireEvent(result.type, result); + App.trigger(result.type, result); if(result.message && result.read === undefined) self.showMessage(result.message); }) diff --git a/couchpotato/core/notifications/email/__init__.py b/couchpotato/core/notifications/email/__init__.py index b41cc8e..33c2f63 100644 --- a/couchpotato/core/notifications/email/__init__.py +++ b/couchpotato/core/notifications/email/__init__.py @@ -28,6 +28,11 @@ config = [{ 'name': 'smtp_server', 'label': 'SMTP server', }, + { 'name': 'smtp_port', + 'label': 'SMTP server port', + 'default': '25', + 'type': 'int', + }, { 'name': 'ssl', 'label': 'Enable SSL', @@ -35,6 +40,12 @@ config = [{ 'type': 'bool', }, { + 'name': 'starttls', + 'label': 'Enable StartTLS', + 'default': 0, + 'type': 'bool', + }, + { 'name': 'smtp_user', 'label': 'SMTP user', }, diff --git a/couchpotato/core/notifications/email/main.py b/couchpotato/core/notifications/email/main.py index 508e082..41a4323 100644 --- a/couchpotato/core/notifications/email/main.py +++ b/couchpotato/core/notifications/email/main.py @@ -4,6 +4,7 @@ from couchpotato.core.logger import CPLog from couchpotato.core.notifications.base import Notification from couchpotato.environment import Env from email.mime.text import MIMEText +from email.utils import formatdate, make_msgid import smtplib import traceback @@ -22,18 +23,30 @@ class Email(Notification): smtp_server = self.conf('smtp_server') smtp_user = self.conf('smtp_user') smtp_pass = self.conf('smtp_pass') + smtp_port = self.conf('smtp_port') + starttls = self.conf('starttls') # Make the basic message message = MIMEText(toUnicode(message), _charset = Env.get('encoding')) message['Subject'] = self.default_title message['From'] = from_address message['To'] = to_address + message['Date'] = formatdate(localtime = 1) + message['Message-ID'] = make_msgid() try: # Open the SMTP connection, via SSL if requested + log.debug("Connecting to host %s on port %s" % (smtp_server, smtp_port)) log.debug("SMTP over SSL %s", ("enabled" if ssl == 1 else "disabled")) mailserver = smtplib.SMTP_SSL(smtp_server) if ssl == 1 else smtplib.SMTP(smtp_server) + if (starttls): + log.debug("Using StartTLS to initiate the connection with the SMTP server") + mailserver.starttls() + + # Say hello to the server + mailserver.ehlo() + # Check too see if an login attempt should be attempted if len(smtp_user) > 0: log.debug("Logging on to SMTP server using username \'%s\'%s", (smtp_user, " and a password" if len(smtp_pass) > 0 else "")) diff --git a/couchpotato/core/notifications/notifo/main.py b/couchpotato/core/notifications/notifo/main.py deleted file mode 100644 index 2d56ed7..0000000 --- a/couchpotato/core/notifications/notifo/main.py +++ /dev/null @@ -1,39 +0,0 @@ -from couchpotato.core.helpers.encoding import toUnicode -from couchpotato.core.logger import CPLog -from couchpotato.core.notifications.base import Notification -import base64 -import json -import traceback - -log = CPLog(__name__) - - -class Notifo(Notification): - - url = 'https://api.notifo.com/v1/send_notification' - - def notify(self, message = '', data = None, listener = None): - if not data: data = {} - - try: - params = { - 'label': self.default_title, - 'msg': toUnicode(message), - } - - headers = { - 'Authorization': "Basic %s" % base64.encodestring('%s:%s' % (self.conf('username'), self.conf('api_key')))[:-1] - } - - handle = self.urlopen(self.url, params = params, headers = headers) - result = json.loads(handle) - - if result['status'] != 'success' or result['response_message'] != 'OK': - raise Exception - - except: - log.error('Notification failed: %s', traceback.format_exc()) - return False - - log.info('Notifo notification successful.') - return True diff --git a/couchpotato/core/notifications/plex/client.py b/couchpotato/core/notifications/plex/client.py new file mode 100644 index 0000000..b873518 --- /dev/null +++ b/couchpotato/core/notifications/plex/client.py @@ -0,0 +1,85 @@ +import json +from couchpotato import CPLog +from couchpotato.core.event import addEvent +from couchpotato.core.helpers.encoding import tryUrlencode +import requests + +log = CPLog(__name__) + + +class PlexClientProtocol(object): + def __init__(self, plex): + self.plex = plex + + addEvent('notify.plex.notifyClient', self.notify) + + def notify(self, client, message): + raise NotImplementedError() + + +class PlexClientHTTP(PlexClientProtocol): + def request(self, command, client): + url = 'http://%s:%s/xbmcCmds/xbmcHttp/?%s' % ( + client['address'], + client['port'], + tryUrlencode(command) + ) + + headers = {} + + try: + self.plex.urlopen(url, headers = headers, timeout = 3, show_error = False) + except Exception, err: + log.error("Couldn't sent command to Plex: %s", err) + return False + + return True + + def notify(self, client, message): + if client.get('protocol') != 'xbmchttp': + return None + + data = { + 'command': 'ExecBuiltIn', + 'parameter': 'Notification(CouchPotato, %s)' % message + } + + return self.request(data, client) + + +class PlexClientJSON(PlexClientProtocol): + def request(self, method, params, client): + log.debug('sendJSON("%s", %s, %s)', (method, params, client)) + url = 'http://%s:%s/jsonrpc' % ( + client['address'], + client['port'] + ) + + headers = { + 'Content-Type': 'application/json' + } + + request = { + 'id': 1, + 'jsonrpc': '2.0', + 'method': method, + 'params': params + } + + try: + requests.post(url, headers = headers, timeout = 3, data = json.dumps(request)) + except Exception, err: + log.error("Couldn't sent command to Plex: %s", err) + return False + + return True + + def notify(self, client, message): + if client.get('protocol') not in ['xbmcjson', 'plex']: + return None + + params = { + 'title': 'CouchPotato', + 'message': message + } + return self.request('GUI.ShowNotification', params, client) diff --git a/couchpotato/core/notifications/plex/main.py b/couchpotato/core/notifications/plex/main.py index 19ca670..ce25c8f 100755 --- a/couchpotato/core/notifications/plex/main.py +++ b/couchpotato/core/notifications/plex/main.py @@ -1,183 +1,64 @@ -from couchpotato.core.event import addEvent -from couchpotato.core.helpers.encoding import tryUrlencode -from couchpotato.core.helpers.variable import cleanHost +from couchpotato.core.event import addEvent, fireEvent from couchpotato.core.logger import CPLog from couchpotato.core.notifications.base import Notification -from datetime import datetime -from urlparse import urlparse -from xml.dom import minidom -import json -import requests -import traceback - -try: - import xml.etree.cElementTree as etree -except ImportError: - import xml.etree.ElementTree as etree +from .client import PlexClientHTTP, PlexClientJSON +from .server import PlexServer log = CPLog(__name__) class Plex(Notification): - client_update_time = 5 * 60 http_time_between_calls = 0 def __init__(self): super(Plex, self).__init__() - self.clients = {} - self.clients_updated = None + self.server = PlexServer(self) - addEvent('renamer.after', self.addToLibrary) - - def updateClients(self, force = False): - if not self.conf('media_server'): - log.warning("Plex media server hostname is required") - return - - since_update = ((datetime.now() - self.clients_updated).total_seconds())\ - if self.clients_updated is not None else None - - if force or self.clients_updated is None or since_update > self.client_update_time: - self.clients = {} - - data = self.urlopen('%s/clients' % self.createHost(self.conf('media_server'), port = 32400)) - client_result = etree.fromstring(data) - - clients = [x.strip().lower() for x in self.conf('clients').split(',')] - - for server in client_result.findall('Server'): - if server.get('name').lower() in clients: - clients.remove(server.get('name').lower()) - protocol = server.get('protocol', 'xbmchttp') - - if protocol in ['plex', 'xbmcjson', 'xbmchttp']: - self.clients[server.get('name')] = { - 'name': server.get('name'), - 'address': server.get('address'), - 'port': server.get('port'), - 'protocol': protocol - } - - if len(clients) > 0: - log.info2('Unable to find plex clients: %s', ', '.join(clients)) - - log.info2('Found hosts: %s', ', '.join(self.clients.keys())) + self.client_protocols = { + 'http': PlexClientHTTP(self), + 'json': PlexClientJSON(self) + } - self.clients_updated = datetime.now() + addEvent('renamer.after', self.addToLibrary) def addToLibrary(self, message = None, group = {}): if self.isDisabled(): return - log.info('Sending notification to Plex') - - source_type = ['movie'] - base_url = '%s/library/sections' % self.createHost(self.conf('media_server'), port = 32400) - refresh_url = '%s/%%s/refresh' % base_url - - try: - sections_xml = self.urlopen(base_url) - xml_sections = minidom.parseString(sections_xml) - sections = xml_sections.getElementsByTagName('Directory') - - for s in sections: - if s.getAttribute('type') in source_type: - url = refresh_url % s.getAttribute('key') - x = self.urlopen(url) - - except: - log.error('Plex library update failed for %s, Media Server not running: %s', - (self.conf('media_server'), traceback.format_exc(1))) - return False - - return True - - def sendHTTP(self, command, client): - url = 'http://%s:%s/xbmcCmds/xbmcHttp/?%s' % ( - client['address'], - client['port'], - tryUrlencode(command) - ) - - headers = {} - - try: - self.urlopen(url, headers = headers, timeout = 3, show_error = False) - except Exception, err: - log.error("Couldn't sent command to Plex: %s", err) - return False - - return True - - def notifyHTTP(self, message = '', data = {}, listener = None): - total = 0 - successful = 0 - - data = { - 'command': 'ExecBuiltIn', - 'parameter': 'Notification(CouchPotato, %s)' % message - } + return self.server.refresh() - for name, client in self.clients.items(): - if client['protocol'] == 'xbmchttp': - total += 1 - if self.sendHTTP(data, client): - successful += 1 + def getClientNames(self): + return [ + x.strip().lower() + for x in self.conf('clients').split(',') + ] - return successful == total + def notifyClients(self, message, client_names): + success = True - def sendJSON(self, method, params, client): - log.debug('sendJSON("%s", %s, %s)', (method, params, client)) - url = 'http://%s:%s/jsonrpc' % ( - client['address'], - client['port'] - ) + for client_name in client_names: - headers = { - 'Content-Type': 'application/json' - } + client_success = False + client = self.server.clients.get(client_name) - request = { - 'id':1, - 'jsonrpc': '2.0', - 'method': method, - 'params': params - } + if client and client['found']: + client_success = fireEvent('notify.plex.notifyClient', client, message, single = True) - try: - requests.post(url, headers = headers, timeout = 3, data = json.dumps(request)) - except Exception, err: - log.error("Couldn't sent command to Plex: %s", err) - return False + if not client_success: + if self.server.staleClients() or not client: + log.info('Failed to send notification to client "%s". ' + 'Client list is stale, updating the client list and retrying.', client_name) + self.server.updateClients(self.getClientNames()) + else: + log.warning('Failed to send notification to client %s, skipping this time', client_name) + success = False - return True + return success - def notifyJSON(self, message = '', data = {}, listener = None): - total = 0 - successful = 0 - - params = { - 'title': 'CouchPotato', - 'message': message - } - - for name, client in self.clients.items(): - if client['protocol'] in ['xbmcjson', 'plex']: - total += 1 - if self.sendJSON('GUI.ShowNotification', params, client): - successful += 1 - - return successful == total - - def notify(self, message = '', data = {}, listener = None, force = False): - self.updateClients(force) - - http_result = self.notifyHTTP(message, data, listener) - json_result = self.notifyJSON(message, data, listener) - - return http_result and json_result + def notify(self, message = '', data = {}, listener = None): + return self.notifyClients(message, self.getClientNames()) def test(self, **kwargs): @@ -185,24 +66,12 @@ class Plex(Notification): log.info('Sending test to %s', test_type) - success = self.notify( + notify_success = self.notify( message = self.test_message, data = {}, - listener = 'test', - force = True + listener = 'test' ) - success2 = self.addToLibrary() - - return { - 'success': success or success2 - } - - def createHost(self, host, port = None): - h = cleanHost(host) - p = urlparse(h) - h = h.rstrip('/') - if port and not p.port: - h += ':%s' % port + refresh_success = self.addToLibrary() - return h + return {'success': notify_success or refresh_success} diff --git a/couchpotato/core/notifications/plex/server.py b/couchpotato/core/notifications/plex/server.py new file mode 100644 index 0000000..b66db8f --- /dev/null +++ b/couchpotato/core/notifications/plex/server.py @@ -0,0 +1,114 @@ +from datetime import timedelta, datetime +from couchpotato.core.helpers.variable import cleanHost +from couchpotato import CPLog +from urlparse import urlparse +import traceback + + +try: + import xml.etree.cElementTree as etree +except ImportError: + import xml.etree.ElementTree as etree + +log = CPLog(__name__) + + +class PlexServer(object): + def __init__(self, plex): + self.plex = plex + + self.clients = {} + self.last_clients_update = None + + def staleClients(self): + if not self.last_clients_update: + return True + + return self.last_clients_update + timedelta(minutes=15) < datetime.now() + + def request(self, path, data_type='xml'): + if not self.plex.conf('media_server'): + log.warning("Plex media server hostname is required") + return None + + if path.startswith('/'): + path = path[1:] + + data = self.plex.urlopen('%s/%s' % ( + self.createHost(self.plex.conf('media_server'), port = 32400), + path + )) + + if data_type == 'xml': + return etree.fromstring(data) + else: + return data + + def updateClients(self, client_names): + log.info('Searching for clients on Plex Media Server') + + self.clients = {} + + result = self.request('clients') + if not result: + return + + found_clients = [ + c for c in result.findall('Server') + if c.get('name') and c.get('name').lower() in client_names + ] + + # Store client details in cache + for client in found_clients: + name = client.get('name').lower() + + self.clients[name] = { + 'name': client.get('name'), + 'found': True, + 'address': client.get('address'), + 'port': client.get('port'), + 'protocol': client.get('protocol', 'xbmchttp') + } + + client_names.remove(name) + + # Store dummy info for missing clients + for client_name in client_names: + self.clients[client_name] = { + 'found': False + } + + if len(client_names) > 0: + log.debug('Unable to find clients: %s', ', '.join(client_names)) + + self.last_clients_update = datetime.now() + + def refresh(self, section_types=None): + if not section_types: + section_types = ['movie'] + + sections = self.request('library/sections') + + try: + for section in sections.findall('Directory'): + if section.get('type') not in section_types: + continue + + self.request('library/sections/%s/refresh' % section.get('key'), 'text') + except: + log.error('Plex library update failed for %s, Media Server not running: %s', + (self.plex.conf('media_server'), traceback.format_exc(1))) + return False + + return True + + def createHost(self, host, port = None): + + h = cleanHost(host) + p = urlparse(h) + h = h.rstrip('/') + + if port and not p.port: + h += ':%s' % port + + return h diff --git a/couchpotato/core/notifications/notifo/__init__.py b/couchpotato/core/notifications/pushbullet/__init__.py similarity index 66% rename from couchpotato/core/notifications/notifo/__init__.py rename to couchpotato/core/notifications/pushbullet/__init__.py index 941246c..e61a44e 100644 --- a/couchpotato/core/notifications/notifo/__init__.py +++ b/couchpotato/core/notifications/pushbullet/__init__.py @@ -1,16 +1,15 @@ -from .main import Notifo +from .main import Pushbullet def start(): - return Notifo() + return Pushbullet() config = [{ - 'name': 'notifo', + 'name': 'pushbullet', 'groups': [ { 'tab': 'notifications', 'list': 'notification_providers', - 'name': 'notifo', - 'description': 'Keep in mind that Notifo service will end soon.', + 'name': 'pushbullet', 'options': [ { 'name': 'enabled', @@ -18,10 +17,14 @@ config = [{ 'type': 'enabler', }, { - 'name': 'username', + 'name': 'api_key', + 'label': 'User API Key' }, { - 'name': 'api_key', + 'name': 'devices', + 'default': '', + 'advanced': True, + 'description': 'IDs of devices to send notifications to, empty = all devices' }, { 'name': 'on_snatch', diff --git a/couchpotato/core/notifications/pushbullet/main.py b/couchpotato/core/notifications/pushbullet/main.py new file mode 100644 index 0000000..2e6db29 --- /dev/null +++ b/couchpotato/core/notifications/pushbullet/main.py @@ -0,0 +1,86 @@ +from couchpotato.core.helpers.encoding import toUnicode +from couchpotato.core.helpers.variable import tryInt +from couchpotato.core.logger import CPLog +from couchpotato.core.notifications.base import Notification +import base64 +import json + +log = CPLog(__name__) + + +class Pushbullet(Notification): + + url = 'https://api.pushbullet.com/api/%s' + + def notify(self, message = '', data = None, listener = None): + if not data: data = {} + + devices = self.getDevices() + if devices is None: + return False + + # Get all the device IDs linked to this user + if not len(devices): + response = self.request('devices') + if not response: + return False + + devices += [device.get('id') for device in response['devices']] + + successful = 0 + for device in devices: + response = self.request( + 'pushes', + cache = False, + device_id = device, + type = 'note', + title = self.default_title, + body = toUnicode(message) + ) + + if response: + successful += 1 + else: + log.error('Unable to push notification to Pushbullet device with ID %s' % device) + + return successful == len(devices) + + def getDevices(self): + devices = [d.strip() for d in self.conf('devices').split(',')] + + # Remove empty items + devices = [d for d in devices if len(d)] + + # Break on any ids that aren't integers + valid_devices = [] + + for device_id in devices: + d = tryInt(device_id, None) + + if not d: + log.error('Device ID "%s" is not valid', device_id) + return None + + valid_devices.append(d) + + return valid_devices + + def request(self, method, cache = True, **kwargs): + try: + base64string = base64.encodestring('%s:' % self.conf('api_key'))[:-1] + + headers = { + "Authorization": "Basic %s" % base64string + } + + if cache: + return self.getJsonData(self.url % method, headers = headers, params = kwargs) + else: + data = self.urlopen(self.url % method, headers = headers, params = kwargs) + return json.loads(data) + + except Exception, ex: + log.error('Pushbullet request failed') + log.debug(ex) + + return None diff --git a/couchpotato/core/notifications/xmpp/__init__.py b/couchpotato/core/notifications/xmpp/__init__.py new file mode 100644 index 0000000..a52242f --- /dev/null +++ b/couchpotato/core/notifications/xmpp/__init__.py @@ -0,0 +1,52 @@ +from .main import Xmpp + +def start(): + return Xmpp() + +config = [{ + 'name': 'xmpp', + 'groups': [ + { + 'tab': 'notifications', + 'list': 'notification_providers', + 'name': 'xmpp', + 'label': 'XMPP', + 'description`': 'for Jabber, Hangouts (Google Talk), AIM...', + 'options': [ + { + 'name': 'enabled', + 'default': 0, + 'type': 'enabler', + }, + { + 'name': 'username', + 'description': 'User sending the message. For Hangouts, e-mail of a single-step authentication Google account.', + }, + { + 'name': 'password', + 'type': 'Password', + }, + { + 'name': 'hostname', + 'default': 'talk.google.com', + }, + { + 'name': 'to', + 'description': 'Username (or e-mail for Hangouts) of the person to send the messages to.', + }, + { + 'name': 'port', + 'type': 'int', + 'default': 5222, + }, + { + 'name': 'on_snatch', + 'default': 0, + 'type': 'bool', + 'advanced': True, + 'description': 'Also send message when movie is snatched.', + }, + ], + } + ], +}] diff --git a/couchpotato/core/notifications/xmpp/main.py b/couchpotato/core/notifications/xmpp/main.py new file mode 100644 index 0000000..0011e41 --- /dev/null +++ b/couchpotato/core/notifications/xmpp/main.py @@ -0,0 +1,43 @@ +from couchpotato.core.logger import CPLog +from couchpotato.core.notifications.base import Notification +from time import sleep +import traceback +import xmpp + +log = CPLog(__name__) + + +class Xmpp(Notification): + + def notify(self, message = '', data = None, listener = None): + if not data: data = {} + + try: + jid = xmpp.protocol.JID(self.conf('username')) + client = xmpp.Client(jid.getDomain(), debug = []) + + # Connect + if not client.connect(server = (self.conf('hostname'), self.conf('port'))): + log.error('XMPP failed: Connection to server failed.') + return False + + # Authenticate + if not client.auth(jid.getNode(), self.conf('password'), resource = jid.getResource()): + log.error('XMPP failed: Failed to authenticate.') + return False + + # Send message + client.send(xmpp.protocol.Message(to = self.conf('to'), body = message, typ = 'chat')) + + # Disconnect + # some older servers will not send the message if you disconnect immediately after sending + sleep(1) + client.disconnect() + + log.info('XMPP notifications sent.') + return True + + except: + log.error('XMPP failed: %s', traceback.format_exc()) + + return False diff --git a/couchpotato/core/plugins/base.py b/couchpotato/core/plugins/base.py index c90c48c..649e359 100644 --- a/couchpotato/core/plugins/base.py +++ b/couchpotato/core/plugins/base.py @@ -1,7 +1,7 @@ from StringIO import StringIO from couchpotato.core.event import fireEvent, addEvent from couchpotato.core.helpers.encoding import tryUrlencode, ss, toSafeString, \ - toUnicode + toUnicode, sp from couchpotato.core.helpers.variable import getExt, md5, isLocalIP from couchpotato.core.logger import CPLog from couchpotato.environment import Env @@ -291,10 +291,10 @@ class Plugin(object): def createNzbName(self, data, movie): tag = self.cpTag(movie) - return '%s%s' % (toSafeString(data.get('name')[:127 - len(tag)]), tag) + return '%s%s' % (toSafeString(toUnicode(data.get('name'))[:127 - len(tag)]), tag) def createFileName(self, data, filedata, movie): - name = os.path.join(self.createNzbName(data, movie)) + name = sp(os.path.join(self.createNzbName(data, movie))) if data.get('protocol') == 'nzb' and 'DOCTYPE nzb' not in filedata and '' not in filedata: return '%s.%s' % (name, 'rar') return '%s.%s' % (name, data.get('protocol')) diff --git a/couchpotato/core/plugins/manage/main.py b/couchpotato/core/plugins/manage/main.py index e8ccaf7..8720761 100644 --- a/couchpotato/core/plugins/manage/main.py +++ b/couchpotato/core/plugins/manage/main.py @@ -79,6 +79,7 @@ class Manage(Plugin): try: directories = self.directories() + directories.sort() added_identifiers = [] # Add some progress diff --git a/couchpotato/core/plugins/quality/main.py b/couchpotato/core/plugins/quality/main.py index e5dcfb1..560c390 100644 --- a/couchpotato/core/plugins/quality/main.py +++ b/couchpotato/core/plugins/quality/main.py @@ -2,7 +2,7 @@ from couchpotato import get_session from couchpotato.api import addApiView from couchpotato.core.event import addEvent from couchpotato.core.helpers.encoding import toUnicode, ss -from couchpotato.core.helpers.variable import mergeDicts, md5, getExt +from couchpotato.core.helpers.variable import mergeDicts, getExt from couchpotato.core.logger import CPLog from couchpotato.core.plugins.base import Plugin from couchpotato.core.settings.model import Quality, Profile, ProfileType @@ -38,6 +38,9 @@ class QualityPlugin(Plugin): ] pre_releases = ['cam', 'ts', 'tc', 'r5', 'scr'] + cached_qualities = None + cached_order = None + def __init__(self): addEvent('quality.all', self.all) addEvent('quality.single', self.single) @@ -55,6 +58,8 @@ class QualityPlugin(Plugin): addEvent('app.initialize', self.fill, priority = 10) + addEvent('app.test', self.doTest) + def preReleases(self): return self.pre_releases @@ -67,6 +72,9 @@ class QualityPlugin(Plugin): def all(self): + if self.cached_qualities: + return self.cached_qualities + db = get_session() qualities = db.query(Quality).all() @@ -76,6 +84,7 @@ class QualityPlugin(Plugin): q = mergeDicts(self.getQuality(quality.identifier), quality.to_dict()) temp.append(q) + self.cached_qualities = temp return temp def single(self, identifier = ''): @@ -104,6 +113,8 @@ class QualityPlugin(Plugin): setattr(quality, kwargs.get('value_type'), kwargs.get('value')) db.commit() + self.cached_qualities = None + return { 'success': True } @@ -164,77 +175,149 @@ class QualityPlugin(Plugin): if not extra: extra = {} # Create hash for cache - cache_key = md5(str([f.replace('.' + getExt(f), '') for f in files])) + cache_key = str([f.replace('.' + getExt(f), '') if len(getExt(f)) < 4 else f for f in files]) cached = self.getCache(cache_key) - if cached and len(extra) == 0: return cached + if cached and len(extra) == 0: + return cached qualities = self.all() + + # Start with 0 + score = {} + for quality in qualities: + score[quality.get('identifier')] = 0 + for cur_file in files: words = re.split('\W+', cur_file.lower()) - found = {} for quality in qualities: - contains = self.containsTag(quality, words, cur_file) - if contains: - found[quality['identifier']] = True + contains_score = self.containsTagScore(quality, words, cur_file) + self.calcScore(score, quality, contains_score) - for quality in qualities: + # Try again with loose testing + for quality in qualities: + loose_score = self.guessLooseScore(quality, files = files, extra = extra) + self.calcScore(score, quality, loose_score) - # Check identifier - if quality['identifier'] in words: - if len(found) == 0 or len(found) == 1 and found.get(quality['identifier']): - log.debug('Found via identifier "%s" in %s', (quality['identifier'], cur_file)) - return self.setCache(cache_key, quality) - # Check alt and tags - contains = self.containsTag(quality, words, cur_file) - if contains: - return self.setCache(cache_key, quality) + # Return nothing if all scores are 0 + has_non_zero = 0 + for s in score: + if score[s] > 0: + has_non_zero += 1 - # Try again with loose testing - quality = self.guessLoose(cache_key, files = files, extra = extra) - if quality: - return self.setCache(cache_key, quality) + if not has_non_zero: + return None + + heighest_quality = max(score, key = score.get) + if heighest_quality: + for quality in qualities: + if quality.get('identifier') == heighest_quality: + return self.setCache(cache_key, quality) - log.debug('Could not identify quality for: %s', files) return None - def containsTag(self, quality, words, cur_file = ''): + def containsTagScore(self, quality, words, cur_file = ''): cur_file = ss(cur_file) + score = 0 + + points = { + 'identifier': 10, + 'label': 10, + 'alternative': 9, + 'tags': 9, + 'ext': 3, + } # Check alt and tags - for tag_type in ['alternative', 'tags', 'label']: + for tag_type in ['identifier', 'alternative', 'tags', 'label']: qualities = quality.get(tag_type, []) qualities = [qualities] if isinstance(qualities, (str, unicode)) else qualities for alt in qualities: - if (isinstance(alt, tuple) and '.'.join(alt) in '.'.join(words)) or (isinstance(alt, (str, unicode)) and ss(alt.lower()) in cur_file.lower()): + if (isinstance(alt, tuple)): + if len(set(words) & set(alt)) == len(alt): + log.debug('Found %s via %s %s in %s', (quality['identifier'], tag_type, quality.get(tag_type), cur_file)) + score += points.get(tag_type) + + if (isinstance(alt, (str, unicode)) and ss(alt.lower()) in cur_file.lower()): log.debug('Found %s via %s %s in %s', (quality['identifier'], tag_type, quality.get(tag_type), cur_file)) - return True + score += points.get(tag_type) / 2 if list(set(qualities) & set(words)): log.debug('Found %s via %s %s in %s', (quality['identifier'], tag_type, quality.get(tag_type), cur_file)) - return True + score += points.get(tag_type) - return + # Check extention + for ext in quality.get('ext', []): + if ext == words[-1]: + log.debug('Found %s extension in %s', (ext, cur_file)) + score += points['ext'] - def guessLoose(self, cache_key, files = None, extra = None): + return score + + def guessLooseScore(self, quality, files = None, extra = None): + + score = 0 if extra: - for quality in self.all(): - # Check width resolution, range 20 - if quality.get('width') and (quality.get('width') - 20) <= extra.get('resolution_width', 0) <= (quality.get('width') + 20): - log.debug('Found %s via resolution_width: %s == %s', (quality['identifier'], quality.get('width'), extra.get('resolution_width', 0))) - return self.setCache(cache_key, quality) + # Check width resolution, range 20 + if quality.get('width') and (quality.get('width') - 20) <= extra.get('resolution_width', 0) <= (quality.get('width') + 20): + log.debug('Found %s via resolution_width: %s == %s', (quality['identifier'], quality.get('width'), extra.get('resolution_width', 0))) + score += 5 - # Check height resolution, range 20 - if quality.get('height') and (quality.get('height') - 20) <= extra.get('resolution_height', 0) <= (quality.get('height') + 20): - log.debug('Found %s via resolution_height: %s == %s', (quality['identifier'], quality.get('height'), extra.get('resolution_height', 0))) - return self.setCache(cache_key, quality) + # Check height resolution, range 20 + if quality.get('height') and (quality.get('height') - 20) <= extra.get('resolution_height', 0) <= (quality.get('height') + 20): + log.debug('Found %s via resolution_height: %s == %s', (quality['identifier'], quality.get('height'), extra.get('resolution_height', 0))) + score += 5 + + if quality.get('identifier') == 'dvdrip' and 480 <= extra.get('resolution_width', 0) <= 720: + log.debug('Add point for correct dvdrip resolutions') + score += 1 + + return score + + def calcScore(self, score, quality, add_score): + + score[quality['identifier']] += add_score + + # Set order for allow calculation (and cache) + if not self.cached_order: + self.cached_order = {} + for q in self.qualities: + self.cached_order[q.get('identifier')] = self.qualities.index(q) + + if add_score != 0: + for allow in quality.get('allow', []): + score[allow] -= 40 if self.cached_order[allow] < self.cached_order[quality['identifier']] else 5 + + def doTest(self): + + tests = { + 'Movie Name (1999)-DVD-Rip.avi': 'dvdrip', + 'Movie Name 1999 720p Bluray.mkv': '720p', + 'Movie Name 1999 BR-Rip 720p.avi': 'brrip', + 'Movie Name 1999 720p Web Rip.avi': 'scr', + 'Movie Name 1999 Web DL.avi': 'brrip', + 'Movie.Name.1999.1080p.WEBRip.H264-Group': 'scr', + 'Movie.Name.1999.DVDRip-Group': 'dvdrip', + 'Movie.Name.1999.DVD-Rip-Group': 'dvdrip', + 'Movie.Name.1999.DVD-R-Group': 'dvdr', + } + + correct = 0 + for name in tests: + success = self.guess([name]).get('identifier') == tests[name] + if not success: + log.error('%s failed check, thinks it\'s %s', (name, self.guess([name]).get('identifier'))) + + correct += success + + if correct == len(tests): + log.info('Quality test successful') + return True + else: + log.error('Quality test failed: %s out of %s succeeded', (correct, len(tests))) - if 480 <= extra.get('resolution_width', 0) <= 720: - log.debug('Found as dvdrip') - return self.setCache(cache_key, self.single('dvdrip')) - return None diff --git a/couchpotato/core/plugins/release/main.py b/couchpotato/core/plugins/release/main.py index aa7ed50..046b66c 100644 --- a/couchpotato/core/plugins/release/main.py +++ b/couchpotato/core/plugins/release/main.py @@ -2,10 +2,14 @@ from couchpotato import get_session, md5 from couchpotato.api import addApiView from couchpotato.core.event import fireEvent, addEvent from couchpotato.core.helpers.encoding import ss, toUnicode +from couchpotato.core.helpers.variable import getTitle from couchpotato.core.logger import CPLog from couchpotato.core.plugins.base import Plugin from couchpotato.core.plugins.scanner.main import Scanner -from couchpotato.core.settings.model import File, Release as Relea, Media, ReleaseInfo +from couchpotato.core.settings.model import File, Release as Relea, Media, \ + ReleaseInfo +from couchpotato.environment import Env +from inspect import ismethod, isfunction from sqlalchemy.exc import InterfaceError from sqlalchemy.orm import joinedload_all from sqlalchemy.sql.expression import and_, or_ @@ -21,7 +25,7 @@ class Release(Plugin): def __init__(self): addEvent('release.add', self.add) - addApiView('release.download', self.download, docs = { + addApiView('release.manual_download', self.manualDownload, docs = { 'desc': 'Send a release manually to the downloaders', 'params': { 'id': {'type': 'id', 'desc': 'ID of the release object in release-table'} @@ -46,12 +50,46 @@ class Release(Plugin): } }) + addEvent('release.download', self.download) + addEvent('release.try_download_result', self.tryDownloadResult) addEvent('release.create_from_search', self.createFromSearch) addEvent('release.for_movie', self.forMovie) addEvent('release.delete', self.delete) addEvent('release.clean', self.clean) addEvent('release.update_status', self.updateStatus) + # Clean releases that didn't have activity in the last week + addEvent('app.load', self.cleanDone) + fireEvent('schedule.interval', 'movie.clean_releases', self.cleanDone, hours = 4) + + def cleanDone(self): + + log.debug('Removing releases from dashboard') + + now = time.time() + week = 262080 + + done_status, available_status, snatched_status, downloaded_status, ignored_status = \ + fireEvent('status.get', ['done', 'available', 'snatched', 'downloaded', 'ignored'], single = True) + + db = get_session() + + # get movies last_edit more than a week ago + media = db.query(Media) \ + .filter(Media.status_id == done_status.get('id'), Media.last_edit < (now - week)) \ + .all() + + for item in media: + for rel in item.releases: + # Remove all available releases + if rel.status_id in [available_status.get('id')]: + fireEvent('release.delete', id = rel.id, single = True) + # Set all snatched and downloaded releases to ignored to make sure they are ignored when re-adding the move + elif rel.status_id in [snatched_status.get('id'), downloaded_status.get('id')]: + self.updateStatus(id = rel.id, status = ignored_status) + + db.expire_all() + def add(self, group): db = get_session() @@ -108,7 +146,6 @@ class Release(Plugin): return True - def saveFile(self, filepath, type = 'unknown', include_media_info = False): properties = {} @@ -169,19 +206,17 @@ class Release(Plugin): 'success': True } - def download(self, id = None, **kwargs): + def manualDownload(self, id = None, **kwargs): db = get_session() - snatched_status, done_status = fireEvent('status.get', ['snatched', 'done'], single = True) - rel = db.query(Relea).filter_by(id = id).first() if rel: item = {} for info in rel.info: item[info.identifier] = info.value - fireEvent('notify.frontend', type = 'release.download', data = True, message = 'Snatching "%s"' % item['name']) + fireEvent('notify.frontend', type = 'release.manual_download', data = True, message = 'Snatching "%s"' % item['name']) # Get matching provider provider = fireEvent('provider.belongs_to', item['url'], provider = item.get('provider'), single = True) @@ -193,18 +228,18 @@ class Release(Plugin): if item.get('protocol') != 'torrent_magnet': item['download'] = provider.loginDownload if provider.urls.get('login') else provider.download - success = fireEvent('searcher.download', data = item, media = rel.media.to_dict({ + success = self.download(data = item, media = rel.movie.to_dict({ 'profile': {'types': {'quality': {}}}, 'releases': {'status': {}, 'quality': {}}, 'library': {'titles': {}, 'files':{}}, 'files': {} - }), manual = True, single = True) + }), manual = True) if success: db.expunge_all() rel = db.query(Relea).filter_by(id = id).first() # Get release again @RuudBurger why do we need to get it again?? - fireEvent('notify.frontend', type = 'release.download', data = True, message = 'Successfully snatched "%s"' % item['name']) + fireEvent('notify.frontend', type = 'release.manual_download', data = True, message = 'Successfully snatched "%s"' % item['name']) return { 'success': success } @@ -215,9 +250,108 @@ class Release(Plugin): 'success': False } + def download(self, data, media, manual = False): + + if not data.get('protocol'): + data['protocol'] = data['type'] + data['type'] = 'movie' + + # Test to see if any downloaders are enabled for this type + downloader_enabled = fireEvent('download.enabled', manual, data, single = True) + + if downloader_enabled: + snatched_status, done_status, active_status = fireEvent('status.get', ['snatched', 'done', 'active'], single = True) + + # Download release to temp + filedata = None + if data.get('download') and (ismethod(data.get('download')) or isfunction(data.get('download'))): + filedata = data.get('download')(url = data.get('url'), nzb_id = data.get('id')) + if filedata == 'try_next': + return filedata + + download_result = fireEvent('download', data = data, movie = media, manual = manual, filedata = filedata, single = True) + log.debug('Downloader result: %s', download_result) + + if download_result: + try: + # Mark release as snatched + db = get_session() + rls = db.query(Relea).filter_by(identifier = md5(data['url'])).first() + if rls: + renamer_enabled = Env.setting('enabled', 'renamer') + + # Save download-id info if returned + if isinstance(download_result, dict): + for key in download_result: + rls_info = ReleaseInfo( + identifier = 'download_%s' % key, + value = toUnicode(download_result.get(key)) + ) + rls.info.append(rls_info) + db.commit() + + log_movie = '%s (%s) in %s' % (getTitle(media['library']), media['library']['year'], rls.quality.label) + snatch_message = 'Snatched "%s": %s' % (data.get('name'), log_movie) + log.info(snatch_message) + fireEvent('%s.snatched' % data['type'], message = snatch_message, data = rls.to_dict()) + + # If renamer isn't used, mark media done + if not renamer_enabled: + try: + if media['status_id'] == active_status.get('id'): + for profile_type in media['profile']['types']: + if profile_type['quality_id'] == rls.quality.id and profile_type['finish']: + log.info('Renamer disabled, marking media as finished: %s', log_movie) + + # Mark release done + self.updateStatus(rls.id, status = done_status) + + # Mark media done + mdia = db.query(Media).filter_by(id = media['id']).first() + mdia.status_id = done_status.get('id') + mdia.last_edit = int(time.time()) + db.commit() + except: + log.error('Failed marking media finished, renamer disabled: %s', traceback.format_exc()) + else: + self.updateStatus(rls.id, status = snatched_status) + + except: + log.error('Failed marking media finished: %s', traceback.format_exc()) + + return True + + log.info('Tried to download, but none of the "%s" downloaders are enabled or gave an error', (data.get('protocol'))) + + return False + + def tryDownloadResult(self, results, media, quality_type, manual = False): + ignored_status, failed_status = fireEvent('status.get', ['ignored', 'failed'], single = True) + + for rel in results: + if not quality_type.get('finish', False) and quality_type.get('wait_for', 0) > 0 and rel.get('age') <= quality_type.get('wait_for', 0): + log.info('Ignored, waiting %s days: %s', (quality_type.get('wait_for'), rel['name'])) + continue + + if rel['status_id'] in [ignored_status.get('id'), failed_status.get('id')]: + log.info('Ignored: %s', rel['name']) + continue + + if rel['score'] <= 0: + log.info('Ignored, score to low: %s', rel['name']) + continue + + downloaded = fireEvent('release.download', data = rel, media = media, manual = manual, single = True) + if downloaded is True: + return True + elif downloaded != 'try_next': + break + + return False + def createFromSearch(self, search_results, media, quality_type): - available_status, ignored_status, failed_status = fireEvent('status.get', ['available', 'ignored', 'failed'], single = True) + available_status = fireEvent('status.get', ['available'], single = True) db = get_session() found_releases = [] @@ -231,7 +365,8 @@ class Release(Plugin): if not rls: rls = Relea( identifier = rel_identifier, - media_id = media.get('id'), + movie_id = media.get('id'), + #media_id = media.get('id'), quality_id = quality_type.get('quality_id'), status_id = available_status.get('id') ) @@ -286,7 +421,7 @@ class Release(Plugin): } def updateStatus(self, id, status = None): - if not status: return + if not status: return False db = get_session() @@ -297,11 +432,20 @@ class Release(Plugin): for info in rel.info: item[info.identifier] = info.value + if rel.files: + for file_item in rel.files: + if file_item.type.identifier == 'movie': + release_name = os.path.basename(file_item.path) + break + else: + release_name = item['name'] #update status in Db - log.debug('Marking release %s as %s', (item['name'], status.get("label"))) + log.debug('Marking release %s as %s', (release_name, status.get("label"))) rel.status_id = status.get('id') rel.last_edit = int(time.time()) db.commit() #Update all movie info as there is no release update function - fireEvent('notify.frontend', type = 'release.update_status.%s' % rel.id, data = status.get('id')) + fireEvent('notify.frontend', type = 'release.update_status', data = rel.to_dict()) + + return True diff --git a/couchpotato/core/plugins/renamer/__init__.py b/couchpotato/core/plugins/renamer/__init__.py index 921b3e1..c8f6b37 100755 --- a/couchpotato/core/plugins/renamer/__init__.py +++ b/couchpotato/core/plugins/renamer/__init__.py @@ -28,6 +28,7 @@ rename_options = { 'cd': 'CD number (cd1)', 'cd_nr': 'Just the cd nr. (1)', 'mpaa': 'MPAA Rating', + 'category': 'Category label', }, } diff --git a/couchpotato/core/plugins/renamer/main.py b/couchpotato/core/plugins/renamer/main.py index b532b2a..d95424d 100755 --- a/couchpotato/core/plugins/renamer/main.py +++ b/couchpotato/core/plugins/renamer/main.py @@ -1,9 +1,9 @@ from couchpotato import get_session from couchpotato.api import addApiView from couchpotato.core.event import addEvent, fireEvent, fireEventAsync -from couchpotato.core.helpers.encoding import toUnicode, ss +from couchpotato.core.helpers.encoding import toUnicode, ss, sp from couchpotato.core.helpers.variable import getExt, mergeDicts, getTitle, \ - getImdb, link, symlink, tryInt + getImdb, link, symlink, tryInt, splitString from couchpotato.core.logger import CPLog from couchpotato.core.plugins.base import Plugin from couchpotato.core.settings.model import Library, File, Profile, Release, \ @@ -31,8 +31,10 @@ class Renamer(Plugin): 'params': { 'async': {'desc': 'Optional: Set to 1 if you dont want to fire the renamer.scan asynchronous.'}, 'movie_folder': {'desc': 'Optional: The folder of the movie to scan. Keep empty for default renamer folder.'}, - 'downloader' : {'desc': 'Optional: The downloader this movie has been downloaded with'}, - 'download_id': {'desc': 'Optional: The downloader\'s nzb/torrent ID'}, + 'files': {'desc': 'Optional: Provide the release files if more releases are in the same movie_folder, delimited with a \'|\'. Note that no dedicated release folder is expected for releases with one file.'}, + 'downloader' : {'desc': 'Optional: The downloader the release has been downloaded with. \'download_id\' is required with this option.'}, + 'download_id': {'desc': 'Optional: The nzb/torrent ID of the release in movie_folder. \'downloader\' is required with this option.'}, + 'status': {'desc': 'Optional: The status of the release: \'completed\' (default) or \'seeding\''}, }, }) @@ -62,23 +64,26 @@ class Renamer(Plugin): def scanView(self, **kwargs): async = tryInt(kwargs.get('async', 0)) - movie_folder = kwargs.get('movie_folder') + movie_folder = sp(kwargs.get('movie_folder')) downloader = kwargs.get('downloader') download_id = kwargs.get('download_id') + files = '|'.join([sp(filename) for filename in splitString(kwargs.get('files'), '|')]) + status = kwargs.get('status', 'completed') - download_info = {'folder': movie_folder} if movie_folder else None - if download_info: - download_info.update({'id': download_id, 'downloader': downloader} if download_id else {}) + release_download = {'folder': movie_folder} if movie_folder else None + if release_download: + release_download.update({'id': download_id, 'downloader': downloader, 'status': status, 'files': files} if download_id else {}) fire_handle = fireEvent if not async else fireEventAsync - fire_handle('renamer.scan', download_info) + fire_handle('renamer.scan', release_download) return { 'success': True } - def scan(self, download_info = None): + def scan(self, release_download = None): + if not release_download: release_download = {} if self.isDisabled(): return @@ -87,22 +92,66 @@ class Renamer(Plugin): log.info('Renamer is already running, if you see this often, check the logs above for errors.') return - movie_folder = download_info and download_info.get('folder') + from_folder = sp(self.conf('from')) + to_folder = sp(self.conf('to')) - # Check to see if the "to" folder is inside the "from" folder. - if movie_folder and not os.path.isdir(movie_folder) or not os.path.isdir(self.conf('from')) or not os.path.isdir(self.conf('to')): - l = log.debug if movie_folder else log.error - l('Both the "To" and "From" have to exist.') - return - elif self.conf('from') in self.conf('to'): - log.error('The "to" can\'t be inside of the "from" folder. You\'ll get an infinite loop.') - return - elif movie_folder and movie_folder in [self.conf('to'), self.conf('from')]: - log.error('The "to" and "from" folders can\'t be inside of or the same as the provided movie folder.') + # Get movie folder to process + movie_folder = release_download.get('folder') + + # Get all folders that should not be processed + no_process = [to_folder] + cat_list = fireEvent('category.all', single = True) or [] + no_process.extend([item['destination'] for item in cat_list]) + try: + if Env.setting('library', section = 'manage').strip(): + no_process.extend([sp(manage_folder) for manage_folder in splitString(Env.setting('library', section = 'manage'), '::')]) + except: + pass + + # Check to see if the no_process folders are inside the "from" folder. + if not os.path.isdir(from_folder) or not os.path.isdir(to_folder): + log.error('Both the "To" and "From" have to exist.') return + else: + for item in no_process: + if from_folder in item: + log.error('To protect your data, the movie libraries can\'t be inside of or the same as the "from" folder.') + return + + # Check to see if the no_process folders are inside the provided movie_folder + if movie_folder and not os.path.isdir(movie_folder): + log.debug('The provided movie folder %s does not exist. Trying to find it in the \'from\' folder.', movie_folder) + + # Update to the from folder + if len(splitString(release_download.get('files'), '|')) == 1: + new_movie_folder = from_folder + else: + new_movie_folder = os.path.join(from_folder, os.path.basename(movie_folder)) + + if not os.path.isdir(new_movie_folder): + log.error('The provided movie folder %s does not exist and could also not be found in the \'from\' folder.', movie_folder) + return + + # Update the files + new_files = [os.path.join(new_movie_folder, os.path.relpath(filename, movie_folder)) for filename in splitString(release_download.get('files'), '|')] + if new_files and not os.path.isfile(new_files[0]): + log.error('The provided movie folder %s does not exist and its files could also not be found in the \'from\' folder.', movie_folder) + return + + # Update release_download info to the from folder + log.debug('Release %s found in the \'from\' folder.', movie_folder) + release_download['folder'] = new_movie_folder + release_download['files'] = '|'.join(new_files) + movie_folder = new_movie_folder + + if movie_folder: + for item in no_process: + if movie_folder in item: + log.error('To protect your data, the movie libraries can\'t be inside of or the same as the provided movie folder.') + return # Make sure a checkSnatched marked all downloads/seeds as such - if not download_info and self.conf('run_every') > 0: + if not release_download and self.conf('run_every') > 0: fireEvent('renamer.check_snatched') self.renaming_started = True @@ -112,29 +161,35 @@ class Renamer(Plugin): files = [] if movie_folder: log.info('Scanning movie folder %s...', movie_folder) - movie_folder = movie_folder.rstrip(os.path.sep) folder = os.path.dirname(movie_folder) - # Get all files from the specified folder - try: - for root, folders, names in os.walk(movie_folder): - files.extend([os.path.join(root, name) for name in names]) - except: - log.error('Failed getting files from %s: %s', (movie_folder, traceback.format_exc())) + if release_download.get('files', ''): + files = splitString(release_download['files'], '|') + + # If there is only one file in the torrent, the downloader did not create a subfolder + if len(files) == 1: + folder = movie_folder + else: + # Get all files from the specified folder + try: + for root, folders, names in os.walk(movie_folder): + files.extend([sp(os.path.join(root, name)) for name in names]) + except: + log.error('Failed getting files from %s: %s', (movie_folder, traceback.format_exc())) db = get_session() # Extend the download info with info stored in the downloaded release - download_info = self.extendDownloadInfo(download_info) + release_download = self.extendReleaseDownload(release_download) # Unpack any archives extr_files = None if self.conf('unrar'): folder, movie_folder, files, extr_files = self.extractFiles(folder = folder, movie_folder = movie_folder, files = files, - cleanup = self.conf('cleanup') and not self.downloadIsTorrent(download_info)) + cleanup = self.conf('cleanup') and not self.downloadIsTorrent(release_download)) - groups = fireEvent('scanner.scan', folder = folder if folder else self.conf('from'), - files = files, download_info = download_info, return_ignored = False, single = True) + groups = fireEvent('scanner.scan', folder = folder if folder else from_folder, + files = files, release_download = release_download, return_ignored = False, single = True) or [] folder_name = self.conf('folder_name') file_name = self.conf('file_name') @@ -142,9 +197,9 @@ class Renamer(Plugin): nfo_name = self.conf('nfo_name') separator = self.conf('separator') - # Statusses - done_status, active_status, downloaded_status, snatched_status = \ - fireEvent('status.get', ['done', 'active', 'downloaded', 'snatched'], single = True) + # Statuses + done_status, active_status, downloaded_status, snatched_status, seeding_status = \ + fireEvent('status.get', ['done', 'active', 'downloaded', 'snatched', 'seeding'], single = True) for group_identifier in groups: @@ -157,7 +212,7 @@ class Renamer(Plugin): # Add _UNKNOWN_ if no library item is connected if not group['library'] or not movie_title: - self.tagDir(group, 'unknown') + self.tagRelease(group = group, tag = 'unknown') continue # Rename the files using the library data else: @@ -172,8 +227,13 @@ class Renamer(Plugin): movie_title = getTitle(library) # Overwrite destination when set in category - destination = self.conf('to') - for movie in library_ent.media: + destination = to_folder + category_label = '' + for movie in library_ent.movies: + + if movie.category and movie.category.label: + category_label = movie.category.label + if movie.category and movie.category.destination and len(movie.category.destination) > 0 and movie.category.destination != 'None': destination = movie.category.destination log.debug('Setting category destination for "%s": %s' % (movie_title, destination)) @@ -190,7 +250,7 @@ class Renamer(Plugin): if extr_files: group['before_rename'].extend(extr_files) - # Remove weird chars from moviename + # Remove weird chars from movie name movie_name = re.sub(r"[\x00\/\\:\*\?\"<>\|]", '', movie_title) # Put 'The' at the end @@ -217,6 +277,7 @@ class Renamer(Plugin): 'cd': '', 'cd_nr': '', 'mpaa': library['info'].get('mpaa', ''), + 'category': category_label, } for file_type in group['files']: @@ -225,7 +286,7 @@ class Renamer(Plugin): if file_type is 'nfo' and not self.conf('rename_nfo'): log.debug('Skipping, renaming of %s disabled', file_type) for current_file in group['files'][file_type]: - if self.conf('cleanup') and (not self.downloadIsTorrent(download_info) or self.fileIsAdded(current_file, group)): + if self.conf('cleanup') and (not self.downloadIsTorrent(release_download) or self.fileIsAdded(current_file, group)): remove_files.append(current_file) continue @@ -385,7 +446,7 @@ class Renamer(Plugin): log.info('Better quality release already exists for %s, with quality %s', (movie.library.titles[0].title, release.quality.label)) # Add exists tag to the .ignore file - self.tagDir(group, 'exists') + self.tagRelease(group = group, tag = 'exists') # Notify on rename fail download_message = 'Renaming of %s (%s) cancelled, exists in %s already.' % (movie.library.titles[0].title, group['meta_data']['quality']['label'], release.quality.label) @@ -393,10 +454,20 @@ class Renamer(Plugin): remove_leftovers = False break - elif release.status_id is snatched_status.get('id'): - if release.quality.id is group['meta_data']['quality']['id']: - # Set the release to downloaded - fireEvent('release.update_status', release.id, status = downloaded_status, single = True) + + elif release.status_id in [snatched_status.get('id'), seeding_status.get('id')]: + if release_download and release_download.get('rls_id'): + if release_download['rls_id'] == release.id: + if release_download['status'] == 'completed': + # Set the release to downloaded + fireEvent('release.update_status', release.id, status = downloaded_status, single = True) + elif release_download['status'] == 'seeding': + # Set the release to seeding + fireEvent('release.update_status', release.id, status = seeding_status, single = True) + + elif release.quality.id is group['meta_data']['quality']['id']: + # Set the release to downloaded + fireEvent('release.update_status', release.id, status = downloaded_status, single = True) # Remove leftover files if not remove_leftovers: # Don't remove anything @@ -405,7 +476,7 @@ class Renamer(Plugin): log.debug('Removing leftover files') for current_file in group['files']['leftover']: if self.conf('cleanup') and not self.conf('move_leftover') and \ - (not self.downloadIsTorrent(download_info) or self.fileIsAdded(current_file, group)): + (not self.downloadIsTorrent(release_download) or self.fileIsAdded(current_file, group)): remove_files.append(current_file) # Remove files @@ -421,17 +492,17 @@ class Renamer(Plugin): log.info('Removing "%s"', src) try: - src = ss(src) + src = sp(src) if os.path.isfile(src): os.remove(src) - parent_dir = os.path.normpath(os.path.dirname(src)) - if delete_folders.count(parent_dir) == 0 and os.path.isdir(parent_dir) and not parent_dir in [destination, movie_folder] and not self.conf('from') in parent_dir: + parent_dir = os.path.dirname(src) + if delete_folders.count(parent_dir) == 0 and os.path.isdir(parent_dir) and not parent_dir in [destination, movie_folder] and not from_folder in parent_dir: delete_folders.append(parent_dir) except: log.error('Failed removing %s: %s', (src, traceback.format_exc())) - self.tagDir(group, 'failed_remove') + self.tagRelease(group = group, tag = 'failed_remove') # Delete leftover folder from older releases for delete_folder in delete_folders: @@ -451,15 +522,15 @@ class Renamer(Plugin): self.makeDir(os.path.dirname(dst)) try: - self.moveFile(src, dst, forcemove = not self.downloadIsTorrent(download_info) or self.fileIsAdded(src, group)) + self.moveFile(src, dst, forcemove = not self.downloadIsTorrent(release_download) or self.fileIsAdded(src, group)) group['renamed_files'].append(dst) except: log.error('Failed moving the file "%s" : %s', (os.path.basename(src), traceback.format_exc())) - self.tagDir(group, 'failed_rename') + self.tagRelease(group = group, tag = 'failed_rename') # Tag folder if it is in the 'from' folder and it will not be removed because it is a torrent - if self.movieInFromFolder(movie_folder) and self.downloadIsTorrent(download_info): - self.tagDir(group, 'renamed_already') + if self.movieInFromFolder(movie_folder) and self.downloadIsTorrent(release_download): + self.tagRelease(group = group, tag = 'renamed_already') # Remove matching releases for release in remove_releases: @@ -469,13 +540,13 @@ class Renamer(Plugin): except: log.error('Failed removing %s: %s', (release.identifier, traceback.format_exc())) - if group['dirname'] and group['parentdir'] and not self.downloadIsTorrent(download_info): + if group['dirname'] and group['parentdir'] and not self.downloadIsTorrent(release_download): if movie_folder: # Delete the movie folder group_folder = movie_folder else: # Delete the first empty subfolder in the tree relative to the 'from' folder - group_folder = os.path.join(self.conf('from'), os.path.relpath(group['parentdir'], self.conf('from')).split(os.path.sep)[0]) + group_folder = sp(os.path.join(from_folder, os.path.relpath(group['parentdir'], from_folder).split(os.path.sep)[0])) try: log.info('Deleting folder: %s', group_folder) @@ -516,18 +587,9 @@ class Renamer(Plugin): return rename_files # This adds a file to ignore / tag a release so it is ignored later - def tagDir(self, group, tag): - - ignore_file = None - if isinstance(group, dict): - for movie_file in sorted(list(group['files']['movie'])): - ignore_file = '%s.%s.ignore' % (os.path.splitext(movie_file)[0], tag) - break - else: - if not os.path.isdir(group) or not tag: - return - ignore_file = os.path.join(group, '%s.ignore' % tag) - + def tagRelease(self, tag, group = None, release_download = None): + if not tag: + return text = """This file is from CouchPotato It has marked this release as "%s" @@ -535,25 +597,88 @@ This file hides the release from the renamer Remove it if you want it to be renamed (again, or at least let it try again) """ % tag - if ignore_file: - self.createFile(ignore_file, text) + tag_files = [] - def untagDir(self, folder, tag = ''): - if not os.path.isdir(folder): + # Tag movie files if they are known + if isinstance(group, dict): + tag_files = [sorted(list(group['files']['movie']))[0]] + + elif isinstance(release_download, dict): + # Tag download_files if they are known + if release_download['files']: + tag_files = splitString(release_download['files'], '|') + + # Tag all files in release folder + else: + for root, folders, names in os.walk(release_download['folder']): + tag_files.extend([os.path.join(root, name) for name in names]) + + for filename in tag_files: + tag_filename = '%s.%s.ignore' % (os.path.splitext(filename)[0], tag) + if not os.path.isfile(tag_filename): + self.createFile(tag_filename, text) + + def untagRelease(self, release_download, tag = ''): + if not release_download: return - # Remove any .ignore files + tag_files = [] + + folder = release_download['folder'] + if not os.path.isdir(folder): + return False + + # Untag download_files if they are known + if release_download['files']: + tag_files = splitString(release_download['files'], '|') + + # Untag all files in release folder + else: + for root, folders, names in os.walk(release_download['folder']): + tag_files.extend([sp(os.path.join(root, name)) for name in names if not os.path.splitext(name)[1] == '.ignore']) + + # Find all .ignore files in folder + ignore_files = [] for root, dirnames, filenames in os.walk(folder): - for filename in fnmatch.filter(filenames, '*%s.ignore' % tag): - os.remove((os.path.join(root, filename))) + ignore_files.extend(fnmatch.filter([sp(os.path.join(root, filename)) for filename in filenames], '*%s.ignore' % tag)) - def hastagDir(self, folder, tag = ''): + # Match all found ignore files with the tag_files and delete if found + for tag_file in tag_files: + ignore_file = fnmatch.filter(ignore_files, '%s.%s.ignore' % (re.escape(os.path.splitext(tag_file)[0]), tag if tag else '*')) + for filename in ignore_file: + try: + os.remove(filename) + except: + log.debug('Unable to remove ignore file: %s. Error: %s.' % (filename, traceback.format_exc())) + + def hastagRelease(self, release_download, tag = ''): + if not release_download: + return False + + folder = release_download['folder'] if not os.path.isdir(folder): return False - # Find any .ignore files + tag_files = [] + ignore_files = [] + + # Find tag on download_files if they are known + if release_download['files']: + tag_files = splitString(release_download['files'], '|') + + # Find tag on all files in release folder + else: + for root, folders, names in os.walk(release_download['folder']): + tag_files.extend([sp(os.path.join(root, name)) for name in names if not os.path.splitext(name)[1] == '.ignore']) + + # Find all .ignore files in folder for root, dirnames, filenames in os.walk(folder): - if fnmatch.filter(filenames, '*%s.ignore' % tag): + ignore_files.extend(fnmatch.filter([sp(os.path.join(root, filename)) for filename in filenames], '*%s.ignore' % tag)) + + # Match all found ignore files with the tag_files and return True found + for tag_file in tag_files: + ignore_file = fnmatch.filter(ignore_files, '%s.%s.ignore' % (os.path.splitext(tag_file)[0], tag if tag else '*')) + if ignore_file: return True return False @@ -572,7 +697,7 @@ Remove it if you want it to be renamed (again, or at least let it try again) link(old, dest) except: # Try to simlink next - log.debug('Couldn\'t hardlink file "%s" to "%s". Simlinking instead. Error: %s. ', (old, dest, traceback.format_exc())) + log.debug('Couldn\'t hardlink file "%s" to "%s". Simlinking instead. Error: %s.', (old, dest, traceback.format_exc())) shutil.copy(old, dest) try: symlink(dest, old + '.link') @@ -616,22 +741,38 @@ Remove it if you want it to be renamed (again, or at least let it try again) replaced = toUnicode(string) for x, r in replacements.iteritems(): + if x in ['thename', 'namethe']: + continue if r is not None: replaced = replaced.replace(u'<%s>' % toUnicode(x), toUnicode(r)) else: #If information is not available, we don't want the tag in the filename replaced = replaced.replace('<' + x + '>', '') + replaced = self.replaceDoubles(replaced.lstrip('. ')) + for x, r in replacements.iteritems(): + if x in ['thename', 'namethe']: + replaced = replaced.replace(u'<%s>' % toUnicode(x), toUnicode(r)) replaced = re.sub(r"[\x00:\*\?\"<>\|]", '', replaced) sep = self.conf('foldersep') if folder else self.conf('separator') - return self.replaceDoubles(replaced.lstrip('. ')).replace(' ', ' ' if not sep else sep) + return replaced.replace(' ', ' ' if not sep else sep) def replaceDoubles(self, string): - return string.replace(' ', ' ').replace(' .', '.') + + replaces = [ + ('\.+', '.'), ('_+', '_'), ('-+', '-'), ('\s+', ' '), + ('(\s\.)+', '.'), ('(-\.)+', '.'), ('(\s-)+', '-'), + ] + + for r in replaces: + reg, replace_with = r + string = re.sub(reg, replace_with, string) + + return string def deleteEmptyFolder(self, folder, show_error = True): - folder = ss(folder) + folder = sp(folder) loge = log.error if show_error else log.debug for root, dirs, files in os.walk(folder): @@ -657,22 +798,22 @@ Remove it if you want it to be renamed (again, or at least let it try again) self.checking_snatched = True - snatched_status, ignored_status, failed_status, done_status, seeding_status, downloaded_status, missing_status = \ - fireEvent('status.get', ['snatched', 'ignored', 'failed', 'done', 'seeding', 'downloaded', 'missing'], single = True) + snatched_status, ignored_status, failed_status, seeding_status, downloaded_status, missing_status = \ + fireEvent('status.get', ['snatched', 'ignored', 'failed', 'seeding', 'downloaded', 'missing'], single = True) db = get_session() rels = db.query(Release).filter( Release.status_id.in_([snatched_status.get('id'), seeding_status.get('id'), missing_status.get('id')]) ).all() - scan_items = [] + scan_releases = [] scan_required = False if rels: log.debug('Checking status snatched releases...') - statuses = fireEvent('download.status', merge = True) - if not statuses: + release_downloads = fireEvent('download.status', merge = True) + if not release_downloads: log.debug('Download status functionality is not implemented for active downloaders.') scan_required = True else: @@ -680,91 +821,91 @@ Remove it if you want it to be renamed (again, or at least let it try again) for rel in rels: rel_dict = rel.to_dict({'info': {}}) - movie_dict = fireEvent('movie.get', rel.media_id, single = True) + if not isinstance(rel_dict['info'], (dict)): + log.error('Faulty release found without any info, ignoring.') + fireEvent('release.update_status', rel.id, status = ignored_status, single = True) + continue # check status nzbname = self.createNzbName(rel_dict['info'], movie_dict) found = False - for item in statuses: + for release_download in release_downloads: found_release = False if rel_dict['info'].get('download_id'): - if item['id'] == rel_dict['info']['download_id'] and item['downloader'] == rel_dict['info']['download_downloader']: - log.debug('Found release by id: %s', item['id']) + if release_download['id'] == rel_dict['info']['download_id'] and release_download['downloader'] == rel_dict['info']['download_downloader']: + log.debug('Found release by id: %s', release_download['id']) found_release = True else: - if item['name'] == nzbname or rel_dict['info']['name'] in item['name'] or getImdb(item['name']) == movie_dict['library']['identifier']: + if release_download['name'] == nzbname or rel_dict['info']['name'] in release_download['name'] or getImdb(release_download['name']) == movie_dict['library']['identifier']: found_release = True if found_release: - timeleft = 'N/A' if item['timeleft'] == -1 else item['timeleft'] - log.debug('Found %s: %s, time to go: %s', (item['name'], item['status'].upper(), timeleft)) + timeleft = 'N/A' if release_download['timeleft'] == -1 else release_download['timeleft'] + log.debug('Found %s: %s, time to go: %s', (release_download['name'], release_download['status'].upper(), timeleft)) - if item['status'] == 'busy': + if release_download['status'] == 'busy': # Set the release to snatched if it was missing before fireEvent('release.update_status', rel.id, status = snatched_status, single = True) # Tag folder if it is in the 'from' folder and it will not be processed because it is still downloading - if item['folder'] and self.conf('from') in item['folder']: - self.tagDir(item['folder'], 'downloading') - - elif item['status'] == 'seeding': - # Set the release to seeding - fireEvent('release.update_status', rel.id, status = seeding_status, single = True) + if self.movieInFromFolder(release_download['folder']): + self.tagRelease(release_download = release_download, tag = 'downloading') + elif release_download['status'] == 'seeding': #If linking setting is enabled, process release - if self.conf('file_action') != 'move' and not rel.status_id == seeding_status.get('id') and self.statusInfoComplete(item): - log.info('Download of %s completed! It is now being processed while leaving the original files alone for seeding. Current ratio: %s.', (item['name'], item['seed_ratio'])) + if self.conf('file_action') != 'move' and not rel.status_id == seeding_status.get('id') and self.statusInfoComplete(release_download): + log.info('Download of %s completed! It is now being processed while leaving the original files alone for seeding. Current ratio: %s.', (release_download['name'], release_download['seed_ratio'])) # Remove the downloading tag - self.untagDir(item['folder'], 'downloading') + self.untagRelease(release_download = release_download, tag = 'downloading') # Scan and set the torrent to paused if required - item.update({'pause': True, 'scan': True, 'process_complete': False}) - scan_items.append(item) + release_download.update({'pause': True, 'scan': True, 'process_complete': False}) + scan_releases.append(release_download) else: #let it seed - log.debug('%s is seeding with ratio: %s', (item['name'], item['seed_ratio'])) + log.debug('%s is seeding with ratio: %s', (release_download['name'], release_download['seed_ratio'])) + + # Set the release to seeding + fireEvent('release.update_status', rel.id, status = seeding_status, single = True) - elif item['status'] == 'failed': + elif release_download['status'] == 'failed': # Set the release to failed fireEvent('release.update_status', rel.id, status = failed_status, single = True) - fireEvent('download.remove_failed', item, single = True) + fireEvent('download.remove_failed', release_download, single = True) if self.conf('next_on_failed'): - fireEvent('movie.searcher.try_next_release', media_id = rel.media_id) - elif item['status'] == 'completed': - log.info('Download of %s completed!', item['name']) - if self.statusInfoComplete(item): + fireEvent('movie.searcher.try_next_release', movie_id = rel.movie_id) + elif release_download['status'] == 'completed': + log.info('Download of %s completed!', release_download['name']) + if self.statusInfoComplete(release_download): # If the release has been seeding, process now the seeding is done if rel.status_id == seeding_status.get('id'): - if rel.movie.status_id == done_status.get('id'): + if self.conf('file_action') != 'move': # Set the release to done as the movie has already been renamed fireEvent('release.update_status', rel.id, status = downloaded_status, single = True) # Allow the downloader to clean-up - item.update({'pause': False, 'scan': False, 'process_complete': True}) - scan_items.append(item) + release_download.update({'pause': False, 'scan': False, 'process_complete': True}) + scan_releases.append(release_download) else: - # Set the release to snatched so that the renamer can process the release as if it was never seeding - fireEvent('release.update_status', rel.id, status = snatched_status, single = True) - # Scan and Allow the downloader to clean-up - item.update({'pause': False, 'scan': True, 'process_complete': True}) - scan_items.append(item) + release_download.update({'pause': False, 'scan': True, 'process_complete': True}) + scan_releases.append(release_download) else: # Set the release to snatched if it was missing before fireEvent('release.update_status', rel.id, status = snatched_status, single = True) # Remove the downloading tag - self.untagDir(item['folder'], 'downloading') + self.untagRelease(release_download = release_download, tag = 'downloading') # Scan and Allow the downloader to clean-up - item.update({'pause': False, 'scan': True, 'process_complete': True}) - scan_items.append(item) + release_download.update({'pause': False, 'scan': True, 'process_complete': True}) + scan_releases.append(release_download) else: scan_required = True @@ -786,21 +927,21 @@ Remove it if you want it to be renamed (again, or at least let it try again) log.error('Failed checking for release in downloader: %s', traceback.format_exc()) # The following can either be done here, or inside the scanner if we pass it scan_items in one go - for item in scan_items: + for release_download in scan_releases: # Ask the renamer to scan the item - if item['scan']: - if item['pause'] and self.conf('file_action') == 'link': - fireEvent('download.pause', item = item, pause = True, single = True) - fireEvent('renamer.scan', download_info = item) - if item['pause'] and self.conf('file_action') == 'link': - fireEvent('download.pause', item = item, pause = False, single = True) - if item['process_complete']: + if release_download['scan']: + if release_download['pause'] and self.conf('file_action') == 'link': + fireEvent('download.pause', release_download = release_download, pause = True, single = True) + fireEvent('renamer.scan', release_download = release_download) + if release_download['pause'] and self.conf('file_action') == 'link': + fireEvent('download.pause', release_download = release_download, pause = False, single = True) + if release_download['process_complete']: #First make sure the files were succesfully processed - if not self.hastagDir(item['folder'], 'failed_rename'): + if not self.hastagRelease(release_download = release_download, tag = 'failed_rename'): # Remove the seeding tag if it exists - self.untagDir(item['folder'], 'renamed_already') + self.untagRelease(release_download = release_download, tag = 'renamed_already') # Ask the downloader to process the item - fireEvent('download.process_complete', item = item, single = True) + fireEvent('download.process_complete', release_download = release_download, single = True) if scan_required: fireEvent('renamer.scan') @@ -809,16 +950,16 @@ Remove it if you want it to be renamed (again, or at least let it try again) return True - def extendDownloadInfo(self, download_info): + def extendReleaseDownload(self, release_download): rls = None - if download_info and download_info.get('id') and download_info.get('downloader'): + if release_download and release_download.get('id') and release_download.get('downloader'): db = get_session() - rlsnfo_dwnlds = db.query(ReleaseInfo).filter_by(identifier = 'download_downloader', value = download_info.get('downloader')).all() - rlsnfo_ids = db.query(ReleaseInfo).filter_by(identifier = 'download_id', value = download_info.get('id')).all() + rlsnfo_dwnlds = db.query(ReleaseInfo).filter_by(identifier = 'download_downloader', value = release_download.get('downloader')).all() + rlsnfo_ids = db.query(ReleaseInfo).filter_by(identifier = 'download_id', value = release_download.get('id')).all() for rlsnfo_dwnld in rlsnfo_dwnlds: for rlsnfo_id in rlsnfo_ids: @@ -828,32 +969,33 @@ Remove it if you want it to be renamed (again, or at least let it try again) if rls: break if not rls: - log.error('Download ID %s from downloader %s not found in releases', (download_info.get('id'), download_info.get('downloader'))) + log.error('Download ID %s from downloader %s not found in releases', (release_download.get('id'), release_download.get('downloader'))) if rls: rls_dict = rls.to_dict({'info':{}}) - download_info.update({ + release_download.update({ 'imdb_id': rls.movie.library.identifier, 'quality': rls.quality.identifier, 'protocol': rls_dict.get('info', {}).get('protocol') or rls_dict.get('info', {}).get('type'), + 'rls_id': rls.id, }) - return download_info + return release_download - def downloadIsTorrent(self, download_info): - return download_info and download_info.get('protocol') in ['torrent', 'torrent_magnet'] + def downloadIsTorrent(self, release_download): + return release_download and release_download.get('protocol') in ['torrent', 'torrent_magnet'] def fileIsAdded(self, src, group): if not group or not group.get('before_rename'): return False return src in group['before_rename'] - def statusInfoComplete(self, item): - return item['id'] and item['downloader'] and item['folder'] + def statusInfoComplete(self, release_download): + return release_download['id'] and release_download['downloader'] and release_download['folder'] def movieInFromFolder(self, movie_folder): - return movie_folder and self.conf('from') in movie_folder or not movie_folder + return movie_folder and sp(self.conf('from')) in sp(movie_folder) or not movie_folder def extractFiles(self, folder = None, movie_folder = None, files = None, cleanup = False): if not files: files = [] @@ -863,9 +1005,11 @@ Remove it if you want it to be renamed (again, or at least let it try again) restfile_regex = '(^%s\.(?:part(?!0*1\.rar$)\d+\.rar$|[rstuvw]\d+$))' extr_files = [] + from_folder = sp(self.conf('from')) + # Check input variables if not folder: - folder = self.conf('from') + folder = from_folder check_file_date = True if movie_folder: @@ -873,7 +1017,7 @@ Remove it if you want it to be renamed (again, or at least let it try again) if not files: for root, folders, names in os.walk(folder): - files.extend([os.path.join(root, name) for name in names]) + files.extend([sp(os.path.join(root, name)) for name in names]) # Find all archive files archives = [re.search(archive_regex, name).groupdict() for name in files if re.search(archive_regex, name)] @@ -881,7 +1025,7 @@ Remove it if you want it to be renamed (again, or at least let it try again) #Extract all found archives for archive in archives: # Check if it has already been processed by CPS - if self.hastagDir(os.path.dirname(archive['file'])): + if self.hastagRelease(release_download = {'folder': os.path.dirname(archive['file']), 'files': archive['file']}): continue # Find all related archive files @@ -919,13 +1063,13 @@ Remove it if you want it to be renamed (again, or at least let it try again) log.info('Archive %s found. Extracting...', os.path.basename(archive['file'])) try: rar_handle = RarFile(archive['file']) - extr_path = os.path.join(self.conf('from'), os.path.relpath(os.path.dirname(archive['file']), folder)) + extr_path = os.path.join(from_folder, os.path.relpath(os.path.dirname(archive['file']), folder)) self.makeDir(extr_path) for packedinfo in rar_handle.infolist(): - if not packedinfo.isdir and not os.path.isfile(os.path.join(extr_path, os.path.basename(packedinfo.filename))): + if not packedinfo.isdir and not os.path.isfile(sp(os.path.join(extr_path, os.path.basename(packedinfo.filename)))): log.debug('Extracting %s...', packedinfo.filename) rar_handle.extract(condition = [packedinfo.index], path = extr_path, withSubpath = False, overwrite = False) - extr_files.append(os.path.join(extr_path, os.path.basename(packedinfo.filename))) + extr_files.append(sp(os.path.join(extr_path, os.path.basename(packedinfo.filename)))) del rar_handle except Exception, e: log.error('Failed to extract %s: %s %s', (archive['file'], e, traceback.format_exc())) @@ -942,9 +1086,9 @@ Remove it if you want it to be renamed (again, or at least let it try again) files.remove(filename) # Move the rest of the files and folders if any files are extracted to the from folder (only if folder was provided) - if extr_files and os.path.normpath(os.path.normcase(folder)) != os.path.normpath(os.path.normcase(self.conf('from'))): + if extr_files and folder != from_folder: for leftoverfile in list(files): - move_to = os.path.join(self.conf('from'), os.path.relpath(leftoverfile, folder)) + move_to = os.path.join(from_folder, os.path.relpath(leftoverfile, folder)) try: self.makeDir(os.path.dirname(move_to)) @@ -967,8 +1111,8 @@ Remove it if you want it to be renamed (again, or at least let it try again) log.debug('Removing old movie folder %s...', movie_folder) self.deleteEmptyFolder(movie_folder) - movie_folder = os.path.join(self.conf('from'), os.path.relpath(movie_folder, folder)) - folder = self.conf('from') + movie_folder = os.path.join(from_folder, os.path.relpath(movie_folder, folder)) + folder = from_folder if extr_files: files.extend(extr_files) diff --git a/couchpotato/core/plugins/scanner/main.py b/couchpotato/core/plugins/scanner/main.py index 1b17c15..eb193ad 100644 --- a/couchpotato/core/plugins/scanner/main.py +++ b/couchpotato/core/plugins/scanner/main.py @@ -1,6 +1,6 @@ from couchpotato import get_session from couchpotato.core.event import fireEvent, addEvent -from couchpotato.core.helpers.encoding import toUnicode, simplifyString, ss +from couchpotato.core.helpers.encoding import toUnicode, simplifyString, ss, sp from couchpotato.core.helpers.variable import getExt, getImdb, tryInt, \ splitString from couchpotato.core.logger import CPLog @@ -21,10 +21,6 @@ log = CPLog(__name__) class Scanner(Plugin): - minimal_filesize = { - 'media': 314572800, # 300MB - 'trailer': 1048576, # 1MB - } ignored_in_path = [os.path.sep + 'extracted' + os.path.sep, 'extracting', '_unpack', '_failed_', '_unknown_', '_exists_', '_failed_remove_', '_failed_rename_', '.appledouble', '.appledb', '.appledesktop', os.path.sep + '._', '.ds_store', 'cp.cpnfo', 'thumbs.db', 'ehthumbs.db', 'desktop.ini'] #unpacking, smb-crap, hidden files @@ -52,6 +48,12 @@ class Scanner(Plugin): 'leftover': ('leftover', 'leftover'), } + file_sizes = { # in MB + 'movie': {'min': 300}, + 'trailer': {'min': 2, 'max': 250}, + 'backdrop': {'min': 0, 'max': 5}, + } + codecs = { 'audio': ['dts', 'ac3', 'ac3d', 'mp3'], 'video': ['x264', 'h264', 'divx', 'xvid'] @@ -104,9 +106,9 @@ class Scanner(Plugin): addEvent('scanner.name_year', self.getReleaseNameYear) addEvent('scanner.partnumber', self.getPartNumber) - def scan(self, folder = None, files = None, download_info = None, simple = False, newer_than = 0, return_ignored = True, on_found = None): + def scan(self, folder = None, files = None, release_download = None, simple = False, newer_than = 0, return_ignored = True, on_found = None): - folder = ss(os.path.normpath(folder)) + folder = sp(folder) if not folder or not os.path.isdir(folder): log.error('Folder doesn\'t exists: %s', folder) @@ -122,7 +124,7 @@ class Scanner(Plugin): try: files = [] for root, dirs, walk_files in os.walk(folder): - files.extend(os.path.join(root, filename) for filename in walk_files) + files.extend([sp(os.path.join(root, filename)) for filename in walk_files]) # Break if CP wants to shut down if self.shuttingDown(): @@ -132,7 +134,7 @@ class Scanner(Plugin): log.error('Failed getting files from %s: %s', (folder, traceback.format_exc())) else: check_file_date = False - files = [ss(x) for x in files] + files = [sp(x) for x in files] for file_path in files: @@ -148,7 +150,7 @@ class Scanner(Plugin): continue is_dvd_file = self.isDVDFile(file_path) - if os.path.getsize(file_path) > self.minimal_filesize['media'] or is_dvd_file: # Minimal 300MB files or is DVD file + if self.filesizeBetween(file_path, self.file_sizes['movie']) or is_dvd_file: # Minimal 300MB files or is DVD file # Normal identifier identifier = self.createStringIdentifier(file_path, folder, exclude_filename = is_dvd_file) @@ -182,7 +184,6 @@ class Scanner(Plugin): # files will be grouped first. leftovers = set(sorted(leftovers, reverse = True)) - # Group files minus extension ignored_identifiers = [] for identifier, group in movie_files.iteritems(): @@ -191,7 +192,7 @@ class Scanner(Plugin): log.debug('Grouping files: %s', identifier) has_ignored = 0 - for file_path in group['unsorted_files']: + for file_path in list(group['unsorted_files']): ext = getExt(file_path) wo_ext = file_path[:-(len(ext) + 1)] found_files = set([i for i in leftovers if wo_ext in i]) @@ -200,6 +201,11 @@ class Scanner(Plugin): has_ignored += 1 if ext == 'ignore' else 0 + if has_ignored == 0: + for file_path in list(group['unsorted_files']): + ext = getExt(file_path) + has_ignored += 1 if ext == 'ignore' else 0 + if has_ignored > 0: ignored_identifiers.append(identifier) @@ -232,10 +238,6 @@ class Scanner(Plugin): # Remove the found files from the leftover stack leftovers = leftovers - set(found_files) - exts = [getExt(ff) for ff in found_files] - if 'ignore' in exts: - ignored_identifiers.append(identifier) - # Break if CP wants to shut down if self.shuttingDown(): break @@ -262,14 +264,14 @@ class Scanner(Plugin): # Remove the found files from the leftover stack leftovers = leftovers - set([ff]) - ext = getExt(ff) - if ext == 'ignore': - ignored_identifiers.append(new_identifier) - # Break if CP wants to shut down if self.shuttingDown(): break + # leftovers should be empty + if leftovers: + log.debug('Some files are still left over: %s', leftovers) + # Cleaning up used for identifier in delete_identifiers: if path_identifiers.get(identifier): @@ -339,11 +341,11 @@ class Scanner(Plugin): total_found = len(valid_files) # Make sure only one movie was found if a download ID is provided - if download_info and total_found == 0: - log.info('Download ID provided (%s), but no groups found! Make sure the download contains valid media files (fully extracted).', download_info.get('imdb_id')) - elif download_info and total_found > 1: - log.info('Download ID provided (%s), but more than one group found (%s). Ignoring Download ID...', (download_info.get('imdb_id'), len(valid_files))) - download_info = None + if release_download and total_found == 0: + log.info('Download ID provided (%s), but no groups found! Make sure the download contains valid media files (fully extracted).', release_download.get('imdb_id')) + elif release_download and total_found > 1: + log.info('Download ID provided (%s), but more than one group found (%s). Ignoring Download ID...', (release_download.get('imdb_id'), len(valid_files))) + release_download = None # Determine file types db = get_session() @@ -379,7 +381,7 @@ class Scanner(Plugin): continue log.debug('Getting metadata for %s', identifier) - group['meta_data'] = self.getMetaData(group, folder = folder, download_info = download_info) + group['meta_data'] = self.getMetaData(group, folder = folder, release_download = release_download) # Subtitle meta group['subtitle_language'] = self.getSubtitleLanguage(group) if not simple else {} @@ -411,7 +413,7 @@ class Scanner(Plugin): del group['unsorted_files'] # Determine movie - group['library'] = self.determineMovie(group, download_info = download_info) + group['library'] = self.determineMovie(group, release_download = release_download) if not group['library']: log.error('Unable to determine movie: %s', group['identifiers']) else: @@ -436,13 +438,13 @@ class Scanner(Plugin): return processed_movies - def getMetaData(self, group, folder = '', download_info = None): + def getMetaData(self, group, folder = '', release_download = None): data = {} files = list(group['files']['movie']) for cur_file in files: - if os.path.getsize(cur_file) < self.minimal_filesize['media']: continue # Ignore smaller files + if not self.filesizeBetween(cur_file, self.file_sizes['movie']): continue # Ignore smaller files meta = self.getMeta(cur_file) @@ -461,8 +463,8 @@ class Scanner(Plugin): # Use the quality guess first, if that failes use the quality we wanted to download data['quality'] = None - if download_info and download_info.get('quality'): - data['quality'] = fireEvent('quality.single', download_info.get('quality'), single = True) + if release_download and release_download.get('quality'): + data['quality'] = fireEvent('quality.single', release_download.get('quality'), single = True) if not data['quality']: data['quality'] = fireEvent('quality.guess', files = files, extra = data, single = True) @@ -546,12 +548,12 @@ class Scanner(Plugin): return detected_languages - def determineMovie(self, group, download_info = None): + def determineMovie(self, group, release_download = None): # Get imdb id from downloader - imdb_id = download_info and download_info.get('imdb_id') + imdb_id = release_download and release_download.get('imdb_id') if imdb_id: - log.debug('Found movie via imdb id from it\'s download id: %s', download_info.get('imdb_id')) + log.debug('Found movie via imdb id from it\'s download id: %s', release_download.get('imdb_id')) files = group['files'] @@ -652,7 +654,7 @@ class Scanner(Plugin): def getMediaFiles(self, files): def test(s): - return self.filesizeBetween(s, 300, 100000) and getExt(s.lower()) in self.extensions['movie'] and not self.isSampleFile(s) + return self.filesizeBetween(s, self.file_sizes['movie']) and getExt(s.lower()) in self.extensions['movie'] and not self.isSampleFile(s) return set(filter(test, files)) @@ -677,7 +679,7 @@ class Scanner(Plugin): def getTrailers(self, files): def test(s): - return re.search('(^|[\W_])trailer\d*[\W_]', s.lower()) and self.filesizeBetween(s, 2, 250) + return re.search('(^|[\W_])trailer\d*[\W_]', s.lower()) and self.filesizeBetween(s, self.file_sizes['trailer']) return set(filter(test, files)) @@ -688,7 +690,7 @@ class Scanner(Plugin): files = set(filter(test, files)) images = { - 'backdrop': set(filter(lambda s: re.search('(^|[\W_])fanart|backdrop\d*[\W_]', s.lower()) and self.filesizeBetween(s, 0, 5), files)) + 'backdrop': set(filter(lambda s: re.search('(^|[\W_])fanart|backdrop\d*[\W_]', s.lower()) and self.filesizeBetween(s, self.file_sizes['backdrop']), files)) } # Rest @@ -716,16 +718,6 @@ class Scanner(Plugin): log.debug('Ignored "%s" contains "%s".', (filename, i)) return False - # Sample file - if self.isSampleFile(filename): - log.debug('Is sample file "%s".', filename) - return False - - # Minimal size - if self.filesizeBetween(filename, self.minimal_filesize['media']): - log.debug('File to small: %s', filename) - return False - # All is OK return True @@ -734,9 +726,9 @@ class Scanner(Plugin): if is_sample: log.debug('Is sample file: %s', filename) return is_sample - def filesizeBetween(self, file, min = 0, max = 100000): + def filesizeBetween(self, file, file_size = []): try: - return (min * 1048576) < os.path.getsize(file) < (max * 1048576) + return (file_size.get('min', 0) * 1048576) < os.path.getsize(file) < (file_size.get('max', 100000) * 1048576) except: log.error('Couldn\'t get filesize of %s.', file) @@ -830,19 +822,21 @@ class Scanner(Plugin): def findYear(self, text): # Search year inside () or [] first - matches = re.search('(\(|\[)(?P19[0-9]{2}|20[0-9]{2})(\]|\))', text) + matches = re.findall('(\(|\[)(?P19[0-9]{2}|20[0-9]{2})(\]|\))', text) if matches: - return matches.group('year') + return matches[-1][1] # Search normal - matches = re.search('(?P19[0-9]{2}|20[0-9]{2})', text) + matches = re.findall('(?P19[0-9]{2}|20[0-9]{2})', text) if matches: - return matches.group('year') + return matches[-1] return '' def getReleaseNameYear(self, release_name, file_name = None): + release_name = release_name.strip(' .-_') + # Use guessit first guess = {} if file_name: @@ -860,7 +854,7 @@ class Scanner(Plugin): cleaned = ' '.join(re.split('\W+', simplifyString(release_name))) cleaned = re.sub(self.clean, ' ', cleaned) - for year_str in [file_name, cleaned]: + for year_str in [file_name, release_name, cleaned]: if not year_str: continue year = self.findYear(year_str) if year: @@ -870,19 +864,21 @@ class Scanner(Plugin): if year: # Split name on year try: - movie_name = cleaned.split(year).pop(0).strip() - cp_guess = { - 'name': movie_name, - 'year': int(year), - } + movie_name = cleaned.rsplit(year, 1).pop(0).strip() + if movie_name: + cp_guess = { + 'name': movie_name, + 'year': int(year), + } except: pass - else: # Split name on multiple spaces + + if not cp_guess: # Split name on multiple spaces try: movie_name = cleaned.split(' ').pop(0).strip() cp_guess = { 'name': movie_name, - 'year': int(year), + 'year': int(year) if movie_name[:4] != year else 0, } except: pass diff --git a/couchpotato/core/plugins/score/main.py b/couchpotato/core/plugins/score/main.py index 5f9da1a..54b6ca3 100644 --- a/couchpotato/core/plugins/score/main.py +++ b/couchpotato/core/plugins/score/main.py @@ -1,11 +1,11 @@ -from couchpotato.core.event import addEvent +from couchpotato.core.event import addEvent, fireEvent from couchpotato.core.helpers.encoding import toUnicode from couchpotato.core.helpers.variable import getTitle, splitString from couchpotato.core.logger import CPLog from couchpotato.core.plugins.base import Plugin from couchpotato.core.plugins.score.scores import nameScore, nameRatioScore, \ sizeScore, providerScore, duplicateScore, partialIgnoredScore, namePositionScore, \ - halfMultipartScore + halfMultipartScore, sceneScore from couchpotato.environment import Env log = CPLog(__name__) @@ -62,4 +62,7 @@ class Score(Plugin): if extra_score: score += extra_score(nzb) + # Scene / Nuke scoring + score += sceneScore(nzb['name']) + return score diff --git a/couchpotato/core/plugins/score/scores.py b/couchpotato/core/plugins/score/scores.py index 6aa0b46..895f5fc 100644 --- a/couchpotato/core/plugins/score/scores.py +++ b/couchpotato/core/plugins/score/scores.py @@ -1,8 +1,13 @@ from couchpotato.core.event import fireEvent from couchpotato.core.helpers.encoding import simplifyString from couchpotato.core.helpers.variable import tryInt +from couchpotato.core.logger import CPLog from couchpotato.environment import Env import re +import traceback + +log = CPLog(__name__) + name_scores = [ # Tags @@ -160,3 +165,38 @@ def halfMultipartScore(nzb_name): return -30 return 0 + + +def sceneScore(nzb_name): + + check_names = [nzb_name] + + # Match names between " + try: check_names.append(re.search(r'([\'"])[^\1]*\1', nzb_name).group(0)) + except: pass + + # Match longest name between [] + try: check_names.append(max(re.findall(r'[^[]*\[([^]]*)\]', nzb_name), key = len).strip()) + except: pass + + for name in check_names: + + # Strip twice, remove possible file extensions + name = name.lower().strip(' "\'\.-_\[\]') + name = re.sub('\.([a-z0-9]{0,4})$', '', name) + name = name.strip(' "\'\.-_\[\]') + + # Make sure year and groupname is in there + year = re.findall('(?P19[0-9]{2}|20[0-9]{2})', name) + group = re.findall('\-([a-z0-9]+)$', name) + + if len(year) > 0 and len(group) > 0: + try: + validate = fireEvent('release.validate', name, single = True) + if validate and tryInt(validate.get('score')) != 0: + log.debug('Release "%s" scored %s, reason: %s', (nzb_name, validate['score'], validate['reasons'])) + return tryInt(validate.get('score')) + except: + log.error('Failed scoring scene: %s', traceback.format_exc()) + + return 0 diff --git a/couchpotato/core/plugins/subtitle/main.py b/couchpotato/core/plugins/subtitle/main.py index 0b494c1..ca944b3 100644 --- a/couchpotato/core/plugins/subtitle/main.py +++ b/couchpotato/core/plugins/subtitle/main.py @@ -1,6 +1,6 @@ from couchpotato import get_session from couchpotato.core.event import addEvent, fireEvent -from couchpotato.core.helpers.encoding import toUnicode +from couchpotato.core.helpers.encoding import toUnicode, sp from couchpotato.core.helpers.variable import splitString from couchpotato.core.logger import CPLog from couchpotato.core.plugins.base import Plugin @@ -58,9 +58,9 @@ class Subtitle(Plugin): for d_sub in downloaded: log.info('Found subtitle (%s): %s', (d_sub.language.alpha2, files)) - group['files']['subtitle'].append(d_sub.path) - group['before_rename'].append(d_sub.path) - group['subtitle_language'][d_sub.path] = [d_sub.language.alpha2] + group['files']['subtitle'].append(sp(d_sub.path)) + group['before_rename'].append(sp(d_sub.path)) + group['subtitle_language'][sp(d_sub.path)] = [d_sub.language.alpha2] return True diff --git a/couchpotato/core/plugins/userscript/static/userscript.css b/couchpotato/core/plugins/userscript/static/userscript.css index d08953a..d816101 100644 --- a/couchpotato/core/plugins/userscript/static/userscript.css +++ b/couchpotato/core/plugins/userscript/static/userscript.css @@ -14,25 +14,25 @@ padding: 20px; } - .page.userscript .movie_result { + .page.userscript .media_result { height: 140px; } - .page.userscript .movie_result .thumbnail { + .page.userscript .media_result .thumbnail { width: 90px; } - .page.userscript .movie_result .options { + .page.userscript .media_result .options { left: 90px; padding: 54px 15px; } - .page.userscript .movie_result .year { + .page.userscript .media_result .year { display: none; } - .page.userscript .movie_result .options select[name="title"] { + .page.userscript .media_result .options select[name="title"] { width: 190px; } - .page.userscript .movie_result .options select[name="profile"] { + .page.userscript .media_result .options select[name="profile"] { width: 70px; } diff --git a/couchpotato/core/plugins/userscript/static/userscript.js b/couchpotato/core/plugins/userscript/static/userscript.js index 2aeb7b5..11daa06 100644 --- a/couchpotato/core/plugins/userscript/static/userscript.js +++ b/couchpotato/core/plugins/userscript/static/userscript.js @@ -34,7 +34,7 @@ Page.Userscript = new Class({ if(json.error) self.frame.set('html', json.error); else { - var item = new Block.Search.Item(json.movie); + var item = new Block.Search.MovieItem(json.movie); self.frame.adopt(item); item.showOptions(); } diff --git a/couchpotato/core/providers/automation/flixster/main.py b/couchpotato/core/providers/automation/flixster/main.py index 46dcfba..7fd2f71 100644 --- a/couchpotato/core/providers/automation/flixster/main.py +++ b/couchpotato/core/providers/automation/flixster/main.py @@ -1,7 +1,6 @@ from couchpotato.core.helpers.variable import tryInt, splitString from couchpotato.core.logger import CPLog from couchpotato.core.providers.automation.base import Automation -import json log = CPLog(__name__) @@ -40,7 +39,7 @@ class Flixster(Automation): if not enablers[index]: continue - data = json.loads(self.getHTMLData(self.url % user_id)) + data = self.getJsonData(self.url % user_id, decode_from = 'iso-8859-1') for movie in data: movies.append({'title': movie['movie']['title'], 'year': movie['movie']['year'] }) diff --git a/couchpotato/core/providers/automation/imdb/__init__.py b/couchpotato/core/providers/automation/imdb/__init__.py index 546cba9..20e4f41 100644 --- a/couchpotato/core/providers/automation/imdb/__init__.py +++ b/couchpotato/core/providers/automation/imdb/__init__.py @@ -55,7 +55,14 @@ config = [{ 'label': 'TOP 250', 'description': 'IMDB TOP 250 chart', 'default': True, - }, + }, + { + 'name': 'automation_charts_boxoffice', + 'type': 'bool', + 'label': 'Box offce TOP 10', + 'description': 'IMDB Box office TOP 10 chart', + 'default': True, + }, ], }, ], diff --git a/couchpotato/core/providers/automation/imdb/main.py b/couchpotato/core/providers/automation/imdb/main.py index e9d14b5..76afb24 100644 --- a/couchpotato/core/providers/automation/imdb/main.py +++ b/couchpotato/core/providers/automation/imdb/main.py @@ -70,8 +70,11 @@ class IMDBAutomation(IMDBBase): chart_urls = { 'theater': 'http://www.imdb.com/movies-in-theaters/', 'top250': 'http://www.imdb.com/chart/top', + 'boxoffice': 'http://www.imdb.com/chart/', } + first_table = ['boxoffice'] + def getIMDBids(self): movies = [] @@ -84,6 +87,14 @@ class IMDBAutomation(IMDBBase): try: result_div = html.find('div', attrs = {'id': 'main'}) + + try: + if url in self.first_table: + table = result_div.find('table') + result_div = table if table else result_div + except: + pass + imdb_ids = getImdb(str(result_div), multiple = True) for imdb_id in imdb_ids: diff --git a/couchpotato/core/providers/automation/itunes/main.py b/couchpotato/core/providers/automation/itunes/main.py index 8e35237..eb68e34 100644 --- a/couchpotato/core/providers/automation/itunes/main.py +++ b/couchpotato/core/providers/automation/itunes/main.py @@ -16,9 +16,6 @@ class ITunes(Automation, RSS): def getIMDBids(self): - if self.isDisabled(): - return - movies = [] enablers = [tryInt(x) for x in splitString(self.conf('automation_urls_use'))] diff --git a/couchpotato/core/providers/base.py b/couchpotato/core/providers/base.py index 8760710..1476ef7 100644 --- a/couchpotato/core/providers/base.py +++ b/couchpotato/core/providers/base.py @@ -62,13 +62,17 @@ class Provider(Plugin): return self.is_available.get(host, False) - def getJsonData(self, url, **kwargs): + def getJsonData(self, url, decode_from = None, **kwargs): cache_key = '%s%s' % (md5(url), md5('%s' % kwargs.get('params', {}))) data = self.getCache(cache_key, url, **kwargs) if data: try: + data = data.strip() + if decode_from: + data = data.decode(decode_from) + return json.loads(data) except: log.error('Failed to parsing %s: %s', (self.getName(), traceback.format_exc())) diff --git a/couchpotato/core/providers/info/_modifier/main.py b/couchpotato/core/providers/info/_modifier/main.py index 85cc47b..113ce4c 100644 --- a/couchpotato/core/providers/info/_modifier/main.py +++ b/couchpotato/core/providers/info/_modifier/main.py @@ -49,19 +49,13 @@ class Movie(ModifierBase): def returnByType(self, results): - new_results = {'unknown':[]} + new_results = {} for r in results: - if r.get('type'): - type_name = r.get('type') + 's' - if not new_results.has_key(type_name): - new_results[type_name] = [] + type_name = r.get('type', 'movie') + 's' + if not new_results.has_key(type_name): + new_results[type_name] = [] - new_results[type_name].append(r) - else: - new_results['unknown'].append(r) - - if len(new_results['unknown']) == 0: - del new_results['unknown'] + new_results[type_name].append(r) # Combine movies, needs a cleaner way.. if new_results.has_key('movies'): diff --git a/couchpotato/core/providers/info/couchpotatoapi/main.py b/couchpotato/core/providers/info/couchpotatoapi/main.py index ef7db1f..4dd942e 100644 --- a/couchpotato/core/providers/info/couchpotatoapi/main.py +++ b/couchpotato/core/providers/info/couchpotatoapi/main.py @@ -3,6 +3,7 @@ from couchpotato.core.helpers.encoding import tryUrlencode from couchpotato.core.logger import CPLog from couchpotato.core.providers.info.base import MovieProvider from couchpotato.environment import Env +import base64 import time log = CPLog(__name__) @@ -11,6 +12,7 @@ log = CPLog(__name__) class CouchPotatoApi(MovieProvider): urls = { + 'validate': 'https://api.couchpota.to/validate/%s/', 'search': 'https://api.couchpota.to/search/%s/', 'info': 'https://api.couchpota.to/info/%s/', 'is_movie': 'https://api.couchpota.to/ismovie/%s/', @@ -24,11 +26,14 @@ class CouchPotatoApi(MovieProvider): def __init__(self): addEvent('movie.info', self.getInfo, priority = 1) + addEvent('info.search', self.search, priority = 1) addEvent('movie.search', self.search, priority = 1) addEvent('movie.release_date', self.getReleaseDate) addEvent('movie.suggest', self.getSuggestions) addEvent('movie.is_movie', self.isMovie) + addEvent('release.validate', self.validate) + addEvent('cp.source_url', self.getSourceUrl) addEvent('cp.messages', self.getMessages) @@ -50,6 +55,14 @@ class CouchPotatoApi(MovieProvider): def search(self, q, limit = 5): return self.getJsonData(self.urls['search'] % tryUrlencode(q) + ('?limit=%s' % limit), headers = self.getRequestHeaders()) + def validate(self, name = None): + + if not name: + return + + name_enc = base64.b64encode(name) + return self.getJsonData(self.urls['validate'] % name_enc, headers = self.getRequestHeaders()) + def isMovie(self, identifier = None): if not identifier: diff --git a/couchpotato/core/providers/info/omdbapi/main.py b/couchpotato/core/providers/info/omdbapi/main.py index f05c7cb..47374f4 100755 --- a/couchpotato/core/providers/info/omdbapi/main.py +++ b/couchpotato/core/providers/info/omdbapi/main.py @@ -20,6 +20,7 @@ class OMDBAPI(MovieProvider): http_time_between_calls = 0 def __init__(self): + addEvent('info.search', self.search) addEvent('movie.search', self.search) addEvent('movie.info', self.getInfo) diff --git a/couchpotato/core/providers/info/themoviedb/main.py b/couchpotato/core/providers/info/themoviedb/main.py index 87579a0..a790135 100644 --- a/couchpotato/core/providers/info/themoviedb/main.py +++ b/couchpotato/core/providers/info/themoviedb/main.py @@ -121,6 +121,7 @@ class TheMovieDb(MovieProvider): 'year': year, 'plot': movie.overview, 'genres': genres, + 'collection': getattr(movie.collection, 'name', None), } movie_data = dict((k, v) for k, v in movie_data.iteritems() if v) diff --git a/couchpotato/core/providers/metadata/xbmc/main.py b/couchpotato/core/providers/metadata/xbmc/main.py index e865e2d..7073363 100644 --- a/couchpotato/core/providers/metadata/xbmc/main.py +++ b/couchpotato/core/providers/metadata/xbmc/main.py @@ -104,6 +104,13 @@ class XBMC(MetaDataBase): writers = SubElement(nfoxml, 'credits') writers.text = toUnicode(writer) + # Sets or collections + collection_name = movie_info.get('collection') + if collection_name: + collection = SubElement(nfoxml, 'set') + collection.text = toUnicode(collection_name) + sorttitle = SubElement(nfoxml, 'sorttitle') + sorttitle.text = '%s %s' % (toUnicode(collection_name), movie_info.get('year')) # Clean up the xml and return it nfoxml = xml.dom.minidom.parseString(tostring(nfoxml)) diff --git a/couchpotato/core/providers/nzb/binsearch/main.py b/couchpotato/core/providers/nzb/binsearch/main.py index 770ed50..db0fb5b 100644 --- a/couchpotato/core/providers/nzb/binsearch/main.py +++ b/couchpotato/core/providers/nzb/binsearch/main.py @@ -65,7 +65,7 @@ class BinSearch(NZBProvider): total = tryInt(parts.group('total')) parts = tryInt(parts.group('parts')) - if (total / parts) < 0.95 or ((total / parts) >= 0.95 and not 'par2' in info.text.lower()): + if (total / parts) < 0.95 or ((total / parts) >= 0.95 and not ('par2' in info.text.lower() or 'pa3' in info.text.lower())): log.info2('Wrong: \'%s\', not complete: %s out of %s', (item['name'], parts, total)) return False diff --git a/couchpotato/core/providers/nzb/newznab/main.py b/couchpotato/core/providers/nzb/newznab/main.py index 02ffcfd..bd1b6c3 100644 --- a/couchpotato/core/providers/nzb/newznab/main.py +++ b/couchpotato/core/providers/nzb/newznab/main.py @@ -1,4 +1,4 @@ -from couchpotato.core.helpers.encoding import tryUrlencode +from couchpotato.core.helpers.encoding import tryUrlencode, toUnicode from couchpotato.core.helpers.rss import RSS from couchpotato.core.helpers.variable import cleanHost, splitString, tryInt from couchpotato.core.logger import CPLog @@ -83,7 +83,7 @@ class Newznab(NZBProvider, RSS): results.append({ 'id': nzb_id, 'provider_extra': urlparse(host['host']).hostname or host['host'], - 'name': name, + 'name': toUnicode(name), 'name_extra': name_extra, 'age': self.calculateAge(int(time.mktime(parse(date).timetuple()))), 'size': int(self.getElement(nzb, 'enclosure').attrib['length']) / 1024 / 1024, diff --git a/couchpotato/core/providers/nzb/omgwtfnzbs/main.py b/couchpotato/core/providers/nzb/omgwtfnzbs/main.py index 0a18b8f..8cc4a3e 100644 --- a/couchpotato/core/providers/nzb/omgwtfnzbs/main.py +++ b/couchpotato/core/providers/nzb/omgwtfnzbs/main.py @@ -14,7 +14,8 @@ log = CPLog(__name__) class OMGWTFNZBs(NZBProvider, RSS): urls = { - 'search': 'http://rss.omgwtfnzbs.org/rss-search.php?%s', + 'search': 'https://rss.omgwtfnzbs.org/rss-search.php?%s', + 'detail_url': 'https://omgwtfnzbs.org/details.php?id=%s', } http_time_between_calls = 1 #seconds @@ -49,13 +50,14 @@ class OMGWTFNZBs(NZBProvider, RSS): for nzb in nzbs: enclosure = self.getElement(nzb, 'enclosure').attrib + nzb_id = parse_qs(urlparse(self.getTextElement(nzb, 'link')).query).get('id')[0] results.append({ - 'id': parse_qs(urlparse(self.getTextElement(nzb, 'link')).query).get('id')[0], + 'id': nzb_id, 'name': toUnicode(self.getTextElement(nzb, 'title')), 'age': self.calculateAge(int(time.mktime(parse(self.getTextElement(nzb, 'pubDate')).timetuple()))), 'size': tryInt(enclosure['length']) / 1024 / 1024, 'url': enclosure['url'], - 'detail_url': self.getTextElement(nzb, 'link'), + 'detail_url': self.urls['detail_url'] % nzb_id, 'description': self.getTextElement(nzb, 'description') }) diff --git a/couchpotato/core/providers/torrent/base.py b/couchpotato/core/providers/torrent/base.py index 3e7ddde..c16e6c5 100644 --- a/couchpotato/core/providers/torrent/base.py +++ b/couchpotato/core/providers/torrent/base.py @@ -1,6 +1,8 @@ -from couchpotato.core.helpers.variable import getImdb, md5 +from couchpotato.core.helpers.variable import getImdb, md5, cleanHost from couchpotato.core.logger import CPLog from couchpotato.core.providers.base import YarrProvider +from couchpotato.environment import Env +import time log = CPLog(__name__) @@ -9,6 +11,9 @@ class TorrentProvider(YarrProvider): protocol = 'torrent' + proxy_domain = None + proxy_list = [] + def imdbMatch(self, url, imdbId): if getImdb(url) == imdbId: return True @@ -25,6 +30,42 @@ class TorrentProvider(YarrProvider): return False + def getDomain(self, url = ''): + + forced_domain = self.conf('domain') + if forced_domain: + return cleanHost(forced_domain).rstrip('/') + url + + if not self.proxy_domain: + for proxy in self.proxy_list: + + prop_name = 'proxy.%s' % proxy + last_check = float(Env.prop(prop_name, default = 0)) + if last_check > time.time() - 1209600: + continue + + data = '' + try: + data = self.urlopen(proxy, timeout = 3, show_error = False) + except: + log.debug('Failed %s proxy %s', (self.getName(), proxy)) + + if self.correctProxy(data): + log.debug('Using proxy for %s: %s', (self.getName(), proxy)) + self.proxy_domain = proxy + break + + Env.prop(prop_name, time.time()) + + if not self.proxy_domain: + log.error('No %s proxies left, please add one in settings, or let us know which one to add on the forum.', self.getName()) + return None + + return cleanHost(self.proxy_domain).rstrip('/') + url + + def correctProxy(self): + return True + class TorrentMagnetProvider(TorrentProvider): protocol = 'torrent_magnet' diff --git a/couchpotato/core/providers/torrent/scenehd/__init__.py b/couchpotato/core/providers/torrent/bithdtv/__init__.py similarity index 87% rename from couchpotato/core/providers/torrent/scenehd/__init__.py rename to couchpotato/core/providers/torrent/bithdtv/__init__.py index c0a82ae..8c6f97a 100644 --- a/couchpotato/core/providers/torrent/scenehd/__init__.py +++ b/couchpotato/core/providers/torrent/bithdtv/__init__.py @@ -1,16 +1,16 @@ -from .main import SceneHD +from .main import BiTHDTV def start(): - return SceneHD() + return BiTHDTV() config = [{ - 'name': 'scenehd', + 'name': 'bithdtv', 'groups': [ { 'tab': 'searcher', 'list': 'torrent_providers', - 'name': 'SceneHD', - 'description': 'See SceneHD', + 'name': 'BiT-HDTV', + 'description': 'See BiT-HDTV', 'wizard': True, 'options': [ { @@ -46,7 +46,7 @@ config = [{ 'advanced': True, 'label': 'Extra Score', 'type': 'int', - 'default': 0, + 'default': 20, 'description': 'Starting score for each release found via this provider.', } ], diff --git a/couchpotato/core/providers/torrent/bithdtv/main.py b/couchpotato/core/providers/torrent/bithdtv/main.py new file mode 100644 index 0000000..2cacff3 --- /dev/null +++ b/couchpotato/core/providers/torrent/bithdtv/main.py @@ -0,0 +1,88 @@ +from bs4 import BeautifulSoup +from couchpotato.core.helpers.encoding import tryUrlencode, toUnicode +from couchpotato.core.helpers.variable import tryInt +from couchpotato.core.logger import CPLog +from couchpotato.core.providers.torrent.base import TorrentProvider +import traceback + +log = CPLog(__name__) + +class BiTHDTV(TorrentProvider): + + urls = { + 'test' : 'http://www.bit-hdtv.com/', + 'login' : 'http://www.bit-hdtv.com/takelogin.php', + 'login_check': 'http://www.bit-hdtv.com/messages.php', + 'detail' : 'http://www.bit-hdtv.com/details.php?id=%s', + 'search' : 'http://www.bit-hdtv.com/torrents.php?', + } + + # Searches for movies only - BiT-HDTV's subcategory and resolution search filters appear to be broken + cat_id_movies = 7 + + http_time_between_calls = 1 #seconds + + def _searchOnTitle(self, title, movie, quality, results): + + arguments = tryUrlencode({ + 'search': '%s %s' % (title.replace(':', ''), movie['library']['year']), + 'cat': self.cat_id_movies + }) + + url = "%s&%s" % (self.urls['search'], arguments) + + data = self.getHTMLData(url, opener = self.login_opener) + + if data: + # Remove BiT-HDTV's output garbage so outdated BS4 versions successfully parse the HTML + split_data = data.partition('-->') + if '## SELECT COUNT(' in split_data[0]: + data = split_data[2] + + html = BeautifulSoup(data) + + try: + result_table = html.find('table', attrs = {'width' : '750', 'class' : ''}) + if result_table is None: + return + + entries = result_table.find_all('tr') + for result in entries[1:]: + + cells = result.find_all('td') + link = cells[2].find('a') + torrent_id = link['href'].replace('/details.php?id=', '') + + results.append({ + 'id': torrent_id, + 'name': link.contents[0].get_text(), + 'url': cells[0].find('a')['href'], + 'detail_url': self.urls['detail'] % torrent_id, + 'size': self.parseSize(cells[6].get_text()), + 'seeders': tryInt(cells[8].string), + 'leechers': tryInt(cells[9].string), + 'get_more_info': self.getMoreInfo, + }) + + except: + log.error('Failed getting results from %s: %s', (self.getName(), traceback.format_exc())) + + def getLoginParams(self): + return tryUrlencode({ + 'username': self.conf('username'), + 'password': self.conf('password'), + }) + + def getMoreInfo(self, item): + full_description = self.getCache('bithdtv.%s' % item['id'], item['detail_url'], cache_timeout = 25920000) + html = BeautifulSoup(full_description) + nfo_pre = html.find('table', attrs = {'class':'detail'}) + description = toUnicode(nfo_pre.text) if nfo_pre else '' + + item['description'] = description + return item + + def loginSuccess(self, output): + return 'logout.php' in output.lower() + + loginCheckSuccess = loginSuccess diff --git a/couchpotato/core/providers/torrent/kickasstorrents/__init__.py b/couchpotato/core/providers/torrent/kickasstorrents/__init__.py index b095a97..0b79c81 100644 --- a/couchpotato/core/providers/torrent/kickasstorrents/__init__.py +++ b/couchpotato/core/providers/torrent/kickasstorrents/__init__.py @@ -19,6 +19,12 @@ config = [{ 'default': True, }, { + 'name': 'domain', + 'advanced': True, + 'label': 'Proxy server', + 'description': 'Domain for requests, keep empty to let CouchPotato pick.', + }, + { 'name': 'seed_ratio', 'label': 'Seed ratio', 'type': 'float', diff --git a/couchpotato/core/providers/torrent/kickasstorrents/main.py b/couchpotato/core/providers/torrent/kickasstorrents/main.py index b85aadc..50f14ce 100644 --- a/couchpotato/core/providers/torrent/kickasstorrents/main.py +++ b/couchpotato/core/providers/torrent/kickasstorrents/main.py @@ -11,9 +11,8 @@ log = CPLog(__name__) class KickAssTorrents(TorrentMagnetProvider): urls = { - 'test': 'https://kickass.to/', - 'detail': 'https://kickass.to/%s', - 'search': 'https://kickass.to/%s-i%s/', + 'detail': '%s/%s', + 'search': '%s/%s-i%s/', } cat_ids = [ @@ -28,9 +27,16 @@ class KickAssTorrents(TorrentMagnetProvider): http_time_between_calls = 1 #seconds cat_backup_id = None + proxy_list = [ + 'https://kickass.to', + 'http://kickass.pw', + 'http://www.kickassunblock.info', + 'http://www.kickassproxy.info', + ] + def _search(self, movie, quality, results): - data = self.getHTMLData(self.urls['search'] % ('m', movie['library']['identifier'].replace('tt', ''))) + data = self.getHTMLData(self.urls['search'] % (self.getDomain(), 'm', movie['library']['identifier'].replace('tt', ''))) if data: @@ -41,7 +47,7 @@ class KickAssTorrents(TorrentMagnetProvider): html = BeautifulSoup(data) resultdiv = html.find('div', attrs = {'class':'tabs'}) for result in resultdiv.find_all('div', recursive = False): - if result.get('id').lower() not in cat_ids: + if result.get('id').lower().strip('tab-') not in cat_ids: continue try: @@ -56,12 +62,12 @@ class KickAssTorrents(TorrentMagnetProvider): column_name = table_order[nr] if column_name: - if column_name is 'name': + if column_name == 'name': link = td.find('div', {'class': 'torrentname'}).find_all('a')[1] new['id'] = temp.get('id')[-8:] new['name'] = link.text new['url'] = td.find('a', 'imagnet')['href'] - new['detail_url'] = self.urls['detail'] % link['href'][1:] + new['detail_url'] = self.urls['detail'] % (self.getDomain(), link['href'][1:]) new['score'] = 20 if td.find('a', 'iverif') else 0 elif column_name is 'size': new['size'] = self.parseSize(td.text) @@ -100,3 +106,10 @@ class KickAssTorrents(TorrentMagnetProvider): age += tryInt(nr) * mult return tryInt(age) + + + def isEnabled(self): + return super(KickAssTorrents, self).isEnabled() and self.getDomain() + + def correctProxy(self, data): + return 'search query' in data.lower() diff --git a/couchpotato/core/providers/torrent/scenehd/main.py b/couchpotato/core/providers/torrent/scenehd/main.py deleted file mode 100644 index 2b76e43..0000000 --- a/couchpotato/core/providers/torrent/scenehd/main.py +++ /dev/null @@ -1,79 +0,0 @@ -from bs4 import BeautifulSoup -from couchpotato.core.helpers.encoding import simplifyString, tryUrlencode -from couchpotato.core.helpers.variable import tryInt -from couchpotato.core.logger import CPLog -from couchpotato.core.providers.torrent.base import TorrentProvider -import traceback - -log = CPLog(__name__) - - -class SceneHD(TorrentProvider): - - urls = { - 'test': 'https://scenehd.org/', - 'login' : 'https://scenehd.org/takelogin.php', - 'login_check': 'https://scenehd.org/my.php', - 'detail': 'https://scenehd.org/details.php?id=%s', - 'search': 'https://scenehd.org/browse.php?ajax', - 'download': 'https://scenehd.org/download.php?id=%s', - } - - http_time_between_calls = 1 #seconds - - def _searchOnTitle(self, title, movie, quality, results): - - q = '"%s %s"' % (simplifyString(title), movie['library']['year']) - arguments = tryUrlencode({ - 'search': q, - }) - url = "%s&%s" % (self.urls['search'], arguments) - - data = self.getHTMLData(url, opener = self.login_opener) - - if data: - html = BeautifulSoup(data) - - try: - resultsTable = html.find_all('table')[6] - entries = resultsTable.find_all('tr') - for result in entries[1:]: - - all_cells = result.find_all('td') - - detail_link = all_cells[2].find('a') - details = detail_link['href'] - torrent_id = details.replace('details.php?id=', '') - - leechers = all_cells[11].find('a') - if leechers: - leechers = leechers.string - else: - leechers = all_cells[11].string - - results.append({ - 'id': torrent_id, - 'name': detail_link['title'], - 'size': self.parseSize(all_cells[7].string), - 'seeders': tryInt(all_cells[10].find('a').string), - 'leechers': tryInt(leechers), - 'url': self.urls['download'] % torrent_id, - 'description': all_cells[1].find('a')['href'], - }) - - except: - log.error('Failed getting results from %s: %s', (self.getName(), traceback.format_exc())) - - - def getLoginParams(self): - return tryUrlencode({ - 'username': self.conf('username'), - 'password': self.conf('password'), - 'ssl': 'yes', - }) - - def loginSuccess(self, output): - return 'logout.php' in output.lower() - - loginCheckSuccess = loginSuccess - diff --git a/couchpotato/core/providers/torrent/thepiratebay/main.py b/couchpotato/core/providers/torrent/thepiratebay/main.py index 6aa2216..b967d5f 100644 --- a/couchpotato/core/providers/torrent/thepiratebay/main.py +++ b/couchpotato/core/providers/torrent/thepiratebay/main.py @@ -1,11 +1,9 @@ from bs4 import BeautifulSoup from couchpotato.core.helpers.encoding import toUnicode, tryUrlencode -from couchpotato.core.helpers.variable import tryInt, cleanHost +from couchpotato.core.helpers.variable import tryInt from couchpotato.core.logger import CPLog from couchpotato.core.providers.torrent.base import TorrentMagnetProvider -from couchpotato.environment import Env import re -import time import traceback log = CPLog(__name__) @@ -30,8 +28,8 @@ class ThePirateBay(TorrentMagnetProvider): http_time_between_calls = 0 proxy_list = [ - 'https://thepiratebay.se', 'https://tpb.ipredator.se', + 'https://thepiratebay.se', 'https://depiraatbaai.be', 'https://piratereverse.info', 'https://tpb.pirateparty.org.uk', @@ -43,10 +41,6 @@ class ThePirateBay(TorrentMagnetProvider): 'https://kuiken.co', ] - def __init__(self): - self.domain = self.conf('domain') - super(ThePirateBay, self).__init__() - def _searchOnTitle(self, title, movie, quality, results): page = 0 @@ -108,38 +102,11 @@ class ThePirateBay(TorrentMagnetProvider): except: log.error('Failed getting results from %s: %s', (self.getName(), traceback.format_exc())) - def isEnabled(self): return super(ThePirateBay, self).isEnabled() and self.getDomain() - def getDomain(self, url = ''): - - if not self.domain: - for proxy in self.proxy_list: - - prop_name = 'tpb_proxy.%s' % proxy - last_check = float(Env.prop(prop_name, default = 0)) - if last_check > time.time() - 1209600: - continue - - data = '' - try: - data = self.urlopen(proxy, timeout = 3, show_error = False) - except: - log.debug('Failed tpb proxy %s', proxy) - - if 'title="Pirate Search"' in data: - log.debug('Using proxy: %s', proxy) - self.domain = proxy - break - - Env.prop(prop_name, time.time()) - - if not self.domain: - log.error('No TPB proxies left, please add one in settings, or let us know which one to add on the forum.') - return None - - return cleanHost(self.domain).rstrip('/') + url + def correctProxy(self, data): + return 'title="Pirate Search"' in data def getMoreInfo(self, item): full_description = self.getCache('tpb.%s' % item['id'], item['detail_url'], cache_timeout = 25920000) diff --git a/couchpotato/core/providers/torrent/yify/main.py b/couchpotato/core/providers/torrent/yify/main.py index 47fe310..60b2f9b 100644 --- a/couchpotato/core/providers/torrent/yify/main.py +++ b/couchpotato/core/providers/torrent/yify/main.py @@ -23,7 +23,7 @@ class Yify(TorrentProvider): return super(Yify, self).search(movie, quality) - def _searchOnTitle(self, title, movie, quality, results): + def _search(self, movie, quality, results): data = self.getJsonData(self.urls['search'] % (movie['library']['identifier'], quality['identifier'])) diff --git a/couchpotato/core/providers/userscript/flickchart/__init__.py b/couchpotato/core/providers/userscript/flickchart/__init__.py new file mode 100644 index 0000000..89d45d9 --- /dev/null +++ b/couchpotato/core/providers/userscript/flickchart/__init__.py @@ -0,0 +1,6 @@ +from .main import Flickchart + +def start(): + return Flickchart() + +config = [] diff --git a/couchpotato/core/providers/userscript/flickchart/main.py b/couchpotato/core/providers/userscript/flickchart/main.py new file mode 100644 index 0000000..a66bd38 --- /dev/null +++ b/couchpotato/core/providers/userscript/flickchart/main.py @@ -0,0 +1,30 @@ +from couchpotato.core.event import fireEvent +from couchpotato.core.logger import CPLog +from couchpotato.core.providers.userscript.base import UserscriptBase +import traceback + +log = CPLog(__name__) + + +class Flickchart(UserscriptBase): + + includes = ['http://www.flickchart.com/movie/*'] + + def getMovie(self, url): + + try: + data = self.getUrl(url) + except: + return + + try: + start = data.find('') + end = data.find('', start) + page_title = data[start + len(''):end].strip().split('-') + + year_name = fireEvent('scanner.name_year', page_title[0], single = True) + + return self.search(**year_name) + except: + log.error('Failed parsing page for title and year: %s', traceback.format_exc()) + diff --git a/couchpotato/static/scripts/couchpotato.js b/couchpotato/static/scripts/couchpotato.js index 77138a7..4a87db4 100644 --- a/couchpotato/static/scripts/couchpotato.js +++ b/couchpotato/static/scripts/couchpotato.js @@ -11,6 +11,12 @@ pages: [], block: [], + initialize: function(){ + var self = this; + + self.global_events = {}; + }, + setup: function(options) { var self = this; self.setOptions(options); @@ -30,9 +36,9 @@ History.addEvent('change', self.openPage.bind(self)); self.c.addEvent('click:relay(a[href^=/]:not([target]))', self.pushState.bind(self)); self.c.addEvent('click:relay(a[href^=http])', self.openDerefered.bind(self)); - + // Check if device is touchenabled - self.touch_device = 'ontouchstart' in document.documentElement; + self.touch_device = 'ontouchstart' in window || navigator.msMaxTouchPoints; if(self.touch_device) self.c.addClass('touch_enabled'); @@ -55,7 +61,7 @@ History.push(url); } }, - + isMac: function(){ return Browser.Platform.mac }, @@ -111,7 +117,7 @@ } }) ]; - + setting_links.each(function(a){ self.block.more.addLink(a) }); @@ -336,6 +342,66 @@ }) ) ); + }, + + /* + * Global events + */ + on: function(name, handle){ + var self = this; + + if(!self.global_events[name]) + self.global_events[name] = []; + + self.global_events[name].push(handle); + + }, + + trigger: function(name, args, on_complete){ + var self = this; + + if(!self.global_events[name]){ return; } + + if(!on_complete && typeOf(args) == 'function'){ + on_complete = args; + args = {}; + } + + // Create parallel callback + var callbacks = []; + self.global_events[name].each(function(handle, nr){ + + callbacks.push(function(callback){ + var results = handle(args || {}); + callback(null, results || null); + }); + + }); + + // Fire events + async.parallel(callbacks, function(err, results){ + if(err) p(err); + + if(on_complete) + on_complete(results); + }); + + }, + + off: function(name, handle){ + var self = this; + + if(!self.global_events[name]) return; + + // Remove single + if(handle){ + self.global_events[name] = self.global_events[name].erase(handle); + } + // Reset full event + else { + self.global_events[name] = []; + } + } }); @@ -503,7 +569,7 @@ function randomString(length, extra) { case "string": saveKeyPath(argument.match(/[+-]|[^.]+/g)); break; } }); - return this.sort(comparer); + return this.stableSort(comparer); } }); diff --git a/couchpotato/static/scripts/library/Array.stableSort.js b/couchpotato/static/scripts/library/Array.stableSort.js new file mode 100644 index 0000000..062c756 --- /dev/null +++ b/couchpotato/static/scripts/library/Array.stableSort.js @@ -0,0 +1,56 @@ +/* +--- + +script: Array.stableSort.js + +description: Add a stable sort algorithm for all browsers + +license: MIT-style license. + +authors: + - Yorick Sijsling + +requires: + core/1.3: '*' + +provides: + - [Array.stableSort, Array.mergeSort] + +... +*/ + +(function() { + + var defaultSortFunction = function(a, b) { + return a > b ? 1 : (a < b ? -1 : 0); + } + + Array.implement({ + + stableSort: function(compare) { + // I would love some real feature recognition. Problem is that an unstable algorithm sometimes/often gives the same result as an unstable algorithm. + return (Browser.chrome || Browser.firefox2 || Browser.opera9) ? this.mergeSort(compare) : this.sort(compare); + }, + + mergeSort: function(compare, token) { + compare = compare || defaultSortFunction; + if (this.length > 1) { + // Split and sort both parts + var right = this.splice(Math.floor(this.length / 2)).mergeSort(compare); + var left = this.splice(0).mergeSort(compare); // 'this' is now empty. + + // Merge parts together + while (left.length > 0 || right.length > 0) { + this.push( + right.length === 0 ? left.shift() + : left.length === 0 ? right.shift() + : compare(left[0], right[0]) > 0 ? right.shift() + : left.shift()); + } + } + return this; + } + + }); +})(); + diff --git a/couchpotato/static/scripts/library/async.js b/couchpotato/static/scripts/library/async.js new file mode 100644 index 0000000..cb6320d --- /dev/null +++ b/couchpotato/static/scripts/library/async.js @@ -0,0 +1,955 @@ +/*global setImmediate: false, setTimeout: false, console: false */ +(function () { + + var async = {}; + + // global on the server, window in the browser + var root, previous_async; + + root = this; + if (root != null) { + previous_async = root.async; + } + + async.noConflict = function () { + root.async = previous_async; + return async; + }; + + function only_once(fn) { + var called = false; + return function() { + if (called) throw new Error("Callback was already called."); + called = true; + fn.apply(root, arguments); + } + } + + //// cross-browser compatiblity functions //// + + var _each = function (arr, iterator) { + if (arr.forEach) { + return arr.forEach(iterator); + } + for (var i = 0; i < arr.length; i += 1) { + iterator(arr[i], i, arr); + } + }; + + var _map = function (arr, iterator) { + if (arr.map) { + return arr.map(iterator); + } + var results = []; + _each(arr, function (x, i, a) { + results.push(iterator(x, i, a)); + }); + return results; + }; + + var _reduce = function (arr, iterator, memo) { + if (arr.reduce) { + return arr.reduce(iterator, memo); + } + _each(arr, function (x, i, a) { + memo = iterator(memo, x, i, a); + }); + return memo; + }; + + var _keys = function (obj) { + if (Object.keys) { + return Object.keys(obj); + } + var keys = []; + for (var k in obj) { + if (obj.hasOwnProperty(k)) { + keys.push(k); + } + } + return keys; + }; + + //// exported async module functions //// + + //// nextTick implementation with browser-compatible fallback //// + if (typeof process === 'undefined' || !(process.nextTick)) { + if (typeof setImmediate === 'function') { + async.nextTick = function (fn) { + // not a direct alias for IE10 compatibility + setImmediate(fn); + }; + async.setImmediate = async.nextTick; + } + else { + async.nextTick = function (fn) { + setTimeout(fn, 0); + }; + async.setImmediate = async.nextTick; + } + } + else { + async.nextTick = process.nextTick; + if (typeof setImmediate !== 'undefined') { + async.setImmediate = setImmediate; + } + else { + async.setImmediate = async.nextTick; + } + } + + async.each = function (arr, iterator, callback) { + callback = callback || function () {}; + if (!arr.length) { + return callback(); + } + var completed = 0; + _each(arr, function (x) { + iterator(x, only_once(function (err) { + if (err) { + callback(err); + callback = function () {}; + } + else { + completed += 1; + if (completed >= arr.length) { + callback(null); + } + } + })); + }); + }; + async.forEach = async.each; + + async.eachSeries = function (arr, iterator, callback) { + callback = callback || function () {}; + if (!arr.length) { + return callback(); + } + var completed = 0; + var iterate = function () { + iterator(arr[completed], function (err) { + if (err) { + callback(err); + callback = function () {}; + } + else { + completed += 1; + if (completed >= arr.length) { + callback(null); + } + else { + iterate(); + } + } + }); + }; + iterate(); + }; + async.forEachSeries = async.eachSeries; + + async.eachLimit = function (arr, limit, iterator, callback) { + var fn = _eachLimit(limit); + fn.apply(null, [arr, iterator, callback]); + }; + async.forEachLimit = async.eachLimit; + + var _eachLimit = function (limit) { + + return function (arr, iterator, callback) { + callback = callback || function () {}; + if (!arr.length || limit <= 0) { + return callback(); + } + var completed = 0; + var started = 0; + var running = 0; + + (function replenish () { + if (completed >= arr.length) { + return callback(); + } + + while (running < limit && started < arr.length) { + started += 1; + running += 1; + iterator(arr[started - 1], function (err) { + if (err) { + callback(err); + callback = function () {}; + } + else { + completed += 1; + running -= 1; + if (completed >= arr.length) { + callback(); + } + else { + replenish(); + } + } + }); + } + })(); + }; + }; + + + var doParallel = function (fn) { + return function () { + var args = Array.prototype.slice.call(arguments); + return fn.apply(null, [async.each].concat(args)); + }; + }; + var doParallelLimit = function(limit, fn) { + return function () { + var args = Array.prototype.slice.call(arguments); + return fn.apply(null, [_eachLimit(limit)].concat(args)); + }; + }; + var doSeries = function (fn) { + return function () { + var args = Array.prototype.slice.call(arguments); + return fn.apply(null, [async.eachSeries].concat(args)); + }; + }; + + + var _asyncMap = function (eachfn, arr, iterator, callback) { + var results = []; + arr = _map(arr, function (x, i) { + return {index: i, value: x}; + }); + eachfn(arr, function (x, callback) { + iterator(x.value, function (err, v) { + results[x.index] = v; + callback(err); + }); + }, function (err) { + callback(err, results); + }); + }; + async.map = doParallel(_asyncMap); + async.mapSeries = doSeries(_asyncMap); + async.mapLimit = function (arr, limit, iterator, callback) { + return _mapLimit(limit)(arr, iterator, callback); + }; + + var _mapLimit = function(limit) { + return doParallelLimit(limit, _asyncMap); + }; + + // reduce only has a series version, as doing reduce in parallel won't + // work in many situations. + async.reduce = function (arr, memo, iterator, callback) { + async.eachSeries(arr, function (x, callback) { + iterator(memo, x, function (err, v) { + memo = v; + callback(err); + }); + }, function (err) { + callback(err, memo); + }); + }; + // inject alias + async.inject = async.reduce; + // foldl alias + async.foldl = async.reduce; + + async.reduceRight = function (arr, memo, iterator, callback) { + var reversed = _map(arr, function (x) { + return x; + }).reverse(); + async.reduce(reversed, memo, iterator, callback); + }; + // foldr alias + async.foldr = async.reduceRight; + + var _filter = function (eachfn, arr, iterator, callback) { + var results = []; + arr = _map(arr, function (x, i) { + return {index: i, value: x}; + }); + eachfn(arr, function (x, callback) { + iterator(x.value, function (v) { + if (v) { + results.push(x); + } + callback(); + }); + }, function (err) { + callback(_map(results.sort(function (a, b) { + return a.index - b.index; + }), function (x) { + return x.value; + })); + }); + }; + async.filter = doParallel(_filter); + async.filterSeries = doSeries(_filter); + // select alias + async.select = async.filter; + async.selectSeries = async.filterSeries; + + var _reject = function (eachfn, arr, iterator, callback) { + var results = []; + arr = _map(arr, function (x, i) { + return {index: i, value: x}; + }); + eachfn(arr, function (x, callback) { + iterator(x.value, function (v) { + if (!v) { + results.push(x); + } + callback(); + }); + }, function (err) { + callback(_map(results.sort(function (a, b) { + return a.index - b.index; + }), function (x) { + return x.value; + })); + }); + }; + async.reject = doParallel(_reject); + async.rejectSeries = doSeries(_reject); + + var _detect = function (eachfn, arr, iterator, main_callback) { + eachfn(arr, function (x, callback) { + iterator(x, function (result) { + if (result) { + main_callback(x); + main_callback = function () {}; + } + else { + callback(); + } + }); + }, function (err) { + main_callback(); + }); + }; + async.detect = doParallel(_detect); + async.detectSeries = doSeries(_detect); + + async.some = function (arr, iterator, main_callback) { + async.each(arr, function (x, callback) { + iterator(x, function (v) { + if (v) { + main_callback(true); + main_callback = function () {}; + } + callback(); + }); + }, function (err) { + main_callback(false); + }); + }; + // any alias + async.any = async.some; + + async.every = function (arr, iterator, main_callback) { + async.each(arr, function (x, callback) { + iterator(x, function (v) { + if (!v) { + main_callback(false); + main_callback = function () {}; + } + callback(); + }); + }, function (err) { + main_callback(true); + }); + }; + // all alias + async.all = async.every; + + async.sortBy = function (arr, iterator, callback) { + async.map(arr, function (x, callback) { + iterator(x, function (err, criteria) { + if (err) { + callback(err); + } + else { + callback(null, {value: x, criteria: criteria}); + } + }); + }, function (err, results) { + if (err) { + return callback(err); + } + else { + var fn = function (left, right) { + var a = left.criteria, b = right.criteria; + return a < b ? -1 : a > b ? 1 : 0; + }; + callback(null, _map(results.sort(fn), function (x) { + return x.value; + })); + } + }); + }; + + async.auto = function (tasks, callback) { + callback = callback || function () {}; + var keys = _keys(tasks); + if (!keys.length) { + return callback(null); + } + + var results = {}; + + var listeners = []; + var addListener = function (fn) { + listeners.unshift(fn); + }; + var removeListener = function (fn) { + for (var i = 0; i < listeners.length; i += 1) { + if (listeners[i] === fn) { + listeners.splice(i, 1); + return; + } + } + }; + var taskComplete = function () { + _each(listeners.slice(0), function (fn) { + fn(); + }); + }; + + addListener(function () { + if (_keys(results).length === keys.length) { + callback(null, results); + callback = function () {}; + } + }); + + _each(keys, function (k) { + var task = (tasks[k] instanceof Function) ? [tasks[k]]: tasks[k]; + var taskCallback = function (err) { + var args = Array.prototype.slice.call(arguments, 1); + if (args.length <= 1) { + args = args[0]; + } + if (err) { + var safeResults = {}; + _each(_keys(results), function(rkey) { + safeResults[rkey] = results[rkey]; + }); + safeResults[k] = args; + callback(err, safeResults); + // stop subsequent errors hitting callback multiple times + callback = function () {}; + } + else { + results[k] = args; + async.setImmediate(taskComplete); + } + }; + var requires = task.slice(0, Math.abs(task.length - 1)) || []; + var ready = function () { + return _reduce(requires, function (a, x) { + return (a && results.hasOwnProperty(x)); + }, true) && !results.hasOwnProperty(k); + }; + if (ready()) { + task[task.length - 1](taskCallback, results); + } + else { + var listener = function () { + if (ready()) { + removeListener(listener); + task[task.length - 1](taskCallback, results); + } + }; + addListener(listener); + } + }); + }; + + async.waterfall = function (tasks, callback) { + callback = callback || function () {}; + if (tasks.constructor !== Array) { + var err = new Error('First argument to waterfall must be an array of functions'); + return callback(err); + } + if (!tasks.length) { + return callback(); + } + var wrapIterator = function (iterator) { + return function (err) { + if (err) { + callback.apply(null, arguments); + callback = function () {}; + } + else { + var args = Array.prototype.slice.call(arguments, 1); + var next = iterator.next(); + if (next) { + args.push(wrapIterator(next)); + } + else { + args.push(callback); + } + async.setImmediate(function () { + iterator.apply(null, args); + }); + } + }; + }; + wrapIterator(async.iterator(tasks))(); + }; + + var _parallel = function(eachfn, tasks, callback) { + callback = callback || function () {}; + if (tasks.constructor === Array) { + eachfn.map(tasks, function (fn, callback) { + if (fn) { + fn(function (err) { + var args = Array.prototype.slice.call(arguments, 1); + if (args.length <= 1) { + args = args[0]; + } + callback.call(null, err, args); + }); + } + }, callback); + } + else { + var results = {}; + eachfn.each(_keys(tasks), function (k, callback) { + tasks[k](function (err) { + var args = Array.prototype.slice.call(arguments, 1); + if (args.length <= 1) { + args = args[0]; + } + results[k] = args; + callback(err); + }); + }, function (err) { + callback(err, results); + }); + } + }; + + async.parallel = function (tasks, callback) { + _parallel({ map: async.map, each: async.each }, tasks, callback); + }; + + async.parallelLimit = function(tasks, limit, callback) { + _parallel({ map: _mapLimit(limit), each: _eachLimit(limit) }, tasks, callback); + }; + + async.series = function (tasks, callback) { + callback = callback || function () {}; + if (tasks.constructor === Array) { + async.mapSeries(tasks, function (fn, callback) { + if (fn) { + fn(function (err) { + var args = Array.prototype.slice.call(arguments, 1); + if (args.length <= 1) { + args = args[0]; + } + callback.call(null, err, args); + }); + } + }, callback); + } + else { + var results = {}; + async.eachSeries(_keys(tasks), function (k, callback) { + tasks[k](function (err) { + var args = Array.prototype.slice.call(arguments, 1); + if (args.length <= 1) { + args = args[0]; + } + results[k] = args; + callback(err); + }); + }, function (err) { + callback(err, results); + }); + } + }; + + async.iterator = function (tasks) { + var makeCallback = function (index) { + var fn = function () { + if (tasks.length) { + tasks[index].apply(null, arguments); + } + return fn.next(); + }; + fn.next = function () { + return (index < tasks.length - 1) ? makeCallback(index + 1): null; + }; + return fn; + }; + return makeCallback(0); + }; + + async.apply = function (fn) { + var args = Array.prototype.slice.call(arguments, 1); + return function () { + return fn.apply( + null, args.concat(Array.prototype.slice.call(arguments)) + ); + }; + }; + + var _concat = function (eachfn, arr, fn, callback) { + var r = []; + eachfn(arr, function (x, cb) { + fn(x, function (err, y) { + r = r.concat(y || []); + cb(err); + }); + }, function (err) { + callback(err, r); + }); + }; + async.concat = doParallel(_concat); + async.concatSeries = doSeries(_concat); + + async.whilst = function (test, iterator, callback) { + if (test()) { + iterator(function (err) { + if (err) { + return callback(err); + } + async.whilst(test, iterator, callback); + }); + } + else { + callback(); + } + }; + + async.doWhilst = function (iterator, test, callback) { + iterator(function (err) { + if (err) { + return callback(err); + } + if (test()) { + async.doWhilst(iterator, test, callback); + } + else { + callback(); + } + }); + }; + + async.until = function (test, iterator, callback) { + if (!test()) { + iterator(function (err) { + if (err) { + return callback(err); + } + async.until(test, iterator, callback); + }); + } + else { + callback(); + } + }; + + async.doUntil = function (iterator, test, callback) { + iterator(function (err) { + if (err) { + return callback(err); + } + if (!test()) { + async.doUntil(iterator, test, callback); + } + else { + callback(); + } + }); + }; + + async.queue = function (worker, concurrency) { + if (concurrency === undefined) { + concurrency = 1; + } + function _insert(q, data, pos, callback) { + if(data.constructor !== Array) { + data = [data]; + } + _each(data, function(task) { + var item = { + data: task, + callback: typeof callback === 'function' ? callback : null + }; + + if (pos) { + q.tasks.unshift(item); + } else { + q.tasks.push(item); + } + + if (q.saturated && q.tasks.length === concurrency) { + q.saturated(); + } + async.setImmediate(q.process); + }); + } + + var workers = 0; + var q = { + tasks: [], + concurrency: concurrency, + saturated: null, + empty: null, + drain: null, + push: function (data, callback) { + _insert(q, data, false, callback); + }, + unshift: function (data, callback) { + _insert(q, data, true, callback); + }, + process: function () { + if (workers < q.concurrency && q.tasks.length) { + var task = q.tasks.shift(); + if (q.empty && q.tasks.length === 0) { + q.empty(); + } + workers += 1; + var next = function () { + workers -= 1; + if (task.callback) { + task.callback.apply(task, arguments); + } + if (q.drain && q.tasks.length + workers === 0) { + q.drain(); + } + q.process(); + }; + var cb = only_once(next); + worker(task.data, cb); + } + }, + length: function () { + return q.tasks.length; + }, + running: function () { + return workers; + } + }; + return q; + }; + + async.cargo = function (worker, payload) { + var working = false, + tasks = []; + + var cargo = { + tasks: tasks, + payload: payload, + saturated: null, + empty: null, + drain: null, + push: function (data, callback) { + if(data.constructor !== Array) { + data = [data]; + } + _each(data, function(task) { + tasks.push({ + data: task, + callback: typeof callback === 'function' ? callback : null + }); + if (cargo.saturated && tasks.length === payload) { + cargo.saturated(); + } + }); + async.setImmediate(cargo.process); + }, + process: function process() { + if (working) return; + if (tasks.length === 0) { + if(cargo.drain) cargo.drain(); + return; + } + + var ts = typeof payload === 'number' + ? tasks.splice(0, payload) + : tasks.splice(0); + + var ds = _map(ts, function (task) { + return task.data; + }); + + if(cargo.empty) cargo.empty(); + working = true; + worker(ds, function () { + working = false; + + var args = arguments; + _each(ts, function (data) { + if (data.callback) { + data.callback.apply(null, args); + } + }); + + process(); + }); + }, + length: function () { + return tasks.length; + }, + running: function () { + return working; + } + }; + return cargo; + }; + + var _console_fn = function (name) { + return function (fn) { + var args = Array.prototype.slice.call(arguments, 1); + fn.apply(null, args.concat([function (err) { + var args = Array.prototype.slice.call(arguments, 1); + if (typeof console !== 'undefined') { + if (err) { + if (console.error) { + console.error(err); + } + } + else if (console[name]) { + _each(args, function (x) { + console[name](x); + }); + } + } + }])); + }; + }; + async.log = _console_fn('log'); + async.dir = _console_fn('dir'); + /*async.info = _console_fn('info'); + async.warn = _console_fn('warn'); + async.error = _console_fn('error');*/ + + async.memoize = function (fn, hasher) { + var memo = {}; + var queues = {}; + hasher = hasher || function (x) { + return x; + }; + var memoized = function () { + var args = Array.prototype.slice.call(arguments); + var callback = args.pop(); + var key = hasher.apply(null, args); + if (key in memo) { + callback.apply(null, memo[key]); + } + else if (key in queues) { + queues[key].push(callback); + } + else { + queues[key] = [callback]; + fn.apply(null, args.concat([function () { + memo[key] = arguments; + var q = queues[key]; + delete queues[key]; + for (var i = 0, l = q.length; i < l; i++) { + q[i].apply(null, arguments); + } + }])); + } + }; + memoized.memo = memo; + memoized.unmemoized = fn; + return memoized; + }; + + async.unmemoize = function (fn) { + return function () { + return (fn.unmemoized || fn).apply(null, arguments); + }; + }; + + async.times = function (count, iterator, callback) { + var counter = []; + for (var i = 0; i < count; i++) { + counter.push(i); + } + return async.map(counter, iterator, callback); + }; + + async.timesSeries = function (count, iterator, callback) { + var counter = []; + for (var i = 0; i < count; i++) { + counter.push(i); + } + return async.mapSeries(counter, iterator, callback); + }; + + async.compose = function (/* functions... */) { + var fns = Array.prototype.reverse.call(arguments); + return function () { + var that = this; + var args = Array.prototype.slice.call(arguments); + var callback = args.pop(); + async.reduce(fns, args, function (newargs, fn, cb) { + fn.apply(that, newargs.concat([function () { + var err = arguments[0]; + var nextargs = Array.prototype.slice.call(arguments, 1); + cb(err, nextargs); + }])) + }, + function (err, results) { + callback.apply(that, [err].concat(results)); + }); + }; + }; + + var _applyEach = function (eachfn, fns /*args...*/) { + var go = function () { + var that = this; + var args = Array.prototype.slice.call(arguments); + var callback = args.pop(); + return eachfn(fns, function (fn, cb) { + fn.apply(that, args.concat([cb])); + }, + callback); + }; + if (arguments.length > 2) { + var args = Array.prototype.slice.call(arguments, 2); + return go.apply(this, args); + } + else { + return go; + } + }; + async.applyEach = doParallel(_applyEach); + async.applyEachSeries = doSeries(_applyEach); + + async.forever = function (fn, callback) { + function next(err) { + if (err) { + if (callback) { + return callback(err); + } + throw err; + } + fn(next); + } + next(); + }; + + // AMD / RequireJS + if (typeof define !== 'undefined' && define.amd) { + define([], function () { + return async; + }); + } + // Node.js + else if (typeof module !== 'undefined' && module.exports) { + module.exports = async; + } + // included directly via <script> tag + else { + root.async = async; + } + +}()); diff --git a/couchpotato/static/scripts/page/about.js b/couchpotato/static/scripts/page/about.js index f931335..b4326ae 100644 --- a/couchpotato/static/scripts/page/about.js +++ b/couchpotato/static/scripts/page/about.js @@ -106,7 +106,7 @@ var AboutSettingTab = new Class({ new Element('div.donate', { 'html': 'Or support me via:' + - '<iframe src="http://couchpota.to/donate.html" style="border:none; height: 200px;" scrolling="no"></iframe>' + '<iframe src="https://couchpota.to/donate.html" style="border:none; height: 200px;" scrolling="no"></iframe>' }) ); diff --git a/couchpotato/static/scripts/page/home.js b/couchpotato/static/scripts/page/home.js index b93db5b..dee43c8 100644 --- a/couchpotato/static/scripts/page/home.js +++ b/couchpotato/static/scripts/page/home.js @@ -52,11 +52,24 @@ Page.Home = new Class({ }) ), 'filter': { - 'release_status': 'snatched,available' + 'release_status': 'snatched,seeding,missing,available,downloaded' }, 'limit': null, 'onLoaded': function(){ self.chain.callChain(); + }, + 'onMovieAdded': function(notification){ + + // Track movie added + var after_search = function(data){ + if(notification.data.id != data.data.id) return; + + // Force update after search + self.available_list.update(); + App.off('movie.searcher.ended', after_search); + } + App.on('movie.searcher.ended', after_search); + } }); diff --git a/couchpotato/static/scripts/page/manage.js b/couchpotato/static/scripts/page/manage.js index 4827f51..eeeef62 100644 --- a/couchpotato/static/scripts/page/manage.js +++ b/couchpotato/static/scripts/page/manage.js @@ -102,6 +102,8 @@ Page.Manage = new Class({ } } else { + // Capture progress so we can use it in our *each* closure + var progress = json.progress // Don't add loader when page is loading still if(!self.list.navigation) @@ -112,10 +114,13 @@ Page.Manage = new Class({ self.progress_container.empty(); - Object.each(json.progress, function(progress, folder){ + var sorted_table = self.parseProgress(json.progress) + + sorted_table.each(function(folder){ + var folder_progress = progress[folder] new Element('div').adopt( new Element('span.folder', {'text': folder}), - new Element('span.percentage', {'text': progress.total ? (((progress.total-progress.to_go)/progress.total)*100).round() + '%' : '0%'}) + new Element('span.percentage', {'text': folder_progress.total ? (((folder_progress.total-folder_progress.to_go)/folder_progress.total)*100).round() + '%' : '0%'}) ).inject(self.progress_container) }); @@ -124,7 +129,17 @@ Page.Manage = new Class({ }) }, 1000); + }, + parseProgress: function (progress_object) { + var folder, temp_array = []; + + for (folder in progress_object) { + if (progress_object.hasOwnProperty(folder)) { + temp_array.push(folder) + } + } + return temp_array.stableSort() } }); diff --git a/couchpotato/static/scripts/page/settings.js b/couchpotato/static/scripts/page/settings.js index 68b41d0..213c0d9 100644 --- a/couchpotato/static/scripts/page/settings.js +++ b/couchpotato/static/scripts/page/settings.js @@ -111,6 +111,10 @@ Page.Settings = new Class({ Cookie.write('advanced_toggle_checked', +self.advanced_toggle.checked, {'duration': 365}); }, + sortByOrder: function(a, b){ + return (a.order || 100) - (b.order || 100) + }, + create: function(json){ var self = this; @@ -141,13 +145,11 @@ Page.Settings = new Class({ options.include(section); }); - options.sort(function(a, b){ - return (a.order || 100) - (b.order || 100) - }).each(function(section){ + options.stableSort(self.sortByOrder).each(function(section){ var section_name = section.section_name; // Add groups to content - section.groups.sortBy('order').each(function(group){ + section.groups.stableSort(self.sortByOrder).each(function(group){ if(group.hidden) return; if(self.wizard_only && !group.wizard) @@ -184,9 +186,7 @@ Page.Settings = new Class({ } // Add options to group - group.options.sort(function(a, b){ - return (a.order || 100) - (b.order || 100) - }).each(function(option){ + group.options.stableSort(self.sortByOrder).each(function(option){ if(option.hidden) return; var class_name = (option.type || 'string').capitalize(); var input = new Option[class_name](section_name, option.name, self.getValue(section_name, option.name), option); diff --git a/couchpotato/templates/index.html b/couchpotato/templates/index.html index d45dcb9..52a4491 100644 --- a/couchpotato/templates/index.html +++ b/couchpotato/templates/index.html @@ -4,6 +4,8 @@ <head> <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no"/> <meta name="apple-mobile-web-app-capable" content="yes"> + <meta name="mobile-web-app-capable" content="yes"> + {% for url in fireEvent('clientscript.get_styles', as_html = True, location = 'front', single = True) %} <link rel="stylesheet" href="{{ Env.get('web_base') }}{{ url }}" type="text/css">{% end %} diff --git a/init/fedora b/init/fedora index 4735247..ec8a9cc 100644 --- a/init/fedora +++ b/init/fedora @@ -1,3 +1,5 @@ +#!/bin/sh +# ### BEGIN INIT INFO # Provides: CouchPotato application instance # Required-Start: $all diff --git a/init/ubuntu b/init/ubuntu old mode 100644 new mode 100755 index 7f770a6..1d2eb57 --- a/init/ubuntu +++ b/init/ubuntu @@ -20,6 +20,8 @@ else echo "/etc/default/couchpotato not found using default settings."; fi +. /lib/lsb/init-functions + # Script name NAME=couchpotato diff --git a/libs/apscheduler/__init__.py b/libs/apscheduler/__init__.py index a55959f..d93e1b3 100644 --- a/libs/apscheduler/__init__.py +++ b/libs/apscheduler/__init__.py @@ -1,3 +1,3 @@ -version_info = (2, 0, 2) +version_info = (2, 1, 1) version = '.'.join(str(n) for n in version_info[:3]) -release = version + ''.join(str(n) for n in version_info[3:]) +release = '.'.join(str(n) for n in version_info) diff --git a/libs/apscheduler/job.py b/libs/apscheduler/job.py index 868e723..cfc09a2 100644 --- a/libs/apscheduler/job.py +++ b/libs/apscheduler/job.py @@ -16,22 +16,25 @@ class MaxInstancesReachedError(Exception): class Job(object): """ Encapsulates the actual Job along with its metadata. Job instances - are created by the scheduler when adding jobs, and it should not be - directly instantiated. - - :param trigger: trigger that determines the execution times - :param func: callable to call when the trigger is triggered - :param args: list of positional arguments to call func with - :param kwargs: dict of keyword arguments to call func with - :param name: name of the job (optional) - :param misfire_grace_time: seconds after the designated run time that + are created by the scheduler when adding jobs, and should not be + directly instantiated. These options can be set when adding jobs + to the scheduler (see :ref:`job_options`). + + :var trigger: trigger that determines the execution times + :var func: callable to call when the trigger is triggered + :var args: list of positional arguments to call func with + :var kwargs: dict of keyword arguments to call func with + :var name: name of the job + :var misfire_grace_time: seconds after the designated run time that the job is still allowed to be run - :param coalesce: run once instead of many times if the scheduler determines + :var coalesce: run once instead of many times if the scheduler determines that the job should be run more than once in succession - :param max_runs: maximum number of times this job is allowed to be + :var max_runs: maximum number of times this job is allowed to be triggered - :param max_instances: maximum number of concurrently running + :var max_instances: maximum number of concurrently running instances allowed for this job + :var runs: number of times this job has been triggered + :var instances: number of concurrently running instances of this job """ id = None next_run_time = None @@ -130,5 +133,5 @@ class Job(object): return '<Job (name=%s, trigger=%s)>' % (self.name, repr(self.trigger)) def __str__(self): - return '%s (trigger: %s, next run at: %s)' % (self.name, - str(self.trigger), str(self.next_run_time)) + return '%s (trigger: %s, next run at: %s)' % ( + self.name, str(self.trigger), str(self.next_run_time)) diff --git a/libs/apscheduler/jobstores/ram_store.py b/libs/apscheduler/jobstores/ram_store.py index 85091fe..60458fb 100644 --- a/libs/apscheduler/jobstores/ram_store.py +++ b/libs/apscheduler/jobstores/ram_store.py @@ -8,7 +8,7 @@ from apscheduler.jobstores.base import JobStore class RAMJobStore(JobStore): def __init__(self): self.jobs = [] - + def add_job(self, job): self.jobs.append(job) diff --git a/libs/apscheduler/jobstores/redis_store.py b/libs/apscheduler/jobstores/redis_store.py new file mode 100644 index 0000000..5eabf4b --- /dev/null +++ b/libs/apscheduler/jobstores/redis_store.py @@ -0,0 +1,91 @@ +""" +Stores jobs in a Redis database. +""" +from uuid import uuid4 +from datetime import datetime +import logging + +from apscheduler.jobstores.base import JobStore +from apscheduler.job import Job + +try: + import cPickle as pickle +except ImportError: # pragma: nocover + import pickle + +try: + from redis import StrictRedis +except ImportError: # pragma: nocover + raise ImportError('RedisJobStore requires redis installed') + +try: + long = long +except NameError: + long = int + +logger = logging.getLogger(__name__) + + +class RedisJobStore(JobStore): + def __init__(self, db=0, key_prefix='jobs.', + pickle_protocol=pickle.HIGHEST_PROTOCOL, **connect_args): + self.jobs = [] + self.pickle_protocol = pickle_protocol + self.key_prefix = key_prefix + + if db is None: + raise ValueError('The "db" parameter must not be empty') + if not key_prefix: + raise ValueError('The "key_prefix" parameter must not be empty') + + self.redis = StrictRedis(db=db, **connect_args) + + def add_job(self, job): + job.id = str(uuid4()) + job_state = job.__getstate__() + job_dict = { + 'job_state': pickle.dumps(job_state, self.pickle_protocol), + 'runs': '0', + 'next_run_time': job_state.pop('next_run_time').isoformat()} + self.redis.hmset(self.key_prefix + job.id, job_dict) + self.jobs.append(job) + + def remove_job(self, job): + self.redis.delete(self.key_prefix + job.id) + self.jobs.remove(job) + + def load_jobs(self): + jobs = [] + keys = self.redis.keys(self.key_prefix + '*') + pipeline = self.redis.pipeline() + for key in keys: + pipeline.hgetall(key) + results = pipeline.execute() + + for job_dict in results: + job_state = {} + try: + job = Job.__new__(Job) + job_state = pickle.loads(job_dict['job_state'.encode()]) + job_state['runs'] = long(job_dict['runs'.encode()]) + dateval = job_dict['next_run_time'.encode()].decode() + job_state['next_run_time'] = datetime.strptime( + dateval, '%Y-%m-%dT%H:%M:%S') + job.__setstate__(job_state) + jobs.append(job) + except Exception: + job_name = job_state.get('name', '(unknown)') + logger.exception('Unable to restore job "%s"', job_name) + self.jobs = jobs + + def update_job(self, job): + attrs = { + 'next_run_time': job.next_run_time.isoformat(), + 'runs': job.runs} + self.redis.hmset(self.key_prefix + job.id, attrs) + + def close(self): + self.redis.connection_pool.disconnect() + + def __repr__(self): + return '<%s>' % self.__class__.__name__ diff --git a/libs/apscheduler/jobstores/shelve_store.py b/libs/apscheduler/jobstores/shelve_store.py index 87c95f8..bd68333 100644 --- a/libs/apscheduler/jobstores/shelve_store.py +++ b/libs/apscheduler/jobstores/shelve_store.py @@ -32,17 +32,20 @@ class ShelveJobStore(JobStore): def add_job(self, job): job.id = self._generate_id() - self.jobs.append(job) self.store[job.id] = job.__getstate__() + self.store.sync() + self.jobs.append(job) def update_job(self, job): job_dict = self.store[job.id] job_dict['next_run_time'] = job.next_run_time job_dict['runs'] = job.runs self.store[job.id] = job_dict + self.store.sync() def remove_job(self, job): del self.store[job.id] + self.store.sync() self.jobs.remove(job) def load_jobs(self): diff --git a/libs/apscheduler/jobstores/sqlalchemy_store.py b/libs/apscheduler/jobstores/sqlalchemy_store.py index 41ed4c7..5b64a35 100644 --- a/libs/apscheduler/jobstores/sqlalchemy_store.py +++ b/libs/apscheduler/jobstores/sqlalchemy_store.py @@ -4,6 +4,8 @@ Stores jobs in a database table using SQLAlchemy. import pickle import logging +import sqlalchemy + from apscheduler.jobstores.base import JobStore from apscheduler.job import Job @@ -28,17 +30,19 @@ class SQLAlchemyJobStore(JobStore): else: raise ValueError('Need either "engine" or "url" defined') - self.jobs_t = Table(tablename, metadata or MetaData(), + if sqlalchemy.__version__ < '0.7': + pickle_coltype = PickleType(pickle_protocol, mutable=False) + else: + pickle_coltype = PickleType(pickle_protocol) + self.jobs_t = Table( + tablename, metadata or MetaData(), Column('id', Integer, Sequence(tablename + '_id_seq', optional=True), primary_key=True), - Column('trigger', PickleType(pickle_protocol, mutable=False), - nullable=False), + Column('trigger', pickle_coltype, nullable=False), Column('func_ref', String(1024), nullable=False), - Column('args', PickleType(pickle_protocol, mutable=False), - nullable=False), - Column('kwargs', PickleType(pickle_protocol, mutable=False), - nullable=False), + Column('args', pickle_coltype, nullable=False), + Column('kwargs', pickle_coltype, nullable=False), Column('name', Unicode(1024)), Column('misfire_grace_time', Integer, nullable=False), Column('coalesce', Boolean, nullable=False), diff --git a/libs/apscheduler/scheduler.py b/libs/apscheduler/scheduler.py index 50769e4..d6afcad 100644 --- a/libs/apscheduler/scheduler.py +++ b/libs/apscheduler/scheduler.py @@ -35,7 +35,7 @@ class Scheduler(object): their execution. """ - _stopped = False + _stopped = True _thread = None def __init__(self, gconfig={}, **options): @@ -60,6 +60,7 @@ class Scheduler(object): self.misfire_grace_time = int(config.pop('misfire_grace_time', 1)) self.coalesce = asbool(config.pop('coalesce', True)) self.daemonic = asbool(config.pop('daemonic', True)) + self.standalone = asbool(config.pop('standalone', False)) # Configure the thread pool if 'threadpool' in config: @@ -85,6 +86,12 @@ class Scheduler(object): def start(self): """ Starts the scheduler in a new thread. + + In threaded mode (the default), this method will return immediately + after starting the scheduler thread. + + In standalone mode, this method will block until there are no more + scheduled jobs. """ if self.running: raise SchedulerAlreadyRunningError @@ -99,11 +106,15 @@ class Scheduler(object): del self._pending_jobs[:] self._stopped = False - self._thread = Thread(target=self._main_loop, name='APScheduler') - self._thread.setDaemon(self.daemonic) - self._thread.start() + if self.standalone: + self._main_loop() + else: + self._thread = Thread(target=self._main_loop, name='APScheduler') + self._thread.setDaemon(self.daemonic) + self._thread.start() - def shutdown(self, wait=True, shutdown_threadpool=True): + def shutdown(self, wait=True, shutdown_threadpool=True, + close_jobstores=True): """ Shuts down the scheduler and terminates the thread. Does not interrupt any currently running jobs. @@ -111,6 +122,7 @@ class Scheduler(object): :param wait: ``True`` to wait until all currently executing jobs have finished (if ``shutdown_threadpool`` is also ``True``) :param shutdown_threadpool: ``True`` to shut down the thread pool + :param close_jobstores: ``True`` to close all job stores after shutdown """ if not self.running: return @@ -123,11 +135,19 @@ class Scheduler(object): self._threadpool.shutdown(wait) # Wait until the scheduler thread terminates - self._thread.join() + if self._thread: + self._thread.join() + + # Close all job stores + if close_jobstores: + for jobstore in itervalues(self._jobstores): + jobstore.close() @property def running(self): - return not self._stopped and self._thread and self._thread.isAlive() + thread_alive = self._thread and self._thread.isAlive() + standalone = getattr(self, 'standalone', False) + return not self._stopped and (standalone or thread_alive) def add_jobstore(self, jobstore, alias, quiet=False): """ @@ -156,21 +176,25 @@ class Scheduler(object): if not quiet: self._wakeup.set() - def remove_jobstore(self, alias): + def remove_jobstore(self, alias, close=True): """ Removes the job store by the given alias from this scheduler. + :param close: ``True`` to close the job store after removing it :type alias: str """ self._jobstores_lock.acquire() try: - try: - del self._jobstores[alias] - except KeyError: + jobstore = self._jobstores.pop(alias) + if not jobstore: raise KeyError('No such job store: %s' % alias) finally: self._jobstores_lock.release() + # Close the job store if requested + if close: + jobstore.close() + # Notify listeners that a job store has been removed self._notify_listeners(JobStoreEvent(EVENT_JOBSTORE_REMOVED, alias)) @@ -245,8 +269,10 @@ class Scheduler(object): **options): """ Adds the given job to the job list and notifies the scheduler thread. + Any extra keyword arguments are passed along to the constructor of the + :class:`~apscheduler.job.Job` class (see :ref:`job_options`). - :param trigger: alias of the job store to store the job in + :param trigger: trigger that determines when ``func`` is called :param func: callable to run at the given time :param args: list of positional arguments to call func with :param kwargs: dict of keyword arguments to call func with @@ -276,6 +302,8 @@ class Scheduler(object): def add_date_job(self, func, date, args=None, kwargs=None, **options): """ Schedules a job to be completed on a specific date and time. + Any extra keyword arguments are passed along to the constructor of the + :class:`~apscheduler.job.Job` class (see :ref:`job_options`). :param func: callable to run at the given time :param date: the date/time to run the job at @@ -294,6 +322,8 @@ class Scheduler(object): **options): """ Schedules a job to be completed on specified intervals. + Any extra keyword arguments are passed along to the constructor of the + :class:`~apscheduler.job.Job` class (see :ref:`job_options`). :param func: callable to run :param weeks: number of weeks to wait @@ -322,6 +352,8 @@ class Scheduler(object): """ Schedules a job to be completed on times that match the given expressions. + Any extra keyword arguments are passed along to the constructor of the + :class:`~apscheduler.job.Job` class (see :ref:`job_options`). :param func: callable to run :param year: year to run on @@ -352,6 +384,8 @@ class Scheduler(object): This decorator does not wrap its host function. Unscheduling decorated functions is possible by passing the ``job`` attribute of the scheduled function to :meth:`unschedule_job`. + Any extra keyword arguments are passed along to the constructor of the + :class:`~apscheduler.job.Job` class (see :ref:`job_options`). """ def inner(func): func.job = self.add_cron_job(func, **options) @@ -364,6 +398,8 @@ class Scheduler(object): This decorator does not wrap its host function. Unscheduling decorated functions is possible by passing the ``job`` attribute of the scheduled function to :meth:`unschedule_job`. + Any extra keyword arguments are passed along to the constructor of the + :class:`~apscheduler.job.Job` class (see :ref:`job_options`). """ def inner(func): func.job = self.add_interval_job(func, **options) @@ -517,7 +553,8 @@ class Scheduler(object): job.runs += len(run_times) # Update the job, but don't keep finished jobs around - if job.compute_next_run_time(now + timedelta(microseconds=1)): + if job.compute_next_run_time( + now + timedelta(microseconds=1)): jobstore.update_job(job) else: self._remove_job(job, alias, jobstore) @@ -550,10 +587,15 @@ class Scheduler(object): logger.debug('Next wakeup is due at %s (in %f seconds)', next_wakeup_time, wait_seconds) self._wakeup.wait(wait_seconds) + self._wakeup.clear() + elif self.standalone: + logger.debug('No jobs left; shutting down scheduler') + self.shutdown() + break else: logger.debug('No jobs; waiting until a job is added') self._wakeup.wait() - self._wakeup.clear() + self._wakeup.clear() logger.info('Scheduler has been shut down') self._notify_listeners(SchedulerEvent(EVENT_SCHEDULER_SHUTDOWN)) diff --git a/libs/apscheduler/triggers/cron/__init__.py b/libs/apscheduler/triggers/cron/__init__.py index 763edb1..9e69f72 100644 --- a/libs/apscheduler/triggers/cron/__init__.py +++ b/libs/apscheduler/triggers/cron/__init__.py @@ -21,8 +21,10 @@ class CronTrigger(object): if self.start_date: self.start_date = convert_to_datetime(self.start_date) - # Yank out all None valued fields + # Check field names and yank out all None valued fields for key, value in list(iteritems(values)): + if key not in self.FIELD_NAMES: + raise TypeError('Invalid field name: %s' % key) if value is None: del values[key] @@ -111,17 +113,17 @@ class CronTrigger(object): if next_value is None: # No valid value was found - next_date, fieldnum = self._increment_field_value(next_date, - fieldnum - 1) + next_date, fieldnum = self._increment_field_value( + next_date, fieldnum - 1) elif next_value > curr_value: # A valid, but higher than the starting value, was found if field.REAL: - next_date = self._set_field_value(next_date, fieldnum, - next_value) + next_date = self._set_field_value( + next_date, fieldnum, next_value) fieldnum += 1 else: - next_date, fieldnum = self._increment_field_value(next_date, - fieldnum) + next_date, fieldnum = self._increment_field_value( + next_date, fieldnum) else: # A valid value was found, no changes necessary fieldnum += 1 diff --git a/libs/apscheduler/triggers/cron/expressions.py b/libs/apscheduler/triggers/cron/expressions.py index 018c7a3..b5d2919 100644 --- a/libs/apscheduler/triggers/cron/expressions.py +++ b/libs/apscheduler/triggers/cron/expressions.py @@ -8,7 +8,7 @@ import re from apscheduler.util import asint __all__ = ('AllExpression', 'RangeExpression', 'WeekdayRangeExpression', - 'WeekdayPositionExpression') + 'WeekdayPositionExpression', 'LastDayOfMonthExpression') WEEKDAYS = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] @@ -176,3 +176,19 @@ class WeekdayPositionExpression(AllExpression): return "%s('%s', '%s')" % (self.__class__.__name__, self.options[self.option_num], WEEKDAYS[self.weekday]) + + +class LastDayOfMonthExpression(AllExpression): + value_re = re.compile(r'last', re.IGNORECASE) + + def __init__(self): + pass + + def get_next_value(self, date, field): + return monthrange(date.year, date.month)[1] + + def __str__(self): + return 'last' + + def __repr__(self): + return "%s()" % self.__class__.__name__ diff --git a/libs/apscheduler/triggers/cron/fields.py b/libs/apscheduler/triggers/cron/fields.py index ef970cc..be5e5e3 100644 --- a/libs/apscheduler/triggers/cron/fields.py +++ b/libs/apscheduler/triggers/cron/fields.py @@ -85,7 +85,8 @@ class WeekField(BaseField): class DayOfMonthField(BaseField): - COMPILERS = BaseField.COMPILERS + [WeekdayPositionExpression] + COMPILERS = BaseField.COMPILERS + [WeekdayPositionExpression, + LastDayOfMonthExpression] def get_max(self, dateval): return monthrange(dateval.year, dateval.month)[1] diff --git a/libs/apscheduler/util.py b/libs/apscheduler/util.py index a49aaed..dcede4c 100644 --- a/libs/apscheduler/util.py +++ b/libs/apscheduler/util.py @@ -6,7 +6,6 @@ from datetime import date, datetime, timedelta from time import mktime import re import sys -from types import MethodType __all__ = ('asint', 'asbool', 'convert_to_datetime', 'timedelta_seconds', 'time_difference', 'datetime_ceil', 'combine_opts', @@ -64,7 +63,7 @@ def convert_to_datetime(input): return input elif isinstance(input, date): return datetime.fromordinal(input.toordinal()) - elif isinstance(input, str): + elif isinstance(input, basestring): m = _DATE_REGEX.match(input) if not m: raise ValueError('Invalid date string') @@ -109,7 +108,7 @@ def datetime_ceil(dateval): """ if dateval.microsecond > 0: return dateval + timedelta(seconds=1, - microseconds= -dateval.microsecond) + microseconds=-dateval.microsecond) return dateval @@ -143,7 +142,8 @@ def get_callable_name(func): if f_self and hasattr(func, '__name__'): if isinstance(f_self, type): # class method - return '%s.%s' % (f_self.__name__, func.__name__) + clsname = getattr(f_self, '__qualname__', None) or f_self.__name__ + return '%s.%s' % (clsname, func.__name__) # bound method return '%s.%s' % (f_self.__class__.__name__, func.__name__) @@ -169,7 +169,7 @@ def obj_to_ref(obj): raise ValueError except Exception: raise ValueError('Cannot determine the reference to %s' % repr(obj)) - + return ref diff --git a/libs/backports/__init__.py b/libs/backports/__init__.py new file mode 100644 index 0000000..612d328 --- /dev/null +++ b/libs/backports/__init__.py @@ -0,0 +1,3 @@ +# This is a Python "namespace package" http://www.python.org/dev/peps/pep-0382/ +from pkgutil import extend_path +__path__ = extend_path(__path__, __name__) diff --git a/libs/backports/ssl_match_hostname/README.txt b/libs/backports/ssl_match_hostname/README.txt new file mode 100644 index 0000000..f024fd7 --- /dev/null +++ b/libs/backports/ssl_match_hostname/README.txt @@ -0,0 +1,42 @@ + +The ssl.match_hostname() function from Python 3.2 +================================================= + +The Secure Sockets layer is only actually *secure* +if you check the hostname in the certificate returned +by the server to which you are connecting, +and verify that it matches to hostname +that you are trying to reach. + +But the matching logic, defined in `RFC2818`_, +can be a bit tricky to implement on your own. +So the ``ssl`` package in the Standard Library of Python 3.2 +now includes a ``match_hostname()`` function +for performing this check instead of requiring every application +to implement the check separately. + +This backport brings ``match_hostname()`` to users +of earlier versions of Python. +Simply make this distribution a dependency of your package, +and then use it like this:: + + from backports.ssl_match_hostname import match_hostname, CertificateError + ... + sslsock = ssl.wrap_socket(sock, ssl_version=ssl.PROTOCOL_SSLv3, + cert_reqs=ssl.CERT_REQUIRED, ca_certs=...) + try: + match_hostname(sslsock.getpeercert(), hostname) + except CertificateError, ce: + ... + +Note that the ``ssl`` module is only included in the Standard Library +for Python 2.6 and later; +users of Python 2.5 or earlier versions +will also need to install the ``ssl`` distribution +from the Python Package Index to use code like that shown above. + +Brandon Craig Rhodes is merely the packager of this distribution; +the actual code inside comes verbatim from Python 3.2. + +.. _RFC2818: http://tools.ietf.org/html/rfc2818.html + diff --git a/libs/backports/ssl_match_hostname/__init__.py b/libs/backports/ssl_match_hostname/__init__.py new file mode 100644 index 0000000..5707649 --- /dev/null +++ b/libs/backports/ssl_match_hostname/__init__.py @@ -0,0 +1,60 @@ +"""The match_hostname() function from Python 3.2, essential when using SSL.""" + +import re + +__version__ = '3.2a3' + +class CertificateError(ValueError): + pass + +def _dnsname_to_pat(dn): + pats = [] + for frag in dn.split(r'.'): + if frag == '*': + # When '*' is a fragment by itself, it matches a non-empty dotless + # fragment. + pats.append('[^.]+') + else: + # Otherwise, '*' matches any dotless fragment. + frag = re.escape(frag) + pats.append(frag.replace(r'\*', '[^.]*')) + return re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) + +def match_hostname(cert, hostname): + """Verify that *cert* (in decoded format as returned by + SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 rules + are mostly followed, but IP addresses are not accepted for *hostname*. + + CertificateError is raised on failure. On success, the function + returns nothing. + """ + if not cert: + raise ValueError("empty or no certificate") + dnsnames = [] + san = cert.get('subjectAltName', ()) + for key, value in san: + if key == 'DNS': + if _dnsname_to_pat(value).match(hostname): + return + dnsnames.append(value) + if not san: + # The subject is only checked when subjectAltName is empty + for sub in cert.get('subject', ()): + for key, value in sub: + # XXX according to RFC 2818, the most specific Common Name + # must be used. + if key == 'commonName': + if _dnsname_to_pat(value).match(hostname): + return + dnsnames.append(value) + if len(dnsnames) > 1: + raise CertificateError("hostname %r " + "doesn't match either of %s" + % (hostname, ', '.join(map(repr, dnsnames)))) + elif len(dnsnames) == 1: + raise CertificateError("hostname %r " + "doesn't match %r" + % (hostname, dnsnames[0])) + else: + raise CertificateError("no appropriate commonName or " + "subjectAltName fields were found") diff --git a/libs/bencode/LICENSE.txt b/libs/bencode/LICENSE.txt deleted file mode 100644 index 4b7a674..0000000 --- a/libs/bencode/LICENSE.txt +++ /dev/null @@ -1,143 +0,0 @@ -BitTorrent Open Source License - -Version 1.1 - -This BitTorrent Open Source License (the "License") applies to the BitTorrent client and related software products as well as any updates or maintenance releases of that software ("BitTorrent Products") that are distributed by BitTorrent, Inc. ("Licensor"). Any BitTorrent Product licensed pursuant to this License is a Licensed Product. Licensed Product, in its entirety, is protected by U.S. copyright law. This License identifies the terms under which you may use, copy, distribute or modify Licensed Product. - -Preamble - -This Preamble is intended to describe, in plain English, the nature and scope of this License. However, this Preamble is not a part of this license. The legal effect of this License is dependent only upon the terms of the License and not this Preamble. - -This License complies with the Open Source Definition and is derived from the Jabber Open Source License 1.0 (the "JOSL"), which has been approved by Open Source Initiative. Sections 4(c) and 4(f)(iii) from the JOSL have been deleted. - -This License provides that: - -1. You may use or give away the Licensed Product, alone or as a component of an aggregate software distribution containing programs from several different sources. No royalty or other fee is required. - -2. Both Source Code and executable versions of the Licensed Product, including Modifications made by previous Contributors, are available for your use. (The terms "Licensed Product," "Modifications," "Contributors" and "Source Code" are defined in the License.) - -3. You are allowed to make Modifications to the Licensed Product, and you can create Derivative Works from it. (The term "Derivative Works" is defined in the License.) - -4. By accepting the Licensed Product under the provisions of this License, you agree that any Modifications you make to the Licensed Product and then distribute are governed by the provisions of this License. In particular, you must make the Source Code of your Modifications available to others free of charge and without a royalty. - -5. You may sell, accept donations or otherwise receive compensation for executable versions of a Licensed Product, without paying a royalty or other fee to the Licensor or any Contributor, provided that such executable versions contain your or another Contributor?s material Modifications. For the avoidance of doubt, to the extent your executable version of a Licensed Product does not contain your or another Contributor?s material Modifications, you may not sell, accept donations or otherwise receive compensation for such executable. - -You may use the Licensed Product for any purpose, but the Licensor is not providing you any warranty whatsoever, nor is the Licensor accepting any liability in the event that the Licensed Product doesn't work properly or causes you any injury or damages. - -6. If you sublicense the Licensed Product or Derivative Works, you may charge fees for warranty or support, or for accepting indemnity or liability obligations to your customers. You cannot charge for, sell, accept donations or otherwise receive compensation for the Source Code. - -7. If you assert any patent claims against the Licensor relating to the Licensed Product, or if you breach any terms of the License, your rights to the Licensed Product under this License automatically terminate. - -You may use this License to distribute your own Derivative Works, in which case the provisions of this License will apply to your Derivative Works just as they do to the original Licensed Product. - -Alternatively, you may distribute your Derivative Works under any other OSI-approved Open Source license, or under a proprietary license of your choice. If you use any license other than this License, however, you must continue to fulfill the requirements of this License (including the provisions relating to publishing the Source Code) for those portions of your Derivative Works that consist of the Licensed Product, including the files containing Modifications. - -New versions of this License may be published from time to time in connection with new versions of a Licensed Product or otherwise. You may choose to continue to use the license terms in this version of the License for the Licensed Product that was originally licensed hereunder, however, the new versions of this License will at all times apply to new versions of the Licensed Product released by Licensor after the release of the new version of this License. Only the Licensor has the right to change the License terms as they apply to the Licensed Product. - -This License relies on precise definitions for certain terms. Those terms are defined when they are first used, and the definitions are repeated for your convenience in a Glossary at the end of the License. - -License Terms - -1. Grant of License From Licensor. Subject to the terms and conditions of this License, Licensor hereby grants you a world-wide, royalty-free, non-exclusive license, subject to third party intellectual property claims, to do the following: - -a. Use, reproduce, modify, display, perform, sublicense and distribute any Modifications created by a Contributor or portions thereof, in both Source Code or as an executable program, either on an unmodified basis or as part of Derivative Works. - -b. Under claims of patents now or hereafter owned or controlled by Contributor, to make, use, sell, offer for sale, have made, and/or otherwise dispose of Modifications or portions thereof, but solely to the extent that any such claim is necessary to enable you to make, use, sell, offer for sale, have made, and/or otherwise dispose of Modifications or portions thereof or Derivative Works thereof. - -2. Grant of License to Modifications From Contributor. "Modifications" means any additions to or deletions from the substance or structure of (i) a file containing a Licensed Product, or (ii) any new file that contains any part of a Licensed Product. Hereinafter in this License, the term "Licensed Product" shall include all previous Modifications that you receive from any Contributor. Subject to the terms and conditions of this License, By application of the provisions in Section 4(a) below, each person or entity who created or contributed to the creation of, and distributed, a Modification (a "Contributor") hereby grants you a world-wide, royalty-free, non-exclusive license, subject to third party intellectual property claims, to do the following: - -a. Use, reproduce, modify, display, perform, sublicense and distribute any Modifications created by such Contributor or portions thereof, in both Source Code or as an executable program, either on an unmodified basis or as part of Derivative Works. - -b. Under claims of patents now or hereafter owned or controlled by Contributor, to make, use, sell, offer for sale, have made, and/or otherwise dispose of Modifications or portions thereof, but solely to the extent that any such claim is necessary to enable you to make, use, sell, offer for sale, have made, and/or otherwise dispose of Modifications or portions thereof or Derivative Works thereof. - -3. Exclusions From License Grant. Nothing in this License shall be deemed to grant any rights to trademarks, copyrights, patents, trade secrets or any other intellectual property of Licensor or any Contributor except as expressly stated herein. No patent license is granted separate from the Licensed Product, for code that you delete from the Licensed Product, or for combinations of the Licensed Product with other software or hardware. No right is granted to the trademarks of Licensor or any Contributor even if such marks are included in the Licensed Product. Nothing in this License shall be interpreted to prohibit Licensor from licensing under different terms from this License any code that Licensor otherwise would have a right to license. As an express condition for your use of the Licensed Product, you hereby agree that you will not, without the prior written consent of Licensor, use any trademarks, copyrights, patents, trade secrets or any other intellectual property of Licensor or any Contributor except as expressly stated herein. For the avoidance of doubt and without limiting the foregoing, you hereby agree that you will not use or display any trademark of Licensor or any Contributor in any domain name, directory filepath, advertisement, link or other reference to you in any manner or in any media. - -4. Your Obligations Regarding Distribution. - -a. Application of This License to Your Modifications. As an express condition for your use of the Licensed Product, you hereby agree that any Modifications that you create or to which you contribute, and which you distribute, are governed by the terms of this License including, without limitation, Section 2. Any Modifications that you create or to which you contribute may be distributed only under the terms of this License or a future version of this License released under Section 7. You must include a copy of this License with every copy of the Modifications you distribute. You agree not to offer or impose any terms on any Source Code or executable version of the Licensed Product or Modifications that alter or restrict the applicable version of this License or the recipients' rights hereunder. However, you may include an additional document offering the additional rights described in Section 4(d). - -b. Availability of Source Code. You must make available, without charge, under the terms of this License, the Source Code of the Licensed Product and any Modifications that you distribute, either on the same media as you distribute any executable or other form of the Licensed Product, or via a mechanism generally accepted in the software development community for the electronic transfer of data (an "Electronic Distribution Mechanism"). The Source Code for any version of Licensed Product or Modifications that you distribute must remain available for as long as any executable or other form of the Licensed Product is distributed by you. You are responsible for ensuring that the Source Code version remains available even if the Electronic Distribution Mechanism is maintained by a third party. - -c. Intellectual Property Matters. - - i. Third Party Claims. If you have knowledge that a license to a third party's intellectual property right is required to exercise the rights granted by this License, you must include a text file with the Source Code distribution titled "LEGAL" that describes the claim and the party making the claim in sufficient detail that a recipient will know whom to contact. If you obtain such knowledge after you make any Modifications available as described in Section 4(b), you shall promptly modify the LEGAL file in all copies you make available thereafter and shall take other steps (such as notifying appropriate mailing lists or newsgroups) reasonably calculated to inform those who received the Licensed Product from you that new knowledge has been obtained. - - ii. Contributor APIs. If your Modifications include an application programming interface ("API") and you have knowledge of patent licenses that are reasonably necessary to implement that API, you must also include this information in the LEGAL file. - - iii. Representations. You represent that, except as disclosed pursuant to 4(c)(i) above, you believe that any Modifications you distribute are your original creations and that you have sufficient rights to grant the rights conveyed by this License. - -d. Required Notices. You must duplicate this License in any documentation you provide along with the Source Code of any Modifications you create or to which you contribute, and which you distribute, wherever you describe recipients' rights relating to Licensed Product. You must duplicate the notice contained in Exhibit A (the "Notice") in each file of the Source Code of any copy you distribute of the Licensed Product. If you created a Modification, you may add your name as a Contributor to the Notice. If it is not possible to put the Notice in a particular Source Code file due to its structure, then you must include such Notice in a location (such as a relevant directory file) where a user would be likely to look for such a notice. You may choose to offer, and charge a fee for, warranty, support, indemnity or liability obligations to one or more recipients of Licensed Product. However, you may do so only on your own behalf, and not on behalf of the Licensor or any Contributor. You must make it clear that any such warranty, support, indemnity or liability obligation is offered by you alone, and you hereby agree to indemnify the Licensor and every Contributor for any liability incurred by the Licensor or such Contributor as a result of warranty, support, indemnity or liability terms you offer. - -e. Distribution of Executable Versions. You may distribute Licensed Product as an executable program under a license of your choice that may contain terms different from this License provided (i) you have satisfied the requirements of Sections 4(a) through 4(d) for that distribution, (ii) you include a conspicuous notice in the executable version, related documentation and collateral materials stating that the Source Code version of the -Licensed Product is available under the terms of this License, including a description of how and where you have fulfilled the obligations of Section 4(b), and (iii) you make it clear that any terms that differ from this License are offered by you alone, not by Licensor or any Contributor. You hereby agree to indemnify the Licensor and every Contributor for any liability incurred by Licensor or such Contributor as a result of any terms you offer. - -f. Distribution of Derivative Works. You may create Derivative Works (e.g., combinations of some or all of the Licensed Product with other code) and distribute the Derivative Works as products under any other license you select, with the proviso that the requirements of this License are fulfilled for those portions of the Derivative Works that consist of the Licensed Product or any Modifications thereto. - -g. Compensation for Distribution of Executable Versions of Licensed Products, Modifications or Derivative Works. Notwithstanding any provision of this License to the contrary, by distributing, selling, licensing, sublicensing or otherwise making available any Licensed Product, or Modification or Derivative Work thereof, you and Licensor hereby acknowledge and agree that you may sell, license or sublicense for a fee, accept donations or otherwise receive compensation for executable versions of a Licensed Product, without paying a royalty or other fee to the Licensor or any other Contributor, provided that such executable versions (i) contain your or another Contributor?s material Modifications, or (ii) are otherwise material Derivative Works. For purposes of this License, an executable version of the Licensed Product will be deemed to contain a material Modification, or will otherwise be deemed a material Derivative Work, if (a) the Licensed Product is modified with your own or a third party?s software programs or other code, and/or the Licensed Product is combined with a number of your own or a third party?s software programs or code, respectively, and (b) such software programs or code add or contribute material value, functionality or features to the License Product. For the avoidance of doubt, to the extent your executable version of a Licensed Product does not contain your or another Contributor?s material Modifications or is otherwise not a material Derivative Work, in each case as contemplated herein, you may not sell, license or sublicense for a fee, accept donations or otherwise receive compensation for such executable. Additionally, without limitation of the foregoing and notwithstanding any provision of this License to the contrary, you cannot charge for, sell, license or sublicense for a fee, accept donations or otherwise receive compensation for the Source Code. - -5. Inability to Comply Due to Statute or Regulation. If it is impossible for you to comply with any of the terms of this License with respect to some or all of the Licensed Product due to statute, judicial order, or regulation, then you must (i) comply with the terms of this License to the maximum extent possible, (ii) cite the statute or regulation that prohibits you from adhering to the License, and (iii) describe the limitations and the code they affect. Such description must be included in the LEGAL file described in Section 4(d), and must be included with all distributions of the Source Code. Except to the extent prohibited by statute or regulation, such description must be sufficiently detailed for a recipient of ordinary skill at computer programming to be able to understand it. - -6. Application of This License. This License applies to code to which Licensor or Contributor has attached the Notice in Exhibit A, which is incorporated herein by this reference. - -7. Versions of This License. - -a. New Versions. Licensor may publish from time to time revised and/or new versions of the License. - -b. Effect of New Versions. Once Licensed Product has been published under a particular version of the License, you may always continue to use it under the terms of that version, provided that any such license be in full force and effect at the time, and has not been revoked or otherwise terminated. You may also choose to use such Licensed Product under the terms of any subsequent version (but not any prior version) of the License published by Licensor. No one other than Licensor has the right to modify the terms applicable to Licensed Product created under this License. - -c. Derivative Works of this License. If you create or use a modified version of this License, which you may do only in order to apply it to software that is not already a Licensed Product under this License, you must rename your license so that it is not confusingly similar to this License, and must make it clear that your license contains terms that differ from this License. In so naming your license, you may not use any trademark of Licensor or any Contributor. - -8. Disclaimer of Warranty. LICENSED PRODUCT IS PROVIDED UNDER THIS LICENSE ON AN AS IS BASIS, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE LICENSED PRODUCT IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LICENSED PRODUCT IS WITH YOU. SHOULD LICENSED PRODUCT PROVE DEFECTIVE IN ANY RESPECT, YOU (AND NOT THE LICENSOR OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY SERVICING, REPAIR OR CORRECTION. THIS -DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF LICENSED PRODUCT IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS DISCLAIMER. - -9. Termination. - -a. Automatic Termination Upon Breach. This license and the rights granted hereunder will terminate automatically if you fail to comply with the terms herein and fail to cure such breach within ten (10) days of being notified of the breach by the Licensor. For purposes of this provision, proof of delivery via email to the address listed in the ?WHOIS? database of the registrar for any website through which you distribute or market any Licensed Product, or to any alternate email address which you designate in writing to the Licensor, shall constitute sufficient notification. All sublicenses to the Licensed Product that are properly granted shall survive any termination of this license so long as they continue to complye with the terms of this License. Provisions that, by their nature, must remain in effect beyond the termination of this License, shall survive. - -b. Termination Upon Assertion of Patent Infringement. If you initiate litigation by asserting a patent infringement claim (excluding declaratory judgment actions) against Licensor or a Contributor (Licensor or Contributor against whom you file such an action is referred to herein as Respondent) alleging that Licensed Product directly or indirectly infringes any patent, then any and all rights granted by such Respondent to you under Sections 1 or 2 of this License shall terminate prospectively upon sixty (60) days notice from Respondent (the "Notice Period") unless within that Notice Period you either agree in writing (i) to pay Respondent a mutually agreeable reasonably royalty for your past or future use of Licensed Product made by such Respondent, or (ii) withdraw your litigation claim with respect to Licensed Product against such Respondent. If within said Notice Period a reasonable royalty and payment arrangement are not mutually agreed upon in writing by the parties or the litigation claim is not withdrawn, the rights granted by Licensor to you under Sections 1 and 2 automatically terminate at the expiration of said Notice Period. - -c. Reasonable Value of This License. If you assert a patent infringement claim against Respondent alleging that Licensed Product directly or indirectly infringes any patent where such claim is resolved (such as by license or settlement) prior to the initiation of patent infringement litigation, then the reasonable value of the licenses granted by said Respondent under Sections 1 and 2 shall be taken into account in determining the amount or value of any payment or license. - -d. No Retroactive Effect of Termination. In the event of termination under Sections 9(a) or 9(b) above, all end user license agreements (excluding licenses to distributors and resellers) that have been validly granted by you or any distributor hereunder prior to termination shall survive termination. - -10. Limitation of Liability. UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL THE LICENSOR, ANY CONTRIBUTOR, OR ANY DISTRIBUTOR OF LICENSED PRODUCT, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE LIABLE TO ANY PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT LIMITATION, DAMAGES FOR LOSS OF GOODWILL, WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL INJURY RESULTING FROM SUCH PARTYS NEGLIGENCE TO THE EXTENT APPLICABLE LAW PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION AND LIMITATION MAY NOT APPLY TO YOU. - -11. Responsibility for Claims. As between Licensor and Contributors, each party is responsible for claims and damages arising, directly or indirectly, out of its utilization of rights under this License. You agree to work with Licensor and Contributors to distribute such responsibility on an equitable basis. Nothing herein is intended or shall be deemed to constitute any admission of liability. - -12. U.S. Government End Users. The Licensed Product is a commercial item, as that term is defined in 48 C.F.R. 2.101 (Oct. 1995), consisting of commercial computer software and commercial computer software documentation, as such terms are used in 48 C.F.R. 12.212 (Sept. 1995). Consistent with 48 C.F.R. 12.212 and 48 C.F.R. 227.7202-1 through 227.7202-4 (June 1995), all U.S. Government End Users acquire Licensed Product with only those rights set forth herein. - -13. Miscellaneous. This License represents the complete agreement concerning the subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable. This License shall be governed by California law provisions (except to the extent applicable law, if any, provides otherwise), excluding its conflict-of-law provisions. You expressly agree that in any litigation relating to this license the losing party shall be responsible for costs including, without limitation, court costs and reasonable attorneys fees and expenses. The application of the United Nations Convention on Contracts for the International Sale of Goods is expressly excluded. Any law or regulation that provides that the language of a contract shall be construed against the drafter shall not apply to this License. - -14. Definition of You in This License. You throughout this License, whether in upper or lower case, means an individual or a legal entity exercising rights under, and complying with all of the terms of, this License or a future version of this License issued under Section 7. For legal entities, you includes any entity that controls, is controlled by, is under common control with, or affiliated with, you. For purposes of this definition, control means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. You are responsible for advising any affiliated entity of the terms of this License, and that any rights or privileges derived from or obtained by way of this License are subject to the restrictions outlined herein. - -15. Glossary. All defined terms in this License that are used in more than one Section of this License are repeated here, in alphabetical order, for the convenience of the reader. The Section of this License in which each defined term is first used is shown in parentheses. - -Contributor: Each person or entity who created or contributed to the creation of, and distributed, a Modification. (See Section 2) - -Derivative Works: That term as used in this License is defined under U.S. copyright law. (See Section 1(b)) - -License: This BitTorrent Open Source License. (See first paragraph of License) - -Licensed Product: Any BitTorrent Product licensed pursuant to this License. The term "Licensed Product" includes all previous Modifications from any Contributor that you receive. (See first paragraph of License and Section 2) - -Licensor: BitTorrent, Inc. (See first paragraph of License) - -Modifications: Any additions to or deletions from the substance or structure of (i) a file containing Licensed Product, or (ii) any new file that contains any part of Licensed Product. (See Section 2) - -Notice: The notice contained in Exhibit A. (See Section 4(e)) - -Source Code: The preferred form for making modifications to the Licensed Product, including all modules contained therein, plus any associated interface definition files, scripts used to control compilation and installation of an executable program, or a list of differential comparisons against the Source Code of the Licensed Product. (See Section 1(a)) - -You: This term is defined in Section 14 of this License. - - -EXHIBIT A - -The Notice below must appear in each file of the Source Code of any copy you distribute of the Licensed Product or any hereto. Contributors to any Modifications may add their own copyright notices to identify their own contributions. - -License: - -The contents of this file are subject to the BitTorrent Open Source License Version 1.0 (the License). You may not copy or use this file, in either source code or executable form, except in compliance with the License. You may obtain a copy of the License at http://www.bittorrent.com/license/. - -Software distributed under the License is distributed on an AS IS basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the specific language governing rights and limitations under the License. - diff --git a/libs/bencode/__init__.py b/libs/bencode/__init__.py index 4424fc7..7a2af17 100644 --- a/libs/bencode/__init__.py +++ b/libs/bencode/__init__.py @@ -1 +1,131 @@ -from bencode import * \ No newline at end of file +# The contents of this file are subject to the BitTorrent Open Source License +# Version 1.1 (the License). You may not copy or use this file, in either +# source code or executable form, except in compliance with the License. You +# may obtain a copy of the License at http://www.bittorrent.com/license/. +# +# Software distributed under the License is distributed on an AS IS basis, +# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License +# for the specific language governing rights and limitations under the +# License. + +# Written by Petru Paler + +from BTL import BTFailure + + +def decode_int(x, f): + f += 1 + newf = x.index('e', f) + n = int(x[f:newf]) + if x[f] == '-': + if x[f + 1] == '0': + raise ValueError + elif x[f] == '0' and newf != f+1: + raise ValueError + return (n, newf+1) + +def decode_string(x, f): + colon = x.index(':', f) + n = int(x[f:colon]) + if x[f] == '0' and colon != f+1: + raise ValueError + colon += 1 + return (x[colon:colon+n], colon+n) + +def decode_list(x, f): + r, f = [], f+1 + while x[f] != 'e': + v, f = decode_func[x[f]](x, f) + r.append(v) + return (r, f + 1) + +def decode_dict(x, f): + r, f = {}, f+1 + while x[f] != 'e': + k, f = decode_string(x, f) + r[k], f = decode_func[x[f]](x, f) + return (r, f + 1) + +decode_func = {} +decode_func['l'] = decode_list +decode_func['d'] = decode_dict +decode_func['i'] = decode_int +decode_func['0'] = decode_string +decode_func['1'] = decode_string +decode_func['2'] = decode_string +decode_func['3'] = decode_string +decode_func['4'] = decode_string +decode_func['5'] = decode_string +decode_func['6'] = decode_string +decode_func['7'] = decode_string +decode_func['8'] = decode_string +decode_func['9'] = decode_string + +def bdecode(x): + try: + r, l = decode_func[x[0]](x, 0) + except (IndexError, KeyError, ValueError): + raise BTFailure("not a valid bencoded string") + if l != len(x): + raise BTFailure("invalid bencoded value (data after valid prefix)") + return r + +from types import StringType, IntType, LongType, DictType, ListType, TupleType + + +class Bencached(object): + + __slots__ = ['bencoded'] + + def __init__(self, s): + self.bencoded = s + +def encode_bencached(x,r): + r.append(x.bencoded) + +def encode_int(x, r): + r.extend(('i', str(x), 'e')) + +def encode_bool(x, r): + if x: + encode_int(1, r) + else: + encode_int(0, r) + +def encode_string(x, r): + r.extend((str(len(x)), ':', x)) + +def encode_list(x, r): + r.append('l') + for i in x: + encode_func[type(i)](i, r) + r.append('e') + +def encode_dict(x,r): + r.append('d') + ilist = x.items() + ilist.sort() + for k, v in ilist: + r.extend((str(len(k)), ':', k)) + encode_func[type(v)](v, r) + r.append('e') + +encode_func = {} +encode_func[Bencached] = encode_bencached +encode_func[IntType] = encode_int +encode_func[LongType] = encode_int +encode_func[StringType] = encode_string +encode_func[ListType] = encode_list +encode_func[TupleType] = encode_list +encode_func[DictType] = encode_dict + +try: + from types import BooleanType + encode_func[BooleanType] = encode_bool +except ImportError: + pass + +def bencode(x): + r = [] + encode_func[type(x)](x, r) + return ''.join(r) diff --git a/libs/bencode/bencode.py b/libs/bencode/bencode.py deleted file mode 100644 index 7a2af17..0000000 --- a/libs/bencode/bencode.py +++ /dev/null @@ -1,131 +0,0 @@ -# The contents of this file are subject to the BitTorrent Open Source License -# Version 1.1 (the License). You may not copy or use this file, in either -# source code or executable form, except in compliance with the License. You -# may obtain a copy of the License at http://www.bittorrent.com/license/. -# -# Software distributed under the License is distributed on an AS IS basis, -# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License -# for the specific language governing rights and limitations under the -# License. - -# Written by Petru Paler - -from BTL import BTFailure - - -def decode_int(x, f): - f += 1 - newf = x.index('e', f) - n = int(x[f:newf]) - if x[f] == '-': - if x[f + 1] == '0': - raise ValueError - elif x[f] == '0' and newf != f+1: - raise ValueError - return (n, newf+1) - -def decode_string(x, f): - colon = x.index(':', f) - n = int(x[f:colon]) - if x[f] == '0' and colon != f+1: - raise ValueError - colon += 1 - return (x[colon:colon+n], colon+n) - -def decode_list(x, f): - r, f = [], f+1 - while x[f] != 'e': - v, f = decode_func[x[f]](x, f) - r.append(v) - return (r, f + 1) - -def decode_dict(x, f): - r, f = {}, f+1 - while x[f] != 'e': - k, f = decode_string(x, f) - r[k], f = decode_func[x[f]](x, f) - return (r, f + 1) - -decode_func = {} -decode_func['l'] = decode_list -decode_func['d'] = decode_dict -decode_func['i'] = decode_int -decode_func['0'] = decode_string -decode_func['1'] = decode_string -decode_func['2'] = decode_string -decode_func['3'] = decode_string -decode_func['4'] = decode_string -decode_func['5'] = decode_string -decode_func['6'] = decode_string -decode_func['7'] = decode_string -decode_func['8'] = decode_string -decode_func['9'] = decode_string - -def bdecode(x): - try: - r, l = decode_func[x[0]](x, 0) - except (IndexError, KeyError, ValueError): - raise BTFailure("not a valid bencoded string") - if l != len(x): - raise BTFailure("invalid bencoded value (data after valid prefix)") - return r - -from types import StringType, IntType, LongType, DictType, ListType, TupleType - - -class Bencached(object): - - __slots__ = ['bencoded'] - - def __init__(self, s): - self.bencoded = s - -def encode_bencached(x,r): - r.append(x.bencoded) - -def encode_int(x, r): - r.extend(('i', str(x), 'e')) - -def encode_bool(x, r): - if x: - encode_int(1, r) - else: - encode_int(0, r) - -def encode_string(x, r): - r.extend((str(len(x)), ':', x)) - -def encode_list(x, r): - r.append('l') - for i in x: - encode_func[type(i)](i, r) - r.append('e') - -def encode_dict(x,r): - r.append('d') - ilist = x.items() - ilist.sort() - for k, v in ilist: - r.extend((str(len(k)), ':', k)) - encode_func[type(v)](v, r) - r.append('e') - -encode_func = {} -encode_func[Bencached] = encode_bencached -encode_func[IntType] = encode_int -encode_func[LongType] = encode_int -encode_func[StringType] = encode_string -encode_func[ListType] = encode_list -encode_func[TupleType] = encode_list -encode_func[DictType] = encode_dict - -try: - from types import BooleanType - encode_func[BooleanType] = encode_bool -except ImportError: - pass - -def bencode(x): - r = [] - encode_func[type(x)](x, r) - return ''.join(r) diff --git a/libs/bs4/__init__.py b/libs/bs4/__init__.py index af8c718..7ba3426 100644 --- a/libs/bs4/__init__.py +++ b/libs/bs4/__init__.py @@ -17,16 +17,17 @@ http://www.crummy.com/software/BeautifulSoup/bs4/doc/ """ __author__ = "Leonard Richardson (leonardr@segfault.org)" -__version__ = "4.1.0" -__copyright__ = "Copyright (c) 2004-2012 Leonard Richardson" +__version__ = "4.3.2" +__copyright__ = "Copyright (c) 2004-2013 Leonard Richardson" __license__ = "MIT" __all__ = ['BeautifulSoup'] +import os import re import warnings -from .builder import builder_registry +from .builder import builder_registry, ParserRejectedMarkup from .dammit import UnicodeDammit from .element import ( CData, @@ -74,11 +75,7 @@ class BeautifulSoup(Tag): # want, look for one with these features. DEFAULT_BUILDER_FEATURES = ['html', 'fast'] - # Used when determining whether a text node is all whitespace and - # can be replaced with a single space. A text node that contains - # fancy Unicode spaces (usually non-breaking) should be left - # alone. - STRIP_ASCII_SPACES = {9: None, 10: None, 12: None, 13: None, 32: None, } + ASCII_SPACES = '\x20\x0a\x09\x0c\x0d' def __init__(self, markup="", features=None, builder=None, parse_only=None, from_encoding=None, **kwargs): @@ -149,7 +146,7 @@ class BeautifulSoup(Tag): features = self.DEFAULT_BUILDER_FEATURES builder_class = builder_registry.lookup(*features) if builder_class is None: - raise ValueError( + raise FeatureNotFound( "Couldn't find a tree builder with the features you " "requested: %s. Do you need to install a parser library?" % ",".join(features)) @@ -160,18 +157,46 @@ class BeautifulSoup(Tag): self.parse_only = parse_only - self.reset() - if hasattr(markup, 'read'): # It's a file-type object. markup = markup.read() - (self.markup, self.original_encoding, self.declared_html_encoding, - self.contains_replacement_characters) = ( - self.builder.prepare_markup(markup, from_encoding)) - - try: - self._feed() - except StopParsing: - pass + elif len(markup) <= 256: + # Print out warnings for a couple beginner problems + # involving passing non-markup to Beautiful Soup. + # Beautiful Soup will still parse the input as markup, + # just in case that's what the user really wants. + if (isinstance(markup, unicode) + and not os.path.supports_unicode_filenames): + possible_filename = markup.encode("utf8") + else: + possible_filename = markup + is_file = False + try: + is_file = os.path.exists(possible_filename) + except Exception, e: + # This is almost certainly a problem involving + # characters not valid in filenames on this + # system. Just let it go. + pass + if is_file: + warnings.warn( + '"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup) + if markup[:5] == "http:" or markup[:6] == "https:": + # TODO: This is ugly but I couldn't get it to work in + # Python 3 otherwise. + if ((isinstance(markup, bytes) and not b' ' in markup) + or (isinstance(markup, unicode) and not u' ' in markup)): + warnings.warn( + '"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup) + + for (self.markup, self.original_encoding, self.declared_html_encoding, + self.contains_replacement_characters) in ( + self.builder.prepare_markup(markup, from_encoding)): + self.reset() + try: + self._feed() + break + except ParserRejectedMarkup: + pass # Clear out the markup and remove the builder's circular # reference to this object. @@ -192,29 +217,32 @@ class BeautifulSoup(Tag): Tag.__init__(self, self, self.builder, self.ROOT_TAG_NAME) self.hidden = 1 self.builder.reset() - self.currentData = [] + self.current_data = [] self.currentTag = None self.tagStack = [] + self.preserve_whitespace_tag_stack = [] self.pushTag(self) def new_tag(self, name, namespace=None, nsprefix=None, **attrs): """Create a new tag associated with this soup.""" return Tag(None, self.builder, name, namespace, nsprefix, attrs) - def new_string(self, s): + def new_string(self, s, subclass=NavigableString): """Create a new NavigableString associated with this soup.""" - navigable = NavigableString(s) + navigable = subclass(s) navigable.setup() return navigable def insert_before(self, successor): - raise ValueError("BeautifulSoup objects don't support insert_before().") + raise NotImplementedError("BeautifulSoup objects don't support insert_before().") def insert_after(self, successor): - raise ValueError("BeautifulSoup objects don't support insert_after().") + raise NotImplementedError("BeautifulSoup objects don't support insert_after().") def popTag(self): tag = self.tagStack.pop() + if self.preserve_whitespace_tag_stack and tag == self.preserve_whitespace_tag_stack[-1]: + self.preserve_whitespace_tag_stack.pop() #print "Pop", tag.name if self.tagStack: self.currentTag = self.tagStack[-1] @@ -226,32 +254,49 @@ class BeautifulSoup(Tag): self.currentTag.contents.append(tag) self.tagStack.append(tag) self.currentTag = self.tagStack[-1] + if tag.name in self.builder.preserve_whitespace_tags: + self.preserve_whitespace_tag_stack.append(tag) def endData(self, containerClass=NavigableString): - if self.currentData: - currentData = u''.join(self.currentData) - if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and - not set([tag.name for tag in self.tagStack]).intersection( - self.builder.preserve_whitespace_tags)): - if '\n' in currentData: - currentData = '\n' - else: - currentData = ' ' - self.currentData = [] + if self.current_data: + current_data = u''.join(self.current_data) + # If whitespace is not preserved, and this string contains + # nothing but ASCII spaces, replace it with a single space + # or newline. + if not self.preserve_whitespace_tag_stack: + strippable = True + for i in current_data: + if i not in self.ASCII_SPACES: + strippable = False + break + if strippable: + if '\n' in current_data: + current_data = '\n' + else: + current_data = ' ' + + # Reset the data collector. + self.current_data = [] + + # Should we add this string to the tree at all? if self.parse_only and len(self.tagStack) <= 1 and \ (not self.parse_only.text or \ - not self.parse_only.search(currentData)): + not self.parse_only.search(current_data)): return - o = containerClass(currentData) + + o = containerClass(current_data) self.object_was_parsed(o) - def object_was_parsed(self, o): + def object_was_parsed(self, o, parent=None, most_recent_element=None): """Add an object to the parse tree.""" - o.setup(self.currentTag, self.previous_element) - if self.previous_element: - self.previous_element.next_element = o - self.previous_element = o - self.currentTag.contents.append(o) + parent = parent or self.currentTag + most_recent_element = most_recent_element or self._most_recent_element + o.setup(parent, most_recent_element) + + if most_recent_element is not None: + most_recent_element.next_element = o + self._most_recent_element = o + parent.contents.append(o) def _popToTag(self, name, nsprefix=None, inclusivePop=True): """Pops the tag stack up to and including the most recent @@ -260,22 +305,21 @@ class BeautifulSoup(Tag): the given tag.""" #print "Popping to %s" % name if name == self.ROOT_TAG_NAME: + # The BeautifulSoup object itself can never be popped. return - numPops = 0 - mostRecentTag = None + most_recently_popped = None - for i in range(len(self.tagStack) - 1, 0, -1): - if (name == self.tagStack[i].name - and nsprefix == self.tagStack[i].nsprefix == nsprefix): - numPops = len(self.tagStack) - i + stack_size = len(self.tagStack) + for i in range(stack_size - 1, 0, -1): + t = self.tagStack[i] + if (name == t.name and nsprefix == t.prefix): + if inclusivePop: + most_recently_popped = self.popTag() break - if not inclusivePop: - numPops = numPops - 1 + most_recently_popped = self.popTag() - for i in range(0, numPops): - mostRecentTag = self.popTag() - return mostRecentTag + return most_recently_popped def handle_starttag(self, name, namespace, nsprefix, attrs): """Push a start tag on to the stack. @@ -295,12 +339,12 @@ class BeautifulSoup(Tag): return None tag = Tag(self, self.builder, name, namespace, nsprefix, attrs, - self.currentTag, self.previous_element) + self.currentTag, self._most_recent_element) if tag is None: return tag - if self.previous_element: - self.previous_element.next_element = tag - self.previous_element = tag + if self._most_recent_element: + self._most_recent_element.next_element = tag + self._most_recent_element = tag self.pushTag(tag) return tag @@ -310,7 +354,7 @@ class BeautifulSoup(Tag): self._popToTag(name, nsprefix) def handle_data(self, data): - self.currentData.append(data) + self.current_data.append(data) def decode(self, pretty_print=False, eventual_encoding=DEFAULT_OUTPUT_ENCODING, @@ -333,6 +377,10 @@ class BeautifulSoup(Tag): return prefix + super(BeautifulSoup, self).decode( indent_level, eventual_encoding, formatter) +# Alias to make it easier to type import: 'from bs4 import _soup' +_s = BeautifulSoup +_soup = BeautifulSoup + class BeautifulStoneSoup(BeautifulSoup): """Deprecated interface to an XML parser.""" @@ -347,6 +395,9 @@ class BeautifulStoneSoup(BeautifulSoup): class StopParsing(Exception): pass +class FeatureNotFound(ValueError): + pass + #By default, act as an HTML pretty-printer. if __name__ == '__main__': diff --git a/libs/bs4/builder/__init__.py b/libs/bs4/builder/__init__.py index 4c22b86..740f5f2 100644 --- a/libs/bs4/builder/__init__.py +++ b/libs/bs4/builder/__init__.py @@ -147,18 +147,29 @@ class TreeBuilder(object): Modifies its input in place. """ + if not attrs: + return attrs if self.cdata_list_attributes: universal = self.cdata_list_attributes.get('*', []) tag_specific = self.cdata_list_attributes.get( - tag_name.lower(), []) - for cdata_list_attr in itertools.chain(universal, tag_specific): - if cdata_list_attr in dict(attrs): - # Basically, we have a "class" attribute whose - # value is a whitespace-separated list of CSS - # classes. Split it into a list. - value = attrs[cdata_list_attr] - values = whitespace_re.split(value) - attrs[cdata_list_attr] = values + tag_name.lower(), None) + for attr in attrs.keys(): + if attr in universal or (tag_specific and attr in tag_specific): + # We have a "class"-type attribute whose string + # value is a whitespace-separated list of + # values. Split it into a list. + value = attrs[attr] + if isinstance(value, basestring): + values = whitespace_re.split(value) + else: + # html5lib sometimes calls setAttributes twice + # for the same tag when rearranging the parse + # tree. On the second call the attribute value + # here is already a list. If this happens, + # leave the value alone rather than trying to + # split it again. + values = value + attrs[attr] = values return attrs class SAXTreeBuilder(TreeBuilder): @@ -287,6 +298,9 @@ def register_treebuilders_from(module): # Register the builder while we're at it. this_module.builder_registry.register(obj) +class ParserRejectedMarkup(Exception): + pass + # Builders are registered in reverse order of priority, so that custom # builder registrations will take precedence. In general, we want lxml # to take precedence over html5lib, because it's faster. And we only diff --git a/libs/bs4/builder/_html5lib.py b/libs/bs4/builder/_html5lib.py index 6001e38..7de36ae 100644 --- a/libs/bs4/builder/_html5lib.py +++ b/libs/bs4/builder/_html5lib.py @@ -27,7 +27,7 @@ class HTML5TreeBuilder(HTMLTreeBuilder): def prepare_markup(self, markup, user_specified_encoding): # Store the user-specified encoding for use later on. self.user_specified_encoding = user_specified_encoding - return markup, None, None, False + yield (markup, None, None, False) # These methods are defined by Beautiful Soup. def feed(self, markup): @@ -123,17 +123,50 @@ class Element(html5lib.treebuilders._base.Node): self.namespace = namespace def appendChild(self, node): - if (node.element.__class__ == NavigableString and self.element.contents + string_child = child = None + if isinstance(node, basestring): + # Some other piece of code decided to pass in a string + # instead of creating a TextElement object to contain the + # string. + string_child = child = node + elif isinstance(node, Tag): + # Some other piece of code decided to pass in a Tag + # instead of creating an Element object to contain the + # Tag. + child = node + elif node.element.__class__ == NavigableString: + string_child = child = node.element + else: + child = node.element + + if not isinstance(child, basestring) and child.parent is not None: + node.element.extract() + + if (string_child and self.element.contents and self.element.contents[-1].__class__ == NavigableString): - # Concatenate new text onto old text node - # XXX This has O(n^2) performance, for input like + # We are appending a string onto another string. + # TODO This has O(n^2) performance, for input like # "a</a>a</a>a</a>..." old_element = self.element.contents[-1] - new_element = self.soup.new_string(old_element + node.element) + new_element = self.soup.new_string(old_element + string_child) old_element.replace_with(new_element) + self.soup._most_recent_element = new_element else: - self.element.append(node.element) - node.parent = self + if isinstance(node, basestring): + # Create a brand new NavigableString from this string. + child = self.soup.new_string(node) + + # Tell Beautiful Soup to act as if it parsed this element + # immediately after the parent's last descendant. (Or + # immediately after the parent, if it has no children.) + if self.element.contents: + most_recent_element = self.element._last_descendant(False) + else: + most_recent_element = self.element + + self.soup.object_was_parsed( + child, parent=self.element, + most_recent_element=most_recent_element) def getAttributes(self): return AttrList(self.element) @@ -162,11 +195,11 @@ class Element(html5lib.treebuilders._base.Node): attributes = property(getAttributes, setAttributes) def insertText(self, data, insertBefore=None): - text = TextNode(self.soup.new_string(data), self.soup) if insertBefore: - self.insertBefore(text, insertBefore) + text = TextNode(self.soup.new_string(data), self.soup) + self.insertBefore(data, insertBefore) else: - self.appendChild(text) + self.appendChild(data) def insertBefore(self, node, refNode): index = self.element.index(refNode.element) @@ -183,16 +216,46 @@ class Element(html5lib.treebuilders._base.Node): def removeChild(self, node): node.element.extract() - def reparentChildren(self, newParent): - while self.element.contents: - child = self.element.contents[0] - child.extract() - if isinstance(child, Tag): - newParent.appendChild( - Element(child, self.soup, namespaces["html"])) - else: - newParent.appendChild( - TextNode(child, self.soup)) + def reparentChildren(self, new_parent): + """Move all of this tag's children into another tag.""" + element = self.element + new_parent_element = new_parent.element + # Determine what this tag's next_element will be once all the children + # are removed. + final_next_element = element.next_sibling + + new_parents_last_descendant = new_parent_element._last_descendant(False, False) + if len(new_parent_element.contents) > 0: + # The new parent already contains children. We will be + # appending this tag's children to the end. + new_parents_last_child = new_parent_element.contents[-1] + new_parents_last_descendant_next_element = new_parents_last_descendant.next_element + else: + # The new parent contains no children. + new_parents_last_child = None + new_parents_last_descendant_next_element = new_parent_element.next_element + + to_append = element.contents + append_after = new_parent.element.contents + if len(to_append) > 0: + # Set the first child's previous_element and previous_sibling + # to elements within the new parent + first_child = to_append[0] + first_child.previous_element = new_parents_last_descendant + first_child.previous_sibling = new_parents_last_child + + # Fix the last child's next_element and next_sibling + last_child = to_append[-1] + last_child.next_element = new_parents_last_descendant_next_element + last_child.next_sibling = None + + for child in to_append: + child.parent = new_parent_element + new_parent_element.contents.append(child) + + # Now that this element has no children, change its .next_element. + element.contents = [] + element.next_element = final_next_element def cloneNode(self): tag = self.soup.new_tag(self.element.name, self.namespace) diff --git a/libs/bs4/builder/_htmlparser.py b/libs/bs4/builder/_htmlparser.py index ede5cec..ca8d8b8 100644 --- a/libs/bs4/builder/_htmlparser.py +++ b/libs/bs4/builder/_htmlparser.py @@ -45,7 +45,15 @@ HTMLPARSER = 'html.parser' class BeautifulSoupHTMLParser(HTMLParser): def handle_starttag(self, name, attrs): # XXX namespace - self.soup.handle_starttag(name, None, None, dict(attrs)) + attr_dict = {} + for key, value in attrs: + # Change None attribute values to the empty string + # for consistency with the other tree builders. + if value is None: + value = '' + attr_dict[key] = value + attrvalue = '""' + self.soup.handle_starttag(name, None, None, attr_dict) def handle_endtag(self, name): self.soup.handle_endtag(name) @@ -58,6 +66,8 @@ class BeautifulSoupHTMLParser(HTMLParser): # it's fixed. if name.startswith('x'): real_name = int(name.lstrip('x'), 16) + elif name.startswith('X'): + real_name = int(name.lstrip('X'), 16) else: real_name = int(name) @@ -85,6 +95,9 @@ class BeautifulSoupHTMLParser(HTMLParser): self.soup.endData() if data.startswith("DOCTYPE "): data = data[len("DOCTYPE "):] + elif data == 'DOCTYPE': + # i.e. "<!DOCTYPE>" + data = '' self.soup.handle_data(data) self.soup.endData(Doctype) @@ -130,13 +143,14 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder): replaced with REPLACEMENT CHARACTER). """ if isinstance(markup, unicode): - return markup, None, None, False + yield (markup, None, None, False) + return try_encodings = [user_specified_encoding, document_declared_encoding] dammit = UnicodeDammit(markup, try_encodings, is_html=True) - return (dammit.markup, dammit.original_encoding, - dammit.declared_html_encoding, - dammit.contains_replacement_characters) + yield (dammit.markup, dammit.original_encoding, + dammit.declared_html_encoding, + dammit.contains_replacement_characters) def feed(self, markup): args, kwargs = self.parser_args diff --git a/libs/bs4/builder/_lxml.py b/libs/bs4/builder/_lxml.py index c78fdff..fa5d498 100644 --- a/libs/bs4/builder/_lxml.py +++ b/libs/bs4/builder/_lxml.py @@ -3,6 +3,7 @@ __all__ = [ 'LXMLTreeBuilder', ] +from io import BytesIO from StringIO import StringIO import collections from lxml import etree @@ -12,9 +13,10 @@ from bs4.builder import ( HTML, HTMLTreeBuilder, PERMISSIVE, + ParserRejectedMarkup, TreeBuilder, XML) -from bs4.dammit import UnicodeDammit +from bs4.dammit import EncodingDetector LXML = 'lxml' @@ -28,24 +30,36 @@ class LXMLTreeBuilderForXML(TreeBuilder): CHUNK_SIZE = 512 - @property - def default_parser(self): + # This namespace mapping is specified in the XML Namespace + # standard. + DEFAULT_NSMAPS = {'http://www.w3.org/XML/1998/namespace' : "xml"} + + def default_parser(self, encoding): # This can either return a parser object or a class, which # will be instantiated with default arguments. - return etree.XMLParser(target=self, strip_cdata=False, recover=True) + if self._default_parser is not None: + return self._default_parser + return etree.XMLParser( + target=self, strip_cdata=False, recover=True, encoding=encoding) + + def parser_for(self, encoding): + # Use the default parser. + parser = self.default_parser(encoding) + + if isinstance(parser, collections.Callable): + # Instantiate the parser with default arguments + parser = parser(target=self, strip_cdata=False, encoding=encoding) + return parser def __init__(self, parser=None, empty_element_tags=None): + # TODO: Issue a warning if parser is present but not a + # callable, since that means there's no way to create new + # parsers for different encodings. + self._default_parser = parser if empty_element_tags is not None: self.empty_element_tags = set(empty_element_tags) - if parser is None: - # Use the default parser. - parser = self.default_parser - if isinstance(parser, collections.Callable): - # Instantiate the parser with default arguments - parser = parser(target=self, strip_cdata=False) - self.parser = parser self.soup = None - self.nsmaps = None + self.nsmaps = [self.DEFAULT_NSMAPS] def _getNsTag(self, tag): # Split the namespace URL out of a fully-qualified lxml tag @@ -58,50 +72,69 @@ class LXMLTreeBuilderForXML(TreeBuilder): def prepare_markup(self, markup, user_specified_encoding=None, document_declared_encoding=None): """ - :return: A 3-tuple (markup, original encoding, encoding - declared within markup). + :yield: A series of 4-tuples. + (markup, encoding, declared encoding, + has undergone character replacement) + + Each 4-tuple represents a strategy for parsing the document. """ if isinstance(markup, unicode): - return markup, None, None, False + # We were given Unicode. Maybe lxml can parse Unicode on + # this system? + yield markup, None, document_declared_encoding, False + if isinstance(markup, unicode): + # No, apparently not. Convert the Unicode to UTF-8 and + # tell lxml to parse it as UTF-8. + yield (markup.encode("utf8"), "utf8", + document_declared_encoding, False) + + # Instead of using UnicodeDammit to convert the bytestring to + # Unicode using different encodings, use EncodingDetector to + # iterate over the encodings, and tell lxml to try to parse + # the document as each one in turn. + is_html = not self.is_xml try_encodings = [user_specified_encoding, document_declared_encoding] - dammit = UnicodeDammit(markup, try_encodings, is_html=True) - return (dammit.markup, dammit.original_encoding, - dammit.declared_html_encoding, - dammit.contains_replacement_characters) + detector = EncodingDetector(markup, try_encodings, is_html) + for encoding in detector.encodings: + yield (detector.markup, encoding, document_declared_encoding, False) def feed(self, markup): - if isinstance(markup, basestring): + if isinstance(markup, bytes): + markup = BytesIO(markup) + elif isinstance(markup, unicode): markup = StringIO(markup) + # Call feed() at least once, even if the markup is empty, # or the parser won't be initialized. data = markup.read(self.CHUNK_SIZE) - self.parser.feed(data) - while data != '': - # Now call feed() on the rest of the data, chunk by chunk. - data = markup.read(self.CHUNK_SIZE) - if data != '': - self.parser.feed(data) - self.parser.close() + try: + self.parser = self.parser_for(self.soup.original_encoding) + self.parser.feed(data) + while len(data) != 0: + # Now call feed() on the rest of the data, chunk by chunk. + data = markup.read(self.CHUNK_SIZE) + if len(data) != 0: + self.parser.feed(data) + self.parser.close() + except (UnicodeDecodeError, LookupError, etree.ParserError), e: + raise ParserRejectedMarkup(str(e)) def close(self): - self.nsmaps = None + self.nsmaps = [self.DEFAULT_NSMAPS] def start(self, name, attrs, nsmap={}): # Make sure attrs is a mutable dict--lxml may send an immutable dictproxy. attrs = dict(attrs) - nsprefix = None # Invert each namespace map as it comes in. - if len(nsmap) == 0 and self.nsmaps != None: - # There are no new namespaces for this tag, but namespaces - # are in play, so we need a separate tag stack to know - # when they end. + if len(self.nsmaps) > 1: + # There are no new namespaces for this tag, but + # non-default namespaces are in play, so we need a + # separate tag stack to know when they end. self.nsmaps.append(None) elif len(nsmap) > 0: # A new namespace mapping has come into play. - if self.nsmaps is None: - self.nsmaps = [] inverted_nsmap = dict((value, key) for key, value in nsmap.items()) self.nsmaps.append(inverted_nsmap) # Also treat the namespace mapping as a set of attributes on the @@ -111,14 +144,34 @@ class LXMLTreeBuilderForXML(TreeBuilder): attribute = NamespacedAttribute( "xmlns", prefix, "http://www.w3.org/2000/xmlns/") attrs[attribute] = namespace + + # Namespaces are in play. Find any attributes that came in + # from lxml with namespaces attached to their names, and + # turn then into NamespacedAttribute objects. + new_attrs = {} + for attr, value in attrs.items(): + namespace, attr = self._getNsTag(attr) + if namespace is None: + new_attrs[attr] = value + else: + nsprefix = self._prefix_for_namespace(namespace) + attr = NamespacedAttribute(nsprefix, attr, namespace) + new_attrs[attr] = value + attrs = new_attrs + namespace, name = self._getNsTag(name) - if namespace is not None: - for inverted_nsmap in reversed(self.nsmaps): - if inverted_nsmap is not None and namespace in inverted_nsmap: - nsprefix = inverted_nsmap[namespace] - break + nsprefix = self._prefix_for_namespace(namespace) self.soup.handle_starttag(name, namespace, nsprefix, attrs) + def _prefix_for_namespace(self, namespace): + """Find the currently active prefix for the given namespace.""" + if namespace is None: + return None + for inverted_nsmap in reversed(self.nsmaps): + if inverted_nsmap is not None and namespace in inverted_nsmap: + return inverted_nsmap[namespace] + return None + def end(self, name): self.soup.endData() completed_tag = self.soup.tagStack[-1] @@ -130,14 +183,10 @@ class LXMLTreeBuilderForXML(TreeBuilder): nsprefix = inverted_nsmap[namespace] break self.soup.handle_endtag(name, nsprefix) - if self.nsmaps != None: + if len(self.nsmaps) > 1: # This tag, or one of its parents, introduced a namespace # mapping, so pop it off the stack. self.nsmaps.pop() - if len(self.nsmaps) == 0: - # Namespaces are no longer in play, so don't bother keeping - # track of the namespace stack. - self.nsmaps = None def pi(self, target, data): pass @@ -166,13 +215,18 @@ class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML): features = [LXML, HTML, FAST, PERMISSIVE] is_xml = False - @property - def default_parser(self): + def default_parser(self, encoding): return etree.HTMLParser def feed(self, markup): - self.parser.feed(markup) - self.parser.close() + encoding = self.soup.original_encoding + try: + self.parser = self.parser_for(encoding) + self.parser.feed(markup) + self.parser.close() + except (UnicodeDecodeError, LookupError, etree.ParserError), e: + raise ParserRejectedMarkup(str(e)) + def test_fragment_to_document(self, fragment): """See `TreeBuilder`.""" diff --git a/libs/bs4/dammit.py b/libs/bs4/dammit.py index 58cad9b..59640b7 100644 --- a/libs/bs4/dammit.py +++ b/libs/bs4/dammit.py @@ -1,27 +1,40 @@ # -*- coding: utf-8 -*- """Beautiful Soup bonus library: Unicode, Dammit -This class forces XML data into a standard format (usually to UTF-8 or -Unicode). It is heavily based on code from Mark Pilgrim's Universal -Feed Parser. It does not rewrite the XML or HTML to reflect a new -encoding; that's the tree builder's job. +This library converts a bytestream to Unicode through any means +necessary. It is heavily based on code from Mark Pilgrim's Universal +Feed Parser. It works best on XML and XML, but it does not rewrite the +XML or HTML to reflect a new encoding; that's the tree builder's job. """ import codecs from htmlentitydefs import codepoint2name import re -import warnings +import logging +import string -# Autodetects character encodings. Very useful. -# Download from http://chardet.feedparser.org/ -# or 'apt-get install python-chardet' -# or 'easy_install chardet' +# Import a library to autodetect character encodings. +chardet_type = None try: - import chardet - #import chardet.constants - #chardet.constants._debug = 1 + # First try the fast C implementation. + # PyPI package: cchardet + import cchardet + def chardet_dammit(s): + return cchardet.detect(s)['encoding'] except ImportError: - chardet = None + try: + # Fall back to the pure Python implementation + # Debian package: python-chardet + # PyPI package: chardet + import chardet + def chardet_dammit(s): + return chardet.detect(s)['encoding'] + #import chardet.constants + #chardet.constants._debug = 1 + except ImportError: + # No chardet available. + def chardet_dammit(s): + return None # Available from http://cjkpython.i18n.org/. try: @@ -69,6 +82,8 @@ class EntitySubstitution(object): "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)" ")") + AMPERSAND_OR_BRACKET = re.compile("([<>&])") + @classmethod def _substitute_html_entity(cls, matchobj): entity = cls.CHARACTER_TO_HTML_ENTITY.get(matchobj.group(0)) @@ -122,6 +137,28 @@ class EntitySubstitution(object): def substitute_xml(cls, value, make_quoted_attribute=False): """Substitute XML entities for special XML characters. + :param value: A string to be substituted. The less-than sign + will become <, the greater-than sign will become >, + and any ampersands will become &. If you want ampersands + that appear to be part of an entity definition to be left + alone, use substitute_xml_containing_entities() instead. + + :param make_quoted_attribute: If True, then the string will be + quoted, as befits an attribute value. + """ + # Escape angle brackets and ampersands. + value = cls.AMPERSAND_OR_BRACKET.sub( + cls._substitute_xml_entity, value) + + if make_quoted_attribute: + value = cls.quoted_attribute_value(value) + return value + + @classmethod + def substitute_xml_containing_entities( + cls, value, make_quoted_attribute=False): + """Substitute XML entities for special XML characters. + :param value: A string to be substituted. The less-than sign will become <, the greater-than sign will become >, and any ampersands that are not part of an entity defition will @@ -155,6 +192,125 @@ class EntitySubstitution(object): cls._substitute_html_entity, s) +class EncodingDetector: + """Suggests a number of possible encodings for a bytestring. + + Order of precedence: + + 1. Encodings you specifically tell EncodingDetector to try first + (the override_encodings argument to the constructor). + + 2. An encoding declared within the bytestring itself, either in an + XML declaration (if the bytestring is to be interpreted as an XML + document), or in a <meta> tag (if the bytestring is to be + interpreted as an HTML document.) + + 3. An encoding detected through textual analysis by chardet, + cchardet, or a similar external library. + + 4. UTF-8. + + 5. Windows-1252. + """ + def __init__(self, markup, override_encodings=None, is_html=False): + self.override_encodings = override_encodings or [] + self.chardet_encoding = None + self.is_html = is_html + self.declared_encoding = None + + # First order of business: strip a byte-order mark. + self.markup, self.sniffed_encoding = self.strip_byte_order_mark(markup) + + def _usable(self, encoding, tried): + if encoding is not None: + encoding = encoding.lower() + if encoding not in tried: + tried.add(encoding) + return True + return False + + @property + def encodings(self): + """Yield a number of encodings that might work for this markup.""" + tried = set() + for e in self.override_encodings: + if self._usable(e, tried): + yield e + + # Did the document originally start with a byte-order mark + # that indicated its encoding? + if self._usable(self.sniffed_encoding, tried): + yield self.sniffed_encoding + + # Look within the document for an XML or HTML encoding + # declaration. + if self.declared_encoding is None: + self.declared_encoding = self.find_declared_encoding( + self.markup, self.is_html) + if self._usable(self.declared_encoding, tried): + yield self.declared_encoding + + # Use third-party character set detection to guess at the + # encoding. + if self.chardet_encoding is None: + self.chardet_encoding = chardet_dammit(self.markup) + if self._usable(self.chardet_encoding, tried): + yield self.chardet_encoding + + # As a last-ditch effort, try utf-8 and windows-1252. + for e in ('utf-8', 'windows-1252'): + if self._usable(e, tried): + yield e + + @classmethod + def strip_byte_order_mark(cls, data): + """If a byte-order mark is present, strip it and return the encoding it implies.""" + encoding = None + if (len(data) >= 4) and (data[:2] == b'\xfe\xff') \ + and (data[2:4] != '\x00\x00'): + encoding = 'utf-16be' + data = data[2:] + elif (len(data) >= 4) and (data[:2] == b'\xff\xfe') \ + and (data[2:4] != '\x00\x00'): + encoding = 'utf-16le' + data = data[2:] + elif data[:3] == b'\xef\xbb\xbf': + encoding = 'utf-8' + data = data[3:] + elif data[:4] == b'\x00\x00\xfe\xff': + encoding = 'utf-32be' + data = data[4:] + elif data[:4] == b'\xff\xfe\x00\x00': + encoding = 'utf-32le' + data = data[4:] + return data, encoding + + @classmethod + def find_declared_encoding(cls, markup, is_html=False, search_entire_document=False): + """Given a document, tries to find its declared encoding. + + An XML encoding is declared at the beginning of the document. + + An HTML encoding is declared in a <meta> tag, hopefully near the + beginning of the document. + """ + if search_entire_document: + xml_endpos = html_endpos = len(markup) + else: + xml_endpos = 1024 + html_endpos = max(2048, int(len(markup) * 0.05)) + + declared_encoding = None + declared_encoding_match = xml_encoding_re.search(markup, endpos=xml_endpos) + if not declared_encoding_match and is_html: + declared_encoding_match = html_meta_re.search(markup, endpos=html_endpos) + if declared_encoding_match is not None: + declared_encoding = declared_encoding_match.groups()[0].decode( + 'ascii') + if declared_encoding: + return declared_encoding.lower() + return None + class UnicodeDammit: """A class for detecting the encoding of a *ML document and converting it to a Unicode string. If the source encoding is @@ -176,65 +332,48 @@ class UnicodeDammit: def __init__(self, markup, override_encodings=[], smart_quotes_to=None, is_html=False): - self.declared_html_encoding = None self.smart_quotes_to = smart_quotes_to self.tried_encodings = [] self.contains_replacement_characters = False + self.is_html = is_html - if markup == '' or isinstance(markup, unicode): + self.detector = EncodingDetector(markup, override_encodings, is_html) + + # Short-circuit if the data is in Unicode to begin with. + if isinstance(markup, unicode) or markup == '': self.markup = markup self.unicode_markup = unicode(markup) self.original_encoding = None return - new_markup, document_encoding, sniffed_encoding = \ - self._detectEncoding(markup, is_html) - self.markup = new_markup + # The encoding detector may have stripped a byte-order mark. + # Use the stripped markup from this point on. + self.markup = self.detector.markup u = None - if new_markup != markup: - # _detectEncoding modified the markup, then converted it to - # Unicode and then to UTF-8. So convert it from UTF-8. - u = self._convert_from("utf8") - self.original_encoding = sniffed_encoding + for encoding in self.detector.encodings: + markup = self.detector.markup + u = self._convert_from(encoding) + if u is not None: + break if not u: - for proposed_encoding in ( - override_encodings + [document_encoding, sniffed_encoding]): - if proposed_encoding is not None: - u = self._convert_from(proposed_encoding) - if u: - break + # None of the encodings worked. As an absolute last resort, + # try them again with character replacement. - # If no luck and we have auto-detection library, try that: - if not u and chardet and not isinstance(self.markup, unicode): - u = self._convert_from(chardet.detect(self.markup)['encoding']) - - # As a last resort, try utf-8 and windows-1252: - if not u: - for proposed_encoding in ("utf-8", "windows-1252"): - u = self._convert_from(proposed_encoding) - if u: - break - - # As an absolute last resort, try the encodings again with - # character replacement. - if not u: - for proposed_encoding in ( - override_encodings + [ - document_encoding, sniffed_encoding, "utf-8", "windows-1252"]): - if proposed_encoding != "ascii": - u = self._convert_from(proposed_encoding, "replace") + for encoding in self.detector.encodings: + if encoding != "ascii": + u = self._convert_from(encoding, "replace") if u is not None: - warnings.warn( - UnicodeWarning( + logging.warning( "Some characters could not be decoded, and were " - "replaced with REPLACEMENT CHARACTER.")) + "replaced with REPLACEMENT CHARACTER.") self.contains_replacement_characters = True break - # We could at this point force it to ASCII, but that would - # destroy so much data that I think giving up is better + # If none of that worked, we could at this point force it to + # ASCII, but that would destroy so much data that I think + # giving up is better. self.unicode_markup = u if not u: self.original_encoding = None @@ -262,11 +401,10 @@ class UnicodeDammit: return None self.tried_encodings.append((proposed, errors)) markup = self.markup - # Convert smart quotes to HTML if coming from an encoding # that might have them. if (self.smart_quotes_to is not None - and proposed.lower() in self.ENCODINGS_WITH_SMART_QUOTES): + and proposed in self.ENCODINGS_WITH_SMART_QUOTES): smart_quotes_re = b"([\x80-\x9f])" smart_quotes_compiled = re.compile(smart_quotes_re) markup = smart_quotes_compiled.sub(self._sub_ms_char, markup) @@ -287,99 +425,24 @@ class UnicodeDammit: def _to_unicode(self, data, encoding, errors="strict"): '''Given a string and its encoding, decodes the string into Unicode. %encoding is a string recognized by encodings.aliases''' + return unicode(data, encoding, errors) - # strip Byte Order Mark (if present) - if (len(data) >= 4) and (data[:2] == '\xfe\xff') \ - and (data[2:4] != '\x00\x00'): - encoding = 'utf-16be' - data = data[2:] - elif (len(data) >= 4) and (data[:2] == '\xff\xfe') \ - and (data[2:4] != '\x00\x00'): - encoding = 'utf-16le' - data = data[2:] - elif data[:3] == '\xef\xbb\xbf': - encoding = 'utf-8' - data = data[3:] - elif data[:4] == '\x00\x00\xfe\xff': - encoding = 'utf-32be' - data = data[4:] - elif data[:4] == '\xff\xfe\x00\x00': - encoding = 'utf-32le' - data = data[4:] - newdata = unicode(data, encoding, errors) - return newdata - - def _detectEncoding(self, xml_data, is_html=False): - """Given a document, tries to detect its XML encoding.""" - xml_encoding = sniffed_xml_encoding = None - try: - if xml_data[:4] == b'\x4c\x6f\xa7\x94': - # EBCDIC - xml_data = self._ebcdic_to_ascii(xml_data) - elif xml_data[:4] == b'\x00\x3c\x00\x3f': - # UTF-16BE - sniffed_xml_encoding = 'utf-16be' - xml_data = unicode(xml_data, 'utf-16be').encode('utf-8') - elif (len(xml_data) >= 4) and (xml_data[:2] == b'\xfe\xff') \ - and (xml_data[2:4] != b'\x00\x00'): - # UTF-16BE with BOM - sniffed_xml_encoding = 'utf-16be' - xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8') - elif xml_data[:4] == b'\x3c\x00\x3f\x00': - # UTF-16LE - sniffed_xml_encoding = 'utf-16le' - xml_data = unicode(xml_data, 'utf-16le').encode('utf-8') - elif (len(xml_data) >= 4) and (xml_data[:2] == b'\xff\xfe') and \ - (xml_data[2:4] != b'\x00\x00'): - # UTF-16LE with BOM - sniffed_xml_encoding = 'utf-16le' - xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8') - elif xml_data[:4] == b'\x00\x00\x00\x3c': - # UTF-32BE - sniffed_xml_encoding = 'utf-32be' - xml_data = unicode(xml_data, 'utf-32be').encode('utf-8') - elif xml_data[:4] == b'\x3c\x00\x00\x00': - # UTF-32LE - sniffed_xml_encoding = 'utf-32le' - xml_data = unicode(xml_data, 'utf-32le').encode('utf-8') - elif xml_data[:4] == b'\x00\x00\xfe\xff': - # UTF-32BE with BOM - sniffed_xml_encoding = 'utf-32be' - xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8') - elif xml_data[:4] == b'\xff\xfe\x00\x00': - # UTF-32LE with BOM - sniffed_xml_encoding = 'utf-32le' - xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8') - elif xml_data[:3] == b'\xef\xbb\xbf': - # UTF-8 with BOM - sniffed_xml_encoding = 'utf-8' - xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8') - else: - sniffed_xml_encoding = 'ascii' - pass - except: - xml_encoding_match = None - xml_encoding_match = xml_encoding_re.match(xml_data) - if not xml_encoding_match and is_html: - xml_encoding_match = html_meta_re.search(xml_data) - if xml_encoding_match is not None: - xml_encoding = xml_encoding_match.groups()[0].decode( - 'ascii').lower() - if is_html: - self.declared_html_encoding = xml_encoding - if sniffed_xml_encoding and \ - (xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode', - 'iso-10646-ucs-4', 'ucs-4', 'csucs4', - 'utf-16', 'utf-32', 'utf_16', 'utf_32', - 'utf16', 'u16')): - xml_encoding = sniffed_xml_encoding - return xml_data, xml_encoding, sniffed_xml_encoding + @property + def declared_html_encoding(self): + if not self.is_html: + return None + return self.detector.declared_encoding def find_codec(self, charset): - return self._codec(self.CHARSET_ALIASES.get(charset, charset)) \ - or (charset and self._codec(charset.replace("-", ""))) \ - or (charset and self._codec(charset.replace("-", "_"))) \ + value = (self._codec(self.CHARSET_ALIASES.get(charset, charset)) + or (charset and self._codec(charset.replace("-", ""))) + or (charset and self._codec(charset.replace("-", "_"))) + or (charset and charset.lower()) or charset + ) + if value: + return value.lower() + return None def _codec(self, charset): if not charset: @@ -392,32 +455,6 @@ class UnicodeDammit: pass return codec - EBCDIC_TO_ASCII_MAP = None - - def _ebcdic_to_ascii(self, s): - c = self.__class__ - if not c.EBCDIC_TO_ASCII_MAP: - emap = (0,1,2,3,156,9,134,127,151,141,142,11,12,13,14,15, - 16,17,18,19,157,133,8,135,24,25,146,143,28,29,30,31, - 128,129,130,131,132,10,23,27,136,137,138,139,140,5,6,7, - 144,145,22,147,148,149,150,4,152,153,154,155,20,21,158,26, - 32,160,161,162,163,164,165,166,167,168,91,46,60,40,43,33, - 38,169,170,171,172,173,174,175,176,177,93,36,42,41,59,94, - 45,47,178,179,180,181,182,183,184,185,124,44,37,95,62,63, - 186,187,188,189,190,191,192,193,194,96,58,35,64,39,61,34, - 195,97,98,99,100,101,102,103,104,105,196,197,198,199,200, - 201,202,106,107,108,109,110,111,112,113,114,203,204,205, - 206,207,208,209,126,115,116,117,118,119,120,121,122,210, - 211,212,213,214,215,216,217,218,219,220,221,222,223,224, - 225,226,227,228,229,230,231,123,65,66,67,68,69,70,71,72, - 73,232,233,234,235,236,237,125,74,75,76,77,78,79,80,81, - 82,238,239,240,241,242,243,92,159,83,84,85,86,87,88,89, - 90,244,245,246,247,248,249,48,49,50,51,52,53,54,55,56,57, - 250,251,252,253,254,255) - import string - c.EBCDIC_TO_ASCII_MAP = string.maketrans( - ''.join(map(chr, list(range(256)))), ''.join(map(chr, emap))) - return s.translate(c.EBCDIC_TO_ASCII_MAP) # A partial mapping of ISO-Latin-1 to HTML entities/XML numeric entities. MS_CHARS = {b'\x80': ('euro', '20AC'), diff --git a/libs/bs4/diagnose.py b/libs/bs4/diagnose.py new file mode 100644 index 0000000..4d0b00a --- /dev/null +++ b/libs/bs4/diagnose.py @@ -0,0 +1,204 @@ +"""Diagnostic functions, mainly for use when doing tech support.""" +import cProfile +from StringIO import StringIO +from HTMLParser import HTMLParser +import bs4 +from bs4 import BeautifulSoup, __version__ +from bs4.builder import builder_registry + +import os +import pstats +import random +import tempfile +import time +import traceback +import sys +import cProfile + +def diagnose(data): + """Diagnostic suite for isolating common problems.""" + print "Diagnostic running on Beautiful Soup %s" % __version__ + print "Python version %s" % sys.version + + basic_parsers = ["html.parser", "html5lib", "lxml"] + for name in basic_parsers: + for builder in builder_registry.builders: + if name in builder.features: + break + else: + basic_parsers.remove(name) + print ( + "I noticed that %s is not installed. Installing it may help." % + name) + + if 'lxml' in basic_parsers: + basic_parsers.append(["lxml", "xml"]) + from lxml import etree + print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)) + + if 'html5lib' in basic_parsers: + import html5lib + print "Found html5lib version %s" % html5lib.__version__ + + if hasattr(data, 'read'): + data = data.read() + elif os.path.exists(data): + print '"%s" looks like a filename. Reading data from the file.' % data + data = open(data).read() + elif data.startswith("http:") or data.startswith("https:"): + print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data + print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup." + return + print + + for parser in basic_parsers: + print "Trying to parse your markup with %s" % parser + success = False + try: + soup = BeautifulSoup(data, parser) + success = True + except Exception, e: + print "%s could not parse the markup." % parser + traceback.print_exc() + if success: + print "Here's what %s did with the markup:" % parser + print soup.prettify() + + print "-" * 80 + +def lxml_trace(data, html=True, **kwargs): + """Print out the lxml events that occur during parsing. + + This lets you see how lxml parses a document when no Beautiful + Soup code is running. + """ + from lxml import etree + for event, element in etree.iterparse(StringIO(data), html=html, **kwargs): + print("%s, %4s, %s" % (event, element.tag, element.text)) + +class AnnouncingParser(HTMLParser): + """Announces HTMLParser parse events, without doing anything else.""" + + def _p(self, s): + print(s) + + def handle_starttag(self, name, attrs): + self._p("%s START" % name) + + def handle_endtag(self, name): + self._p("%s END" % name) + + def handle_data(self, data): + self._p("%s DATA" % data) + + def handle_charref(self, name): + self._p("%s CHARREF" % name) + + def handle_entityref(self, name): + self._p("%s ENTITYREF" % name) + + def handle_comment(self, data): + self._p("%s COMMENT" % data) + + def handle_decl(self, data): + self._p("%s DECL" % data) + + def unknown_decl(self, data): + self._p("%s UNKNOWN-DECL" % data) + + def handle_pi(self, data): + self._p("%s PI" % data) + +def htmlparser_trace(data): + """Print out the HTMLParser events that occur during parsing. + + This lets you see how HTMLParser parses a document when no + Beautiful Soup code is running. + """ + parser = AnnouncingParser() + parser.feed(data) + +_vowels = "aeiou" +_consonants = "bcdfghjklmnpqrstvwxyz" + +def rword(length=5): + "Generate a random word-like string." + s = '' + for i in range(length): + if i % 2 == 0: + t = _consonants + else: + t = _vowels + s += random.choice(t) + return s + +def rsentence(length=4): + "Generate a random sentence-like string." + return " ".join(rword(random.randint(4,9)) for i in range(length)) + +def rdoc(num_elements=1000): + """Randomly generate an invalid HTML document.""" + tag_names = ['p', 'div', 'span', 'i', 'b', 'script', 'table'] + elements = [] + for i in range(num_elements): + choice = random.randint(0,3) + if choice == 0: + # New tag. + tag_name = random.choice(tag_names) + elements.append("<%s>" % tag_name) + elif choice == 1: + elements.append(rsentence(random.randint(1,4))) + elif choice == 2: + # Close a tag. + tag_name = random.choice(tag_names) + elements.append("</%s>" % tag_name) + return "<html>" + "\n".join(elements) + "</html>" + +def benchmark_parsers(num_elements=100000): + """Very basic head-to-head performance benchmark.""" + print "Comparative parser benchmark on Beautiful Soup %s" % __version__ + data = rdoc(num_elements) + print "Generated a large invalid HTML document (%d bytes)." % len(data) + + for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]: + success = False + try: + a = time.time() + soup = BeautifulSoup(data, parser) + b = time.time() + success = True + except Exception, e: + print "%s could not parse the markup." % parser + traceback.print_exc() + if success: + print "BS4+%s parsed the markup in %.2fs." % (parser, b-a) + + from lxml import etree + a = time.time() + etree.HTML(data) + b = time.time() + print "Raw lxml parsed the markup in %.2fs." % (b-a) + + import html5lib + parser = html5lib.HTMLParser() + a = time.time() + parser.parse(data) + b = time.time() + print "Raw html5lib parsed the markup in %.2fs." % (b-a) + +def profile(num_elements=100000, parser="lxml"): + + filehandle = tempfile.NamedTemporaryFile() + filename = filehandle.name + + data = rdoc(num_elements) + vars = dict(bs4=bs4, data=data, parser=parser) + cProfile.runctx('bs4.BeautifulSoup(data, parser)' , vars, vars, filename) + + stats = pstats.Stats(filename) + # stats.strip_dirs() + stats.sort_stats("cumulative") + stats.print_stats('_html5lib|bs4', 50) + +if __name__ == '__main__': + diagnose(sys.stdin.read()) diff --git a/libs/bs4/element.py b/libs/bs4/element.py index 91a4007..da9afdf 100644 --- a/libs/bs4/element.py +++ b/libs/bs4/element.py @@ -26,6 +26,9 @@ class NamespacedAttribute(unicode): def __new__(cls, prefix, name, namespace=None): if name is None: obj = unicode.__new__(cls, prefix) + elif prefix is None: + # Not really namespaced. + obj = unicode.__new__(cls, name) else: obj = unicode.__new__(cls, prefix + ":" + name) obj.prefix = prefix @@ -78,6 +81,40 @@ class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution): return match.group(1) + encoding return self.CHARSET_RE.sub(rewrite, self.original_value) +class HTMLAwareEntitySubstitution(EntitySubstitution): + + """Entity substitution rules that are aware of some HTML quirks. + + Specifically, the contents of <script> and <style> tags should not + undergo entity substitution. + + Incoming NavigableString objects are checked to see if they're the + direct children of a <script> or <style> tag. + """ + + cdata_containing_tags = set(["script", "style"]) + + preformatted_tags = set(["pre"]) + + @classmethod + def _substitute_if_appropriate(cls, ns, f): + if (isinstance(ns, NavigableString) + and ns.parent is not None + and ns.parent.name in cls.cdata_containing_tags): + # Do nothing. + return ns + # Substitute. + return f(ns) + + @classmethod + def substitute_html(cls, ns): + return cls._substitute_if_appropriate( + ns, EntitySubstitution.substitute_html) + + @classmethod + def substitute_xml(cls, ns): + return cls._substitute_if_appropriate( + ns, EntitySubstitution.substitute_xml) class PageElement(object): """Contains the navigational information for some part of the page @@ -94,25 +131,60 @@ class PageElement(object): # converted to entities. This is not recommended, but it's # faster than "minimal". # A function - This function will be called on every string that - # needs to undergo entity substition - FORMATTERS = { + # needs to undergo entity substitution. + # + + # In an HTML document, the default "html" and "minimal" functions + # will leave the contents of <script> and <style> tags alone. For + # an XML document, all tags will be given the same treatment. + + HTML_FORMATTERS = { + "html" : HTMLAwareEntitySubstitution.substitute_html, + "minimal" : HTMLAwareEntitySubstitution.substitute_xml, + None : None + } + + XML_FORMATTERS = { "html" : EntitySubstitution.substitute_html, "minimal" : EntitySubstitution.substitute_xml, None : None } - @classmethod def format_string(self, s, formatter='minimal'): """Format the given string using the given formatter.""" if not callable(formatter): - formatter = self.FORMATTERS.get( - formatter, EntitySubstitution.substitute_xml) + formatter = self._formatter_for_name(formatter) if formatter is None: output = s else: output = formatter(s) return output + @property + def _is_xml(self): + """Is this element part of an XML tree or an HTML tree? + + This is used when mapping a formatter name ("minimal") to an + appropriate function (one that performs entity-substitution on + the contents of <script> and <style> tags, or not). It's + inefficient, but it should be called very rarely. + """ + if self.parent is None: + # This is the top-level object. It should have .is_xml set + # from tree creation. If not, take a guess--BS is usually + # used on HTML markup. + return getattr(self, 'is_xml', False) + return self.parent._is_xml + + def _formatter_for_name(self, name): + "Look up a formatter function based on its name and the tree." + if self._is_xml: + return self.XML_FORMATTERS.get( + name, EntitySubstitution.substitute_xml) + else: + return self.HTML_FORMATTERS.get( + name, HTMLAwareEntitySubstitution.substitute_xml) + def setup(self, parent=None, previous_element=None): """Sets up the initial relations between this element and other elements.""" @@ -183,11 +255,16 @@ class PageElement(object): self.previous_sibling = self.next_sibling = None return self - def _last_descendant(self): + def _last_descendant(self, is_initialized=True, accept_self=True): "Finds the last element beneath this object to be parsed." - last_child = self - while hasattr(last_child, 'contents') and last_child.contents: - last_child = last_child.contents[-1] + if is_initialized and self.next_sibling: + last_child = self.next_sibling.previous_element + else: + last_child = self + while isinstance(last_child, Tag) and last_child.contents: + last_child = last_child.contents[-1] + if not accept_self and last_child == self: + last_child = None return last_child # BS3: Not part of the API! _lastRecursiveChild = _last_descendant @@ -222,11 +299,11 @@ class PageElement(object): previous_child = self.contents[position - 1] new_child.previous_sibling = previous_child new_child.previous_sibling.next_sibling = new_child - new_child.previous_element = previous_child._last_descendant() + new_child.previous_element = previous_child._last_descendant(False) if new_child.previous_element is not None: new_child.previous_element.next_element = new_child - new_childs_last_element = new_child._last_descendant() + new_childs_last_element = new_child._last_descendant(False) if position >= len(self.contents): new_child.next_sibling = None @@ -366,7 +443,7 @@ class PageElement(object): # NOTE: We can't use _find_one because findParents takes a different # set of arguments. r = None - l = self.find_parents(name, attrs, 1) + l = self.find_parents(name, attrs, 1, **kwargs) if l: r = l[0] return r @@ -403,20 +480,21 @@ class PageElement(object): if isinstance(name, SoupStrainer): strainer = name - elif text is None and not limit and not attrs and not kwargs: - # Optimization to find all tags. - if name is True or name is None: - return [element for element in generator - if isinstance(element, Tag)] - # Optimization to find all tags with a given name. - elif isinstance(name, basestring): - return [element for element in generator - if isinstance(element, Tag) and element.name == name] - else: - strainer = SoupStrainer(name, attrs, text, **kwargs) else: - # Build a SoupStrainer strainer = SoupStrainer(name, attrs, text, **kwargs) + + if text is None and not limit and not attrs and not kwargs: + if name is True or name is None: + # Optimization to find all tags. + result = (element for element in generator + if isinstance(element, Tag)) + return ResultSet(strainer, result) + elif isinstance(name, basestring): + # Optimization to find all tags with a given name. + result = (element for element in generator + if isinstance(element, Tag) + and element.name == name) + return ResultSet(strainer, result) results = ResultSet(strainer) while True: try: @@ -495,6 +573,14 @@ class PageElement(object): value =" ".join(value) return value + def _tag_name_matches_and(self, function, tag_name): + if not tag_name: + return function + else: + def _match(tag): + return tag.name == tag_name and function(tag) + return _match + def _attribute_checker(self, operator, attribute, value=''): """Create a function that performs a CSS selector operation. @@ -536,87 +622,6 @@ class PageElement(object): else: return lambda el: el.has_attr(attribute) - def select(self, selector): - """Perform a CSS selection operation on the current element.""" - tokens = selector.split() - current_context = [self] - for index, token in enumerate(tokens): - if tokens[index - 1] == '>': - # already found direct descendants in last step. skip this - # step. - continue - m = self.attribselect_re.match(token) - if m is not None: - # Attribute selector - tag, attribute, operator, value = m.groups() - if not tag: - tag = True - checker = self._attribute_checker(operator, attribute, value) - found = [] - for context in current_context: - found.extend( - [el for el in context.find_all(tag) if checker(el)]) - current_context = found - continue - - if '#' in token: - # ID selector - tag, id = token.split('#', 1) - if tag == "": - tag = True - el = current_context[0].find(tag, {'id': id}) - if el is None: - return [] # No match - current_context = [el] - continue - - if '.' in token: - # Class selector - tag_name, klass = token.split('.', 1) - if not tag_name: - tag_name = True - classes = set(klass.split('.')) - found = [] - def classes_match(tag): - if tag_name is not True and tag.name != tag_name: - return False - if not tag.has_attr('class'): - return False - return classes.issubset(tag['class']) - for context in current_context: - found.extend(context.find_all(classes_match)) - current_context = found - continue - - if token == '*': - # Star selector - found = [] - for context in current_context: - found.extend(context.findAll(True)) - current_context = found - continue - - if token == '>': - # Child selector - tag = tokens[index + 1] - if not tag: - tag = True - - found = [] - for context in current_context: - found.extend(context.find_all(tag, recursive=False)) - current_context = found - continue - - # Here we should just have a regular tag - if not self.tag_name_re.match(token): - return [] - found = [] - for context in current_context: - found.extend(context.findAll(token)) - current_context = found - return current_context - # Old non-property versions of the generators, for backwards # compatibility with BS3. def nextGenerator(self): @@ -652,6 +657,9 @@ class NavigableString(unicode, PageElement): return unicode.__new__(cls, value) return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) + def __copy__(self): + return self + def __getnewargs__(self): return (unicode(self),) @@ -670,6 +678,13 @@ class NavigableString(unicode, PageElement): output = self.format_string(self, formatter) return self.PREFIX + output + self.SUFFIX + @property + def name(self): + return None + + @name.setter + def name(self, name): + raise AttributeError("A NavigableString cannot be given a name.") class PreformattedString(NavigableString): """A NavigableString not subject to the normal formatting rules. @@ -709,7 +724,7 @@ class Doctype(PreformattedString): @classmethod def for_name_and_ids(cls, name, pub_id, system_id): - value = name + value = name or '' if pub_id is not None: value += ' PUBLIC "%s"' % pub_id if system_id is not None: @@ -744,7 +759,7 @@ class Tag(PageElement): self.prefix = prefix if attrs is None: attrs = {} - elif builder.cdata_list_attributes: + elif attrs and builder.cdata_list_attributes: attrs = builder._replace_cdata_list_attribute_values( self.name, attrs) else: @@ -803,16 +818,24 @@ class Tag(PageElement): self.clear() self.append(string.__class__(string)) - def _all_strings(self, strip=False): - """Yield all child strings, possibly stripping them.""" + def _all_strings(self, strip=False, types=(NavigableString, CData)): + """Yield all strings of certain classes, possibly stripping them. + + By default, yields only NavigableString and CData objects. So + no comments, processing instructions, etc. + """ for descendant in self.descendants: - if not isinstance(descendant, NavigableString): + if ( + (types is None and not isinstance(descendant, NavigableString)) + or + (types is not None and type(descendant) not in types)): continue if strip: descendant = descendant.strip() if len(descendant) == 0: continue yield descendant + strings = property(_all_strings) @property @@ -820,11 +843,13 @@ class Tag(PageElement): for string in self._all_strings(True): yield string - def get_text(self, separator="", strip=False): + def get_text(self, separator=u"", strip=False, + types=(NavigableString, CData)): """ Get all child strings, concatenated using the given separator. """ - return separator.join([s for s in self._all_strings(strip)]) + return separator.join([s for s in self._all_strings( + strip, types=types)]) getText = get_text text = property(get_text) @@ -835,6 +860,7 @@ class Tag(PageElement): while i is not None: next = i.next_element i.__dict__.clear() + i.contents = [] i = next def clear(self, decompose=False): @@ -966,6 +992,13 @@ class Tag(PageElement): u = self.decode(indent_level, encoding, formatter) return u.encode(encoding, errors) + def _should_pretty_print(self, indent_level): + """Should this tag be pretty-printed?""" + return ( + indent_level is not None and + (self.name not in HTMLAwareEntitySubstitution.preformatted_tags + or self._is_xml)) + def decode(self, indent_level=None, eventual_encoding=DEFAULT_OUTPUT_ENCODING, formatter="minimal"): @@ -978,6 +1011,12 @@ class Tag(PageElement): document contains a <META> tag that mentions the document's encoding. """ + + # First off, turn a string formatter into a function. This + # will stop the lookup from happening over and over again. + if not callable(formatter): + formatter = self._formatter_for_name(formatter) + attrs = [] if self.attrs: for key, val in sorted(self.attrs.items()): @@ -987,7 +1026,7 @@ class Tag(PageElement): if isinstance(val, list) or isinstance(val, tuple): val = ' '.join(val) elif not isinstance(val, basestring): - val = str(val) + val = unicode(val) elif ( isinstance(val, AttributeValueWithCharsetSubstitution) and eventual_encoding is not None): @@ -995,26 +1034,30 @@ class Tag(PageElement): text = self.format_string(val, formatter) decoded = ( - str(key) + '=' + unicode(key) + '=' + EntitySubstitution.quoted_attribute_value(text)) attrs.append(decoded) close = '' closeTag = '' - if self.is_empty_element: - close = '/' - else: - closeTag = '</%s>' % self.name prefix = '' if self.prefix: prefix = self.prefix + ":" - pretty_print = (indent_level is not None) + if self.is_empty_element: + close = '/' + else: + closeTag = '</%s%s>' % (prefix, self.name) + + pretty_print = self._should_pretty_print(indent_level) + space = '' + indent_space = '' + if indent_level is not None: + indent_space = (' ' * (indent_level - 1)) if pretty_print: - space = (' ' * (indent_level - 1)) + space = indent_space indent_contents = indent_level + 1 else: - space = '' indent_contents = None contents = self.decode_contents( indent_contents, eventual_encoding, formatter) @@ -1027,8 +1070,10 @@ class Tag(PageElement): attribute_string = '' if attrs: attribute_string = ' ' + ' '.join(attrs) - if pretty_print: - s.append(space) + if indent_level is not None: + # Even if this particular tag is not pretty-printed, + # we should indent up to the start of the tag. + s.append(indent_space) s.append('<%s%s%s%s>' % ( prefix, self.name, attribute_string, close)) if pretty_print: @@ -1039,7 +1084,10 @@ class Tag(PageElement): if pretty_print and closeTag: s.append(space) s.append(closeTag) - if pretty_print and closeTag and self.next_sibling: + if indent_level is not None and closeTag and self.next_sibling: + # Even if this particular tag is not pretty-printed, + # we're now done with the tag, and we should add a + # newline if appropriate. s.append("\n") s = ''.join(s) return s @@ -1062,6 +1110,11 @@ class Tag(PageElement): document contains a <META> tag that mentions the document's encoding. """ + # First off, turn a string formatter into a function. This + # will stop the lookup from happening over and over again. + if not callable(formatter): + formatter = self._formatter_for_name(formatter) + pretty_print = (indent_level is not None) s = [] for c in self: @@ -1071,13 +1124,13 @@ class Tag(PageElement): elif isinstance(c, Tag): s.append(c.decode(indent_level, eventual_encoding, formatter)) - if text and indent_level: + if text and indent_level and not self.name == 'pre': text = text.strip() if text: - if pretty_print: + if pretty_print and not self.name == 'pre': s.append(" " * (indent_level - 1)) s.append(text) - if pretty_print: + if pretty_print and not self.name == 'pre': s.append("\n") return ''.join(s) @@ -1120,6 +1173,7 @@ class Tag(PageElement): callable that takes a string and returns whether or not the string matches for some custom definition of 'matches'. The same is true of the tag name.""" + generator = self.descendants if not recursive: generator = self.children @@ -1143,6 +1197,207 @@ class Tag(PageElement): yield current current = current.next_element + # CSS selector code + + _selector_combinators = ['>', '+', '~'] + _select_debug = False + def select(self, selector, _candidate_generator=None): + """Perform a CSS selection operation on the current element.""" + tokens = selector.split() + current_context = [self] + + if tokens[-1] in self._selector_combinators: + raise ValueError( + 'Final combinator "%s" is missing an argument.' % tokens[-1]) + if self._select_debug: + print 'Running CSS selector "%s"' % selector + for index, token in enumerate(tokens): + if self._select_debug: + print ' Considering token "%s"' % token + recursive_candidate_generator = None + tag_name = None + if tokens[index-1] in self._selector_combinators: + # This token was consumed by the previous combinator. Skip it. + if self._select_debug: + print ' Token was consumed by the previous combinator.' + continue + # Each operation corresponds to a checker function, a rule + # for determining whether a candidate matches the + # selector. Candidates are generated by the active + # iterator. + checker = None + + m = self.attribselect_re.match(token) + if m is not None: + # Attribute selector + tag_name, attribute, operator, value = m.groups() + checker = self._attribute_checker(operator, attribute, value) + + elif '#' in token: + # ID selector + tag_name, tag_id = token.split('#', 1) + def id_matches(tag): + return tag.get('id', None) == tag_id + checker = id_matches + + elif '.' in token: + # Class selector + tag_name, klass = token.split('.', 1) + classes = set(klass.split('.')) + def classes_match(candidate): + return classes.issubset(candidate.get('class', [])) + checker = classes_match + + elif ':' in token: + # Pseudo-class + tag_name, pseudo = token.split(':', 1) + if tag_name == '': + raise ValueError( + "A pseudo-class must be prefixed with a tag name.") + pseudo_attributes = re.match('([a-zA-Z\d-]+)\(([a-zA-Z\d]+)\)', pseudo) + found = [] + if pseudo_attributes is not None: + pseudo_type, pseudo_value = pseudo_attributes.groups() + if pseudo_type == 'nth-of-type': + try: + pseudo_value = int(pseudo_value) + except: + raise NotImplementedError( + 'Only numeric values are currently supported for the nth-of-type pseudo-class.') + if pseudo_value < 1: + raise ValueError( + 'nth-of-type pseudo-class value must be at least 1.') + class Counter(object): + def __init__(self, destination): + self.count = 0 + self.destination = destination + + def nth_child_of_type(self, tag): + self.count += 1 + if self.count == self.destination: + return True + if self.count > self.destination: + # Stop the generator that's sending us + # these things. + raise StopIteration() + return False + checker = Counter(pseudo_value).nth_child_of_type + else: + raise NotImplementedError( + 'Only the following pseudo-classes are implemented: nth-of-type.') + + elif token == '*': + # Star selector -- matches everything + pass + elif token == '>': + # Run the next token as a CSS selector against the + # direct children of each tag in the current context. + recursive_candidate_generator = lambda tag: tag.children + elif token == '~': + # Run the next token as a CSS selector against the + # siblings of each tag in the current context. + recursive_candidate_generator = lambda tag: tag.next_siblings + elif token == '+': + # For each tag in the current context, run the next + # token as a CSS selector against the tag's next + # sibling that's a tag. + def next_tag_sibling(tag): + yield tag.find_next_sibling(True) + recursive_candidate_generator = next_tag_sibling + + elif self.tag_name_re.match(token): + # Just a tag name. + tag_name = token + else: + raise ValueError( + 'Unsupported or invalid CSS selector: "%s"' % token) + + if recursive_candidate_generator: + # This happens when the selector looks like "> foo". + # + # The generator calls select() recursively on every + # member of the current context, passing in a different + # candidate generator and a different selector. + # + # In the case of "> foo", the candidate generator is + # one that yields a tag's direct children (">"), and + # the selector is "foo". + next_token = tokens[index+1] + def recursive_select(tag): + if self._select_debug: + print ' Calling select("%s") recursively on %s %s' % (next_token, tag.name, tag.attrs) + print '-' * 40 + for i in tag.select(next_token, recursive_candidate_generator): + if self._select_debug: + print '(Recursive select picked up candidate %s %s)' % (i.name, i.attrs) + yield i + if self._select_debug: + print '-' * 40 + _use_candidate_generator = recursive_select + elif _candidate_generator is None: + # By default, a tag's candidates are all of its + # children. If tag_name is defined, only yield tags + # with that name. + if self._select_debug: + if tag_name: + check = "[any]" + else: + check = tag_name + print ' Default candidate generator, tag name="%s"' % check + if self._select_debug: + # This is redundant with later code, but it stops + # a bunch of bogus tags from cluttering up the + # debug log. + def default_candidate_generator(tag): + for child in tag.descendants: + if not isinstance(child, Tag): + continue + if tag_name and not child.name == tag_name: + continue + yield child + _use_candidate_generator = default_candidate_generator + else: + _use_candidate_generator = lambda tag: tag.descendants + else: + _use_candidate_generator = _candidate_generator + + new_context = [] + new_context_ids = set([]) + for tag in current_context: + if self._select_debug: + print " Running candidate generator on %s %s" % ( + tag.name, repr(tag.attrs)) + for candidate in _use_candidate_generator(tag): + if not isinstance(candidate, Tag): + continue + if tag_name and candidate.name != tag_name: + continue + if checker is not None: + try: + result = checker(candidate) + except StopIteration: + # The checker has decided we should no longer + # run the generator. + break + if checker is None or result: + if self._select_debug: + print " SUCCESS %s %s" % (candidate.name, repr(candidate.attrs)) + if id(candidate) not in new_context_ids: + # If a tag matches a selector more than once, + # don't include it in the context more than once. + new_context.append(candidate) + new_context_ids.add(id(candidate)) + elif self._select_debug: + print " FAILURE %s %s" % (candidate.name, repr(candidate.attrs)) + + current_context = new_context + + if self._select_debug: + print "Final verdict:" + for i in current_context: + print " %s %s" % (i.name, i.attrs) + return current_context + # Old names for backwards compatibility def childGenerator(self): return self.children @@ -1150,10 +1405,13 @@ class Tag(PageElement): def recursiveChildGenerator(self): return self.descendants - # This was kind of misleading because has_key() (attributes) was - # different from __in__ (contents). has_key() is gone in Python 3, - # anyway. - has_key = has_attr + def has_key(self, key): + """This was kind of misleading because has_key() (attributes) + was different from __in__ (contents). has_key() is gone in + Python 3, anyway.""" + warnings.warn('has_key is deprecated. Use has_attr("%s") instead.' % ( + key)) + return self.has_attr(key) # Next, a couple classes to represent queries and their results. class SoupStrainer(object): @@ -1168,6 +1426,12 @@ class SoupStrainer(object): kwargs['class'] = attrs attrs = None + if 'class_' in kwargs: + # Treat class_="foo" as a search for the 'class' + # attribute, overriding any non-dict value for attrs. + kwargs['class'] = kwargs['class_'] + del kwargs['class_'] + if kwargs: if attrs: attrs = attrs.copy() @@ -1342,6 +1606,6 @@ class SoupStrainer(object): class ResultSet(list): """A ResultSet is just a list that keeps track of the SoupStrainer that created it.""" - def __init__(self, source): - list.__init__([]) + def __init__(self, source, result=()): + super(ResultSet, self).__init__(result) self.source = source diff --git a/libs/bs4/testing.py b/libs/bs4/testing.py index 5a84b0b..fd4495a 100644 --- a/libs/bs4/testing.py +++ b/libs/bs4/testing.py @@ -81,6 +81,11 @@ class HTMLTreeBuilderSmokeTest(object): self.assertDoctypeHandled( 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"') + def test_empty_doctype(self): + soup = self.soup("<!DOCTYPE>") + doctype = soup.contents[0] + self.assertEqual("", doctype.strip()) + def test_public_doctype_with_url(self): doctype = 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"' self.assertDoctypeHandled(doctype) @@ -159,6 +164,12 @@ class HTMLTreeBuilderSmokeTest(object): comment = soup.find(text="foobar") self.assertEqual(comment.__class__, Comment) + # The comment is properly integrated into the tree. + foo = soup.find(text="foo") + self.assertEqual(comment, foo.next_element) + baz = soup.find(text="baz") + self.assertEqual(comment, baz.previous_element) + def test_preserved_whitespace_in_pre_and_textarea(self): """Whitespace must be preserved in <pre> and <textarea> tags.""" self.assertSoupEquals("<pre> </pre>") @@ -202,6 +213,14 @@ class HTMLTreeBuilderSmokeTest(object): "<tbody><tr><td>Bar</td></tr></tbody>" "<tfoot><tr><td>Baz</td></tr></tfoot></table>") + def test_deeply_nested_multivalued_attribute(self): + # html5lib can set the attributes of the same tag many times + # as it rearranges the tree. This has caused problems with + # multivalued attributes. + markup = '<table><div><div class="css"></div></div></table>' + soup = self.soup(markup) + self.assertEqual(["css"], soup.div.div['class']) + def test_angle_brackets_in_attribute_values_are_escaped(self): self.assertSoupEquals('<a b="<a>"></a>', '<a b="<a>"></a>') @@ -209,12 +228,14 @@ class HTMLTreeBuilderSmokeTest(object): expect = u'<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>' self.assertSoupEquals('<p id="piñata"></p>', expect) self.assertSoupEquals('<p id="piñata"></p>', expect) + self.assertSoupEquals('<p id="piñata"></p>', expect) self.assertSoupEquals('<p id="piñata"></p>', expect) def test_entities_in_text_converted_to_unicode(self): expect = u'<p>pi\N{LATIN SMALL LETTER N WITH TILDE}ata</p>' self.assertSoupEquals("<p>piñata</p>", expect) self.assertSoupEquals("<p>piñata</p>", expect) + self.assertSoupEquals("<p>piñata</p>", expect) self.assertSoupEquals("<p>piñata</p>", expect) def test_quot_entity_converted_to_quotation_mark(self): @@ -227,6 +248,12 @@ class HTMLTreeBuilderSmokeTest(object): self.assertSoupEquals("�", expect) self.assertSoupEquals("�", expect) + def test_multipart_strings(self): + "Mostly to prevent a recurrence of a bug in the html5lib treebuilder." + soup = self.soup("<html><h2>\nfoo</h2><p></p></html>") + self.assertEqual("p", soup.h2.string.next_element.name) + self.assertEqual("p", soup.p.name) + def test_basic_namespaces(self): """Parsers don't need to *understand* namespaces, but at the very least they should not choke on namespaces or lose @@ -254,6 +281,14 @@ class HTMLTreeBuilderSmokeTest(object): # to detect any differences between them. # + def test_can_parse_unicode_document(self): + # A seemingly innocuous document... but it's in Unicode! And + # it contains characters that can't be represented in the + # encoding found in the declaration! The horror! + markup = u'<html><head><meta encoding="euc-jp"></head><body>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</body>' + soup = self.soup(markup) + self.assertEqual(u'Sacr\xe9 bleu!', soup.body.string) + def test_soupstrainer(self): """Parsers should be able to work with SoupStrainers.""" strainer = SoupStrainer("b") @@ -445,6 +480,28 @@ class XMLTreeBuilderSmokeTest(object): self.assertEqual( soup.encode("utf-8"), markup) + def test_formatter_processes_script_tag_for_xml_documents(self): + doc = """ + <script type="text/javascript"> + </script> +""" + soup = BeautifulSoup(doc, "xml") + # lxml would have stripped this while parsing, but we can add + # it later. + soup.script.string = 'console.log("< < hey > > ");' + encoded = soup.encode() + self.assertTrue(b"< < hey > >" in encoded) + + def test_can_parse_unicode_document(self): + markup = u'<?xml version="1.0" encoding="euc-jp"><root>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</root>' + soup = self.soup(markup) + self.assertEqual(u'Sacr\xe9 bleu!', soup.root.string) + + def test_popping_namespaced_tag(self): + markup = '<rss xmlns:dc="foo"><dc:creator>b</dc:creator><dc:date>2012-07-02T20:33:42Z</dc:date><dc:rights>c</dc:rights><image>d</image></rss>' + soup = self.soup(markup) + self.assertEqual( + unicode(soup.rss), markup) def test_docstring_includes_correct_encoding(self): soup = self.soup("<root/>") @@ -472,6 +529,20 @@ class XMLTreeBuilderSmokeTest(object): self.assertEqual("http://example.com/", root['xmlns:a']) self.assertEqual("http://example.net/", root['xmlns:b']) + def test_closing_namespaced_tag(self): + markup = '<p xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:date>20010504</dc:date></p>' + soup = self.soup(markup) + self.assertEqual(unicode(soup.p), markup) + + def test_namespaced_attributes(self): + markup = '<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><bar xsi:schemaLocation="http://www.example.com"/></foo>' + soup = self.soup(markup) + self.assertEqual(unicode(soup.foo), markup) + + def test_namespaced_attributes_xml_namespace(self): + markup = '<foo xml:lang="fr">bar</foo>' + soup = self.soup(markup) + self.assertEqual(unicode(soup.foo), markup) class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest): """Smoke test for a tree builder that supports HTML5.""" @@ -501,6 +572,12 @@ class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest): self.assertEqual(namespace, soup.math.namespace) self.assertEqual(namespace, soup.msqrt.namespace) + def test_xml_declaration_becomes_comment(self): + markup = '<?xml version="1.0" encoding="utf-8"?><html></html>' + soup = self.soup(markup) + self.assertTrue(isinstance(soup.contents[0], Comment)) + self.assertEqual(soup.contents[0], '?xml version="1.0" encoding="utf-8"?') + self.assertEqual("html", soup.contents[0].next_element.name) def skipIf(condition, reason): def nothing(test, *args, **kwargs): diff --git a/libs/gntp/__init__.py b/libs/gntp/__init__.py index eabbfa4..e69de29 100755 --- a/libs/gntp/__init__.py +++ b/libs/gntp/__init__.py @@ -1,509 +0,0 @@ -import re -import hashlib -import time -import StringIO - -__version__ = '0.8' - -#GNTP/<version> <messagetype> <encryptionAlgorithmID>[:<ivValue>][ <keyHashAlgorithmID>:<keyHash>.<salt>] -GNTP_INFO_LINE = re.compile( - 'GNTP/(?P<version>\d+\.\d+) (?P<messagetype>REGISTER|NOTIFY|SUBSCRIBE|\-OK|\-ERROR)' + - ' (?P<encryptionAlgorithmID>[A-Z0-9]+(:(?P<ivValue>[A-F0-9]+))?) ?' + - '((?P<keyHashAlgorithmID>[A-Z0-9]+):(?P<keyHash>[A-F0-9]+).(?P<salt>[A-F0-9]+))?\r\n', - re.IGNORECASE -) - -GNTP_INFO_LINE_SHORT = re.compile( - 'GNTP/(?P<version>\d+\.\d+) (?P<messagetype>REGISTER|NOTIFY|SUBSCRIBE|\-OK|\-ERROR)', - re.IGNORECASE -) - -GNTP_HEADER = re.compile('([\w-]+):(.+)') - -GNTP_EOL = '\r\n' - - -class BaseError(Exception): - def gntp_error(self): - error = GNTPError(self.errorcode, self.errordesc) - return error.encode() - - -class ParseError(BaseError): - errorcode = 500 - errordesc = 'Error parsing the message' - - -class AuthError(BaseError): - errorcode = 400 - errordesc = 'Error with authorization' - - -class UnsupportedError(BaseError): - errorcode = 500 - errordesc = 'Currently unsupported by gntp.py' - - -class _GNTPBuffer(StringIO.StringIO): - """GNTP Buffer class""" - def writefmt(self, message = "", *args): - """Shortcut function for writing GNTP Headers""" - self.write((message % args).encode('utf8', 'replace')) - self.write(GNTP_EOL) - - -class _GNTPBase(object): - """Base initilization - - :param string messagetype: GNTP Message type - :param string version: GNTP Protocol version - :param string encription: Encryption protocol - """ - def __init__(self, messagetype = None, version = '1.0', encryption = None): - self.info = { - 'version': version, - 'messagetype': messagetype, - 'encryptionAlgorithmID': encryption - } - self.headers = {} - self.resources = {} - - def __str__(self): - return self.encode() - - def _parse_info(self, data): - """Parse the first line of a GNTP message to get security and other info values - - :param string data: GNTP Message - :return dict: Parsed GNTP Info line - """ - - match = GNTP_INFO_LINE.match(data) - - if not match: - raise ParseError('ERROR_PARSING_INFO_LINE') - - info = match.groupdict() - if info['encryptionAlgorithmID'] == 'NONE': - info['encryptionAlgorithmID'] = None - - return info - - def set_password(self, password, encryptAlgo = 'MD5'): - """Set a password for a GNTP Message - - :param string password: Null to clear password - :param string encryptAlgo: Supports MD5, SHA1, SHA256, SHA512 - """ - hash = { - 'MD5': hashlib.md5, - 'SHA1': hashlib.sha1, - 'SHA256': hashlib.sha256, - 'SHA512': hashlib.sha512, - } - - self.password = password - self.encryptAlgo = encryptAlgo.upper() - if not password: - self.info['encryptionAlgorithmID'] = None - self.info['keyHashAlgorithm'] = None - return - if not self.encryptAlgo in hash.keys(): - raise UnsupportedError('INVALID HASH "%s"' % self.encryptAlgo) - - hashfunction = hash.get(self.encryptAlgo) - - password = password.encode('utf8') - seed = time.ctime() - salt = hashfunction(seed).hexdigest() - saltHash = hashfunction(seed).digest() - keyBasis = password + saltHash - key = hashfunction(keyBasis).digest() - keyHash = hashfunction(key).hexdigest() - - self.info['keyHashAlgorithmID'] = self.encryptAlgo - self.info['keyHash'] = keyHash.upper() - self.info['salt'] = salt.upper() - - def _decode_hex(self, value): - """Helper function to decode hex string to `proper` hex string - - :param string value: Human readable hex string - :return string: Hex string - """ - result = '' - for i in range(0, len(value), 2): - tmp = int(value[i:i + 2], 16) - result += chr(tmp) - return result - - def _decode_binary(self, rawIdentifier, identifier): - rawIdentifier += '\r\n\r\n' - dataLength = int(identifier['Length']) - pointerStart = self.raw.find(rawIdentifier) + len(rawIdentifier) - pointerEnd = pointerStart + dataLength - data = self.raw[pointerStart:pointerEnd] - if not len(data) == dataLength: - raise ParseError('INVALID_DATA_LENGTH Expected: %s Recieved %s' % (dataLength, len(data))) - return data - - def _validate_password(self, password): - """Validate GNTP Message against stored password""" - self.password = password - if password == None: - raise AuthError('Missing password') - keyHash = self.info.get('keyHash', None) - if keyHash is None and self.password is None: - return True - if keyHash is None: - raise AuthError('Invalid keyHash') - if self.password is None: - raise AuthError('Missing password') - - password = self.password.encode('utf8') - saltHash = self._decode_hex(self.info['salt']) - - keyBasis = password + saltHash - key = hashlib.md5(keyBasis).digest() - keyHash = hashlib.md5(key).hexdigest() - - if not keyHash.upper() == self.info['keyHash'].upper(): - raise AuthError('Invalid Hash') - return True - - def validate(self): - """Verify required headers""" - for header in self._requiredHeaders: - if not self.headers.get(header, False): - raise ParseError('Missing Notification Header: ' + header) - - def _format_info(self): - """Generate info line for GNTP Message - - :return string: - """ - info = u'GNTP/%s %s' % ( - self.info.get('version'), - self.info.get('messagetype'), - ) - if self.info.get('encryptionAlgorithmID', None): - info += ' %s:%s' % ( - self.info.get('encryptionAlgorithmID'), - self.info.get('ivValue'), - ) - else: - info += ' NONE' - - if self.info.get('keyHashAlgorithmID', None): - info += ' %s:%s.%s' % ( - self.info.get('keyHashAlgorithmID'), - self.info.get('keyHash'), - self.info.get('salt') - ) - - return info - - def _parse_dict(self, data): - """Helper function to parse blocks of GNTP headers into a dictionary - - :param string data: - :return dict: - """ - dict = {} - for line in data.split('\r\n'): - match = GNTP_HEADER.match(line) - if not match: - continue - - key = unicode(match.group(1).strip(), 'utf8', 'replace') - val = unicode(match.group(2).strip(), 'utf8', 'replace') - dict[key] = val - return dict - - def add_header(self, key, value): - if isinstance(value, unicode): - self.headers[key] = value - else: - self.headers[key] = unicode('%s' % value, 'utf8', 'replace') - - def add_resource(self, data): - """Add binary resource - - :param string data: Binary Data - """ - identifier = hashlib.md5(data).hexdigest() - self.resources[identifier] = data - return 'x-growl-resource://%s' % identifier - - def decode(self, data, password = None): - """Decode GNTP Message - - :param string data: - """ - self.password = password - self.raw = data - parts = self.raw.split('\r\n\r\n') - self.info = self._parse_info(data) - self.headers = self._parse_dict(parts[0]) - - def encode(self): - """Encode a generic GNTP Message - - :return string: GNTP Message ready to be sent - """ - - buffer = _GNTPBuffer() - - buffer.writefmt(self._format_info()) - - #Headers - for k, v in self.headers.iteritems(): - buffer.writefmt('%s: %s', k, v) - buffer.writefmt() - - #Resources - for resource, data in self.resources.iteritems(): - buffer.writefmt('Identifier: %s', resource) - buffer.writefmt('Length: %d', len(data)) - buffer.writefmt() - buffer.write(data) - buffer.writefmt() - buffer.writefmt() - - return buffer.getvalue() - - -class GNTPRegister(_GNTPBase): - """Represents a GNTP Registration Command - - :param string data: (Optional) See decode() - :param string password: (Optional) Password to use while encoding/decoding messages - """ - _requiredHeaders = [ - 'Application-Name', - 'Notifications-Count' - ] - _requiredNotificationHeaders = ['Notification-Name'] - - def __init__(self, data = None, password = None): - _GNTPBase.__init__(self, 'REGISTER') - self.notifications = [] - - if data: - self.decode(data, password) - else: - self.set_password(password) - self.add_header('Application-Name', 'pygntp') - self.add_header('Notifications-Count', 0) - - def validate(self): - '''Validate required headers and validate notification headers''' - for header in self._requiredHeaders: - if not self.headers.get(header, False): - raise ParseError('Missing Registration Header: ' + header) - for notice in self.notifications: - for header in self._requiredNotificationHeaders: - if not notice.get(header, False): - raise ParseError('Missing Notification Header: ' + header) - - def decode(self, data, password): - """Decode existing GNTP Registration message - - :param string data: Message to decode - """ - self.raw = data - parts = self.raw.split('\r\n\r\n') - self.info = self._parse_info(data) - self._validate_password(password) - self.headers = self._parse_dict(parts[0]) - - for i, part in enumerate(parts): - if i == 0: - continue # Skip Header - if part.strip() == '': - continue - notice = self._parse_dict(part) - if notice.get('Notification-Name', False): - self.notifications.append(notice) - elif notice.get('Identifier', False): - notice['Data'] = self._decode_binary(part, notice) - #open('register.png','wblol').write(notice['Data']) - self.resources[notice.get('Identifier')] = notice - - def add_notification(self, name, enabled = True): - """Add new Notification to Registration message - - :param string name: Notification Name - :param boolean enabled: Enable this notification by default - """ - notice = {} - notice['Notification-Name'] = u'%s' % name - notice['Notification-Enabled'] = u'%s' % enabled - - self.notifications.append(notice) - self.add_header('Notifications-Count', len(self.notifications)) - - def encode(self): - """Encode a GNTP Registration Message - - :return string: Encoded GNTP Registration message - """ - - buffer = _GNTPBuffer() - - buffer.writefmt(self._format_info()) - - #Headers - for k, v in self.headers.iteritems(): - buffer.writefmt('%s: %s', k, v) - buffer.writefmt() - - #Notifications - if len(self.notifications) > 0: - for notice in self.notifications: - for k, v in notice.iteritems(): - buffer.writefmt('%s: %s', k, v) - buffer.writefmt() - - #Resources - for resource, data in self.resources.iteritems(): - buffer.writefmt('Identifier: %s', resource) - buffer.writefmt('Length: %d', len(data)) - buffer.writefmt() - buffer.write(data) - buffer.writefmt() - buffer.writefmt() - - return buffer.getvalue() - - -class GNTPNotice(_GNTPBase): - """Represents a GNTP Notification Command - - :param string data: (Optional) See decode() - :param string app: (Optional) Set Application-Name - :param string name: (Optional) Set Notification-Name - :param string title: (Optional) Set Notification Title - :param string password: (Optional) Password to use while encoding/decoding messages - """ - _requiredHeaders = [ - 'Application-Name', - 'Notification-Name', - 'Notification-Title' - ] - - def __init__(self, data = None, app = None, name = None, title = None, password = None): - _GNTPBase.__init__(self, 'NOTIFY') - - if data: - self.decode(data, password) - else: - self.set_password(password) - if app: - self.add_header('Application-Name', app) - if name: - self.add_header('Notification-Name', name) - if title: - self.add_header('Notification-Title', title) - - def decode(self, data, password): - """Decode existing GNTP Notification message - - :param string data: Message to decode. - """ - self.raw = data - parts = self.raw.split('\r\n\r\n') - self.info = self._parse_info(data) - self._validate_password(password) - self.headers = self._parse_dict(parts[0]) - - for i, part in enumerate(parts): - if i == 0: - continue # Skip Header - if part.strip() == '': - continue - notice = self._parse_dict(part) - if notice.get('Identifier', False): - notice['Data'] = self._decode_binary(part, notice) - #open('notice.png','wblol').write(notice['Data']) - self.resources[notice.get('Identifier')] = notice - - -class GNTPSubscribe(_GNTPBase): - """Represents a GNTP Subscribe Command - - :param string data: (Optional) See decode() - :param string password: (Optional) Password to use while encoding/decoding messages - """ - _requiredHeaders = [ - 'Subscriber-ID', - 'Subscriber-Name', - ] - - def __init__(self, data = None, password = None): - _GNTPBase.__init__(self, 'SUBSCRIBE') - if data: - self.decode(data, password) - else: - self.set_password(password) - - -class GNTPOK(_GNTPBase): - """Represents a GNTP OK Response - - :param string data: (Optional) See _GNTPResponse.decode() - :param string action: (Optional) Set type of action the OK Response is for - """ - _requiredHeaders = ['Response-Action'] - - def __init__(self, data = None, action = None): - _GNTPBase.__init__(self, '-OK') - if data: - self.decode(data) - if action: - self.add_header('Response-Action', action) - - -class GNTPError(_GNTPBase): - """Represents a GNTP Error response - - :param string data: (Optional) See _GNTPResponse.decode() - :param string errorcode: (Optional) Error code - :param string errordesc: (Optional) Error Description - """ - _requiredHeaders = ['Error-Code', 'Error-Description'] - - def __init__(self, data = None, errorcode = None, errordesc = None): - _GNTPBase.__init__(self, '-ERROR') - if data: - self.decode(data) - if errorcode: - self.add_header('Error-Code', errorcode) - self.add_header('Error-Description', errordesc) - - def error(self): - return (self.headers.get('Error-Code', None), - self.headers.get('Error-Description', None)) - - -def parse_gntp(data, password = None): - """Attempt to parse a message as a GNTP message - - :param string data: Message to be parsed - :param string password: Optional password to be used to verify the message - """ - match = GNTP_INFO_LINE_SHORT.match(data) - if not match: - raise ParseError('INVALID_GNTP_INFO') - info = match.groupdict() - if info['messagetype'] == 'REGISTER': - return GNTPRegister(data, password = password) - elif info['messagetype'] == 'NOTIFY': - return GNTPNotice(data, password = password) - elif info['messagetype'] == 'SUBSCRIBE': - return GNTPSubscribe(data, password = password) - elif info['messagetype'] == '-OK': - return GNTPOK(data) - elif info['messagetype'] == '-ERROR': - return GNTPError(data) - raise ParseError('INVALID_GNTP_MESSAGE') diff --git a/libs/gntp/cli.py b/libs/gntp/cli.py new file mode 100644 index 0000000..bc08306 --- /dev/null +++ b/libs/gntp/cli.py @@ -0,0 +1,141 @@ +# Copyright: 2013 Paul Traylor +# These sources are released under the terms of the MIT license: see LICENSE + +import logging +import os +import sys +from optparse import OptionParser, OptionGroup + +from gntp.notifier import GrowlNotifier +from gntp.shim import RawConfigParser +from gntp.version import __version__ + +DEFAULT_CONFIG = os.path.expanduser('~/.gntp') + +config = RawConfigParser({ + 'hostname': 'localhost', + 'password': None, + 'port': 23053, +}) +config.read([DEFAULT_CONFIG]) +if not config.has_section('gntp'): + config.add_section('gntp') + + +class ClientParser(OptionParser): + def __init__(self): + OptionParser.__init__(self, version="%%prog %s" % __version__) + + group = OptionGroup(self, "Network Options") + group.add_option("-H", "--host", + dest="host", default=config.get('gntp', 'hostname'), + help="Specify a hostname to which to send a remote notification. [%default]") + group.add_option("--port", + dest="port", default=config.getint('gntp', 'port'), type="int", + help="port to listen on [%default]") + group.add_option("-P", "--password", + dest='password', default=config.get('gntp', 'password'), + help="Network password") + self.add_option_group(group) + + group = OptionGroup(self, "Notification Options") + group.add_option("-n", "--name", + dest="app", default='Python GNTP Test Client', + help="Set the name of the application [%default]") + group.add_option("-s", "--sticky", + dest='sticky', default=False, action="store_true", + help="Make the notification sticky [%default]") + group.add_option("--image", + dest="icon", default=None, + help="Icon for notification (URL or /path/to/file)") + group.add_option("-m", "--message", + dest="message", default=None, + help="Sets the message instead of using stdin") + group.add_option("-p", "--priority", + dest="priority", default=0, type="int", + help="-2 to 2 [%default]") + group.add_option("-d", "--identifier", + dest="identifier", + help="Identifier for coalescing") + group.add_option("-t", "--title", + dest="title", default=None, + help="Set the title of the notification [%default]") + group.add_option("-N", "--notification", + dest="name", default='Notification', + help="Set the notification name [%default]") + group.add_option("--callback", + dest="callback", + help="URL callback") + self.add_option_group(group) + + # Extra Options + self.add_option('-v', '--verbose', + dest='verbose', default=0, action='count', + help="Verbosity levels") + + def parse_args(self, args=None, values=None): + values, args = OptionParser.parse_args(self, args, values) + + if values.message is None: + print('Enter a message followed by Ctrl-D') + try: + message = sys.stdin.read() + except KeyboardInterrupt: + exit() + else: + message = values.message + + if values.title is None: + values.title = ' '.join(args) + + # If we still have an empty title, use the + # first bit of the message as the title + if values.title == '': + values.title = message[:20] + + values.verbose = logging.WARNING - values.verbose * 10 + + return values, message + + +def main(): + (options, message) = ClientParser().parse_args() + logging.basicConfig(level=options.verbose) + if not os.path.exists(DEFAULT_CONFIG): + logging.info('No config read found at %s', DEFAULT_CONFIG) + + growl = GrowlNotifier( + applicationName=options.app, + notifications=[options.name], + defaultNotifications=[options.name], + hostname=options.host, + password=options.password, + port=options.port, + ) + result = growl.register() + if result is not True: + exit(result) + + # This would likely be better placed within the growl notifier + # class but until I make _checkIcon smarter this is "easier" + if options.icon is not None and not options.icon.startswith('http'): + logging.info('Loading image %s', options.icon) + f = open(options.icon) + options.icon = f.read() + f.close() + + result = growl.notify( + noteType=options.name, + title=options.title, + description=message, + icon=options.icon, + sticky=options.sticky, + priority=options.priority, + callback=options.callback, + identifier=options.identifier, + ) + if result is not True: + exit(result) + +if __name__ == "__main__": + main() diff --git a/libs/gntp/config.py b/libs/gntp/config.py new file mode 100644 index 0000000..7536bd1 --- /dev/null +++ b/libs/gntp/config.py @@ -0,0 +1,77 @@ +# Copyright: 2013 Paul Traylor +# These sources are released under the terms of the MIT license: see LICENSE + +""" +The gntp.config module is provided as an extended GrowlNotifier object that takes +advantage of the ConfigParser module to allow us to setup some default values +(such as hostname, password, and port) in a more global way to be shared among +programs using gntp +""" +import logging +import os + +import gntp.notifier +import gntp.shim + +__all__ = [ + 'mini', + 'GrowlNotifier' +] + +logger = logging.getLogger(__name__) + + +class GrowlNotifier(gntp.notifier.GrowlNotifier): + """ + ConfigParser enhanced GrowlNotifier object + + For right now, we are only interested in letting users overide certain + values from ~/.gntp + + :: + + [gntp] + hostname = ? + password = ? + port = ? + """ + def __init__(self, *args, **kwargs): + config = gntp.shim.RawConfigParser({ + 'hostname': kwargs.get('hostname', 'localhost'), + 'password': kwargs.get('password'), + 'port': kwargs.get('port', 23053), + }) + + config.read([os.path.expanduser('~/.gntp')]) + + # If the file does not exist, then there will be no gntp section defined + # and the config.get() lines below will get confused. Since we are not + # saving the config, it should be safe to just add it here so the + # code below doesn't complain + if not config.has_section('gntp'): + logger.info('Error reading ~/.gntp config file') + config.add_section('gntp') + + kwargs['password'] = config.get('gntp', 'password') + kwargs['hostname'] = config.get('gntp', 'hostname') + kwargs['port'] = config.getint('gntp', 'port') + + super(GrowlNotifier, self).__init__(*args, **kwargs) + + +def mini(description, **kwargs): + """Single notification function + + Simple notification function in one line. Has only one required parameter + and attempts to use reasonable defaults for everything else + :param string description: Notification message + """ + kwargs['notifierFactory'] = GrowlNotifier + gntp.notifier.mini(description, **kwargs) + + +if __name__ == '__main__': + # If we're running this module directly we're likely running it as a test + # so extra debugging is useful + logging.basicConfig(level=logging.INFO) + mini('Testing mini notification') diff --git a/libs/gntp/core.py b/libs/gntp/core.py new file mode 100644 index 0000000..ee544d3 --- /dev/null +++ b/libs/gntp/core.py @@ -0,0 +1,511 @@ +# Copyright: 2013 Paul Traylor +# These sources are released under the terms of the MIT license: see LICENSE + +import hashlib +import re +import time + +import gntp.shim +import gntp.errors as errors + +__all__ = [ + 'GNTPRegister', + 'GNTPNotice', + 'GNTPSubscribe', + 'GNTPOK', + 'GNTPError', + 'parse_gntp', +] + +#GNTP/<version> <messagetype> <encryptionAlgorithmID>[:<ivValue>][ <keyHashAlgorithmID>:<keyHash>.<salt>] +GNTP_INFO_LINE = re.compile( + 'GNTP/(?P<version>\d+\.\d+) (?P<messagetype>REGISTER|NOTIFY|SUBSCRIBE|\-OK|\-ERROR)' + + ' (?P<encryptionAlgorithmID>[A-Z0-9]+(:(?P<ivValue>[A-F0-9]+))?) ?' + + '((?P<keyHashAlgorithmID>[A-Z0-9]+):(?P<keyHash>[A-F0-9]+).(?P<salt>[A-F0-9]+))?\r\n', + re.IGNORECASE +) + +GNTP_INFO_LINE_SHORT = re.compile( + 'GNTP/(?P<version>\d+\.\d+) (?P<messagetype>REGISTER|NOTIFY|SUBSCRIBE|\-OK|\-ERROR)', + re.IGNORECASE +) + +GNTP_HEADER = re.compile('([\w-]+):(.+)') + +GNTP_EOL = gntp.shim.b('\r\n') +GNTP_SEP = gntp.shim.b(': ') + + +class _GNTPBuffer(gntp.shim.StringIO): + """GNTP Buffer class""" + def writeln(self, value=None): + if value: + self.write(gntp.shim.b(value)) + self.write(GNTP_EOL) + + def writeheader(self, key, value): + if not isinstance(value, str): + value = str(value) + self.write(gntp.shim.b(key)) + self.write(GNTP_SEP) + self.write(gntp.shim.b(value)) + self.write(GNTP_EOL) + + +class _GNTPBase(object): + """Base initilization + + :param string messagetype: GNTP Message type + :param string version: GNTP Protocol version + :param string encription: Encryption protocol + """ + def __init__(self, messagetype=None, version='1.0', encryption=None): + self.info = { + 'version': version, + 'messagetype': messagetype, + 'encryptionAlgorithmID': encryption + } + self.hash_algo = { + 'MD5': hashlib.md5, + 'SHA1': hashlib.sha1, + 'SHA256': hashlib.sha256, + 'SHA512': hashlib.sha512, + } + self.headers = {} + self.resources = {} + + def __str__(self): + return self.encode() + + def _parse_info(self, data): + """Parse the first line of a GNTP message to get security and other info values + + :param string data: GNTP Message + :return dict: Parsed GNTP Info line + """ + + match = GNTP_INFO_LINE.match(data) + + if not match: + raise errors.ParseError('ERROR_PARSING_INFO_LINE') + + info = match.groupdict() + if info['encryptionAlgorithmID'] == 'NONE': + info['encryptionAlgorithmID'] = None + + return info + + def set_password(self, password, encryptAlgo='MD5'): + """Set a password for a GNTP Message + + :param string password: Null to clear password + :param string encryptAlgo: Supports MD5, SHA1, SHA256, SHA512 + """ + if not password: + self.info['encryptionAlgorithmID'] = None + self.info['keyHashAlgorithm'] = None + return + + self.password = gntp.shim.b(password) + self.encryptAlgo = encryptAlgo.upper() + + if not self.encryptAlgo in self.hash_algo: + raise errors.UnsupportedError('INVALID HASH "%s"' % self.encryptAlgo) + + hashfunction = self.hash_algo.get(self.encryptAlgo) + + password = password.encode('utf8') + seed = time.ctime().encode('utf8') + salt = hashfunction(seed).hexdigest() + saltHash = hashfunction(seed).digest() + keyBasis = password + saltHash + key = hashfunction(keyBasis).digest() + keyHash = hashfunction(key).hexdigest() + + self.info['keyHashAlgorithmID'] = self.encryptAlgo + self.info['keyHash'] = keyHash.upper() + self.info['salt'] = salt.upper() + + def _decode_hex(self, value): + """Helper function to decode hex string to `proper` hex string + + :param string value: Human readable hex string + :return string: Hex string + """ + result = '' + for i in range(0, len(value), 2): + tmp = int(value[i:i + 2], 16) + result += chr(tmp) + return result + + def _decode_binary(self, rawIdentifier, identifier): + rawIdentifier += '\r\n\r\n' + dataLength = int(identifier['Length']) + pointerStart = self.raw.find(rawIdentifier) + len(rawIdentifier) + pointerEnd = pointerStart + dataLength + data = self.raw[pointerStart:pointerEnd] + if not len(data) == dataLength: + raise errors.ParseError('INVALID_DATA_LENGTH Expected: %s Recieved %s' % (dataLength, len(data))) + return data + + def _validate_password(self, password): + """Validate GNTP Message against stored password""" + self.password = password + if password is None: + raise errors.AuthError('Missing password') + keyHash = self.info.get('keyHash', None) + if keyHash is None and self.password is None: + return True + if keyHash is None: + raise errors.AuthError('Invalid keyHash') + if self.password is None: + raise errors.AuthError('Missing password') + + keyHashAlgorithmID = self.info.get('keyHashAlgorithmID','MD5') + + password = self.password.encode('utf8') + saltHash = self._decode_hex(self.info['salt']) + + keyBasis = password + saltHash + self.key = self.hash_algo[keyHashAlgorithmID](keyBasis).digest() + keyHash = self.hash_algo[keyHashAlgorithmID](self.key).hexdigest() + + if not keyHash.upper() == self.info['keyHash'].upper(): + raise errors.AuthError('Invalid Hash') + return True + + def validate(self): + """Verify required headers""" + for header in self._requiredHeaders: + if not self.headers.get(header, False): + raise errors.ParseError('Missing Notification Header: ' + header) + + def _format_info(self): + """Generate info line for GNTP Message + + :return string: + """ + info = 'GNTP/%s %s' % ( + self.info.get('version'), + self.info.get('messagetype'), + ) + if self.info.get('encryptionAlgorithmID', None): + info += ' %s:%s' % ( + self.info.get('encryptionAlgorithmID'), + self.info.get('ivValue'), + ) + else: + info += ' NONE' + + if self.info.get('keyHashAlgorithmID', None): + info += ' %s:%s.%s' % ( + self.info.get('keyHashAlgorithmID'), + self.info.get('keyHash'), + self.info.get('salt') + ) + + return info + + def _parse_dict(self, data): + """Helper function to parse blocks of GNTP headers into a dictionary + + :param string data: + :return dict: Dictionary of parsed GNTP Headers + """ + d = {} + for line in data.split('\r\n'): + match = GNTP_HEADER.match(line) + if not match: + continue + + key = match.group(1).strip() + val = match.group(2).strip() + d[key] = val + return d + + def add_header(self, key, value): + self.headers[key] = value + + def add_resource(self, data): + """Add binary resource + + :param string data: Binary Data + """ + data = gntp.shim.b(data) + identifier = hashlib.md5(data).hexdigest() + self.resources[identifier] = data + return 'x-growl-resource://%s' % identifier + + def decode(self, data, password=None): + """Decode GNTP Message + + :param string data: + """ + self.password = password + self.raw = gntp.shim.u(data) + parts = self.raw.split('\r\n\r\n') + self.info = self._parse_info(self.raw) + self.headers = self._parse_dict(parts[0]) + + def encode(self): + """Encode a generic GNTP Message + + :return string: GNTP Message ready to be sent. Returned as a byte string + """ + + buff = _GNTPBuffer() + + buff.writeln(self._format_info()) + + #Headers + for k, v in self.headers.items(): + buff.writeheader(k, v) + buff.writeln() + + #Resources + for resource, data in self.resources.items(): + buff.writeheader('Identifier', resource) + buff.writeheader('Length', len(data)) + buff.writeln() + buff.write(data) + buff.writeln() + buff.writeln() + + return buff.getvalue() + + +class GNTPRegister(_GNTPBase): + """Represents a GNTP Registration Command + + :param string data: (Optional) See decode() + :param string password: (Optional) Password to use while encoding/decoding messages + """ + _requiredHeaders = [ + 'Application-Name', + 'Notifications-Count' + ] + _requiredNotificationHeaders = ['Notification-Name'] + + def __init__(self, data=None, password=None): + _GNTPBase.__init__(self, 'REGISTER') + self.notifications = [] + + if data: + self.decode(data, password) + else: + self.set_password(password) + self.add_header('Application-Name', 'pygntp') + self.add_header('Notifications-Count', 0) + + def validate(self): + '''Validate required headers and validate notification headers''' + for header in self._requiredHeaders: + if not self.headers.get(header, False): + raise errors.ParseError('Missing Registration Header: ' + header) + for notice in self.notifications: + for header in self._requiredNotificationHeaders: + if not notice.get(header, False): + raise errors.ParseError('Missing Notification Header: ' + header) + + def decode(self, data, password): + """Decode existing GNTP Registration message + + :param string data: Message to decode + """ + self.raw = gntp.shim.u(data) + parts = self.raw.split('\r\n\r\n') + self.info = self._parse_info(self.raw) + self._validate_password(password) + self.headers = self._parse_dict(parts[0]) + + for i, part in enumerate(parts): + if i == 0: + continue # Skip Header + if part.strip() == '': + continue + notice = self._parse_dict(part) + if notice.get('Notification-Name', False): + self.notifications.append(notice) + elif notice.get('Identifier', False): + notice['Data'] = self._decode_binary(part, notice) + #open('register.png','wblol').write(notice['Data']) + self.resources[notice.get('Identifier')] = notice + + def add_notification(self, name, enabled=True): + """Add new Notification to Registration message + + :param string name: Notification Name + :param boolean enabled: Enable this notification by default + """ + notice = {} + notice['Notification-Name'] = name + notice['Notification-Enabled'] = enabled + + self.notifications.append(notice) + self.add_header('Notifications-Count', len(self.notifications)) + + def encode(self): + """Encode a GNTP Registration Message + + :return string: Encoded GNTP Registration message. Returned as a byte string + """ + + buff = _GNTPBuffer() + + buff.writeln(self._format_info()) + + #Headers + for k, v in self.headers.items(): + buff.writeheader(k, v) + buff.writeln() + + #Notifications + if len(self.notifications) > 0: + for notice in self.notifications: + for k, v in notice.items(): + buff.writeheader(k, v) + buff.writeln() + + #Resources + for resource, data in self.resources.items(): + buff.writeheader('Identifier', resource) + buff.writeheader('Length', len(data)) + buff.writeln() + buff.write(data) + buff.writeln() + buff.writeln() + + return buff.getvalue() + + +class GNTPNotice(_GNTPBase): + """Represents a GNTP Notification Command + + :param string data: (Optional) See decode() + :param string app: (Optional) Set Application-Name + :param string name: (Optional) Set Notification-Name + :param string title: (Optional) Set Notification Title + :param string password: (Optional) Password to use while encoding/decoding messages + """ + _requiredHeaders = [ + 'Application-Name', + 'Notification-Name', + 'Notification-Title' + ] + + def __init__(self, data=None, app=None, name=None, title=None, password=None): + _GNTPBase.__init__(self, 'NOTIFY') + + if data: + self.decode(data, password) + else: + self.set_password(password) + if app: + self.add_header('Application-Name', app) + if name: + self.add_header('Notification-Name', name) + if title: + self.add_header('Notification-Title', title) + + def decode(self, data, password): + """Decode existing GNTP Notification message + + :param string data: Message to decode. + """ + self.raw = gntp.shim.u(data) + parts = self.raw.split('\r\n\r\n') + self.info = self._parse_info(self.raw) + self._validate_password(password) + self.headers = self._parse_dict(parts[0]) + + for i, part in enumerate(parts): + if i == 0: + continue # Skip Header + if part.strip() == '': + continue + notice = self._parse_dict(part) + if notice.get('Identifier', False): + notice['Data'] = self._decode_binary(part, notice) + #open('notice.png','wblol').write(notice['Data']) + self.resources[notice.get('Identifier')] = notice + + +class GNTPSubscribe(_GNTPBase): + """Represents a GNTP Subscribe Command + + :param string data: (Optional) See decode() + :param string password: (Optional) Password to use while encoding/decoding messages + """ + _requiredHeaders = [ + 'Subscriber-ID', + 'Subscriber-Name', + ] + + def __init__(self, data=None, password=None): + _GNTPBase.__init__(self, 'SUBSCRIBE') + if data: + self.decode(data, password) + else: + self.set_password(password) + + +class GNTPOK(_GNTPBase): + """Represents a GNTP OK Response + + :param string data: (Optional) See _GNTPResponse.decode() + :param string action: (Optional) Set type of action the OK Response is for + """ + _requiredHeaders = ['Response-Action'] + + def __init__(self, data=None, action=None): + _GNTPBase.__init__(self, '-OK') + if data: + self.decode(data) + if action: + self.add_header('Response-Action', action) + + +class GNTPError(_GNTPBase): + """Represents a GNTP Error response + + :param string data: (Optional) See _GNTPResponse.decode() + :param string errorcode: (Optional) Error code + :param string errordesc: (Optional) Error Description + """ + _requiredHeaders = ['Error-Code', 'Error-Description'] + + def __init__(self, data=None, errorcode=None, errordesc=None): + _GNTPBase.__init__(self, '-ERROR') + if data: + self.decode(data) + if errorcode: + self.add_header('Error-Code', errorcode) + self.add_header('Error-Description', errordesc) + + def error(self): + return (self.headers.get('Error-Code', None), + self.headers.get('Error-Description', None)) + + +def parse_gntp(data, password=None): + """Attempt to parse a message as a GNTP message + + :param string data: Message to be parsed + :param string password: Optional password to be used to verify the message + """ + data = gntp.shim.u(data) + match = GNTP_INFO_LINE_SHORT.match(data) + if not match: + raise errors.ParseError('INVALID_GNTP_INFO') + info = match.groupdict() + if info['messagetype'] == 'REGISTER': + return GNTPRegister(data, password=password) + elif info['messagetype'] == 'NOTIFY': + return GNTPNotice(data, password=password) + elif info['messagetype'] == 'SUBSCRIBE': + return GNTPSubscribe(data, password=password) + elif info['messagetype'] == '-OK': + return GNTPOK(data) + elif info['messagetype'] == '-ERROR': + return GNTPError(data) + raise errors.ParseError('INVALID_GNTP_MESSAGE') diff --git a/libs/gntp/errors.py b/libs/gntp/errors.py new file mode 100644 index 0000000..c006fd6 --- /dev/null +++ b/libs/gntp/errors.py @@ -0,0 +1,25 @@ +# Copyright: 2013 Paul Traylor +# These sources are released under the terms of the MIT license: see LICENSE + +class BaseError(Exception): + pass + + +class ParseError(BaseError): + errorcode = 500 + errordesc = 'Error parsing the message' + + +class AuthError(BaseError): + errorcode = 400 + errordesc = 'Error with authorization' + + +class UnsupportedError(BaseError): + errorcode = 500 + errordesc = 'Currently unsupported by gntp.py' + + +class NetworkError(BaseError): + errorcode = 500 + errordesc = "Error connecting to growl server" diff --git a/libs/gntp/notifier.py b/libs/gntp/notifier.py index 539dae2..1719ecd 100755 --- a/libs/gntp/notifier.py +++ b/libs/gntp/notifier.py @@ -1,3 +1,6 @@ +# Copyright: 2013 Paul Traylor +# These sources are released under the terms of the MIT license: see LICENSE + """ The gntp.notifier module is provided as a simple way to send notifications using GNTP @@ -9,10 +12,15 @@ using GNTP `Original Python bindings <http://code.google.com/p/growl/source/browse/Bindings/python/Growl.py>`_ """ -import gntp -import socket import logging import platform +import socket +import sys + +from gntp.version import __version__ +import gntp.core +import gntp.errors as errors +import gntp.shim __all__ = [ 'mini', @@ -37,9 +45,9 @@ class GrowlNotifier(object): passwordHash = 'MD5' socketTimeout = 3 - def __init__(self, applicationName = 'Python GNTP', notifications = [], - defaultNotifications = None, applicationIcon = None, hostname = 'localhost', - password = None, port = 23053): + def __init__(self, applicationName='Python GNTP', notifications=[], + defaultNotifications=None, applicationIcon=None, hostname='localhost', + password=None, port=23053): self.applicationName = applicationName self.notifications = list(notifications) @@ -61,7 +69,7 @@ class GrowlNotifier(object): then we return False ''' logger.info('Checking icon') - return data.startswith('http') + return gntp.shim.u(data).startswith('http') def register(self): """Send GNTP Registration @@ -71,7 +79,7 @@ class GrowlNotifier(object): sent a registration message at least once """ logger.info('Sending registration to %s:%s', self.hostname, self.port) - register = gntp.GNTPRegister() + register = gntp.core.GNTPRegister() register.add_header('Application-Name', self.applicationName) for notification in self.notifications: enabled = notification in self.defaultNotifications @@ -80,16 +88,16 @@ class GrowlNotifier(object): if self._checkIcon(self.applicationIcon): register.add_header('Application-Icon', self.applicationIcon) else: - id = register.add_resource(self.applicationIcon) - register.add_header('Application-Icon', id) + resource = register.add_resource(self.applicationIcon) + register.add_header('Application-Icon', resource) if self.password: register.set_password(self.password, self.passwordHash) self.add_origin_info(register) self.register_hook(register) return self._send('register', register) - def notify(self, noteType, title, description, icon = None, sticky = False, - priority = None, callback = None, identifier = None): + def notify(self, noteType, title, description, icon=None, sticky=False, + priority=None, callback=None, identifier=None, custom={}): """Send a GNTP notifications .. warning:: @@ -102,6 +110,8 @@ class GrowlNotifier(object): :param boolean sticky: Sticky notification :param integer priority: Message priority level from -2 to 2 :param string callback: URL callback + :param dict custom: Custom attributes. Key names should be prefixed with X- + according to the spec but this is not enforced by this class .. warning:: For now, only URL callbacks are supported. In the future, the @@ -109,7 +119,7 @@ class GrowlNotifier(object): """ logger.info('Sending notification [%s] to %s:%s', noteType, self.hostname, self.port) assert noteType in self.notifications - notice = gntp.GNTPNotice() + notice = gntp.core.GNTPNotice() notice.add_header('Application-Name', self.applicationName) notice.add_header('Notification-Name', noteType) notice.add_header('Notification-Title', title) @@ -123,8 +133,8 @@ class GrowlNotifier(object): if self._checkIcon(icon): notice.add_header('Notification-Icon', icon) else: - id = notice.add_resource(icon) - notice.add_header('Notification-Icon', id) + resource = notice.add_resource(icon) + notice.add_header('Notification-Icon', resource) if description: notice.add_header('Notification-Text', description) @@ -133,6 +143,9 @@ class GrowlNotifier(object): if identifier: notice.add_header('Notification-Coalescing-ID', identifier) + for key in custom: + notice.add_header(key, custom[key]) + self.add_origin_info(notice) self.notify_hook(notice) @@ -140,7 +153,7 @@ class GrowlNotifier(object): def subscribe(self, id, name, port): """Send a Subscribe request to a remote machine""" - sub = gntp.GNTPSubscribe() + sub = gntp.core.GNTPSubscribe() sub.add_header('Subscriber-ID', id) sub.add_header('Subscriber-Name', name) sub.add_header('Subscriber-Port', port) @@ -156,7 +169,7 @@ class GrowlNotifier(object): """Add optional Origin headers to message""" packet.add_header('Origin-Machine-Name', platform.node()) packet.add_header('Origin-Software-Name', 'gntp.py') - packet.add_header('Origin-Software-Version', gntp.__version__) + packet.add_header('Origin-Software-Version', __version__) packet.add_header('Origin-Platform-Name', platform.system()) packet.add_header('Origin-Platform-Version', platform.platform()) @@ -179,27 +192,33 @@ class GrowlNotifier(object): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.settimeout(self.socketTimeout) - s.connect((self.hostname, self.port)) - s.send(data) - recv_data = s.recv(1024) - while not recv_data.endswith("\r\n\r\n"): - recv_data += s.recv(1024) - response = gntp.parse_gntp(recv_data) + try: + s.connect((self.hostname, self.port)) + s.send(data) + recv_data = s.recv(1024) + while not recv_data.endswith(gntp.shim.b("\r\n\r\n")): + recv_data += s.recv(1024) + except socket.error: + # Python2.5 and Python3 compatibile exception + exc = sys.exc_info()[1] + raise errors.NetworkError(exc) + + response = gntp.core.parse_gntp(recv_data) s.close() logger.debug('From : %s:%s <%s>\n%s', self.hostname, self.port, response.__class__, response) - if type(response) == gntp.GNTPOK: + if type(response) == gntp.core.GNTPOK: return True logger.error('Invalid response: %s', response.error()) return response.error() -def mini(description, applicationName = 'PythonMini', noteType = "Message", - title = "Mini Message", applicationIcon = None, hostname = 'localhost', - password = None, port = 23053, sticky = False, priority = None, - callback = None, notificationIcon = None, identifier = None, - notifierFactory = GrowlNotifier): +def mini(description, applicationName='PythonMini', noteType="Message", + title="Mini Message", applicationIcon=None, hostname='localhost', + password=None, port=23053, sticky=False, priority=None, + callback=None, notificationIcon=None, identifier=None, + notifierFactory=GrowlNotifier): """Single notification function Simple notification function in one line. Has only one required parameter @@ -210,32 +229,37 @@ def mini(description, applicationName = 'PythonMini', noteType = "Message", For now, only URL callbacks are supported. In the future, the callback argument will also support a function """ - growl = notifierFactory( - applicationName = applicationName, - notifications = [noteType], - defaultNotifications = [noteType], - applicationIcon = applicationIcon, - hostname = hostname, - password = password, - port = port, - ) - result = growl.register() - if result is not True: - return result - - return growl.notify( - noteType = noteType, - title = title, - description = description, - icon = notificationIcon, - sticky = sticky, - priority = priority, - callback = callback, - identifier = identifier, - ) + try: + growl = notifierFactory( + applicationName=applicationName, + notifications=[noteType], + defaultNotifications=[noteType], + applicationIcon=applicationIcon, + hostname=hostname, + password=password, + port=port, + ) + result = growl.register() + if result is not True: + return result + + return growl.notify( + noteType=noteType, + title=title, + description=description, + icon=notificationIcon, + sticky=sticky, + priority=priority, + callback=callback, + identifier=identifier, + ) + except Exception: + # We want the "mini" function to be simple and swallow Exceptions + # in order to be less invasive + logger.exception("Growl error") if __name__ == '__main__': # If we're running this module directly we're likely running it as a test # so extra debugging is useful - logging.basicConfig(level = logging.INFO) + logging.basicConfig(level=logging.INFO) mini('Testing mini notification') diff --git a/libs/gntp/shim.py b/libs/gntp/shim.py new file mode 100644 index 0000000..3a38782 --- /dev/null +++ b/libs/gntp/shim.py @@ -0,0 +1,45 @@ +# Copyright: 2013 Paul Traylor +# These sources are released under the terms of the MIT license: see LICENSE + +""" +Python2.5 and Python3.3 compatibility shim + +Heavily inspirted by the "six" library. +https://pypi.python.org/pypi/six +""" + +import sys + +PY3 = sys.version_info[0] == 3 + +if PY3: + def b(s): + if isinstance(s, bytes): + return s + return s.encode('utf8', 'replace') + + def u(s): + if isinstance(s, bytes): + return s.decode('utf8', 'replace') + return s + + from io import BytesIO as StringIO + from configparser import RawConfigParser +else: + def b(s): + if isinstance(s, unicode): + return s.encode('utf8', 'replace') + return s + + def u(s): + if isinstance(s, unicode): + return s + if isinstance(s, int): + s = str(s) + return unicode(s, "utf8", "replace") + + from StringIO import StringIO + from ConfigParser import RawConfigParser + +b.__doc__ = "Ensure we have a byte string" +u.__doc__ = "Ensure we have a unicode string" diff --git a/libs/gntp/version.py b/libs/gntp/version.py new file mode 100644 index 0000000..2166aac --- /dev/null +++ b/libs/gntp/version.py @@ -0,0 +1,4 @@ +# Copyright: 2013 Paul Traylor +# These sources are released under the terms of the MIT license: see LICENSE + +__version__ = '1.0.2' diff --git a/libs/guessit/__init__.py b/libs/guessit/__init__.py index ce14024..e6cfa27 100755 --- a/libs/guessit/__init__.py +++ b/libs/guessit/__init__.py @@ -20,7 +20,7 @@ from __future__ import unicode_literals -__version__ = '0.7-dev' +__version__ = '0.6.2' __all__ = ['Guess', 'Language', 'guess_file_info', 'guess_video_info', 'guess_movie_info', 'guess_episode_info'] @@ -76,6 +76,7 @@ from guessit.language import Language from guessit.matcher import IterativeMatcher from guessit.textutils import clean_string import logging +import json log = logging.getLogger(__name__) @@ -105,17 +106,74 @@ def _guess_filename(filename, filetype): mtree = IterativeMatcher(filename, filetype=filetype) + m = mtree.matched() + + second_pass_opts = [] + second_pass_transfo_opts = {} + # if there are multiple possible years found, we assume the first one is # part of the title, reparse the tree taking this into account years = set(n.value for n in find_nodes(mtree.match_tree, 'year')) if len(years) >= 2: + second_pass_opts.append('skip_first_year') + + to_skip_language_nodes = [] + + title_nodes = set(n for n in find_nodes(mtree.match_tree, ['title', 'series'])) + title_spans = {} + for title_node in title_nodes: + title_spans[title_node.span[0]] = title_node + title_spans[title_node.span[1]] = title_node + + for lang_key in ('language', 'subtitleLanguage'): + langs = {} + lang_nodes = set(n for n in find_nodes(mtree.match_tree, lang_key)) + + for lang_node in lang_nodes: + lang = lang_node.guess.get(lang_key, None) + if len(lang_node.value) > 3 and (lang_node.span[0] in title_spans.keys() or lang_node.span[1] in title_spans.keys()): + # Language is next or before title, and is not a language code. Add to skip for 2nd pass. + + # if filetype is subtitle and the language appears last, just before + # the extension, then it is likely a subtitle language + parts = clean_string(lang_node.root.value).split() + if m['type'] in ['moviesubtitle', 'episodesubtitle'] and (parts.index(lang_node.value) == len(parts) - 2): + continue + + to_skip_language_nodes.append(lang_node) + elif not lang in langs: + langs[lang] = lang_node + else: + # The same language was found. Keep the more confident one, and add others to skip for 2nd pass. + existing_lang_node = langs[lang] + to_skip = None + if existing_lang_node.guess.confidence('language') >= lang_node.guess.confidence('language'): + # lang_node is to remove + to_skip = lang_node + else: + # existing_lang_node is to remove + langs[lang] = lang_node + to_skip = existing_lang_node + to_skip_language_nodes.append(to_skip) + + + if to_skip_language_nodes: + second_pass_transfo_opts['guess_language'] = ( + ((), { 'skip': [ { 'node_idx': node.parent.node_idx, + 'span': node.span } + for node in to_skip_language_nodes ] })) + + if second_pass_opts or second_pass_transfo_opts: + # 2nd pass is needed + log.info("Running 2nd pass with options: %s" % second_pass_opts) + log.info("Transfo options: %s" % second_pass_transfo_opts) mtree = IterativeMatcher(filename, filetype=filetype, - opts=['skip_first_year']) - + opts=second_pass_opts, + transfo_opts=second_pass_transfo_opts) m = mtree.matched() - if 'language' not in m and 'subtitleLanguage' not in m: + if 'language' not in m and 'subtitleLanguage' not in m or 'title' not in m: return m # if we found some language, make sure we didn't cut a title or sth... @@ -123,51 +181,10 @@ def _guess_filename(filename, filetype): opts=['nolanguage', 'nocountry']) m2 = mtree2.matched() - - if m.get('title') is None: - return m - if m.get('title') != m2.get('title'): title = next(find_nodes(mtree.match_tree, 'title')) title2 = next(find_nodes(mtree2.match_tree, 'title')) - langs = list(find_nodes(mtree.match_tree, ['language', 'subtitleLanguage'])) - if not langs: - return warning('A weird error happened with language detection') - - # find the language that is likely more relevant - for lng in langs: - if lng.value in title2.value: - # if the language was detected as part of a potential title, - # look at this one in particular - lang = lng - break - else: - # pick the first one if we don't have a better choice - lang = langs[0] - - - # language code are rarely part of a title, and those - # should be handled by the Language exceptions anyway - if len(lang.value) <= 3: - return m - - - # if filetype is subtitle and the language appears last, just before - # the extension, then it is likely a subtitle language - parts = clean_string(title.root.value).split() - if (m['type'] in ['moviesubtitle', 'episodesubtitle'] and - parts.index(lang.value) == len(parts) - 2): - return m - - # if the language was in the middle of the other potential title, - # keep the other title (eg: The Italian Job), except if it is at the - # very beginning, in which case we consider it an error - if m2['title'].startswith(lang.value): - return m - elif lang.value in title2.value: - return m2 - # if a node is in an explicit group, then the correct title is probably # the other one if title.root.node_at(title.node_idx[:2]).is_explicit(): @@ -175,9 +192,6 @@ def _guess_filename(filename, filetype): elif title2.root.node_at(title2.node_idx[:2]).is_explicit(): return m - return warning('Not sure of the title because of the language position') - - return m diff --git a/libs/guessit/__main__.py b/libs/guessit/__main__.py index 957ec9d..ccfa3af 100755 --- a/libs/guessit/__main__.py +++ b/libs/guessit/__main__.py @@ -24,16 +24,19 @@ from guessit import u from guessit import slogging, guess_file_info from optparse import OptionParser import logging +import sys +import os +import locale -def detect_filename(filename, filetype, info=['filename']): +def detect_filename(filename, filetype, info=['filename'], advanced = False): filename = u(filename) print('For:', filename) - print('GuessIt found:', guess_file_info(filename, filetype, info).nice_string()) + print('GuessIt found:', guess_file_info(filename, filetype, info).nice_string(advanced)) -def run_demo(episodes=True, movies=True): +def run_demo(episodes=True, movies=True, advanced=False): # NOTE: tests should not be added here but rather in the tests/ folder # this is just intended as a quick example if episodes: @@ -50,7 +53,7 @@ def run_demo(episodes=True, movies=True): for f in testeps: print('-'*80) - detect_filename(f, filetype='episode') + detect_filename(f, filetype='episode', advanced=advanced) if movies: @@ -77,12 +80,17 @@ def run_demo(episodes=True, movies=True): for f in testmovies: print('-'*80) - detect_filename(f, filetype = 'movie') + detect_filename(f, filetype = 'movie', advanced = advanced) def main(): slogging.setupLogging() + # see http://bugs.python.org/issue2128 + if sys.version_info.major < 3 and os.name == 'nt': + for i, a in enumerate(sys.argv): + sys.argv[i] = a.decode(locale.getpreferredencoding()) + parser = OptionParser(usage = 'usage: %prog [options] file1 [file2...]') parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, help = 'display debug output') @@ -92,6 +100,8 @@ def main(): 'them, comma-separated') parser.add_option('-t', '--type', dest = 'filetype', default = 'autodetect', help = 'the suggested file type: movie, episode or autodetect') + parser.add_option('-a', '--advanced', dest = 'advanced', action='store_true', default = False, + help = 'display advanced information for filename guesses, as json output') parser.add_option('-d', '--demo', action='store_true', dest='demo', default=False, help = 'run a few builtin tests instead of analyzing a file') @@ -100,13 +110,14 @@ def main(): logging.getLogger('guessit').setLevel(logging.DEBUG) if options.demo: - run_demo(episodes=True, movies=True) + run_demo(episodes=True, movies=True, advanced=options.advanced) else: if args: for filename in args: detect_filename(filename, filetype = options.filetype, - info = options.info.split(',')) + info = options.info.split(','), + advanced = options.advanced) else: parser.print_help() diff --git a/libs/guessit/fileutils.py b/libs/guessit/fileutils.py index dc077e6..9531f82 100755 --- a/libs/guessit/fileutils.py +++ b/libs/guessit/fileutils.py @@ -44,13 +44,14 @@ def split_path(path): result = [] while True: head, tail = os.path.split(path) + headlen = len(head) # on Unix systems, the root folder is '/' - if head == '/' and tail == '': + if head and head == '/'*headlen and tail == '': return ['/'] + result # on Windows, the root folder is a drive letter (eg: 'C:\') or for shares \\ - if ((len(head) == 3 and head[1:] == ':\\') or (len(head) == 2 and head == '\\\\')) and tail == '': + if ((headlen == 3 and head[1:] == ':\\') or (headlen == 2 and head == '\\\\')) and tail == '': return [head] + result if head == '' and tail == '': @@ -61,6 +62,7 @@ def split_path(path): path = head continue + # otherwise, add the last path fragment and keep splitting result = [tail] + result path = head diff --git a/libs/guessit/guess.py b/libs/guessit/guess.py index 33d3651..73babce 100755 --- a/libs/guessit/guess.py +++ b/libs/guessit/guess.py @@ -41,15 +41,21 @@ class Guess(UnicodeMixin, dict): confidence = kwargs.pop('confidence') except KeyError: confidence = 0 + + try: + raw = kwargs.pop('raw') + except KeyError: + raw = None dict.__init__(self, *args, **kwargs) self._confidence = {} + self._raw = {} for prop in self: self._confidence[prop] = confidence - - - def to_dict(self): + self._raw[prop] = raw + + def to_dict(self, advanced=False): data = dict(self) for prop, value in data.items(): if isinstance(value, datetime.date): @@ -58,46 +64,65 @@ class Guess(UnicodeMixin, dict): data[prop] = u(value) elif isinstance(value, list): data[prop] = [u(x) for x in value] + if advanced: + data[prop] = {"value": data[prop], "raw": self.raw(prop), "confidence": self.confidence(prop)} return data - def nice_string(self): - data = self.to_dict() - - parts = json.dumps(data, indent=4).split('\n') - for i, p in enumerate(parts): - if p[:5] != ' "': - continue - - prop = p.split('"')[1] - parts[i] = (' [%.2f] "' % self.confidence(prop)) + p[5:] - - return '\n'.join(parts) + def nice_string(self, advanced=False): + if advanced: + data = self.to_dict(advanced) + return json.dumps(data, indent=4) + else: + data = self.to_dict() + + parts = json.dumps(data, indent=4).split('\n') + for i, p in enumerate(parts): + if p[:5] != ' "': + continue + + prop = p.split('"')[1] + parts[i] = (' [%.2f] "' % self.confidence(prop)) + p[5:] + + return '\n'.join(parts) def __unicode__(self): return u(self.to_dict()) def confidence(self, prop): return self._confidence.get(prop, -1) + + def raw(self, prop): + return self._raw.get(prop, None) - def set(self, prop, value, confidence=None): + def set(self, prop, value, confidence=None, raw=None): self[prop] = value if confidence is not None: self._confidence[prop] = confidence + if raw is not None: + self._raw[prop] = raw def set_confidence(self, prop, value): self._confidence[prop] = value + + def set_raw(self, prop, value): + self._raw[prop] = value - def update(self, other, confidence=None): + def update(self, other, confidence=None, raw=None): dict.update(self, other) if isinstance(other, Guess): for prop in other: self._confidence[prop] = other.confidence(prop) + self._raw[prop] = other.raw(prop) if confidence is not None: for prop in other: self._confidence[prop] = confidence + if raw is not None: + for prop in other: + self._raw[prop] = raw + def update_highest_confidence(self, other): """Update this guess with the values from the given one. In case there is property present in both, only the one with the highest one @@ -110,6 +135,7 @@ class Guess(UnicodeMixin, dict): continue self[prop] = other[prop] self._confidence[prop] = other.confidence(prop) + self._raw[prop] = other.raw(prop) def choose_int(g1, g2): @@ -181,7 +207,7 @@ def choose_string(g1, g2): elif v1l in v2l: return (v1, combined_prob) - # in case of conflict, return the one with highest priority + # in case of conflict, return the one with highest confidence else: if c1 > c2: return (v1, c1 - c2) @@ -288,7 +314,8 @@ def merge_all(guesses, append=None): result.set(prop, result.get(prop, []) + [g[prop]], # TODO: what to do with confidence here? maybe an # arithmetic mean... - confidence=g.confidence(prop)) + confidence=g.confidence(prop), + raw=g.raw(prop)) del g[prop] diff --git a/libs/guessit/language.py b/libs/guessit/language.py index 2714c6e..4d22cf0 100755 --- a/libs/guessit/language.py +++ b/libs/guessit/language.py @@ -296,7 +296,7 @@ UNDETERMINED = Language('und') ALL_LANGUAGES = frozenset(Language(lng) for lng in lng_all_names) - frozenset([UNDETERMINED]) ALL_LANGUAGES_NAMES = lng_all_names -def search_language(string, lang_filter=None): +def search_language(string, lang_filter=None, skip=None): """Looks for language patterns, and if found return the language object, its group span and an associated confidence. @@ -345,6 +345,16 @@ def search_language(string, lang_filter=None): if pos != -1: end = pos + len(lang) + + # skip if span in in skip list + while skip and (pos - 1, end - 1) in skip: + pos = slow.find(lang, end) + if pos == -1: + continue + end = pos + len(lang) + if pos == -1: + continue + # make sure our word is always surrounded by separators if slow[pos - 1] not in sep or slow[end] not in sep: continue diff --git a/libs/guessit/matcher.py b/libs/guessit/matcher.py index 4337819..1984c01 100755 --- a/libs/guessit/matcher.py +++ b/libs/guessit/matcher.py @@ -21,14 +21,14 @@ from __future__ import unicode_literals from guessit import PY3, u, base_text_type from guessit.matchtree import MatchTree -from guessit.textutils import normalize_unicode +from guessit.textutils import normalize_unicode, clean_string import logging log = logging.getLogger(__name__) class IterativeMatcher(object): - def __init__(self, filename, filetype='autodetect', opts=None): + def __init__(self, filename, filetype='autodetect', opts=None, transfo_opts=None): """An iterative matcher tries to match different patterns that appear in the filename. @@ -38,7 +38,8 @@ class IterativeMatcher(object): a movie. The recognized 'filetype' values are: - [ autodetect, subtitle, movie, moviesubtitle, episode, episodesubtitle ] + [ autodetect, subtitle, info, movie, moviesubtitle, movieinfo, episode, + episodesubtitle, episodeinfo ] The IterativeMatcher works mainly in 2 steps: @@ -61,15 +62,20 @@ class IterativeMatcher(object): it corresponds to a video codec, denoted by the letter'v' in the 4th line. (for more info, see guess.matchtree.to_string) + Second, it tries to merge all this information into a single object + containing all the found properties, and does some (basic) conflict + resolution when they arise. - Second, it tries to merge all this information into a single object - containing all the found properties, and does some (basic) conflict - resolution when they arise. + + When you create the Matcher, you can pass it: + - a list 'opts' of option names, that act as global flags + - a dict 'transfo_opts' of { transfo_name: (transfo_args, transfo_kwargs) } + with which to call the transfo.process() function. """ - valid_filetypes = ('autodetect', 'subtitle', 'video', - 'movie', 'moviesubtitle', - 'episode', 'episodesubtitle') + valid_filetypes = ('autodetect', 'subtitle', 'info', 'video', + 'movie', 'moviesubtitle', 'movieinfo', + 'episode', 'episodesubtitle', 'episodeinfo') if filetype not in valid_filetypes: raise ValueError("filetype needs to be one of %s" % valid_filetypes) if not PY3 and not isinstance(filename, unicode): @@ -80,10 +86,22 @@ class IterativeMatcher(object): if opts is None: opts = [] - elif isinstance(opts, base_text_type): - opts = opts.split() + if not isinstance(opts, list): + raise ValueError('opts must be a list of option names! Received: type=%s val=%s', + type(opts), opts) + + if transfo_opts is None: + transfo_opts = {} + if not isinstance(transfo_opts, dict): + raise ValueError('transfo_opts must be a dict of { transfo_name: (args, kwargs) }. '+ + 'Received: type=%s val=%s', type(transfo_opts), transfo_opts) self.match_tree = MatchTree(filename) + + # sanity check: make sure we don't process a (mostly) empty string + if clean_string(filename) == '': + return + mtree = self.match_tree mtree.guess.set('type', filetype, confidence=1.0) @@ -91,7 +109,11 @@ class IterativeMatcher(object): transfo = __import__('guessit.transfo.' + transfo_name, globals=globals(), locals=locals(), fromlist=['process'], level=0) - transfo.process(mtree, *args, **kwargs) + default_args, default_kwargs = transfo_opts.get(transfo_name, ((), {})) + all_args = args or default_args + all_kwargs = dict(default_kwargs) + all_kwargs.update(kwargs) # keep all kwargs merged together + transfo.process(mtree, *all_args, **all_kwargs) # 1- first split our path into dirs + basename + ext apply_transfo('split_path_components') @@ -111,7 +133,7 @@ class IterativeMatcher(object): # - language before episodes_rexps # - properties before language (eg: he-aac vs hebrew) # - release_group before properties (eg: XviD-?? vs xvid) - if mtree.guess['type'] in ('episode', 'episodesubtitle'): + if mtree.guess['type'] in ('episode', 'episodesubtitle', 'episodeinfo'): strategy = [ 'guess_date', 'guess_website', 'guess_release_group', 'guess_properties', 'guess_language', 'guess_video_rexps', @@ -124,6 +146,7 @@ class IterativeMatcher(object): if 'nolanguage' in opts: strategy.remove('guess_language') + for name in strategy: apply_transfo(name) @@ -143,7 +166,7 @@ class IterativeMatcher(object): # 5- try to identify the remaining unknown groups by looking at their # position relative to other known elements - if mtree.guess['type'] in ('episode', 'episodesubtitle'): + if mtree.guess['type'] in ('episode', 'episodesubtitle', 'episodeinfo'): apply_transfo('guess_episode_info_from_position') else: apply_transfo('guess_movie_title_from_position') diff --git a/libs/guessit/patterns.py b/libs/guessit/patterns.py index ed3982b..f803a11 100755 --- a/libs/guessit/patterns.py +++ b/libs/guessit/patterns.py @@ -25,6 +25,8 @@ import re subtitle_exts = [ 'srt', 'idx', 'sub', 'ssa' ] +info_exts = [ 'nfo' ] + video_exts = ['3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2', 'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm', 'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv'] @@ -32,7 +34,7 @@ video_exts = ['3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2', group_delimiters = [ '()', '[]', '{}' ] # separator character regexp -sep = r'[][)(}{+ /\._-]' # regexp art, hehe :D +sep = r'[][,)(}{+ /\._-]' # regexp art, hehe :D # character used to represent a deleted char (when matching groups) deleted = '_' @@ -49,7 +51,7 @@ episode_rexps = [ # ... Season 2 ... #(r'[Ss](?P<season>[0-9]{1,3})[^0-9]?(?P<bonusNumber>(?:-?[xX-][0-9]{1,3})+)[^0-9]', 1.0, (0, -1)), # ... 2x13 ... - (r'[^0-9](?P<season>[0-9]{1,2})[^0-9]?(?P<episodeNumber>(?:-?[xX][0-9]{1,3})+)[^0-9]', 1.0, (1, -1)), + (r'[^0-9](?P<season>[0-9]{1,2})[^0-9 .-]?(?P<episodeNumber>(?:-?[xX][0-9]{1,3})+)[^0-9]', 1.0, (1, -1)), # ... s02 ... #(sep + r's(?P<season>[0-9]{1,2})' + sep, 0.6, (1, -1)), @@ -122,9 +124,12 @@ prop_multi = { 'format': { 'DVD': [ 'DVD', 'DVD-Rip', 'VIDEO-TS', 'DVDivX' ], 'VHS': [ 'VHS' ], 'WEB-DL': [ 'WEB-DL' ] }, + 'is3D': { True: [ '3D' ] }, + 'screenSize': { '480p': [ '480[pi]?' ], '720p': [ '720[pi]?' ], - '1080p': [ '1080[pi]?' ] }, + '1080i': [ '1080i' ], + '1080p': [ '1080p', '1080[^i]' ] }, 'videoCodec': { 'XviD': [ 'Xvid' ], 'DivX': [ 'DVDivX', 'DivX' ], @@ -140,7 +145,7 @@ prop_multi = { 'format': { 'DVD': [ 'DVD', 'DVD-Rip', 'VIDEO-TS', 'DVDivX' ], 'DTS': [ 'DTS' ], 'AAC': [ 'He-AAC', 'AAC-He', 'AAC' ] }, - 'audioChannels': { '5.1': [ r'5\.1', 'DD5[\._ ]1', '5ch' ] }, + 'audioChannels': { '5.1': [ r'5\.1', 'DD5[._ ]1', '5ch' ] }, 'episodeFormat': { 'Minisode': [ 'Minisodes?' ] } @@ -170,7 +175,7 @@ prop_single = { 'releaseGroup': [ 'ESiR', 'WAF', 'SEPTiC', r'\[XCT\]', 'iNT', 'P } _dash = '-' -_psep = '[-\. _]?' +_psep = '[-. _]?' def _to_rexp(prop): return re.compile(prop.replace(_dash, _psep), re.IGNORECASE) @@ -237,8 +242,9 @@ def canonical_form(string): def compute_canonical_form(property_name, value): """Return the canonical form of a property given its type if it is a valid one, None otherwise.""" - for canonical_form, rexps in properties_rexps[property_name].items(): - for rexp in rexps: - if rexp.match(value): - return canonical_form + if isinstance(value, basestring): + for canonical_form, rexps in properties_rexps[property_name].items(): + for rexp in rexps: + if rexp.match(value): + return canonical_form return None diff --git a/libs/guessit/slogging.py b/libs/guessit/slogging.py index 75e261c..39591a2 100755 --- a/libs/guessit/slogging.py +++ b/libs/guessit/slogging.py @@ -31,14 +31,15 @@ RED_FONT = "\x1B[0;31m" RESET_FONT = "\x1B[0m" -def setupLogging(colored=True, with_time=False, with_thread=False, filename=None): +def setupLogging(colored=True, with_time=False, with_thread=False, filename=None, with_lineno=False): """Set up a nice colored logger as the main application logger.""" class SimpleFormatter(logging.Formatter): def __init__(self, with_time, with_thread): self.fmt = (('%(asctime)s ' if with_time else '') + '%(levelname)-8s ' + - '[%(name)s:%(funcName)s]' + + '[%(name)s:%(funcName)s' + + (':%(lineno)s' if with_lineno else '') + ']' + ('[%(threadName)s]' if with_thread else '') + ' -- %(message)s') logging.Formatter.__init__(self, self.fmt) @@ -47,7 +48,8 @@ def setupLogging(colored=True, with_time=False, with_thread=False, filename=None def __init__(self, with_time, with_thread): self.fmt = (('%(asctime)s ' if with_time else '') + '-CC-%(levelname)-8s ' + - BLUE_FONT + '[%(name)s:%(funcName)s]' + + BLUE_FONT + '[%(name)s:%(funcName)s' + + (':%(lineno)s' if with_lineno else '') + ']' + RESET_FONT + ('[%(threadName)s]' if with_thread else '') + ' -- %(message)s') diff --git a/libs/guessit/textutils.py b/libs/guessit/textutils.py index f195e2b..ae9d28c 100755 --- a/libs/guessit/textutils.py +++ b/libs/guessit/textutils.py @@ -43,10 +43,13 @@ def strip_brackets(s): return s -def clean_string(s): - for c in sep[:-2]: # do not remove dashes ('-') - s = s.replace(c, ' ') - parts = s.split() +def clean_string(st): + for c in sep: + # do not remove certain chars + if c in ['-', ',']: + continue + st = st.replace(c, ' ') + parts = st.split() result = ' '.join(p for p in parts if p != '') # now also remove dashes on the outer part of the string diff --git a/libs/guessit/transfo/__init__.py b/libs/guessit/transfo/__init__.py index 820690a..a28aa98 100755 --- a/libs/guessit/transfo/__init__.py +++ b/libs/guessit/transfo/__init__.py @@ -28,7 +28,7 @@ log = logging.getLogger(__name__) def found_property(node, name, confidence): - node.guess = Guess({name: node.clean_value}, confidence=confidence) + node.guess = Guess({name: node.clean_value}, confidence=confidence, raw=node.value) log.debug('Found with confidence %.2f: %s' % (confidence, node.guess)) @@ -52,11 +52,17 @@ def format_guess(guess): def find_and_split_node(node, strategy, logger): string = ' %s ' % node.value # add sentinels - for matcher, confidence in strategy: + for matcher, confidence, args, kwargs in strategy: + all_args = [string] if getattr(matcher, 'use_node', False): - result, span = matcher(string, node) + all_args.append(node) + if args: + all_args.append(args) + + if kwargs: + result, span = matcher(*all_args, **kwargs) else: - result, span = matcher(string) + result, span = matcher(*all_args) if result: # readjust span to compensate for sentinels @@ -69,7 +75,7 @@ def find_and_split_node(node, strategy, logger): if confidence is None: confidence = 1.0 - guess = format_guess(Guess(result, confidence=confidence)) + guess = format_guess(Guess(result, confidence=confidence, raw=string[span[0] + 1:span[1] + 1])) msg = 'Found with confidence %.2f: %s' % (confidence, guess) (logger or log).debug(msg) @@ -84,10 +90,12 @@ def find_and_split_node(node, strategy, logger): class SingleNodeGuesser(object): - def __init__(self, guess_func, confidence, logger=None): + def __init__(self, guess_func, confidence, logger, *args, **kwargs): self.guess_func = guess_func self.confidence = confidence self.logger = logger + self.args = args + self.kwargs = kwargs def process(self, mtree): # strategy is a list of pairs (guesser, confidence) @@ -95,7 +103,7 @@ class SingleNodeGuesser(object): # it will override it, otherwise it will leave the guess confidence # - if the guesser returns a simple dict as a guess and confidence is # specified, it will use it, or 1.0 otherwise - strategy = [ (self.guess_func, self.confidence) ] + strategy = [ (self.guess_func, self.confidence, self.args, self.kwargs) ] for node in mtree.unidentified_leaves(): find_and_split_node(node, strategy, self.logger) diff --git a/libs/guessit/transfo/guess_country.py b/libs/guessit/transfo/guess_country.py index 1d69069..aadb84f 100755 --- a/libs/guessit/transfo/guess_country.py +++ b/libs/guessit/transfo/guess_country.py @@ -45,4 +45,4 @@ def process(mtree): except ValueError: continue - node.guess = Guess(country=country, confidence=1.0) + node.guess = Guess(country=country, confidence=1.0, raw=c) diff --git a/libs/guessit/transfo/guess_episodes_rexps.py b/libs/guessit/transfo/guess_episodes_rexps.py index 29562be..30c2ca2 100755 --- a/libs/guessit/transfo/guess_episodes_rexps.py +++ b/libs/guessit/transfo/guess_episodes_rexps.py @@ -40,27 +40,22 @@ def guess_episodes_rexps(string): for rexp, confidence, span_adjust in episode_rexps: match = re.search(rexp, string, re.IGNORECASE) if match: - guess = Guess(match.groupdict(), confidence=confidence) - span = (match.start() + span_adjust[0], + span = (match.start() + span_adjust[0], match.end() + span_adjust[1]) - - # episodes which have a season > 30 are most likely errors - # (Simpsons is at 24!) - if int(guess.get('season', 0)) > 30: - continue + guess = Guess(match.groupdict(), confidence=confidence, raw=string[span[0]:span[1]]) # decide whether we have only a single episode number or an # episode list if guess.get('episodeNumber'): eplist = number_list(guess['episodeNumber']) - guess.set('episodeNumber', eplist[0], confidence=confidence) + guess.set('episodeNumber', eplist[0], confidence=confidence, raw=string[span[0]:span[1]]) if len(eplist) > 1: - guess.set('episodeList', eplist, confidence=confidence) + guess.set('episodeList', eplist, confidence=confidence, raw=string[span[0]:span[1]]) if guess.get('bonusNumber'): eplist = number_list(guess['bonusNumber']) - guess.set('bonusNumber', eplist[0], confidence=confidence) + guess.set('bonusNumber', eplist[0], confidence=confidence, raw=string[span[0]:span[1]]) return guess, span diff --git a/libs/guessit/transfo/guess_filetype.py b/libs/guessit/transfo/guess_filetype.py index 4d98d01..4279c0b 100755 --- a/libs/guessit/transfo/guess_filetype.py +++ b/libs/guessit/transfo/guess_filetype.py @@ -20,7 +20,7 @@ from __future__ import unicode_literals from guessit import Guess -from guessit.patterns import (subtitle_exts, video_exts, episode_rexps, +from guessit.patterns import (subtitle_exts, info_exts, video_exts, episode_rexps, find_properties, compute_canonical_form) from guessit.date import valid_year from guessit.textutils import clean_string @@ -53,12 +53,16 @@ def guess_filetype(mtree, filetype): filetype_container[0] = 'episode' elif filetype_container[0] == 'subtitle': filetype_container[0] = 'episodesubtitle' + elif filetype_container[0] == 'info': + filetype_container[0] = 'episodeinfo' def upgrade_movie(): if filetype_container[0] == 'video': filetype_container[0] = 'movie' elif filetype_container[0] == 'subtitle': filetype_container[0] = 'moviesubtitle' + elif filetype_container[0] == 'info': + filetype_container[0] = 'movieinfo' def upgrade_subtitle(): if 'movie' in filetype_container[0]: @@ -68,6 +72,14 @@ def guess_filetype(mtree, filetype): else: filetype_container[0] = 'subtitle' + def upgrade_info(): + if 'movie' in filetype_container[0]: + filetype_container[0] = 'movieinfo' + elif 'episode' in filetype_container[0]: + filetype_container[0] = 'episodeinfo' + else: + filetype_container[0] = 'info' + def upgrade(type='unknown'): if filetype_container[0] == 'autodetect': filetype_container[0] = type @@ -78,6 +90,9 @@ def guess_filetype(mtree, filetype): if fileext in subtitle_exts: upgrade_subtitle() other = { 'container': fileext } + elif fileext in info_exts: + upgrade_info() + other = { 'container': fileext } elif fileext in video_exts: upgrade(type='video') other = { 'container': fileext } @@ -104,17 +119,20 @@ def guess_filetype(mtree, filetype): fname = clean_string(filename).lower() for m in MOVIES: if m in fname: + log.debug('Found in exception list of movies -> type = movie') upgrade_movie() for s in SERIES: if s in fname: + log.debug('Found in exception list of series -> type = episode') upgrade_episode() # now look whether there are some specific hints for episode vs movie - if filetype_container[0] in ('video', 'subtitle'): + if filetype_container[0] in ('video', 'subtitle', 'info'): # if we have an episode_rexp (eg: s02e13), it is an episode for rexp, _, _ in episode_rexps: match = re.search(rexp, filename, re.IGNORECASE) if match: + log.debug('Found matching regexp: "%s" (string = "%s") -> type = episode', rexp, match.group()) upgrade_episode() break @@ -133,24 +151,29 @@ def guess_filetype(mtree, filetype): possible = False if possible: + log.debug('Found possible episode number: %s (from string "%s") -> type = episode', epnumber, match.group()) upgrade_episode() # if we have certain properties characteristic of episodes, it is an ep for prop, value, _, _ in find_properties(filename): log.debug('prop: %s = %s' % (prop, value)) if prop == 'episodeFormat': + log.debug('Found characteristic property of episodes: %s = "%s"', prop, value) upgrade_episode() break elif compute_canonical_form('format', value) == 'DVB': + log.debug('Found characteristic property of episodes: %s = "%s"', prop, value) upgrade_episode() break # origin-specific type if 'tvu.org.ru' in filename: + log.debug('Found characteristic property of episodes: %s = "%s"', prop, value) upgrade_episode() # if no episode info found, assume it's a movie + log.debug('Nothing characteristic found, assuming type = movie') upgrade_movie() filetype = filetype_container[0] diff --git a/libs/guessit/transfo/guess_language.py b/libs/guessit/transfo/guess_language.py index 86c1cf5..648a06b 100755 --- a/libs/guessit/transfo/guess_language.py +++ b/libs/guessit/transfo/guess_language.py @@ -22,22 +22,34 @@ from __future__ import unicode_literals from guessit import Guess from guessit.transfo import SingleNodeGuesser from guessit.language import search_language -from guessit.textutils import clean_string, find_words import logging log = logging.getLogger(__name__) -def guess_language(string): - language, span, confidence = search_language(string) +def guess_language(string, node, skip=None): + if skip: + relative_skip = [] + for entry in skip: + node_idx = entry['node_idx'] + span = entry['span'] + if node_idx == node.node_idx[:len(node_idx)]: + relative_span = (span[0] - node.offset + 1, span[1] - node.offset + 1) + relative_skip.append(relative_span) + skip = relative_skip + + language, span, confidence = search_language(string, skip=skip) if language: return (Guess({'language': language}, - confidence=confidence), + confidence=confidence, + raw= string[span[0]:span[1]]), span) return None, None +guess_language.use_node = True + -def process(mtree): - SingleNodeGuesser(guess_language, None, log).process(mtree) +def process(mtree, *args, **kwargs): + SingleNodeGuesser(guess_language, None, log, *args, **kwargs).process(mtree) # Note: 'language' is promoted to 'subtitleLanguage' in the post_process transfo diff --git a/libs/guessit/transfo/guess_movie_title_from_position.py b/libs/guessit/transfo/guess_movie_title_from_position.py index d2e2deb..bcb42b4 100755 --- a/libs/guessit/transfo/guess_movie_title_from_position.py +++ b/libs/guessit/transfo/guess_movie_title_from_position.py @@ -29,7 +29,8 @@ log = logging.getLogger(__name__) def process(mtree): def found_property(node, name, value, confidence): node.guess = Guess({ name: value }, - confidence=confidence) + confidence=confidence, + raw=value) log.debug('Found with confidence %.2f: %s' % (confidence, node.guess)) def found_title(node, confidence): diff --git a/libs/guessit/transfo/guess_video_rexps.py b/libs/guessit/transfo/guess_video_rexps.py index 8ae9e6c..1b511f1 100755 --- a/libs/guessit/transfo/guess_video_rexps.py +++ b/libs/guessit/transfo/guess_video_rexps.py @@ -38,9 +38,10 @@ def guess_video_rexps(string): # the soonest that we can catch it) if metadata.get('cdNumberTotal', -1) is None: del metadata['cdNumberTotal'] - return (Guess(metadata, confidence=confidence), - (match.start() + span_adjust[0], - match.end() + span_adjust[1] - 2)) + span = (match.start() + span_adjust[0], + match.end() + span_adjust[1] - 2) + return (Guess(metadata, confidence=confidence, raw=string[span[0]:span[1]]), + span) return None, None diff --git a/libs/guessit/transfo/guess_weak_episodes_rexps.py b/libs/guessit/transfo/guess_weak_episodes_rexps.py index 8436ade..18306b4 100755 --- a/libs/guessit/transfo/guess_weak_episodes_rexps.py +++ b/libs/guessit/transfo/guess_weak_episodes_rexps.py @@ -48,9 +48,9 @@ def guess_weak_episodes_rexps(string, node): continue return Guess({ 'season': season, 'episodeNumber': epnum }, - confidence=0.6), span + confidence=0.6, raw=string[span[0]:span[1]]), span else: - return Guess(metadata, confidence=0.3), span + return Guess(metadata, confidence=0.3, raw=string[span[0]:span[1]]), span return None, None diff --git a/libs/html5lib/__init__.py b/libs/html5lib/__init__.py index 16537aa..66c1a8e 100644 --- a/libs/html5lib/__init__.py +++ b/libs/html5lib/__init__.py @@ -1,4 +1,4 @@ -""" +""" HTML parsing library based on the WHATWG "HTML5" specification. The parser is designed to be compatible with existing HTML found in the wild and implements well-defined error recovery that @@ -8,10 +8,16 @@ Example usage: import html5lib f = open("my_document.html") -tree = html5lib.parse(f) +tree = html5lib.parse(f) """ -__version__ = "0.95-dev" -from html5parser import HTMLParser, parse, parseFragment -from treebuilders import getTreeBuilder -from treewalkers import getTreeWalker -from serializer import serialize + +from __future__ import absolute_import, division, unicode_literals + +from .html5parser import HTMLParser, parse, parseFragment +from .treebuilders import getTreeBuilder +from .treewalkers import getTreeWalker +from .serializer import serialize + +__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder", + "getTreeWalker", "serialize"] +__version__ = "0.99" diff --git a/libs/html5lib/constants.py b/libs/html5lib/constants.py index b533018..e708984 100644 --- a/libs/html5lib/constants.py +++ b/libs/html5lib/constants.py @@ -1,302 +1,301 @@ -import string, gettext -_ = gettext.gettext +from __future__ import absolute_import, division, unicode_literals -try: - frozenset -except NameError: - # Import from the sets module for python 2.3 - from sets import Set as set - from sets import ImmutableSet as frozenset +import string +import gettext +_ = gettext.gettext EOF = None E = { - "null-character": - _(u"Null character in input stream, replaced with U+FFFD."), - "invalid-codepoint": - _(u"Invalid codepoint in stream."), + "null-character": + _("Null character in input stream, replaced with U+FFFD."), + "invalid-codepoint": + _("Invalid codepoint in stream."), "incorrectly-placed-solidus": - _(u"Solidus (/) incorrectly placed in tag."), + _("Solidus (/) incorrectly placed in tag."), "incorrect-cr-newline-entity": - _(u"Incorrect CR newline entity, replaced with LF."), + _("Incorrect CR newline entity, replaced with LF."), "illegal-windows-1252-entity": - _(u"Entity used with illegal number (windows-1252 reference)."), + _("Entity used with illegal number (windows-1252 reference)."), "cant-convert-numeric-entity": - _(u"Numeric entity couldn't be converted to character " - u"(codepoint U+%(charAsInt)08x)."), + _("Numeric entity couldn't be converted to character " + "(codepoint U+%(charAsInt)08x)."), "illegal-codepoint-for-numeric-entity": - _(u"Numeric entity represents an illegal codepoint: " - u"U+%(charAsInt)08x."), + _("Numeric entity represents an illegal codepoint: " + "U+%(charAsInt)08x."), "numeric-entity-without-semicolon": - _(u"Numeric entity didn't end with ';'."), + _("Numeric entity didn't end with ';'."), "expected-numeric-entity-but-got-eof": - _(u"Numeric entity expected. Got end of file instead."), + _("Numeric entity expected. Got end of file instead."), "expected-numeric-entity": - _(u"Numeric entity expected but none found."), + _("Numeric entity expected but none found."), "named-entity-without-semicolon": - _(u"Named entity didn't end with ';'."), + _("Named entity didn't end with ';'."), "expected-named-entity": - _(u"Named entity expected. Got none."), + _("Named entity expected. Got none."), "attributes-in-end-tag": - _(u"End tag contains unexpected attributes."), + _("End tag contains unexpected attributes."), 'self-closing-flag-on-end-tag': - _(u"End tag contains unexpected self-closing flag."), + _("End tag contains unexpected self-closing flag."), "expected-tag-name-but-got-right-bracket": - _(u"Expected tag name. Got '>' instead."), + _("Expected tag name. Got '>' instead."), "expected-tag-name-but-got-question-mark": - _(u"Expected tag name. Got '?' instead. (HTML doesn't " - u"support processing instructions.)"), + _("Expected tag name. Got '?' instead. (HTML doesn't " + "support processing instructions.)"), "expected-tag-name": - _(u"Expected tag name. Got something else instead"), + _("Expected tag name. Got something else instead"), "expected-closing-tag-but-got-right-bracket": - _(u"Expected closing tag. Got '>' instead. Ignoring '</>'."), + _("Expected closing tag. Got '>' instead. Ignoring '</>'."), "expected-closing-tag-but-got-eof": - _(u"Expected closing tag. Unexpected end of file."), + _("Expected closing tag. Unexpected end of file."), "expected-closing-tag-but-got-char": - _(u"Expected closing tag. Unexpected character '%(data)s' found."), + _("Expected closing tag. Unexpected character '%(data)s' found."), "eof-in-tag-name": - _(u"Unexpected end of file in the tag name."), + _("Unexpected end of file in the tag name."), "expected-attribute-name-but-got-eof": - _(u"Unexpected end of file. Expected attribute name instead."), + _("Unexpected end of file. Expected attribute name instead."), "eof-in-attribute-name": - _(u"Unexpected end of file in attribute name."), + _("Unexpected end of file in attribute name."), "invalid-character-in-attribute-name": - _(u"Invalid chracter in attribute name"), + _("Invalid character in attribute name"), "duplicate-attribute": - _(u"Dropped duplicate attribute on tag."), + _("Dropped duplicate attribute on tag."), "expected-end-of-tag-name-but-got-eof": - _(u"Unexpected end of file. Expected = or end of tag."), + _("Unexpected end of file. Expected = or end of tag."), "expected-attribute-value-but-got-eof": - _(u"Unexpected end of file. Expected attribute value."), + _("Unexpected end of file. Expected attribute value."), "expected-attribute-value-but-got-right-bracket": - _(u"Expected attribute value. Got '>' instead."), + _("Expected attribute value. Got '>' instead."), 'equals-in-unquoted-attribute-value': - _(u"Unexpected = in unquoted attribute"), + _("Unexpected = in unquoted attribute"), 'unexpected-character-in-unquoted-attribute-value': - _(u"Unexpected character in unquoted attribute"), + _("Unexpected character in unquoted attribute"), "invalid-character-after-attribute-name": - _(u"Unexpected character after attribute name."), + _("Unexpected character after attribute name."), "unexpected-character-after-attribute-value": - _(u"Unexpected character after attribute value."), + _("Unexpected character after attribute value."), "eof-in-attribute-value-double-quote": - _(u"Unexpected end of file in attribute value (\")."), + _("Unexpected end of file in attribute value (\")."), "eof-in-attribute-value-single-quote": - _(u"Unexpected end of file in attribute value (')."), + _("Unexpected end of file in attribute value (')."), "eof-in-attribute-value-no-quotes": - _(u"Unexpected end of file in attribute value."), + _("Unexpected end of file in attribute value."), "unexpected-EOF-after-solidus-in-tag": - _(u"Unexpected end of file in tag. Expected >"), - "unexpected-character-after-soldius-in-tag": - _(u"Unexpected character after / in tag. Expected >"), + _("Unexpected end of file in tag. Expected >"), + "unexpected-character-after-solidus-in-tag": + _("Unexpected character after / in tag. Expected >"), "expected-dashes-or-doctype": - _(u"Expected '--' or 'DOCTYPE'. Not found."), + _("Expected '--' or 'DOCTYPE'. Not found."), "unexpected-bang-after-double-dash-in-comment": - _(u"Unexpected ! after -- in comment"), + _("Unexpected ! after -- in comment"), "unexpected-space-after-double-dash-in-comment": - _(u"Unexpected space after -- in comment"), + _("Unexpected space after -- in comment"), "incorrect-comment": - _(u"Incorrect comment."), + _("Incorrect comment."), "eof-in-comment": - _(u"Unexpected end of file in comment."), + _("Unexpected end of file in comment."), "eof-in-comment-end-dash": - _(u"Unexpected end of file in comment (-)"), + _("Unexpected end of file in comment (-)"), "unexpected-dash-after-double-dash-in-comment": - _(u"Unexpected '-' after '--' found in comment."), + _("Unexpected '-' after '--' found in comment."), "eof-in-comment-double-dash": - _(u"Unexpected end of file in comment (--)."), + _("Unexpected end of file in comment (--)."), "eof-in-comment-end-space-state": - _(u"Unexpected end of file in comment."), + _("Unexpected end of file in comment."), "eof-in-comment-end-bang-state": - _(u"Unexpected end of file in comment."), + _("Unexpected end of file in comment."), "unexpected-char-in-comment": - _(u"Unexpected character in comment found."), + _("Unexpected character in comment found."), "need-space-after-doctype": - _(u"No space after literal string 'DOCTYPE'."), + _("No space after literal string 'DOCTYPE'."), "expected-doctype-name-but-got-right-bracket": - _(u"Unexpected > character. Expected DOCTYPE name."), + _("Unexpected > character. Expected DOCTYPE name."), "expected-doctype-name-but-got-eof": - _(u"Unexpected end of file. Expected DOCTYPE name."), + _("Unexpected end of file. Expected DOCTYPE name."), "eof-in-doctype-name": - _(u"Unexpected end of file in DOCTYPE name."), + _("Unexpected end of file in DOCTYPE name."), "eof-in-doctype": - _(u"Unexpected end of file in DOCTYPE."), + _("Unexpected end of file in DOCTYPE."), "expected-space-or-right-bracket-in-doctype": - _(u"Expected space or '>'. Got '%(data)s'"), + _("Expected space or '>'. Got '%(data)s'"), "unexpected-end-of-doctype": - _(u"Unexpected end of DOCTYPE."), + _("Unexpected end of DOCTYPE."), "unexpected-char-in-doctype": - _(u"Unexpected character in DOCTYPE."), + _("Unexpected character in DOCTYPE."), "eof-in-innerhtml": - _(u"XXX innerHTML EOF"), + _("XXX innerHTML EOF"), "unexpected-doctype": - _(u"Unexpected DOCTYPE. Ignored."), + _("Unexpected DOCTYPE. Ignored."), "non-html-root": - _(u"html needs to be the first start tag."), + _("html needs to be the first start tag."), "expected-doctype-but-got-eof": - _(u"Unexpected End of file. Expected DOCTYPE."), + _("Unexpected End of file. Expected DOCTYPE."), "unknown-doctype": - _(u"Erroneous DOCTYPE."), + _("Erroneous DOCTYPE."), "expected-doctype-but-got-chars": - _(u"Unexpected non-space characters. Expected DOCTYPE."), + _("Unexpected non-space characters. Expected DOCTYPE."), "expected-doctype-but-got-start-tag": - _(u"Unexpected start tag (%(name)s). Expected DOCTYPE."), + _("Unexpected start tag (%(name)s). Expected DOCTYPE."), "expected-doctype-but-got-end-tag": - _(u"Unexpected end tag (%(name)s). Expected DOCTYPE."), + _("Unexpected end tag (%(name)s). Expected DOCTYPE."), "end-tag-after-implied-root": - _(u"Unexpected end tag (%(name)s) after the (implied) root element."), + _("Unexpected end tag (%(name)s) after the (implied) root element."), "expected-named-closing-tag-but-got-eof": - _(u"Unexpected end of file. Expected end tag (%(name)s)."), + _("Unexpected end of file. Expected end tag (%(name)s)."), "two-heads-are-not-better-than-one": - _(u"Unexpected start tag head in existing head. Ignored."), + _("Unexpected start tag head in existing head. Ignored."), "unexpected-end-tag": - _(u"Unexpected end tag (%(name)s). Ignored."), + _("Unexpected end tag (%(name)s). Ignored."), "unexpected-start-tag-out-of-my-head": - _(u"Unexpected start tag (%(name)s) that can be in head. Moved."), + _("Unexpected start tag (%(name)s) that can be in head. Moved."), "unexpected-start-tag": - _(u"Unexpected start tag (%(name)s)."), + _("Unexpected start tag (%(name)s)."), "missing-end-tag": - _(u"Missing end tag (%(name)s)."), + _("Missing end tag (%(name)s)."), "missing-end-tags": - _(u"Missing end tags (%(name)s)."), + _("Missing end tags (%(name)s)."), "unexpected-start-tag-implies-end-tag": - _(u"Unexpected start tag (%(startName)s) " - u"implies end tag (%(endName)s)."), + _("Unexpected start tag (%(startName)s) " + "implies end tag (%(endName)s)."), "unexpected-start-tag-treated-as": - _(u"Unexpected start tag (%(originalName)s). Treated as %(newName)s."), + _("Unexpected start tag (%(originalName)s). Treated as %(newName)s."), "deprecated-tag": - _(u"Unexpected start tag %(name)s. Don't use it!"), + _("Unexpected start tag %(name)s. Don't use it!"), "unexpected-start-tag-ignored": - _(u"Unexpected start tag %(name)s. Ignored."), + _("Unexpected start tag %(name)s. Ignored."), "expected-one-end-tag-but-got-another": - _(u"Unexpected end tag (%(gotName)s). " - u"Missing end tag (%(expectedName)s)."), + _("Unexpected end tag (%(gotName)s). " + "Missing end tag (%(expectedName)s)."), "end-tag-too-early": - _(u"End tag (%(name)s) seen too early. Expected other end tag."), + _("End tag (%(name)s) seen too early. Expected other end tag."), "end-tag-too-early-named": - _(u"Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s)."), + _("Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s)."), "end-tag-too-early-ignored": - _(u"End tag (%(name)s) seen too early. Ignored."), + _("End tag (%(name)s) seen too early. Ignored."), "adoption-agency-1.1": - _(u"End tag (%(name)s) violates step 1, " - u"paragraph 1 of the adoption agency algorithm."), + _("End tag (%(name)s) violates step 1, " + "paragraph 1 of the adoption agency algorithm."), "adoption-agency-1.2": - _(u"End tag (%(name)s) violates step 1, " - u"paragraph 2 of the adoption agency algorithm."), + _("End tag (%(name)s) violates step 1, " + "paragraph 2 of the adoption agency algorithm."), "adoption-agency-1.3": - _(u"End tag (%(name)s) violates step 1, " - u"paragraph 3 of the adoption agency algorithm."), + _("End tag (%(name)s) violates step 1, " + "paragraph 3 of the adoption agency algorithm."), + "adoption-agency-4.4": + _("End tag (%(name)s) violates step 4, " + "paragraph 4 of the adoption agency algorithm."), "unexpected-end-tag-treated-as": - _(u"Unexpected end tag (%(originalName)s). Treated as %(newName)s."), + _("Unexpected end tag (%(originalName)s). Treated as %(newName)s."), "no-end-tag": - _(u"This element (%(name)s) has no end tag."), + _("This element (%(name)s) has no end tag."), "unexpected-implied-end-tag-in-table": - _(u"Unexpected implied end tag (%(name)s) in the table phase."), + _("Unexpected implied end tag (%(name)s) in the table phase."), "unexpected-implied-end-tag-in-table-body": - _(u"Unexpected implied end tag (%(name)s) in the table body phase."), + _("Unexpected implied end tag (%(name)s) in the table body phase."), "unexpected-char-implies-table-voodoo": - _(u"Unexpected non-space characters in " - u"table context caused voodoo mode."), + _("Unexpected non-space characters in " + "table context caused voodoo mode."), "unexpected-hidden-input-in-table": - _(u"Unexpected input with type hidden in table context."), + _("Unexpected input with type hidden in table context."), "unexpected-form-in-table": - _(u"Unexpected form in table context."), + _("Unexpected form in table context."), "unexpected-start-tag-implies-table-voodoo": - _(u"Unexpected start tag (%(name)s) in " - u"table context caused voodoo mode."), + _("Unexpected start tag (%(name)s) in " + "table context caused voodoo mode."), "unexpected-end-tag-implies-table-voodoo": - _(u"Unexpected end tag (%(name)s) in " - u"table context caused voodoo mode."), + _("Unexpected end tag (%(name)s) in " + "table context caused voodoo mode."), "unexpected-cell-in-table-body": - _(u"Unexpected table cell start tag (%(name)s) " - u"in the table body phase."), + _("Unexpected table cell start tag (%(name)s) " + "in the table body phase."), "unexpected-cell-end-tag": - _(u"Got table cell end tag (%(name)s) " - u"while required end tags are missing."), + _("Got table cell end tag (%(name)s) " + "while required end tags are missing."), "unexpected-end-tag-in-table-body": - _(u"Unexpected end tag (%(name)s) in the table body phase. Ignored."), + _("Unexpected end tag (%(name)s) in the table body phase. Ignored."), "unexpected-implied-end-tag-in-table-row": - _(u"Unexpected implied end tag (%(name)s) in the table row phase."), + _("Unexpected implied end tag (%(name)s) in the table row phase."), "unexpected-end-tag-in-table-row": - _(u"Unexpected end tag (%(name)s) in the table row phase. Ignored."), + _("Unexpected end tag (%(name)s) in the table row phase. Ignored."), "unexpected-select-in-select": - _(u"Unexpected select start tag in the select phase " - u"treated as select end tag."), + _("Unexpected select start tag in the select phase " + "treated as select end tag."), "unexpected-input-in-select": - _(u"Unexpected input start tag in the select phase."), + _("Unexpected input start tag in the select phase."), "unexpected-start-tag-in-select": - _(u"Unexpected start tag token (%(name)s in the select phase. " - u"Ignored."), + _("Unexpected start tag token (%(name)s in the select phase. " + "Ignored."), "unexpected-end-tag-in-select": - _(u"Unexpected end tag (%(name)s) in the select phase. Ignored."), + _("Unexpected end tag (%(name)s) in the select phase. Ignored."), "unexpected-table-element-start-tag-in-select-in-table": - _(u"Unexpected table element start tag (%(name)s) in the select in table phase."), + _("Unexpected table element start tag (%(name)s) in the select in table phase."), "unexpected-table-element-end-tag-in-select-in-table": - _(u"Unexpected table element end tag (%(name)s) in the select in table phase."), + _("Unexpected table element end tag (%(name)s) in the select in table phase."), "unexpected-char-after-body": - _(u"Unexpected non-space characters in the after body phase."), + _("Unexpected non-space characters in the after body phase."), "unexpected-start-tag-after-body": - _(u"Unexpected start tag token (%(name)s)" - u" in the after body phase."), + _("Unexpected start tag token (%(name)s)" + " in the after body phase."), "unexpected-end-tag-after-body": - _(u"Unexpected end tag token (%(name)s)" - u" in the after body phase."), + _("Unexpected end tag token (%(name)s)" + " in the after body phase."), "unexpected-char-in-frameset": - _(u"Unepxected characters in the frameset phase. Characters ignored."), + _("Unexpected characters in the frameset phase. Characters ignored."), "unexpected-start-tag-in-frameset": - _(u"Unexpected start tag token (%(name)s)" - u" in the frameset phase. Ignored."), + _("Unexpected start tag token (%(name)s)" + " in the frameset phase. Ignored."), "unexpected-frameset-in-frameset-innerhtml": - _(u"Unexpected end tag token (frameset) " - u"in the frameset phase (innerHTML)."), + _("Unexpected end tag token (frameset) " + "in the frameset phase (innerHTML)."), "unexpected-end-tag-in-frameset": - _(u"Unexpected end tag token (%(name)s)" - u" in the frameset phase. Ignored."), + _("Unexpected end tag token (%(name)s)" + " in the frameset phase. Ignored."), "unexpected-char-after-frameset": - _(u"Unexpected non-space characters in the " - u"after frameset phase. Ignored."), + _("Unexpected non-space characters in the " + "after frameset phase. Ignored."), "unexpected-start-tag-after-frameset": - _(u"Unexpected start tag (%(name)s)" - u" in the after frameset phase. Ignored."), + _("Unexpected start tag (%(name)s)" + " in the after frameset phase. Ignored."), "unexpected-end-tag-after-frameset": - _(u"Unexpected end tag (%(name)s)" - u" in the after frameset phase. Ignored."), + _("Unexpected end tag (%(name)s)" + " in the after frameset phase. Ignored."), "unexpected-end-tag-after-body-innerhtml": - _(u"Unexpected end tag after body(innerHtml)"), + _("Unexpected end tag after body(innerHtml)"), "expected-eof-but-got-char": - _(u"Unexpected non-space characters. Expected end of file."), + _("Unexpected non-space characters. Expected end of file."), "expected-eof-but-got-start-tag": - _(u"Unexpected start tag (%(name)s)" - u". Expected end of file."), + _("Unexpected start tag (%(name)s)" + ". Expected end of file."), "expected-eof-but-got-end-tag": - _(u"Unexpected end tag (%(name)s)" - u". Expected end of file."), + _("Unexpected end tag (%(name)s)" + ". Expected end of file."), "eof-in-table": - _(u"Unexpected end of file. Expected table content."), + _("Unexpected end of file. Expected table content."), "eof-in-select": - _(u"Unexpected end of file. Expected select content."), + _("Unexpected end of file. Expected select content."), "eof-in-frameset": - _(u"Unexpected end of file. Expected frameset content."), + _("Unexpected end of file. Expected frameset content."), "eof-in-script-in-script": - _(u"Unexpected end of file. Expected script content."), + _("Unexpected end of file. Expected script content."), "eof-in-foreign-lands": - _(u"Unexpected end of file. Expected foreign content"), + _("Unexpected end of file. Expected foreign content"), "non-void-element-with-trailing-solidus": - _(u"Trailing solidus not allowed on element %(name)s"), + _("Trailing solidus not allowed on element %(name)s"), "unexpected-html-element-in-foreign-content": - _(u"Element %(name)s not allowed in a non-html context"), + _("Element %(name)s not allowed in a non-html context"), "unexpected-end-tag-before-html": - _(u"Unexpected end tag (%(name)s) before html."), + _("Unexpected end tag (%(name)s) before html."), "XXX-undefined-error": - (u"Undefined error (this sucks and should be fixed)"), + _("Undefined error (this sucks and should be fixed)"), } namespaces = { - "html":"http://www.w3.org/1999/xhtml", - "mathml":"http://www.w3.org/1998/Math/MathML", - "svg":"http://www.w3.org/2000/svg", - "xlink":"http://www.w3.org/1999/xlink", - "xml":"http://www.w3.org/XML/1998/namespace", - "xmlns":"http://www.w3.org/2000/xmlns/" + "html": "http://www.w3.org/1999/xhtml", + "mathml": "http://www.w3.org/1998/Math/MathML", + "svg": "http://www.w3.org/2000/svg", + "xlink": "http://www.w3.org/1999/xlink", + "xml": "http://www.w3.org/XML/1998/namespace", + "xmlns": "http://www.w3.org/2000/xmlns/" } scopingElements = frozenset(( @@ -380,7 +379,7 @@ specialElements = frozenset(( (namespaces["html"], "iframe"), # Note that image is commented out in the spec as "this isn't an # element that can end up on the stack, so it doesn't matter," - (namespaces["html"], "image"), + (namespaces["html"], "image"), (namespaces["html"], "img"), (namespaces["html"], "input"), (namespaces["html"], "isindex"), @@ -434,12 +433,30 @@ mathmlTextIntegrationPointElements = frozenset(( (namespaces["mathml"], "mtext") )) +adjustForeignAttributes = { + "xlink:actuate": ("xlink", "actuate", namespaces["xlink"]), + "xlink:arcrole": ("xlink", "arcrole", namespaces["xlink"]), + "xlink:href": ("xlink", "href", namespaces["xlink"]), + "xlink:role": ("xlink", "role", namespaces["xlink"]), + "xlink:show": ("xlink", "show", namespaces["xlink"]), + "xlink:title": ("xlink", "title", namespaces["xlink"]), + "xlink:type": ("xlink", "type", namespaces["xlink"]), + "xml:base": ("xml", "base", namespaces["xml"]), + "xml:lang": ("xml", "lang", namespaces["xml"]), + "xml:space": ("xml", "space", namespaces["xml"]), + "xmlns": (None, "xmlns", namespaces["xmlns"]), + "xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"]) +} + +unadjustForeignAttributes = dict([((ns, local), qname) for qname, (prefix, local, ns) in + adjustForeignAttributes.items()]) + spaceCharacters = frozenset(( - u"\t", - u"\n", - u"\u000C", - u" ", - u"\r" + "\t", + "\n", + "\u000C", + " ", + "\r" )) tableInsertModeElements = frozenset(( @@ -456,8 +473,8 @@ asciiLetters = frozenset(string.ascii_letters) digits = frozenset(string.digits) hexDigits = frozenset(string.hexdigits) -asciiUpper2Lower = dict([(ord(c),ord(c.lower())) - for c in string.ascii_uppercase]) +asciiUpper2Lower = dict([(ord(c), ord(c.lower())) + for c in string.ascii_uppercase]) # Heading elements need to be ordered headingElements = ( @@ -503,8 +520,8 @@ booleanAttributes = { "": frozenset(("irrelevant",)), "style": frozenset(("scoped",)), "img": frozenset(("ismap",)), - "audio": frozenset(("autoplay","controls")), - "video": frozenset(("autoplay","controls")), + "audio": frozenset(("autoplay", "controls")), + "video": frozenset(("autoplay", "controls")), "script": frozenset(("defer", "async")), "details": frozenset(("open",)), "datagrid": frozenset(("multiple", "disabled")), @@ -523,2312 +540,2312 @@ booleanAttributes = { # entitiesWindows1252 has to be _ordered_ and needs to have an index. It # therefore can't be a frozenset. entitiesWindows1252 = ( - 8364, # 0x80 0x20AC EURO SIGN - 65533, # 0x81 UNDEFINED - 8218, # 0x82 0x201A SINGLE LOW-9 QUOTATION MARK - 402, # 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK - 8222, # 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK - 8230, # 0x85 0x2026 HORIZONTAL ELLIPSIS - 8224, # 0x86 0x2020 DAGGER - 8225, # 0x87 0x2021 DOUBLE DAGGER - 710, # 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT - 8240, # 0x89 0x2030 PER MILLE SIGN - 352, # 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON - 8249, # 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 338, # 0x8C 0x0152 LATIN CAPITAL LIGATURE OE - 65533, # 0x8D UNDEFINED - 381, # 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON - 65533, # 0x8F UNDEFINED - 65533, # 0x90 UNDEFINED - 8216, # 0x91 0x2018 LEFT SINGLE QUOTATION MARK - 8217, # 0x92 0x2019 RIGHT SINGLE QUOTATION MARK - 8220, # 0x93 0x201C LEFT DOUBLE QUOTATION MARK - 8221, # 0x94 0x201D RIGHT DOUBLE QUOTATION MARK - 8226, # 0x95 0x2022 BULLET - 8211, # 0x96 0x2013 EN DASH - 8212, # 0x97 0x2014 EM DASH - 732, # 0x98 0x02DC SMALL TILDE - 8482, # 0x99 0x2122 TRADE MARK SIGN - 353, # 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON - 8250, # 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 339, # 0x9C 0x0153 LATIN SMALL LIGATURE OE - 65533, # 0x9D UNDEFINED - 382, # 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON - 376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS + 8364, # 0x80 0x20AC EURO SIGN + 65533, # 0x81 UNDEFINED + 8218, # 0x82 0x201A SINGLE LOW-9 QUOTATION MARK + 402, # 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK + 8222, # 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK + 8230, # 0x85 0x2026 HORIZONTAL ELLIPSIS + 8224, # 0x86 0x2020 DAGGER + 8225, # 0x87 0x2021 DOUBLE DAGGER + 710, # 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT + 8240, # 0x89 0x2030 PER MILLE SIGN + 352, # 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON + 8249, # 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 338, # 0x8C 0x0152 LATIN CAPITAL LIGATURE OE + 65533, # 0x8D UNDEFINED + 381, # 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON + 65533, # 0x8F UNDEFINED + 65533, # 0x90 UNDEFINED + 8216, # 0x91 0x2018 LEFT SINGLE QUOTATION MARK + 8217, # 0x92 0x2019 RIGHT SINGLE QUOTATION MARK + 8220, # 0x93 0x201C LEFT DOUBLE QUOTATION MARK + 8221, # 0x94 0x201D RIGHT DOUBLE QUOTATION MARK + 8226, # 0x95 0x2022 BULLET + 8211, # 0x96 0x2013 EN DASH + 8212, # 0x97 0x2014 EM DASH + 732, # 0x98 0x02DC SMALL TILDE + 8482, # 0x99 0x2122 TRADE MARK SIGN + 353, # 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON + 8250, # 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 339, # 0x9C 0x0153 LATIN SMALL LIGATURE OE + 65533, # 0x9D UNDEFINED + 382, # 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON + 376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS ) xmlEntities = frozenset(('lt;', 'gt;', 'amp;', 'apos;', 'quot;')) entities = { - "AElig": u"\xc6", - "AElig;": u"\xc6", - "AMP": u"&", - "AMP;": u"&", - "Aacute": u"\xc1", - "Aacute;": u"\xc1", - "Abreve;": u"\u0102", - "Acirc": u"\xc2", - "Acirc;": u"\xc2", - "Acy;": u"\u0410", - "Afr;": u"\U0001d504", - "Agrave": u"\xc0", - "Agrave;": u"\xc0", - "Alpha;": u"\u0391", - "Amacr;": u"\u0100", - "And;": u"\u2a53", - "Aogon;": u"\u0104", - "Aopf;": u"\U0001d538", - "ApplyFunction;": u"\u2061", - "Aring": u"\xc5", - "Aring;": u"\xc5", - "Ascr;": u"\U0001d49c", - "Assign;": u"\u2254", - "Atilde": u"\xc3", - "Atilde;": u"\xc3", - "Auml": u"\xc4", - "Auml;": u"\xc4", - "Backslash;": u"\u2216", - "Barv;": u"\u2ae7", - "Barwed;": u"\u2306", - "Bcy;": u"\u0411", - "Because;": u"\u2235", - "Bernoullis;": u"\u212c", - "Beta;": u"\u0392", - "Bfr;": u"\U0001d505", - "Bopf;": u"\U0001d539", - "Breve;": u"\u02d8", - "Bscr;": u"\u212c", - "Bumpeq;": u"\u224e", - "CHcy;": u"\u0427", - "COPY": u"\xa9", - "COPY;": u"\xa9", - "Cacute;": u"\u0106", - "Cap;": u"\u22d2", - "CapitalDifferentialD;": u"\u2145", - "Cayleys;": u"\u212d", - "Ccaron;": u"\u010c", - "Ccedil": u"\xc7", - "Ccedil;": u"\xc7", - "Ccirc;": u"\u0108", - "Cconint;": u"\u2230", - "Cdot;": u"\u010a", - "Cedilla;": u"\xb8", - "CenterDot;": u"\xb7", - "Cfr;": u"\u212d", - "Chi;": u"\u03a7", - "CircleDot;": u"\u2299", - "CircleMinus;": u"\u2296", - "CirclePlus;": u"\u2295", - "CircleTimes;": u"\u2297", - "ClockwiseContourIntegral;": u"\u2232", - "CloseCurlyDoubleQuote;": u"\u201d", - "CloseCurlyQuote;": u"\u2019", - "Colon;": u"\u2237", - "Colone;": u"\u2a74", - "Congruent;": u"\u2261", - "Conint;": u"\u222f", - "ContourIntegral;": u"\u222e", - "Copf;": u"\u2102", - "Coproduct;": u"\u2210", - "CounterClockwiseContourIntegral;": u"\u2233", - "Cross;": u"\u2a2f", - "Cscr;": u"\U0001d49e", - "Cup;": u"\u22d3", - "CupCap;": u"\u224d", - "DD;": u"\u2145", - "DDotrahd;": u"\u2911", - "DJcy;": u"\u0402", - "DScy;": u"\u0405", - "DZcy;": u"\u040f", - "Dagger;": u"\u2021", - "Darr;": u"\u21a1", - "Dashv;": u"\u2ae4", - "Dcaron;": u"\u010e", - "Dcy;": u"\u0414", - "Del;": u"\u2207", - "Delta;": u"\u0394", - "Dfr;": u"\U0001d507", - "DiacriticalAcute;": u"\xb4", - "DiacriticalDot;": u"\u02d9", - "DiacriticalDoubleAcute;": u"\u02dd", - "DiacriticalGrave;": u"`", - "DiacriticalTilde;": u"\u02dc", - "Diamond;": u"\u22c4", - "DifferentialD;": u"\u2146", - "Dopf;": u"\U0001d53b", - "Dot;": u"\xa8", - "DotDot;": u"\u20dc", - "DotEqual;": u"\u2250", - "DoubleContourIntegral;": u"\u222f", - "DoubleDot;": u"\xa8", - "DoubleDownArrow;": u"\u21d3", - "DoubleLeftArrow;": u"\u21d0", - "DoubleLeftRightArrow;": u"\u21d4", - "DoubleLeftTee;": u"\u2ae4", - "DoubleLongLeftArrow;": u"\u27f8", - "DoubleLongLeftRightArrow;": u"\u27fa", - "DoubleLongRightArrow;": u"\u27f9", - "DoubleRightArrow;": u"\u21d2", - "DoubleRightTee;": u"\u22a8", - "DoubleUpArrow;": u"\u21d1", - "DoubleUpDownArrow;": u"\u21d5", - "DoubleVerticalBar;": u"\u2225", - "DownArrow;": u"\u2193", - "DownArrowBar;": u"\u2913", - "DownArrowUpArrow;": u"\u21f5", - "DownBreve;": u"\u0311", - "DownLeftRightVector;": u"\u2950", - "DownLeftTeeVector;": u"\u295e", - "DownLeftVector;": u"\u21bd", - "DownLeftVectorBar;": u"\u2956", - "DownRightTeeVector;": u"\u295f", - "DownRightVector;": u"\u21c1", - "DownRightVectorBar;": u"\u2957", - "DownTee;": u"\u22a4", - "DownTeeArrow;": u"\u21a7", - "Downarrow;": u"\u21d3", - "Dscr;": u"\U0001d49f", - "Dstrok;": u"\u0110", - "ENG;": u"\u014a", - "ETH": u"\xd0", - "ETH;": u"\xd0", - "Eacute": u"\xc9", - "Eacute;": u"\xc9", - "Ecaron;": u"\u011a", - "Ecirc": u"\xca", - "Ecirc;": u"\xca", - "Ecy;": u"\u042d", - "Edot;": u"\u0116", - "Efr;": u"\U0001d508", - "Egrave": u"\xc8", - "Egrave;": u"\xc8", - "Element;": u"\u2208", - "Emacr;": u"\u0112", - "EmptySmallSquare;": u"\u25fb", - "EmptyVerySmallSquare;": u"\u25ab", - "Eogon;": u"\u0118", - "Eopf;": u"\U0001d53c", - "Epsilon;": u"\u0395", - "Equal;": u"\u2a75", - "EqualTilde;": u"\u2242", - "Equilibrium;": u"\u21cc", - "Escr;": u"\u2130", - "Esim;": u"\u2a73", - "Eta;": u"\u0397", - "Euml": u"\xcb", - "Euml;": u"\xcb", - "Exists;": u"\u2203", - "ExponentialE;": u"\u2147", - "Fcy;": u"\u0424", - "Ffr;": u"\U0001d509", - "FilledSmallSquare;": u"\u25fc", - "FilledVerySmallSquare;": u"\u25aa", - "Fopf;": u"\U0001d53d", - "ForAll;": u"\u2200", - "Fouriertrf;": u"\u2131", - "Fscr;": u"\u2131", - "GJcy;": u"\u0403", - "GT": u">", - "GT;": u">", - "Gamma;": u"\u0393", - "Gammad;": u"\u03dc", - "Gbreve;": u"\u011e", - "Gcedil;": u"\u0122", - "Gcirc;": u"\u011c", - "Gcy;": u"\u0413", - "Gdot;": u"\u0120", - "Gfr;": u"\U0001d50a", - "Gg;": u"\u22d9", - "Gopf;": u"\U0001d53e", - "GreaterEqual;": u"\u2265", - "GreaterEqualLess;": u"\u22db", - "GreaterFullEqual;": u"\u2267", - "GreaterGreater;": u"\u2aa2", - "GreaterLess;": u"\u2277", - "GreaterSlantEqual;": u"\u2a7e", - "GreaterTilde;": u"\u2273", - "Gscr;": u"\U0001d4a2", - "Gt;": u"\u226b", - "HARDcy;": u"\u042a", - "Hacek;": u"\u02c7", - "Hat;": u"^", - "Hcirc;": u"\u0124", - "Hfr;": u"\u210c", - "HilbertSpace;": u"\u210b", - "Hopf;": u"\u210d", - "HorizontalLine;": u"\u2500", - "Hscr;": u"\u210b", - "Hstrok;": u"\u0126", - "HumpDownHump;": u"\u224e", - "HumpEqual;": u"\u224f", - "IEcy;": u"\u0415", - "IJlig;": u"\u0132", - "IOcy;": u"\u0401", - "Iacute": u"\xcd", - "Iacute;": u"\xcd", - "Icirc": u"\xce", - "Icirc;": u"\xce", - "Icy;": u"\u0418", - "Idot;": u"\u0130", - "Ifr;": u"\u2111", - "Igrave": u"\xcc", - "Igrave;": u"\xcc", - "Im;": u"\u2111", - "Imacr;": u"\u012a", - "ImaginaryI;": u"\u2148", - "Implies;": u"\u21d2", - "Int;": u"\u222c", - "Integral;": u"\u222b", - "Intersection;": u"\u22c2", - "InvisibleComma;": u"\u2063", - "InvisibleTimes;": u"\u2062", - "Iogon;": u"\u012e", - "Iopf;": u"\U0001d540", - "Iota;": u"\u0399", - "Iscr;": u"\u2110", - "Itilde;": u"\u0128", - "Iukcy;": u"\u0406", - "Iuml": u"\xcf", - "Iuml;": u"\xcf", - "Jcirc;": u"\u0134", - "Jcy;": u"\u0419", - "Jfr;": u"\U0001d50d", - "Jopf;": u"\U0001d541", - "Jscr;": u"\U0001d4a5", - "Jsercy;": u"\u0408", - "Jukcy;": u"\u0404", - "KHcy;": u"\u0425", - "KJcy;": u"\u040c", - "Kappa;": u"\u039a", - "Kcedil;": u"\u0136", - "Kcy;": u"\u041a", - "Kfr;": u"\U0001d50e", - "Kopf;": u"\U0001d542", - "Kscr;": u"\U0001d4a6", - "LJcy;": u"\u0409", - "LT": u"<", - "LT;": u"<", - "Lacute;": u"\u0139", - "Lambda;": u"\u039b", - "Lang;": u"\u27ea", - "Laplacetrf;": u"\u2112", - "Larr;": u"\u219e", - "Lcaron;": u"\u013d", - "Lcedil;": u"\u013b", - "Lcy;": u"\u041b", - "LeftAngleBracket;": u"\u27e8", - "LeftArrow;": u"\u2190", - "LeftArrowBar;": u"\u21e4", - "LeftArrowRightArrow;": u"\u21c6", - "LeftCeiling;": u"\u2308", - "LeftDoubleBracket;": u"\u27e6", - "LeftDownTeeVector;": u"\u2961", - "LeftDownVector;": u"\u21c3", - "LeftDownVectorBar;": u"\u2959", - "LeftFloor;": u"\u230a", - "LeftRightArrow;": u"\u2194", - "LeftRightVector;": u"\u294e", - "LeftTee;": u"\u22a3", - "LeftTeeArrow;": u"\u21a4", - "LeftTeeVector;": u"\u295a", - "LeftTriangle;": u"\u22b2", - "LeftTriangleBar;": u"\u29cf", - "LeftTriangleEqual;": u"\u22b4", - "LeftUpDownVector;": u"\u2951", - "LeftUpTeeVector;": u"\u2960", - "LeftUpVector;": u"\u21bf", - "LeftUpVectorBar;": u"\u2958", - "LeftVector;": u"\u21bc", - "LeftVectorBar;": u"\u2952", - "Leftarrow;": u"\u21d0", - "Leftrightarrow;": u"\u21d4", - "LessEqualGreater;": u"\u22da", - "LessFullEqual;": u"\u2266", - "LessGreater;": u"\u2276", - "LessLess;": u"\u2aa1", - "LessSlantEqual;": u"\u2a7d", - "LessTilde;": u"\u2272", - "Lfr;": u"\U0001d50f", - "Ll;": u"\u22d8", - "Lleftarrow;": u"\u21da", - "Lmidot;": u"\u013f", - "LongLeftArrow;": u"\u27f5", - "LongLeftRightArrow;": u"\u27f7", - "LongRightArrow;": u"\u27f6", - "Longleftarrow;": u"\u27f8", - "Longleftrightarrow;": u"\u27fa", - "Longrightarrow;": u"\u27f9", - "Lopf;": u"\U0001d543", - "LowerLeftArrow;": u"\u2199", - "LowerRightArrow;": u"\u2198", - "Lscr;": u"\u2112", - "Lsh;": u"\u21b0", - "Lstrok;": u"\u0141", - "Lt;": u"\u226a", - "Map;": u"\u2905", - "Mcy;": u"\u041c", - "MediumSpace;": u"\u205f", - "Mellintrf;": u"\u2133", - "Mfr;": u"\U0001d510", - "MinusPlus;": u"\u2213", - "Mopf;": u"\U0001d544", - "Mscr;": u"\u2133", - "Mu;": u"\u039c", - "NJcy;": u"\u040a", - "Nacute;": u"\u0143", - "Ncaron;": u"\u0147", - "Ncedil;": u"\u0145", - "Ncy;": u"\u041d", - "NegativeMediumSpace;": u"\u200b", - "NegativeThickSpace;": u"\u200b", - "NegativeThinSpace;": u"\u200b", - "NegativeVeryThinSpace;": u"\u200b", - "NestedGreaterGreater;": u"\u226b", - "NestedLessLess;": u"\u226a", - "NewLine;": u"\n", - "Nfr;": u"\U0001d511", - "NoBreak;": u"\u2060", - "NonBreakingSpace;": u"\xa0", - "Nopf;": u"\u2115", - "Not;": u"\u2aec", - "NotCongruent;": u"\u2262", - "NotCupCap;": u"\u226d", - "NotDoubleVerticalBar;": u"\u2226", - "NotElement;": u"\u2209", - "NotEqual;": u"\u2260", - "NotEqualTilde;": u"\u2242\u0338", - "NotExists;": u"\u2204", - "NotGreater;": u"\u226f", - "NotGreaterEqual;": u"\u2271", - "NotGreaterFullEqual;": u"\u2267\u0338", - "NotGreaterGreater;": u"\u226b\u0338", - "NotGreaterLess;": u"\u2279", - "NotGreaterSlantEqual;": u"\u2a7e\u0338", - "NotGreaterTilde;": u"\u2275", - "NotHumpDownHump;": u"\u224e\u0338", - "NotHumpEqual;": u"\u224f\u0338", - "NotLeftTriangle;": u"\u22ea", - "NotLeftTriangleBar;": u"\u29cf\u0338", - "NotLeftTriangleEqual;": u"\u22ec", - "NotLess;": u"\u226e", - "NotLessEqual;": u"\u2270", - "NotLessGreater;": u"\u2278", - "NotLessLess;": u"\u226a\u0338", - "NotLessSlantEqual;": u"\u2a7d\u0338", - "NotLessTilde;": u"\u2274", - "NotNestedGreaterGreater;": u"\u2aa2\u0338", - "NotNestedLessLess;": u"\u2aa1\u0338", - "NotPrecedes;": u"\u2280", - "NotPrecedesEqual;": u"\u2aaf\u0338", - "NotPrecedesSlantEqual;": u"\u22e0", - "NotReverseElement;": u"\u220c", - "NotRightTriangle;": u"\u22eb", - "NotRightTriangleBar;": u"\u29d0\u0338", - "NotRightTriangleEqual;": u"\u22ed", - "NotSquareSubset;": u"\u228f\u0338", - "NotSquareSubsetEqual;": u"\u22e2", - "NotSquareSuperset;": u"\u2290\u0338", - "NotSquareSupersetEqual;": u"\u22e3", - "NotSubset;": u"\u2282\u20d2", - "NotSubsetEqual;": u"\u2288", - "NotSucceeds;": u"\u2281", - "NotSucceedsEqual;": u"\u2ab0\u0338", - "NotSucceedsSlantEqual;": u"\u22e1", - "NotSucceedsTilde;": u"\u227f\u0338", - "NotSuperset;": u"\u2283\u20d2", - "NotSupersetEqual;": u"\u2289", - "NotTilde;": u"\u2241", - "NotTildeEqual;": u"\u2244", - "NotTildeFullEqual;": u"\u2247", - "NotTildeTilde;": u"\u2249", - "NotVerticalBar;": u"\u2224", - "Nscr;": u"\U0001d4a9", - "Ntilde": u"\xd1", - "Ntilde;": u"\xd1", - "Nu;": u"\u039d", - "OElig;": u"\u0152", - "Oacute": u"\xd3", - "Oacute;": u"\xd3", - "Ocirc": u"\xd4", - "Ocirc;": u"\xd4", - "Ocy;": u"\u041e", - "Odblac;": u"\u0150", - "Ofr;": u"\U0001d512", - "Ograve": u"\xd2", - "Ograve;": u"\xd2", - "Omacr;": u"\u014c", - "Omega;": u"\u03a9", - "Omicron;": u"\u039f", - "Oopf;": u"\U0001d546", - "OpenCurlyDoubleQuote;": u"\u201c", - "OpenCurlyQuote;": u"\u2018", - "Or;": u"\u2a54", - "Oscr;": u"\U0001d4aa", - "Oslash": u"\xd8", - "Oslash;": u"\xd8", - "Otilde": u"\xd5", - "Otilde;": u"\xd5", - "Otimes;": u"\u2a37", - "Ouml": u"\xd6", - "Ouml;": u"\xd6", - "OverBar;": u"\u203e", - "OverBrace;": u"\u23de", - "OverBracket;": u"\u23b4", - "OverParenthesis;": u"\u23dc", - "PartialD;": u"\u2202", - "Pcy;": u"\u041f", - "Pfr;": u"\U0001d513", - "Phi;": u"\u03a6", - "Pi;": u"\u03a0", - "PlusMinus;": u"\xb1", - "Poincareplane;": u"\u210c", - "Popf;": u"\u2119", - "Pr;": u"\u2abb", - "Precedes;": u"\u227a", - "PrecedesEqual;": u"\u2aaf", - "PrecedesSlantEqual;": u"\u227c", - "PrecedesTilde;": u"\u227e", - "Prime;": u"\u2033", - "Product;": u"\u220f", - "Proportion;": u"\u2237", - "Proportional;": u"\u221d", - "Pscr;": u"\U0001d4ab", - "Psi;": u"\u03a8", - "QUOT": u"\"", - "QUOT;": u"\"", - "Qfr;": u"\U0001d514", - "Qopf;": u"\u211a", - "Qscr;": u"\U0001d4ac", - "RBarr;": u"\u2910", - "REG": u"\xae", - "REG;": u"\xae", - "Racute;": u"\u0154", - "Rang;": u"\u27eb", - "Rarr;": u"\u21a0", - "Rarrtl;": u"\u2916", - "Rcaron;": u"\u0158", - "Rcedil;": u"\u0156", - "Rcy;": u"\u0420", - "Re;": u"\u211c", - "ReverseElement;": u"\u220b", - "ReverseEquilibrium;": u"\u21cb", - "ReverseUpEquilibrium;": u"\u296f", - "Rfr;": u"\u211c", - "Rho;": u"\u03a1", - "RightAngleBracket;": u"\u27e9", - "RightArrow;": u"\u2192", - "RightArrowBar;": u"\u21e5", - "RightArrowLeftArrow;": u"\u21c4", - "RightCeiling;": u"\u2309", - "RightDoubleBracket;": u"\u27e7", - "RightDownTeeVector;": u"\u295d", - "RightDownVector;": u"\u21c2", - "RightDownVectorBar;": u"\u2955", - "RightFloor;": u"\u230b", - "RightTee;": u"\u22a2", - "RightTeeArrow;": u"\u21a6", - "RightTeeVector;": u"\u295b", - "RightTriangle;": u"\u22b3", - "RightTriangleBar;": u"\u29d0", - "RightTriangleEqual;": u"\u22b5", - "RightUpDownVector;": u"\u294f", - "RightUpTeeVector;": u"\u295c", - "RightUpVector;": u"\u21be", - "RightUpVectorBar;": u"\u2954", - "RightVector;": u"\u21c0", - "RightVectorBar;": u"\u2953", - "Rightarrow;": u"\u21d2", - "Ropf;": u"\u211d", - "RoundImplies;": u"\u2970", - "Rrightarrow;": u"\u21db", - "Rscr;": u"\u211b", - "Rsh;": u"\u21b1", - "RuleDelayed;": u"\u29f4", - "SHCHcy;": u"\u0429", - "SHcy;": u"\u0428", - "SOFTcy;": u"\u042c", - "Sacute;": u"\u015a", - "Sc;": u"\u2abc", - "Scaron;": u"\u0160", - "Scedil;": u"\u015e", - "Scirc;": u"\u015c", - "Scy;": u"\u0421", - "Sfr;": u"\U0001d516", - "ShortDownArrow;": u"\u2193", - "ShortLeftArrow;": u"\u2190", - "ShortRightArrow;": u"\u2192", - "ShortUpArrow;": u"\u2191", - "Sigma;": u"\u03a3", - "SmallCircle;": u"\u2218", - "Sopf;": u"\U0001d54a", - "Sqrt;": u"\u221a", - "Square;": u"\u25a1", - "SquareIntersection;": u"\u2293", - "SquareSubset;": u"\u228f", - "SquareSubsetEqual;": u"\u2291", - "SquareSuperset;": u"\u2290", - "SquareSupersetEqual;": u"\u2292", - "SquareUnion;": u"\u2294", - "Sscr;": u"\U0001d4ae", - "Star;": u"\u22c6", - "Sub;": u"\u22d0", - "Subset;": u"\u22d0", - "SubsetEqual;": u"\u2286", - "Succeeds;": u"\u227b", - "SucceedsEqual;": u"\u2ab0", - "SucceedsSlantEqual;": u"\u227d", - "SucceedsTilde;": u"\u227f", - "SuchThat;": u"\u220b", - "Sum;": u"\u2211", - "Sup;": u"\u22d1", - "Superset;": u"\u2283", - "SupersetEqual;": u"\u2287", - "Supset;": u"\u22d1", - "THORN": u"\xde", - "THORN;": u"\xde", - "TRADE;": u"\u2122", - "TSHcy;": u"\u040b", - "TScy;": u"\u0426", - "Tab;": u"\t", - "Tau;": u"\u03a4", - "Tcaron;": u"\u0164", - "Tcedil;": u"\u0162", - "Tcy;": u"\u0422", - "Tfr;": u"\U0001d517", - "Therefore;": u"\u2234", - "Theta;": u"\u0398", - "ThickSpace;": u"\u205f\u200a", - "ThinSpace;": u"\u2009", - "Tilde;": u"\u223c", - "TildeEqual;": u"\u2243", - "TildeFullEqual;": u"\u2245", - "TildeTilde;": u"\u2248", - "Topf;": u"\U0001d54b", - "TripleDot;": u"\u20db", - "Tscr;": u"\U0001d4af", - "Tstrok;": u"\u0166", - "Uacute": u"\xda", - "Uacute;": u"\xda", - "Uarr;": u"\u219f", - "Uarrocir;": u"\u2949", - "Ubrcy;": u"\u040e", - "Ubreve;": u"\u016c", - "Ucirc": u"\xdb", - "Ucirc;": u"\xdb", - "Ucy;": u"\u0423", - "Udblac;": u"\u0170", - "Ufr;": u"\U0001d518", - "Ugrave": u"\xd9", - "Ugrave;": u"\xd9", - "Umacr;": u"\u016a", - "UnderBar;": u"_", - "UnderBrace;": u"\u23df", - "UnderBracket;": u"\u23b5", - "UnderParenthesis;": u"\u23dd", - "Union;": u"\u22c3", - "UnionPlus;": u"\u228e", - "Uogon;": u"\u0172", - "Uopf;": u"\U0001d54c", - "UpArrow;": u"\u2191", - "UpArrowBar;": u"\u2912", - "UpArrowDownArrow;": u"\u21c5", - "UpDownArrow;": u"\u2195", - "UpEquilibrium;": u"\u296e", - "UpTee;": u"\u22a5", - "UpTeeArrow;": u"\u21a5", - "Uparrow;": u"\u21d1", - "Updownarrow;": u"\u21d5", - "UpperLeftArrow;": u"\u2196", - "UpperRightArrow;": u"\u2197", - "Upsi;": u"\u03d2", - "Upsilon;": u"\u03a5", - "Uring;": u"\u016e", - "Uscr;": u"\U0001d4b0", - "Utilde;": u"\u0168", - "Uuml": u"\xdc", - "Uuml;": u"\xdc", - "VDash;": u"\u22ab", - "Vbar;": u"\u2aeb", - "Vcy;": u"\u0412", - "Vdash;": u"\u22a9", - "Vdashl;": u"\u2ae6", - "Vee;": u"\u22c1", - "Verbar;": u"\u2016", - "Vert;": u"\u2016", - "VerticalBar;": u"\u2223", - "VerticalLine;": u"|", - "VerticalSeparator;": u"\u2758", - "VerticalTilde;": u"\u2240", - "VeryThinSpace;": u"\u200a", - "Vfr;": u"\U0001d519", - "Vopf;": u"\U0001d54d", - "Vscr;": u"\U0001d4b1", - "Vvdash;": u"\u22aa", - "Wcirc;": u"\u0174", - "Wedge;": u"\u22c0", - "Wfr;": u"\U0001d51a", - "Wopf;": u"\U0001d54e", - "Wscr;": u"\U0001d4b2", - "Xfr;": u"\U0001d51b", - "Xi;": u"\u039e", - "Xopf;": u"\U0001d54f", - "Xscr;": u"\U0001d4b3", - "YAcy;": u"\u042f", - "YIcy;": u"\u0407", - "YUcy;": u"\u042e", - "Yacute": u"\xdd", - "Yacute;": u"\xdd", - "Ycirc;": u"\u0176", - "Ycy;": u"\u042b", - "Yfr;": u"\U0001d51c", - "Yopf;": u"\U0001d550", - "Yscr;": u"\U0001d4b4", - "Yuml;": u"\u0178", - "ZHcy;": u"\u0416", - "Zacute;": u"\u0179", - "Zcaron;": u"\u017d", - "Zcy;": u"\u0417", - "Zdot;": u"\u017b", - "ZeroWidthSpace;": u"\u200b", - "Zeta;": u"\u0396", - "Zfr;": u"\u2128", - "Zopf;": u"\u2124", - "Zscr;": u"\U0001d4b5", - "aacute": u"\xe1", - "aacute;": u"\xe1", - "abreve;": u"\u0103", - "ac;": u"\u223e", - "acE;": u"\u223e\u0333", - "acd;": u"\u223f", - "acirc": u"\xe2", - "acirc;": u"\xe2", - "acute": u"\xb4", - "acute;": u"\xb4", - "acy;": u"\u0430", - "aelig": u"\xe6", - "aelig;": u"\xe6", - "af;": u"\u2061", - "afr;": u"\U0001d51e", - "agrave": u"\xe0", - "agrave;": u"\xe0", - "alefsym;": u"\u2135", - "aleph;": u"\u2135", - "alpha;": u"\u03b1", - "amacr;": u"\u0101", - "amalg;": u"\u2a3f", - "amp": u"&", - "amp;": u"&", - "and;": u"\u2227", - "andand;": u"\u2a55", - "andd;": u"\u2a5c", - "andslope;": u"\u2a58", - "andv;": u"\u2a5a", - "ang;": u"\u2220", - "ange;": u"\u29a4", - "angle;": u"\u2220", - "angmsd;": u"\u2221", - "angmsdaa;": u"\u29a8", - "angmsdab;": u"\u29a9", - "angmsdac;": u"\u29aa", - "angmsdad;": u"\u29ab", - "angmsdae;": u"\u29ac", - "angmsdaf;": u"\u29ad", - "angmsdag;": u"\u29ae", - "angmsdah;": u"\u29af", - "angrt;": u"\u221f", - "angrtvb;": u"\u22be", - "angrtvbd;": u"\u299d", - "angsph;": u"\u2222", - "angst;": u"\xc5", - "angzarr;": u"\u237c", - "aogon;": u"\u0105", - "aopf;": u"\U0001d552", - "ap;": u"\u2248", - "apE;": u"\u2a70", - "apacir;": u"\u2a6f", - "ape;": u"\u224a", - "apid;": u"\u224b", - "apos;": u"'", - "approx;": u"\u2248", - "approxeq;": u"\u224a", - "aring": u"\xe5", - "aring;": u"\xe5", - "ascr;": u"\U0001d4b6", - "ast;": u"*", - "asymp;": u"\u2248", - "asympeq;": u"\u224d", - "atilde": u"\xe3", - "atilde;": u"\xe3", - "auml": u"\xe4", - "auml;": u"\xe4", - "awconint;": u"\u2233", - "awint;": u"\u2a11", - "bNot;": u"\u2aed", - "backcong;": u"\u224c", - "backepsilon;": u"\u03f6", - "backprime;": u"\u2035", - "backsim;": u"\u223d", - "backsimeq;": u"\u22cd", - "barvee;": u"\u22bd", - "barwed;": u"\u2305", - "barwedge;": u"\u2305", - "bbrk;": u"\u23b5", - "bbrktbrk;": u"\u23b6", - "bcong;": u"\u224c", - "bcy;": u"\u0431", - "bdquo;": u"\u201e", - "becaus;": u"\u2235", - "because;": u"\u2235", - "bemptyv;": u"\u29b0", - "bepsi;": u"\u03f6", - "bernou;": u"\u212c", - "beta;": u"\u03b2", - "beth;": u"\u2136", - "between;": u"\u226c", - "bfr;": u"\U0001d51f", - "bigcap;": u"\u22c2", - "bigcirc;": u"\u25ef", - "bigcup;": u"\u22c3", - "bigodot;": u"\u2a00", - "bigoplus;": u"\u2a01", - "bigotimes;": u"\u2a02", - "bigsqcup;": u"\u2a06", - "bigstar;": u"\u2605", - "bigtriangledown;": u"\u25bd", - "bigtriangleup;": u"\u25b3", - "biguplus;": u"\u2a04", - "bigvee;": u"\u22c1", - "bigwedge;": u"\u22c0", - "bkarow;": u"\u290d", - "blacklozenge;": u"\u29eb", - "blacksquare;": u"\u25aa", - "blacktriangle;": u"\u25b4", - "blacktriangledown;": u"\u25be", - "blacktriangleleft;": u"\u25c2", - "blacktriangleright;": u"\u25b8", - "blank;": u"\u2423", - "blk12;": u"\u2592", - "blk14;": u"\u2591", - "blk34;": u"\u2593", - "block;": u"\u2588", - "bne;": u"=\u20e5", - "bnequiv;": u"\u2261\u20e5", - "bnot;": u"\u2310", - "bopf;": u"\U0001d553", - "bot;": u"\u22a5", - "bottom;": u"\u22a5", - "bowtie;": u"\u22c8", - "boxDL;": u"\u2557", - "boxDR;": u"\u2554", - "boxDl;": u"\u2556", - "boxDr;": u"\u2553", - "boxH;": u"\u2550", - "boxHD;": u"\u2566", - "boxHU;": u"\u2569", - "boxHd;": u"\u2564", - "boxHu;": u"\u2567", - "boxUL;": u"\u255d", - "boxUR;": u"\u255a", - "boxUl;": u"\u255c", - "boxUr;": u"\u2559", - "boxV;": u"\u2551", - "boxVH;": u"\u256c", - "boxVL;": u"\u2563", - "boxVR;": u"\u2560", - "boxVh;": u"\u256b", - "boxVl;": u"\u2562", - "boxVr;": u"\u255f", - "boxbox;": u"\u29c9", - "boxdL;": u"\u2555", - "boxdR;": u"\u2552", - "boxdl;": u"\u2510", - "boxdr;": u"\u250c", - "boxh;": u"\u2500", - "boxhD;": u"\u2565", - "boxhU;": u"\u2568", - "boxhd;": u"\u252c", - "boxhu;": u"\u2534", - "boxminus;": u"\u229f", - "boxplus;": u"\u229e", - "boxtimes;": u"\u22a0", - "boxuL;": u"\u255b", - "boxuR;": u"\u2558", - "boxul;": u"\u2518", - "boxur;": u"\u2514", - "boxv;": u"\u2502", - "boxvH;": u"\u256a", - "boxvL;": u"\u2561", - "boxvR;": u"\u255e", - "boxvh;": u"\u253c", - "boxvl;": u"\u2524", - "boxvr;": u"\u251c", - "bprime;": u"\u2035", - "breve;": u"\u02d8", - "brvbar": u"\xa6", - "brvbar;": u"\xa6", - "bscr;": u"\U0001d4b7", - "bsemi;": u"\u204f", - "bsim;": u"\u223d", - "bsime;": u"\u22cd", - "bsol;": u"\\", - "bsolb;": u"\u29c5", - "bsolhsub;": u"\u27c8", - "bull;": u"\u2022", - "bullet;": u"\u2022", - "bump;": u"\u224e", - "bumpE;": u"\u2aae", - "bumpe;": u"\u224f", - "bumpeq;": u"\u224f", - "cacute;": u"\u0107", - "cap;": u"\u2229", - "capand;": u"\u2a44", - "capbrcup;": u"\u2a49", - "capcap;": u"\u2a4b", - "capcup;": u"\u2a47", - "capdot;": u"\u2a40", - "caps;": u"\u2229\ufe00", - "caret;": u"\u2041", - "caron;": u"\u02c7", - "ccaps;": u"\u2a4d", - "ccaron;": u"\u010d", - "ccedil": u"\xe7", - "ccedil;": u"\xe7", - "ccirc;": u"\u0109", - "ccups;": u"\u2a4c", - "ccupssm;": u"\u2a50", - "cdot;": u"\u010b", - "cedil": u"\xb8", - "cedil;": u"\xb8", - "cemptyv;": u"\u29b2", - "cent": u"\xa2", - "cent;": u"\xa2", - "centerdot;": u"\xb7", - "cfr;": u"\U0001d520", - "chcy;": u"\u0447", - "check;": u"\u2713", - "checkmark;": u"\u2713", - "chi;": u"\u03c7", - "cir;": u"\u25cb", - "cirE;": u"\u29c3", - "circ;": u"\u02c6", - "circeq;": u"\u2257", - "circlearrowleft;": u"\u21ba", - "circlearrowright;": u"\u21bb", - "circledR;": u"\xae", - "circledS;": u"\u24c8", - "circledast;": u"\u229b", - "circledcirc;": u"\u229a", - "circleddash;": u"\u229d", - "cire;": u"\u2257", - "cirfnint;": u"\u2a10", - "cirmid;": u"\u2aef", - "cirscir;": u"\u29c2", - "clubs;": u"\u2663", - "clubsuit;": u"\u2663", - "colon;": u":", - "colone;": u"\u2254", - "coloneq;": u"\u2254", - "comma;": u",", - "commat;": u"@", - "comp;": u"\u2201", - "compfn;": u"\u2218", - "complement;": u"\u2201", - "complexes;": u"\u2102", - "cong;": u"\u2245", - "congdot;": u"\u2a6d", - "conint;": u"\u222e", - "copf;": u"\U0001d554", - "coprod;": u"\u2210", - "copy": u"\xa9", - "copy;": u"\xa9", - "copysr;": u"\u2117", - "crarr;": u"\u21b5", - "cross;": u"\u2717", - "cscr;": u"\U0001d4b8", - "csub;": u"\u2acf", - "csube;": u"\u2ad1", - "csup;": u"\u2ad0", - "csupe;": u"\u2ad2", - "ctdot;": u"\u22ef", - "cudarrl;": u"\u2938", - "cudarrr;": u"\u2935", - "cuepr;": u"\u22de", - "cuesc;": u"\u22df", - "cularr;": u"\u21b6", - "cularrp;": u"\u293d", - "cup;": u"\u222a", - "cupbrcap;": u"\u2a48", - "cupcap;": u"\u2a46", - "cupcup;": u"\u2a4a", - "cupdot;": u"\u228d", - "cupor;": u"\u2a45", - "cups;": u"\u222a\ufe00", - "curarr;": u"\u21b7", - "curarrm;": u"\u293c", - "curlyeqprec;": u"\u22de", - "curlyeqsucc;": u"\u22df", - "curlyvee;": u"\u22ce", - "curlywedge;": u"\u22cf", - "curren": u"\xa4", - "curren;": u"\xa4", - "curvearrowleft;": u"\u21b6", - "curvearrowright;": u"\u21b7", - "cuvee;": u"\u22ce", - "cuwed;": u"\u22cf", - "cwconint;": u"\u2232", - "cwint;": u"\u2231", - "cylcty;": u"\u232d", - "dArr;": u"\u21d3", - "dHar;": u"\u2965", - "dagger;": u"\u2020", - "daleth;": u"\u2138", - "darr;": u"\u2193", - "dash;": u"\u2010", - "dashv;": u"\u22a3", - "dbkarow;": u"\u290f", - "dblac;": u"\u02dd", - "dcaron;": u"\u010f", - "dcy;": u"\u0434", - "dd;": u"\u2146", - "ddagger;": u"\u2021", - "ddarr;": u"\u21ca", - "ddotseq;": u"\u2a77", - "deg": u"\xb0", - "deg;": u"\xb0", - "delta;": u"\u03b4", - "demptyv;": u"\u29b1", - "dfisht;": u"\u297f", - "dfr;": u"\U0001d521", - "dharl;": u"\u21c3", - "dharr;": u"\u21c2", - "diam;": u"\u22c4", - "diamond;": u"\u22c4", - "diamondsuit;": u"\u2666", - "diams;": u"\u2666", - "die;": u"\xa8", - "digamma;": u"\u03dd", - "disin;": u"\u22f2", - "div;": u"\xf7", - "divide": u"\xf7", - "divide;": u"\xf7", - "divideontimes;": u"\u22c7", - "divonx;": u"\u22c7", - "djcy;": u"\u0452", - "dlcorn;": u"\u231e", - "dlcrop;": u"\u230d", - "dollar;": u"$", - "dopf;": u"\U0001d555", - "dot;": u"\u02d9", - "doteq;": u"\u2250", - "doteqdot;": u"\u2251", - "dotminus;": u"\u2238", - "dotplus;": u"\u2214", - "dotsquare;": u"\u22a1", - "doublebarwedge;": u"\u2306", - "downarrow;": u"\u2193", - "downdownarrows;": u"\u21ca", - "downharpoonleft;": u"\u21c3", - "downharpoonright;": u"\u21c2", - "drbkarow;": u"\u2910", - "drcorn;": u"\u231f", - "drcrop;": u"\u230c", - "dscr;": u"\U0001d4b9", - "dscy;": u"\u0455", - "dsol;": u"\u29f6", - "dstrok;": u"\u0111", - "dtdot;": u"\u22f1", - "dtri;": u"\u25bf", - "dtrif;": u"\u25be", - "duarr;": u"\u21f5", - "duhar;": u"\u296f", - "dwangle;": u"\u29a6", - "dzcy;": u"\u045f", - "dzigrarr;": u"\u27ff", - "eDDot;": u"\u2a77", - "eDot;": u"\u2251", - "eacute": u"\xe9", - "eacute;": u"\xe9", - "easter;": u"\u2a6e", - "ecaron;": u"\u011b", - "ecir;": u"\u2256", - "ecirc": u"\xea", - "ecirc;": u"\xea", - "ecolon;": u"\u2255", - "ecy;": u"\u044d", - "edot;": u"\u0117", - "ee;": u"\u2147", - "efDot;": u"\u2252", - "efr;": u"\U0001d522", - "eg;": u"\u2a9a", - "egrave": u"\xe8", - "egrave;": u"\xe8", - "egs;": u"\u2a96", - "egsdot;": u"\u2a98", - "el;": u"\u2a99", - "elinters;": u"\u23e7", - "ell;": u"\u2113", - "els;": u"\u2a95", - "elsdot;": u"\u2a97", - "emacr;": u"\u0113", - "empty;": u"\u2205", - "emptyset;": u"\u2205", - "emptyv;": u"\u2205", - "emsp13;": u"\u2004", - "emsp14;": u"\u2005", - "emsp;": u"\u2003", - "eng;": u"\u014b", - "ensp;": u"\u2002", - "eogon;": u"\u0119", - "eopf;": u"\U0001d556", - "epar;": u"\u22d5", - "eparsl;": u"\u29e3", - "eplus;": u"\u2a71", - "epsi;": u"\u03b5", - "epsilon;": u"\u03b5", - "epsiv;": u"\u03f5", - "eqcirc;": u"\u2256", - "eqcolon;": u"\u2255", - "eqsim;": u"\u2242", - "eqslantgtr;": u"\u2a96", - "eqslantless;": u"\u2a95", - "equals;": u"=", - "equest;": u"\u225f", - "equiv;": u"\u2261", - "equivDD;": u"\u2a78", - "eqvparsl;": u"\u29e5", - "erDot;": u"\u2253", - "erarr;": u"\u2971", - "escr;": u"\u212f", - "esdot;": u"\u2250", - "esim;": u"\u2242", - "eta;": u"\u03b7", - "eth": u"\xf0", - "eth;": u"\xf0", - "euml": u"\xeb", - "euml;": u"\xeb", - "euro;": u"\u20ac", - "excl;": u"!", - "exist;": u"\u2203", - "expectation;": u"\u2130", - "exponentiale;": u"\u2147", - "fallingdotseq;": u"\u2252", - "fcy;": u"\u0444", - "female;": u"\u2640", - "ffilig;": u"\ufb03", - "fflig;": u"\ufb00", - "ffllig;": u"\ufb04", - "ffr;": u"\U0001d523", - "filig;": u"\ufb01", - "fjlig;": u"fj", - "flat;": u"\u266d", - "fllig;": u"\ufb02", - "fltns;": u"\u25b1", - "fnof;": u"\u0192", - "fopf;": u"\U0001d557", - "forall;": u"\u2200", - "fork;": u"\u22d4", - "forkv;": u"\u2ad9", - "fpartint;": u"\u2a0d", - "frac12": u"\xbd", - "frac12;": u"\xbd", - "frac13;": u"\u2153", - "frac14": u"\xbc", - "frac14;": u"\xbc", - "frac15;": u"\u2155", - "frac16;": u"\u2159", - "frac18;": u"\u215b", - "frac23;": u"\u2154", - "frac25;": u"\u2156", - "frac34": u"\xbe", - "frac34;": u"\xbe", - "frac35;": u"\u2157", - "frac38;": u"\u215c", - "frac45;": u"\u2158", - "frac56;": u"\u215a", - "frac58;": u"\u215d", - "frac78;": u"\u215e", - "frasl;": u"\u2044", - "frown;": u"\u2322", - "fscr;": u"\U0001d4bb", - "gE;": u"\u2267", - "gEl;": u"\u2a8c", - "gacute;": u"\u01f5", - "gamma;": u"\u03b3", - "gammad;": u"\u03dd", - "gap;": u"\u2a86", - "gbreve;": u"\u011f", - "gcirc;": u"\u011d", - "gcy;": u"\u0433", - "gdot;": u"\u0121", - "ge;": u"\u2265", - "gel;": u"\u22db", - "geq;": u"\u2265", - "geqq;": u"\u2267", - "geqslant;": u"\u2a7e", - "ges;": u"\u2a7e", - "gescc;": u"\u2aa9", - "gesdot;": u"\u2a80", - "gesdoto;": u"\u2a82", - "gesdotol;": u"\u2a84", - "gesl;": u"\u22db\ufe00", - "gesles;": u"\u2a94", - "gfr;": u"\U0001d524", - "gg;": u"\u226b", - "ggg;": u"\u22d9", - "gimel;": u"\u2137", - "gjcy;": u"\u0453", - "gl;": u"\u2277", - "glE;": u"\u2a92", - "gla;": u"\u2aa5", - "glj;": u"\u2aa4", - "gnE;": u"\u2269", - "gnap;": u"\u2a8a", - "gnapprox;": u"\u2a8a", - "gne;": u"\u2a88", - "gneq;": u"\u2a88", - "gneqq;": u"\u2269", - "gnsim;": u"\u22e7", - "gopf;": u"\U0001d558", - "grave;": u"`", - "gscr;": u"\u210a", - "gsim;": u"\u2273", - "gsime;": u"\u2a8e", - "gsiml;": u"\u2a90", - "gt": u">", - "gt;": u">", - "gtcc;": u"\u2aa7", - "gtcir;": u"\u2a7a", - "gtdot;": u"\u22d7", - "gtlPar;": u"\u2995", - "gtquest;": u"\u2a7c", - "gtrapprox;": u"\u2a86", - "gtrarr;": u"\u2978", - "gtrdot;": u"\u22d7", - "gtreqless;": u"\u22db", - "gtreqqless;": u"\u2a8c", - "gtrless;": u"\u2277", - "gtrsim;": u"\u2273", - "gvertneqq;": u"\u2269\ufe00", - "gvnE;": u"\u2269\ufe00", - "hArr;": u"\u21d4", - "hairsp;": u"\u200a", - "half;": u"\xbd", - "hamilt;": u"\u210b", - "hardcy;": u"\u044a", - "harr;": u"\u2194", - "harrcir;": u"\u2948", - "harrw;": u"\u21ad", - "hbar;": u"\u210f", - "hcirc;": u"\u0125", - "hearts;": u"\u2665", - "heartsuit;": u"\u2665", - "hellip;": u"\u2026", - "hercon;": u"\u22b9", - "hfr;": u"\U0001d525", - "hksearow;": u"\u2925", - "hkswarow;": u"\u2926", - "hoarr;": u"\u21ff", - "homtht;": u"\u223b", - "hookleftarrow;": u"\u21a9", - "hookrightarrow;": u"\u21aa", - "hopf;": u"\U0001d559", - "horbar;": u"\u2015", - "hscr;": u"\U0001d4bd", - "hslash;": u"\u210f", - "hstrok;": u"\u0127", - "hybull;": u"\u2043", - "hyphen;": u"\u2010", - "iacute": u"\xed", - "iacute;": u"\xed", - "ic;": u"\u2063", - "icirc": u"\xee", - "icirc;": u"\xee", - "icy;": u"\u0438", - "iecy;": u"\u0435", - "iexcl": u"\xa1", - "iexcl;": u"\xa1", - "iff;": u"\u21d4", - "ifr;": u"\U0001d526", - "igrave": u"\xec", - "igrave;": u"\xec", - "ii;": u"\u2148", - "iiiint;": u"\u2a0c", - "iiint;": u"\u222d", - "iinfin;": u"\u29dc", - "iiota;": u"\u2129", - "ijlig;": u"\u0133", - "imacr;": u"\u012b", - "image;": u"\u2111", - "imagline;": u"\u2110", - "imagpart;": u"\u2111", - "imath;": u"\u0131", - "imof;": u"\u22b7", - "imped;": u"\u01b5", - "in;": u"\u2208", - "incare;": u"\u2105", - "infin;": u"\u221e", - "infintie;": u"\u29dd", - "inodot;": u"\u0131", - "int;": u"\u222b", - "intcal;": u"\u22ba", - "integers;": u"\u2124", - "intercal;": u"\u22ba", - "intlarhk;": u"\u2a17", - "intprod;": u"\u2a3c", - "iocy;": u"\u0451", - "iogon;": u"\u012f", - "iopf;": u"\U0001d55a", - "iota;": u"\u03b9", - "iprod;": u"\u2a3c", - "iquest": u"\xbf", - "iquest;": u"\xbf", - "iscr;": u"\U0001d4be", - "isin;": u"\u2208", - "isinE;": u"\u22f9", - "isindot;": u"\u22f5", - "isins;": u"\u22f4", - "isinsv;": u"\u22f3", - "isinv;": u"\u2208", - "it;": u"\u2062", - "itilde;": u"\u0129", - "iukcy;": u"\u0456", - "iuml": u"\xef", - "iuml;": u"\xef", - "jcirc;": u"\u0135", - "jcy;": u"\u0439", - "jfr;": u"\U0001d527", - "jmath;": u"\u0237", - "jopf;": u"\U0001d55b", - "jscr;": u"\U0001d4bf", - "jsercy;": u"\u0458", - "jukcy;": u"\u0454", - "kappa;": u"\u03ba", - "kappav;": u"\u03f0", - "kcedil;": u"\u0137", - "kcy;": u"\u043a", - "kfr;": u"\U0001d528", - "kgreen;": u"\u0138", - "khcy;": u"\u0445", - "kjcy;": u"\u045c", - "kopf;": u"\U0001d55c", - "kscr;": u"\U0001d4c0", - "lAarr;": u"\u21da", - "lArr;": u"\u21d0", - "lAtail;": u"\u291b", - "lBarr;": u"\u290e", - "lE;": u"\u2266", - "lEg;": u"\u2a8b", - "lHar;": u"\u2962", - "lacute;": u"\u013a", - "laemptyv;": u"\u29b4", - "lagran;": u"\u2112", - "lambda;": u"\u03bb", - "lang;": u"\u27e8", - "langd;": u"\u2991", - "langle;": u"\u27e8", - "lap;": u"\u2a85", - "laquo": u"\xab", - "laquo;": u"\xab", - "larr;": u"\u2190", - "larrb;": u"\u21e4", - "larrbfs;": u"\u291f", - "larrfs;": u"\u291d", - "larrhk;": u"\u21a9", - "larrlp;": u"\u21ab", - "larrpl;": u"\u2939", - "larrsim;": u"\u2973", - "larrtl;": u"\u21a2", - "lat;": u"\u2aab", - "latail;": u"\u2919", - "late;": u"\u2aad", - "lates;": u"\u2aad\ufe00", - "lbarr;": u"\u290c", - "lbbrk;": u"\u2772", - "lbrace;": u"{", - "lbrack;": u"[", - "lbrke;": u"\u298b", - "lbrksld;": u"\u298f", - "lbrkslu;": u"\u298d", - "lcaron;": u"\u013e", - "lcedil;": u"\u013c", - "lceil;": u"\u2308", - "lcub;": u"{", - "lcy;": u"\u043b", - "ldca;": u"\u2936", - "ldquo;": u"\u201c", - "ldquor;": u"\u201e", - "ldrdhar;": u"\u2967", - "ldrushar;": u"\u294b", - "ldsh;": u"\u21b2", - "le;": u"\u2264", - "leftarrow;": u"\u2190", - "leftarrowtail;": u"\u21a2", - "leftharpoondown;": u"\u21bd", - "leftharpoonup;": u"\u21bc", - "leftleftarrows;": u"\u21c7", - "leftrightarrow;": u"\u2194", - "leftrightarrows;": u"\u21c6", - "leftrightharpoons;": u"\u21cb", - "leftrightsquigarrow;": u"\u21ad", - "leftthreetimes;": u"\u22cb", - "leg;": u"\u22da", - "leq;": u"\u2264", - "leqq;": u"\u2266", - "leqslant;": u"\u2a7d", - "les;": u"\u2a7d", - "lescc;": u"\u2aa8", - "lesdot;": u"\u2a7f", - "lesdoto;": u"\u2a81", - "lesdotor;": u"\u2a83", - "lesg;": u"\u22da\ufe00", - "lesges;": u"\u2a93", - "lessapprox;": u"\u2a85", - "lessdot;": u"\u22d6", - "lesseqgtr;": u"\u22da", - "lesseqqgtr;": u"\u2a8b", - "lessgtr;": u"\u2276", - "lesssim;": u"\u2272", - "lfisht;": u"\u297c", - "lfloor;": u"\u230a", - "lfr;": u"\U0001d529", - "lg;": u"\u2276", - "lgE;": u"\u2a91", - "lhard;": u"\u21bd", - "lharu;": u"\u21bc", - "lharul;": u"\u296a", - "lhblk;": u"\u2584", - "ljcy;": u"\u0459", - "ll;": u"\u226a", - "llarr;": u"\u21c7", - "llcorner;": u"\u231e", - "llhard;": u"\u296b", - "lltri;": u"\u25fa", - "lmidot;": u"\u0140", - "lmoust;": u"\u23b0", - "lmoustache;": u"\u23b0", - "lnE;": u"\u2268", - "lnap;": u"\u2a89", - "lnapprox;": u"\u2a89", - "lne;": u"\u2a87", - "lneq;": u"\u2a87", - "lneqq;": u"\u2268", - "lnsim;": u"\u22e6", - "loang;": u"\u27ec", - "loarr;": u"\u21fd", - "lobrk;": u"\u27e6", - "longleftarrow;": u"\u27f5", - "longleftrightarrow;": u"\u27f7", - "longmapsto;": u"\u27fc", - "longrightarrow;": u"\u27f6", - "looparrowleft;": u"\u21ab", - "looparrowright;": u"\u21ac", - "lopar;": u"\u2985", - "lopf;": u"\U0001d55d", - "loplus;": u"\u2a2d", - "lotimes;": u"\u2a34", - "lowast;": u"\u2217", - "lowbar;": u"_", - "loz;": u"\u25ca", - "lozenge;": u"\u25ca", - "lozf;": u"\u29eb", - "lpar;": u"(", - "lparlt;": u"\u2993", - "lrarr;": u"\u21c6", - "lrcorner;": u"\u231f", - "lrhar;": u"\u21cb", - "lrhard;": u"\u296d", - "lrm;": u"\u200e", - "lrtri;": u"\u22bf", - "lsaquo;": u"\u2039", - "lscr;": u"\U0001d4c1", - "lsh;": u"\u21b0", - "lsim;": u"\u2272", - "lsime;": u"\u2a8d", - "lsimg;": u"\u2a8f", - "lsqb;": u"[", - "lsquo;": u"\u2018", - "lsquor;": u"\u201a", - "lstrok;": u"\u0142", - "lt": u"<", - "lt;": u"<", - "ltcc;": u"\u2aa6", - "ltcir;": u"\u2a79", - "ltdot;": u"\u22d6", - "lthree;": u"\u22cb", - "ltimes;": u"\u22c9", - "ltlarr;": u"\u2976", - "ltquest;": u"\u2a7b", - "ltrPar;": u"\u2996", - "ltri;": u"\u25c3", - "ltrie;": u"\u22b4", - "ltrif;": u"\u25c2", - "lurdshar;": u"\u294a", - "luruhar;": u"\u2966", - "lvertneqq;": u"\u2268\ufe00", - "lvnE;": u"\u2268\ufe00", - "mDDot;": u"\u223a", - "macr": u"\xaf", - "macr;": u"\xaf", - "male;": u"\u2642", - "malt;": u"\u2720", - "maltese;": u"\u2720", - "map;": u"\u21a6", - "mapsto;": u"\u21a6", - "mapstodown;": u"\u21a7", - "mapstoleft;": u"\u21a4", - "mapstoup;": u"\u21a5", - "marker;": u"\u25ae", - "mcomma;": u"\u2a29", - "mcy;": u"\u043c", - "mdash;": u"\u2014", - "measuredangle;": u"\u2221", - "mfr;": u"\U0001d52a", - "mho;": u"\u2127", - "micro": u"\xb5", - "micro;": u"\xb5", - "mid;": u"\u2223", - "midast;": u"*", - "midcir;": u"\u2af0", - "middot": u"\xb7", - "middot;": u"\xb7", - "minus;": u"\u2212", - "minusb;": u"\u229f", - "minusd;": u"\u2238", - "minusdu;": u"\u2a2a", - "mlcp;": u"\u2adb", - "mldr;": u"\u2026", - "mnplus;": u"\u2213", - "models;": u"\u22a7", - "mopf;": u"\U0001d55e", - "mp;": u"\u2213", - "mscr;": u"\U0001d4c2", - "mstpos;": u"\u223e", - "mu;": u"\u03bc", - "multimap;": u"\u22b8", - "mumap;": u"\u22b8", - "nGg;": u"\u22d9\u0338", - "nGt;": u"\u226b\u20d2", - "nGtv;": u"\u226b\u0338", - "nLeftarrow;": u"\u21cd", - "nLeftrightarrow;": u"\u21ce", - "nLl;": u"\u22d8\u0338", - "nLt;": u"\u226a\u20d2", - "nLtv;": u"\u226a\u0338", - "nRightarrow;": u"\u21cf", - "nVDash;": u"\u22af", - "nVdash;": u"\u22ae", - "nabla;": u"\u2207", - "nacute;": u"\u0144", - "nang;": u"\u2220\u20d2", - "nap;": u"\u2249", - "napE;": u"\u2a70\u0338", - "napid;": u"\u224b\u0338", - "napos;": u"\u0149", - "napprox;": u"\u2249", - "natur;": u"\u266e", - "natural;": u"\u266e", - "naturals;": u"\u2115", - "nbsp": u"\xa0", - "nbsp;": u"\xa0", - "nbump;": u"\u224e\u0338", - "nbumpe;": u"\u224f\u0338", - "ncap;": u"\u2a43", - "ncaron;": u"\u0148", - "ncedil;": u"\u0146", - "ncong;": u"\u2247", - "ncongdot;": u"\u2a6d\u0338", - "ncup;": u"\u2a42", - "ncy;": u"\u043d", - "ndash;": u"\u2013", - "ne;": u"\u2260", - "neArr;": u"\u21d7", - "nearhk;": u"\u2924", - "nearr;": u"\u2197", - "nearrow;": u"\u2197", - "nedot;": u"\u2250\u0338", - "nequiv;": u"\u2262", - "nesear;": u"\u2928", - "nesim;": u"\u2242\u0338", - "nexist;": u"\u2204", - "nexists;": u"\u2204", - "nfr;": u"\U0001d52b", - "ngE;": u"\u2267\u0338", - "nge;": u"\u2271", - "ngeq;": u"\u2271", - "ngeqq;": u"\u2267\u0338", - "ngeqslant;": u"\u2a7e\u0338", - "nges;": u"\u2a7e\u0338", - "ngsim;": u"\u2275", - "ngt;": u"\u226f", - "ngtr;": u"\u226f", - "nhArr;": u"\u21ce", - "nharr;": u"\u21ae", - "nhpar;": u"\u2af2", - "ni;": u"\u220b", - "nis;": u"\u22fc", - "nisd;": u"\u22fa", - "niv;": u"\u220b", - "njcy;": u"\u045a", - "nlArr;": u"\u21cd", - "nlE;": u"\u2266\u0338", - "nlarr;": u"\u219a", - "nldr;": u"\u2025", - "nle;": u"\u2270", - "nleftarrow;": u"\u219a", - "nleftrightarrow;": u"\u21ae", - "nleq;": u"\u2270", - "nleqq;": u"\u2266\u0338", - "nleqslant;": u"\u2a7d\u0338", - "nles;": u"\u2a7d\u0338", - "nless;": u"\u226e", - "nlsim;": u"\u2274", - "nlt;": u"\u226e", - "nltri;": u"\u22ea", - "nltrie;": u"\u22ec", - "nmid;": u"\u2224", - "nopf;": u"\U0001d55f", - "not": u"\xac", - "not;": u"\xac", - "notin;": u"\u2209", - "notinE;": u"\u22f9\u0338", - "notindot;": u"\u22f5\u0338", - "notinva;": u"\u2209", - "notinvb;": u"\u22f7", - "notinvc;": u"\u22f6", - "notni;": u"\u220c", - "notniva;": u"\u220c", - "notnivb;": u"\u22fe", - "notnivc;": u"\u22fd", - "npar;": u"\u2226", - "nparallel;": u"\u2226", - "nparsl;": u"\u2afd\u20e5", - "npart;": u"\u2202\u0338", - "npolint;": u"\u2a14", - "npr;": u"\u2280", - "nprcue;": u"\u22e0", - "npre;": u"\u2aaf\u0338", - "nprec;": u"\u2280", - "npreceq;": u"\u2aaf\u0338", - "nrArr;": u"\u21cf", - "nrarr;": u"\u219b", - "nrarrc;": u"\u2933\u0338", - "nrarrw;": u"\u219d\u0338", - "nrightarrow;": u"\u219b", - "nrtri;": u"\u22eb", - "nrtrie;": u"\u22ed", - "nsc;": u"\u2281", - "nsccue;": u"\u22e1", - "nsce;": u"\u2ab0\u0338", - "nscr;": u"\U0001d4c3", - "nshortmid;": u"\u2224", - "nshortparallel;": u"\u2226", - "nsim;": u"\u2241", - "nsime;": u"\u2244", - "nsimeq;": u"\u2244", - "nsmid;": u"\u2224", - "nspar;": u"\u2226", - "nsqsube;": u"\u22e2", - "nsqsupe;": u"\u22e3", - "nsub;": u"\u2284", - "nsubE;": u"\u2ac5\u0338", - "nsube;": u"\u2288", - "nsubset;": u"\u2282\u20d2", - "nsubseteq;": u"\u2288", - "nsubseteqq;": u"\u2ac5\u0338", - "nsucc;": u"\u2281", - "nsucceq;": u"\u2ab0\u0338", - "nsup;": u"\u2285", - "nsupE;": u"\u2ac6\u0338", - "nsupe;": u"\u2289", - "nsupset;": u"\u2283\u20d2", - "nsupseteq;": u"\u2289", - "nsupseteqq;": u"\u2ac6\u0338", - "ntgl;": u"\u2279", - "ntilde": u"\xf1", - "ntilde;": u"\xf1", - "ntlg;": u"\u2278", - "ntriangleleft;": u"\u22ea", - "ntrianglelefteq;": u"\u22ec", - "ntriangleright;": u"\u22eb", - "ntrianglerighteq;": u"\u22ed", - "nu;": u"\u03bd", - "num;": u"#", - "numero;": u"\u2116", - "numsp;": u"\u2007", - "nvDash;": u"\u22ad", - "nvHarr;": u"\u2904", - "nvap;": u"\u224d\u20d2", - "nvdash;": u"\u22ac", - "nvge;": u"\u2265\u20d2", - "nvgt;": u">\u20d2", - "nvinfin;": u"\u29de", - "nvlArr;": u"\u2902", - "nvle;": u"\u2264\u20d2", - "nvlt;": u"<\u20d2", - "nvltrie;": u"\u22b4\u20d2", - "nvrArr;": u"\u2903", - "nvrtrie;": u"\u22b5\u20d2", - "nvsim;": u"\u223c\u20d2", - "nwArr;": u"\u21d6", - "nwarhk;": u"\u2923", - "nwarr;": u"\u2196", - "nwarrow;": u"\u2196", - "nwnear;": u"\u2927", - "oS;": u"\u24c8", - "oacute": u"\xf3", - "oacute;": u"\xf3", - "oast;": u"\u229b", - "ocir;": u"\u229a", - "ocirc": u"\xf4", - "ocirc;": u"\xf4", - "ocy;": u"\u043e", - "odash;": u"\u229d", - "odblac;": u"\u0151", - "odiv;": u"\u2a38", - "odot;": u"\u2299", - "odsold;": u"\u29bc", - "oelig;": u"\u0153", - "ofcir;": u"\u29bf", - "ofr;": u"\U0001d52c", - "ogon;": u"\u02db", - "ograve": u"\xf2", - "ograve;": u"\xf2", - "ogt;": u"\u29c1", - "ohbar;": u"\u29b5", - "ohm;": u"\u03a9", - "oint;": u"\u222e", - "olarr;": u"\u21ba", - "olcir;": u"\u29be", - "olcross;": u"\u29bb", - "oline;": u"\u203e", - "olt;": u"\u29c0", - "omacr;": u"\u014d", - "omega;": u"\u03c9", - "omicron;": u"\u03bf", - "omid;": u"\u29b6", - "ominus;": u"\u2296", - "oopf;": u"\U0001d560", - "opar;": u"\u29b7", - "operp;": u"\u29b9", - "oplus;": u"\u2295", - "or;": u"\u2228", - "orarr;": u"\u21bb", - "ord;": u"\u2a5d", - "order;": u"\u2134", - "orderof;": u"\u2134", - "ordf": u"\xaa", - "ordf;": u"\xaa", - "ordm": u"\xba", - "ordm;": u"\xba", - "origof;": u"\u22b6", - "oror;": u"\u2a56", - "orslope;": u"\u2a57", - "orv;": u"\u2a5b", - "oscr;": u"\u2134", - "oslash": u"\xf8", - "oslash;": u"\xf8", - "osol;": u"\u2298", - "otilde": u"\xf5", - "otilde;": u"\xf5", - "otimes;": u"\u2297", - "otimesas;": u"\u2a36", - "ouml": u"\xf6", - "ouml;": u"\xf6", - "ovbar;": u"\u233d", - "par;": u"\u2225", - "para": u"\xb6", - "para;": u"\xb6", - "parallel;": u"\u2225", - "parsim;": u"\u2af3", - "parsl;": u"\u2afd", - "part;": u"\u2202", - "pcy;": u"\u043f", - "percnt;": u"%", - "period;": u".", - "permil;": u"\u2030", - "perp;": u"\u22a5", - "pertenk;": u"\u2031", - "pfr;": u"\U0001d52d", - "phi;": u"\u03c6", - "phiv;": u"\u03d5", - "phmmat;": u"\u2133", - "phone;": u"\u260e", - "pi;": u"\u03c0", - "pitchfork;": u"\u22d4", - "piv;": u"\u03d6", - "planck;": u"\u210f", - "planckh;": u"\u210e", - "plankv;": u"\u210f", - "plus;": u"+", - "plusacir;": u"\u2a23", - "plusb;": u"\u229e", - "pluscir;": u"\u2a22", - "plusdo;": u"\u2214", - "plusdu;": u"\u2a25", - "pluse;": u"\u2a72", - "plusmn": u"\xb1", - "plusmn;": u"\xb1", - "plussim;": u"\u2a26", - "plustwo;": u"\u2a27", - "pm;": u"\xb1", - "pointint;": u"\u2a15", - "popf;": u"\U0001d561", - "pound": u"\xa3", - "pound;": u"\xa3", - "pr;": u"\u227a", - "prE;": u"\u2ab3", - "prap;": u"\u2ab7", - "prcue;": u"\u227c", - "pre;": u"\u2aaf", - "prec;": u"\u227a", - "precapprox;": u"\u2ab7", - "preccurlyeq;": u"\u227c", - "preceq;": u"\u2aaf", - "precnapprox;": u"\u2ab9", - "precneqq;": u"\u2ab5", - "precnsim;": u"\u22e8", - "precsim;": u"\u227e", - "prime;": u"\u2032", - "primes;": u"\u2119", - "prnE;": u"\u2ab5", - "prnap;": u"\u2ab9", - "prnsim;": u"\u22e8", - "prod;": u"\u220f", - "profalar;": u"\u232e", - "profline;": u"\u2312", - "profsurf;": u"\u2313", - "prop;": u"\u221d", - "propto;": u"\u221d", - "prsim;": u"\u227e", - "prurel;": u"\u22b0", - "pscr;": u"\U0001d4c5", - "psi;": u"\u03c8", - "puncsp;": u"\u2008", - "qfr;": u"\U0001d52e", - "qint;": u"\u2a0c", - "qopf;": u"\U0001d562", - "qprime;": u"\u2057", - "qscr;": u"\U0001d4c6", - "quaternions;": u"\u210d", - "quatint;": u"\u2a16", - "quest;": u"?", - "questeq;": u"\u225f", - "quot": u"\"", - "quot;": u"\"", - "rAarr;": u"\u21db", - "rArr;": u"\u21d2", - "rAtail;": u"\u291c", - "rBarr;": u"\u290f", - "rHar;": u"\u2964", - "race;": u"\u223d\u0331", - "racute;": u"\u0155", - "radic;": u"\u221a", - "raemptyv;": u"\u29b3", - "rang;": u"\u27e9", - "rangd;": u"\u2992", - "range;": u"\u29a5", - "rangle;": u"\u27e9", - "raquo": u"\xbb", - "raquo;": u"\xbb", - "rarr;": u"\u2192", - "rarrap;": u"\u2975", - "rarrb;": u"\u21e5", - "rarrbfs;": u"\u2920", - "rarrc;": u"\u2933", - "rarrfs;": u"\u291e", - "rarrhk;": u"\u21aa", - "rarrlp;": u"\u21ac", - "rarrpl;": u"\u2945", - "rarrsim;": u"\u2974", - "rarrtl;": u"\u21a3", - "rarrw;": u"\u219d", - "ratail;": u"\u291a", - "ratio;": u"\u2236", - "rationals;": u"\u211a", - "rbarr;": u"\u290d", - "rbbrk;": u"\u2773", - "rbrace;": u"}", - "rbrack;": u"]", - "rbrke;": u"\u298c", - "rbrksld;": u"\u298e", - "rbrkslu;": u"\u2990", - "rcaron;": u"\u0159", - "rcedil;": u"\u0157", - "rceil;": u"\u2309", - "rcub;": u"}", - "rcy;": u"\u0440", - "rdca;": u"\u2937", - "rdldhar;": u"\u2969", - "rdquo;": u"\u201d", - "rdquor;": u"\u201d", - "rdsh;": u"\u21b3", - "real;": u"\u211c", - "realine;": u"\u211b", - "realpart;": u"\u211c", - "reals;": u"\u211d", - "rect;": u"\u25ad", - "reg": u"\xae", - "reg;": u"\xae", - "rfisht;": u"\u297d", - "rfloor;": u"\u230b", - "rfr;": u"\U0001d52f", - "rhard;": u"\u21c1", - "rharu;": u"\u21c0", - "rharul;": u"\u296c", - "rho;": u"\u03c1", - "rhov;": u"\u03f1", - "rightarrow;": u"\u2192", - "rightarrowtail;": u"\u21a3", - "rightharpoondown;": u"\u21c1", - "rightharpoonup;": u"\u21c0", - "rightleftarrows;": u"\u21c4", - "rightleftharpoons;": u"\u21cc", - "rightrightarrows;": u"\u21c9", - "rightsquigarrow;": u"\u219d", - "rightthreetimes;": u"\u22cc", - "ring;": u"\u02da", - "risingdotseq;": u"\u2253", - "rlarr;": u"\u21c4", - "rlhar;": u"\u21cc", - "rlm;": u"\u200f", - "rmoust;": u"\u23b1", - "rmoustache;": u"\u23b1", - "rnmid;": u"\u2aee", - "roang;": u"\u27ed", - "roarr;": u"\u21fe", - "robrk;": u"\u27e7", - "ropar;": u"\u2986", - "ropf;": u"\U0001d563", - "roplus;": u"\u2a2e", - "rotimes;": u"\u2a35", - "rpar;": u")", - "rpargt;": u"\u2994", - "rppolint;": u"\u2a12", - "rrarr;": u"\u21c9", - "rsaquo;": u"\u203a", - "rscr;": u"\U0001d4c7", - "rsh;": u"\u21b1", - "rsqb;": u"]", - "rsquo;": u"\u2019", - "rsquor;": u"\u2019", - "rthree;": u"\u22cc", - "rtimes;": u"\u22ca", - "rtri;": u"\u25b9", - "rtrie;": u"\u22b5", - "rtrif;": u"\u25b8", - "rtriltri;": u"\u29ce", - "ruluhar;": u"\u2968", - "rx;": u"\u211e", - "sacute;": u"\u015b", - "sbquo;": u"\u201a", - "sc;": u"\u227b", - "scE;": u"\u2ab4", - "scap;": u"\u2ab8", - "scaron;": u"\u0161", - "sccue;": u"\u227d", - "sce;": u"\u2ab0", - "scedil;": u"\u015f", - "scirc;": u"\u015d", - "scnE;": u"\u2ab6", - "scnap;": u"\u2aba", - "scnsim;": u"\u22e9", - "scpolint;": u"\u2a13", - "scsim;": u"\u227f", - "scy;": u"\u0441", - "sdot;": u"\u22c5", - "sdotb;": u"\u22a1", - "sdote;": u"\u2a66", - "seArr;": u"\u21d8", - "searhk;": u"\u2925", - "searr;": u"\u2198", - "searrow;": u"\u2198", - "sect": u"\xa7", - "sect;": u"\xa7", - "semi;": u";", - "seswar;": u"\u2929", - "setminus;": u"\u2216", - "setmn;": u"\u2216", - "sext;": u"\u2736", - "sfr;": u"\U0001d530", - "sfrown;": u"\u2322", - "sharp;": u"\u266f", - "shchcy;": u"\u0449", - "shcy;": u"\u0448", - "shortmid;": u"\u2223", - "shortparallel;": u"\u2225", - "shy": u"\xad", - "shy;": u"\xad", - "sigma;": u"\u03c3", - "sigmaf;": u"\u03c2", - "sigmav;": u"\u03c2", - "sim;": u"\u223c", - "simdot;": u"\u2a6a", - "sime;": u"\u2243", - "simeq;": u"\u2243", - "simg;": u"\u2a9e", - "simgE;": u"\u2aa0", - "siml;": u"\u2a9d", - "simlE;": u"\u2a9f", - "simne;": u"\u2246", - "simplus;": u"\u2a24", - "simrarr;": u"\u2972", - "slarr;": u"\u2190", - "smallsetminus;": u"\u2216", - "smashp;": u"\u2a33", - "smeparsl;": u"\u29e4", - "smid;": u"\u2223", - "smile;": u"\u2323", - "smt;": u"\u2aaa", - "smte;": u"\u2aac", - "smtes;": u"\u2aac\ufe00", - "softcy;": u"\u044c", - "sol;": u"/", - "solb;": u"\u29c4", - "solbar;": u"\u233f", - "sopf;": u"\U0001d564", - "spades;": u"\u2660", - "spadesuit;": u"\u2660", - "spar;": u"\u2225", - "sqcap;": u"\u2293", - "sqcaps;": u"\u2293\ufe00", - "sqcup;": u"\u2294", - "sqcups;": u"\u2294\ufe00", - "sqsub;": u"\u228f", - "sqsube;": u"\u2291", - "sqsubset;": u"\u228f", - "sqsubseteq;": u"\u2291", - "sqsup;": u"\u2290", - "sqsupe;": u"\u2292", - "sqsupset;": u"\u2290", - "sqsupseteq;": u"\u2292", - "squ;": u"\u25a1", - "square;": u"\u25a1", - "squarf;": u"\u25aa", - "squf;": u"\u25aa", - "srarr;": u"\u2192", - "sscr;": u"\U0001d4c8", - "ssetmn;": u"\u2216", - "ssmile;": u"\u2323", - "sstarf;": u"\u22c6", - "star;": u"\u2606", - "starf;": u"\u2605", - "straightepsilon;": u"\u03f5", - "straightphi;": u"\u03d5", - "strns;": u"\xaf", - "sub;": u"\u2282", - "subE;": u"\u2ac5", - "subdot;": u"\u2abd", - "sube;": u"\u2286", - "subedot;": u"\u2ac3", - "submult;": u"\u2ac1", - "subnE;": u"\u2acb", - "subne;": u"\u228a", - "subplus;": u"\u2abf", - "subrarr;": u"\u2979", - "subset;": u"\u2282", - "subseteq;": u"\u2286", - "subseteqq;": u"\u2ac5", - "subsetneq;": u"\u228a", - "subsetneqq;": u"\u2acb", - "subsim;": u"\u2ac7", - "subsub;": u"\u2ad5", - "subsup;": u"\u2ad3", - "succ;": u"\u227b", - "succapprox;": u"\u2ab8", - "succcurlyeq;": u"\u227d", - "succeq;": u"\u2ab0", - "succnapprox;": u"\u2aba", - "succneqq;": u"\u2ab6", - "succnsim;": u"\u22e9", - "succsim;": u"\u227f", - "sum;": u"\u2211", - "sung;": u"\u266a", - "sup1": u"\xb9", - "sup1;": u"\xb9", - "sup2": u"\xb2", - "sup2;": u"\xb2", - "sup3": u"\xb3", - "sup3;": u"\xb3", - "sup;": u"\u2283", - "supE;": u"\u2ac6", - "supdot;": u"\u2abe", - "supdsub;": u"\u2ad8", - "supe;": u"\u2287", - "supedot;": u"\u2ac4", - "suphsol;": u"\u27c9", - "suphsub;": u"\u2ad7", - "suplarr;": u"\u297b", - "supmult;": u"\u2ac2", - "supnE;": u"\u2acc", - "supne;": u"\u228b", - "supplus;": u"\u2ac0", - "supset;": u"\u2283", - "supseteq;": u"\u2287", - "supseteqq;": u"\u2ac6", - "supsetneq;": u"\u228b", - "supsetneqq;": u"\u2acc", - "supsim;": u"\u2ac8", - "supsub;": u"\u2ad4", - "supsup;": u"\u2ad6", - "swArr;": u"\u21d9", - "swarhk;": u"\u2926", - "swarr;": u"\u2199", - "swarrow;": u"\u2199", - "swnwar;": u"\u292a", - "szlig": u"\xdf", - "szlig;": u"\xdf", - "target;": u"\u2316", - "tau;": u"\u03c4", - "tbrk;": u"\u23b4", - "tcaron;": u"\u0165", - "tcedil;": u"\u0163", - "tcy;": u"\u0442", - "tdot;": u"\u20db", - "telrec;": u"\u2315", - "tfr;": u"\U0001d531", - "there4;": u"\u2234", - "therefore;": u"\u2234", - "theta;": u"\u03b8", - "thetasym;": u"\u03d1", - "thetav;": u"\u03d1", - "thickapprox;": u"\u2248", - "thicksim;": u"\u223c", - "thinsp;": u"\u2009", - "thkap;": u"\u2248", - "thksim;": u"\u223c", - "thorn": u"\xfe", - "thorn;": u"\xfe", - "tilde;": u"\u02dc", - "times": u"\xd7", - "times;": u"\xd7", - "timesb;": u"\u22a0", - "timesbar;": u"\u2a31", - "timesd;": u"\u2a30", - "tint;": u"\u222d", - "toea;": u"\u2928", - "top;": u"\u22a4", - "topbot;": u"\u2336", - "topcir;": u"\u2af1", - "topf;": u"\U0001d565", - "topfork;": u"\u2ada", - "tosa;": u"\u2929", - "tprime;": u"\u2034", - "trade;": u"\u2122", - "triangle;": u"\u25b5", - "triangledown;": u"\u25bf", - "triangleleft;": u"\u25c3", - "trianglelefteq;": u"\u22b4", - "triangleq;": u"\u225c", - "triangleright;": u"\u25b9", - "trianglerighteq;": u"\u22b5", - "tridot;": u"\u25ec", - "trie;": u"\u225c", - "triminus;": u"\u2a3a", - "triplus;": u"\u2a39", - "trisb;": u"\u29cd", - "tritime;": u"\u2a3b", - "trpezium;": u"\u23e2", - "tscr;": u"\U0001d4c9", - "tscy;": u"\u0446", - "tshcy;": u"\u045b", - "tstrok;": u"\u0167", - "twixt;": u"\u226c", - "twoheadleftarrow;": u"\u219e", - "twoheadrightarrow;": u"\u21a0", - "uArr;": u"\u21d1", - "uHar;": u"\u2963", - "uacute": u"\xfa", - "uacute;": u"\xfa", - "uarr;": u"\u2191", - "ubrcy;": u"\u045e", - "ubreve;": u"\u016d", - "ucirc": u"\xfb", - "ucirc;": u"\xfb", - "ucy;": u"\u0443", - "udarr;": u"\u21c5", - "udblac;": u"\u0171", - "udhar;": u"\u296e", - "ufisht;": u"\u297e", - "ufr;": u"\U0001d532", - "ugrave": u"\xf9", - "ugrave;": u"\xf9", - "uharl;": u"\u21bf", - "uharr;": u"\u21be", - "uhblk;": u"\u2580", - "ulcorn;": u"\u231c", - "ulcorner;": u"\u231c", - "ulcrop;": u"\u230f", - "ultri;": u"\u25f8", - "umacr;": u"\u016b", - "uml": u"\xa8", - "uml;": u"\xa8", - "uogon;": u"\u0173", - "uopf;": u"\U0001d566", - "uparrow;": u"\u2191", - "updownarrow;": u"\u2195", - "upharpoonleft;": u"\u21bf", - "upharpoonright;": u"\u21be", - "uplus;": u"\u228e", - "upsi;": u"\u03c5", - "upsih;": u"\u03d2", - "upsilon;": u"\u03c5", - "upuparrows;": u"\u21c8", - "urcorn;": u"\u231d", - "urcorner;": u"\u231d", - "urcrop;": u"\u230e", - "uring;": u"\u016f", - "urtri;": u"\u25f9", - "uscr;": u"\U0001d4ca", - "utdot;": u"\u22f0", - "utilde;": u"\u0169", - "utri;": u"\u25b5", - "utrif;": u"\u25b4", - "uuarr;": u"\u21c8", - "uuml": u"\xfc", - "uuml;": u"\xfc", - "uwangle;": u"\u29a7", - "vArr;": u"\u21d5", - "vBar;": u"\u2ae8", - "vBarv;": u"\u2ae9", - "vDash;": u"\u22a8", - "vangrt;": u"\u299c", - "varepsilon;": u"\u03f5", - "varkappa;": u"\u03f0", - "varnothing;": u"\u2205", - "varphi;": u"\u03d5", - "varpi;": u"\u03d6", - "varpropto;": u"\u221d", - "varr;": u"\u2195", - "varrho;": u"\u03f1", - "varsigma;": u"\u03c2", - "varsubsetneq;": u"\u228a\ufe00", - "varsubsetneqq;": u"\u2acb\ufe00", - "varsupsetneq;": u"\u228b\ufe00", - "varsupsetneqq;": u"\u2acc\ufe00", - "vartheta;": u"\u03d1", - "vartriangleleft;": u"\u22b2", - "vartriangleright;": u"\u22b3", - "vcy;": u"\u0432", - "vdash;": u"\u22a2", - "vee;": u"\u2228", - "veebar;": u"\u22bb", - "veeeq;": u"\u225a", - "vellip;": u"\u22ee", - "verbar;": u"|", - "vert;": u"|", - "vfr;": u"\U0001d533", - "vltri;": u"\u22b2", - "vnsub;": u"\u2282\u20d2", - "vnsup;": u"\u2283\u20d2", - "vopf;": u"\U0001d567", - "vprop;": u"\u221d", - "vrtri;": u"\u22b3", - "vscr;": u"\U0001d4cb", - "vsubnE;": u"\u2acb\ufe00", - "vsubne;": u"\u228a\ufe00", - "vsupnE;": u"\u2acc\ufe00", - "vsupne;": u"\u228b\ufe00", - "vzigzag;": u"\u299a", - "wcirc;": u"\u0175", - "wedbar;": u"\u2a5f", - "wedge;": u"\u2227", - "wedgeq;": u"\u2259", - "weierp;": u"\u2118", - "wfr;": u"\U0001d534", - "wopf;": u"\U0001d568", - "wp;": u"\u2118", - "wr;": u"\u2240", - "wreath;": u"\u2240", - "wscr;": u"\U0001d4cc", - "xcap;": u"\u22c2", - "xcirc;": u"\u25ef", - "xcup;": u"\u22c3", - "xdtri;": u"\u25bd", - "xfr;": u"\U0001d535", - "xhArr;": u"\u27fa", - "xharr;": u"\u27f7", - "xi;": u"\u03be", - "xlArr;": u"\u27f8", - "xlarr;": u"\u27f5", - "xmap;": u"\u27fc", - "xnis;": u"\u22fb", - "xodot;": u"\u2a00", - "xopf;": u"\U0001d569", - "xoplus;": u"\u2a01", - "xotime;": u"\u2a02", - "xrArr;": u"\u27f9", - "xrarr;": u"\u27f6", - "xscr;": u"\U0001d4cd", - "xsqcup;": u"\u2a06", - "xuplus;": u"\u2a04", - "xutri;": u"\u25b3", - "xvee;": u"\u22c1", - "xwedge;": u"\u22c0", - "yacute": u"\xfd", - "yacute;": u"\xfd", - "yacy;": u"\u044f", - "ycirc;": u"\u0177", - "ycy;": u"\u044b", - "yen": u"\xa5", - "yen;": u"\xa5", - "yfr;": u"\U0001d536", - "yicy;": u"\u0457", - "yopf;": u"\U0001d56a", - "yscr;": u"\U0001d4ce", - "yucy;": u"\u044e", - "yuml": u"\xff", - "yuml;": u"\xff", - "zacute;": u"\u017a", - "zcaron;": u"\u017e", - "zcy;": u"\u0437", - "zdot;": u"\u017c", - "zeetrf;": u"\u2128", - "zeta;": u"\u03b6", - "zfr;": u"\U0001d537", - "zhcy;": u"\u0436", - "zigrarr;": u"\u21dd", - "zopf;": u"\U0001d56b", - "zscr;": u"\U0001d4cf", - "zwj;": u"\u200d", - "zwnj;": u"\u200c", + "AElig": "\xc6", + "AElig;": "\xc6", + "AMP": "&", + "AMP;": "&", + "Aacute": "\xc1", + "Aacute;": "\xc1", + "Abreve;": "\u0102", + "Acirc": "\xc2", + "Acirc;": "\xc2", + "Acy;": "\u0410", + "Afr;": "\U0001d504", + "Agrave": "\xc0", + "Agrave;": "\xc0", + "Alpha;": "\u0391", + "Amacr;": "\u0100", + "And;": "\u2a53", + "Aogon;": "\u0104", + "Aopf;": "\U0001d538", + "ApplyFunction;": "\u2061", + "Aring": "\xc5", + "Aring;": "\xc5", + "Ascr;": "\U0001d49c", + "Assign;": "\u2254", + "Atilde": "\xc3", + "Atilde;": "\xc3", + "Auml": "\xc4", + "Auml;": "\xc4", + "Backslash;": "\u2216", + "Barv;": "\u2ae7", + "Barwed;": "\u2306", + "Bcy;": "\u0411", + "Because;": "\u2235", + "Bernoullis;": "\u212c", + "Beta;": "\u0392", + "Bfr;": "\U0001d505", + "Bopf;": "\U0001d539", + "Breve;": "\u02d8", + "Bscr;": "\u212c", + "Bumpeq;": "\u224e", + "CHcy;": "\u0427", + "COPY": "\xa9", + "COPY;": "\xa9", + "Cacute;": "\u0106", + "Cap;": "\u22d2", + "CapitalDifferentialD;": "\u2145", + "Cayleys;": "\u212d", + "Ccaron;": "\u010c", + "Ccedil": "\xc7", + "Ccedil;": "\xc7", + "Ccirc;": "\u0108", + "Cconint;": "\u2230", + "Cdot;": "\u010a", + "Cedilla;": "\xb8", + "CenterDot;": "\xb7", + "Cfr;": "\u212d", + "Chi;": "\u03a7", + "CircleDot;": "\u2299", + "CircleMinus;": "\u2296", + "CirclePlus;": "\u2295", + "CircleTimes;": "\u2297", + "ClockwiseContourIntegral;": "\u2232", + "CloseCurlyDoubleQuote;": "\u201d", + "CloseCurlyQuote;": "\u2019", + "Colon;": "\u2237", + "Colone;": "\u2a74", + "Congruent;": "\u2261", + "Conint;": "\u222f", + "ContourIntegral;": "\u222e", + "Copf;": "\u2102", + "Coproduct;": "\u2210", + "CounterClockwiseContourIntegral;": "\u2233", + "Cross;": "\u2a2f", + "Cscr;": "\U0001d49e", + "Cup;": "\u22d3", + "CupCap;": "\u224d", + "DD;": "\u2145", + "DDotrahd;": "\u2911", + "DJcy;": "\u0402", + "DScy;": "\u0405", + "DZcy;": "\u040f", + "Dagger;": "\u2021", + "Darr;": "\u21a1", + "Dashv;": "\u2ae4", + "Dcaron;": "\u010e", + "Dcy;": "\u0414", + "Del;": "\u2207", + "Delta;": "\u0394", + "Dfr;": "\U0001d507", + "DiacriticalAcute;": "\xb4", + "DiacriticalDot;": "\u02d9", + "DiacriticalDoubleAcute;": "\u02dd", + "DiacriticalGrave;": "`", + "DiacriticalTilde;": "\u02dc", + "Diamond;": "\u22c4", + "DifferentialD;": "\u2146", + "Dopf;": "\U0001d53b", + "Dot;": "\xa8", + "DotDot;": "\u20dc", + "DotEqual;": "\u2250", + "DoubleContourIntegral;": "\u222f", + "DoubleDot;": "\xa8", + "DoubleDownArrow;": "\u21d3", + "DoubleLeftArrow;": "\u21d0", + "DoubleLeftRightArrow;": "\u21d4", + "DoubleLeftTee;": "\u2ae4", + "DoubleLongLeftArrow;": "\u27f8", + "DoubleLongLeftRightArrow;": "\u27fa", + "DoubleLongRightArrow;": "\u27f9", + "DoubleRightArrow;": "\u21d2", + "DoubleRightTee;": "\u22a8", + "DoubleUpArrow;": "\u21d1", + "DoubleUpDownArrow;": "\u21d5", + "DoubleVerticalBar;": "\u2225", + "DownArrow;": "\u2193", + "DownArrowBar;": "\u2913", + "DownArrowUpArrow;": "\u21f5", + "DownBreve;": "\u0311", + "DownLeftRightVector;": "\u2950", + "DownLeftTeeVector;": "\u295e", + "DownLeftVector;": "\u21bd", + "DownLeftVectorBar;": "\u2956", + "DownRightTeeVector;": "\u295f", + "DownRightVector;": "\u21c1", + "DownRightVectorBar;": "\u2957", + "DownTee;": "\u22a4", + "DownTeeArrow;": "\u21a7", + "Downarrow;": "\u21d3", + "Dscr;": "\U0001d49f", + "Dstrok;": "\u0110", + "ENG;": "\u014a", + "ETH": "\xd0", + "ETH;": "\xd0", + "Eacute": "\xc9", + "Eacute;": "\xc9", + "Ecaron;": "\u011a", + "Ecirc": "\xca", + "Ecirc;": "\xca", + "Ecy;": "\u042d", + "Edot;": "\u0116", + "Efr;": "\U0001d508", + "Egrave": "\xc8", + "Egrave;": "\xc8", + "Element;": "\u2208", + "Emacr;": "\u0112", + "EmptySmallSquare;": "\u25fb", + "EmptyVerySmallSquare;": "\u25ab", + "Eogon;": "\u0118", + "Eopf;": "\U0001d53c", + "Epsilon;": "\u0395", + "Equal;": "\u2a75", + "EqualTilde;": "\u2242", + "Equilibrium;": "\u21cc", + "Escr;": "\u2130", + "Esim;": "\u2a73", + "Eta;": "\u0397", + "Euml": "\xcb", + "Euml;": "\xcb", + "Exists;": "\u2203", + "ExponentialE;": "\u2147", + "Fcy;": "\u0424", + "Ffr;": "\U0001d509", + "FilledSmallSquare;": "\u25fc", + "FilledVerySmallSquare;": "\u25aa", + "Fopf;": "\U0001d53d", + "ForAll;": "\u2200", + "Fouriertrf;": "\u2131", + "Fscr;": "\u2131", + "GJcy;": "\u0403", + "GT": ">", + "GT;": ">", + "Gamma;": "\u0393", + "Gammad;": "\u03dc", + "Gbreve;": "\u011e", + "Gcedil;": "\u0122", + "Gcirc;": "\u011c", + "Gcy;": "\u0413", + "Gdot;": "\u0120", + "Gfr;": "\U0001d50a", + "Gg;": "\u22d9", + "Gopf;": "\U0001d53e", + "GreaterEqual;": "\u2265", + "GreaterEqualLess;": "\u22db", + "GreaterFullEqual;": "\u2267", + "GreaterGreater;": "\u2aa2", + "GreaterLess;": "\u2277", + "GreaterSlantEqual;": "\u2a7e", + "GreaterTilde;": "\u2273", + "Gscr;": "\U0001d4a2", + "Gt;": "\u226b", + "HARDcy;": "\u042a", + "Hacek;": "\u02c7", + "Hat;": "^", + "Hcirc;": "\u0124", + "Hfr;": "\u210c", + "HilbertSpace;": "\u210b", + "Hopf;": "\u210d", + "HorizontalLine;": "\u2500", + "Hscr;": "\u210b", + "Hstrok;": "\u0126", + "HumpDownHump;": "\u224e", + "HumpEqual;": "\u224f", + "IEcy;": "\u0415", + "IJlig;": "\u0132", + "IOcy;": "\u0401", + "Iacute": "\xcd", + "Iacute;": "\xcd", + "Icirc": "\xce", + "Icirc;": "\xce", + "Icy;": "\u0418", + "Idot;": "\u0130", + "Ifr;": "\u2111", + "Igrave": "\xcc", + "Igrave;": "\xcc", + "Im;": "\u2111", + "Imacr;": "\u012a", + "ImaginaryI;": "\u2148", + "Implies;": "\u21d2", + "Int;": "\u222c", + "Integral;": "\u222b", + "Intersection;": "\u22c2", + "InvisibleComma;": "\u2063", + "InvisibleTimes;": "\u2062", + "Iogon;": "\u012e", + "Iopf;": "\U0001d540", + "Iota;": "\u0399", + "Iscr;": "\u2110", + "Itilde;": "\u0128", + "Iukcy;": "\u0406", + "Iuml": "\xcf", + "Iuml;": "\xcf", + "Jcirc;": "\u0134", + "Jcy;": "\u0419", + "Jfr;": "\U0001d50d", + "Jopf;": "\U0001d541", + "Jscr;": "\U0001d4a5", + "Jsercy;": "\u0408", + "Jukcy;": "\u0404", + "KHcy;": "\u0425", + "KJcy;": "\u040c", + "Kappa;": "\u039a", + "Kcedil;": "\u0136", + "Kcy;": "\u041a", + "Kfr;": "\U0001d50e", + "Kopf;": "\U0001d542", + "Kscr;": "\U0001d4a6", + "LJcy;": "\u0409", + "LT": "<", + "LT;": "<", + "Lacute;": "\u0139", + "Lambda;": "\u039b", + "Lang;": "\u27ea", + "Laplacetrf;": "\u2112", + "Larr;": "\u219e", + "Lcaron;": "\u013d", + "Lcedil;": "\u013b", + "Lcy;": "\u041b", + "LeftAngleBracket;": "\u27e8", + "LeftArrow;": "\u2190", + "LeftArrowBar;": "\u21e4", + "LeftArrowRightArrow;": "\u21c6", + "LeftCeiling;": "\u2308", + "LeftDoubleBracket;": "\u27e6", + "LeftDownTeeVector;": "\u2961", + "LeftDownVector;": "\u21c3", + "LeftDownVectorBar;": "\u2959", + "LeftFloor;": "\u230a", + "LeftRightArrow;": "\u2194", + "LeftRightVector;": "\u294e", + "LeftTee;": "\u22a3", + "LeftTeeArrow;": "\u21a4", + "LeftTeeVector;": "\u295a", + "LeftTriangle;": "\u22b2", + "LeftTriangleBar;": "\u29cf", + "LeftTriangleEqual;": "\u22b4", + "LeftUpDownVector;": "\u2951", + "LeftUpTeeVector;": "\u2960", + "LeftUpVector;": "\u21bf", + "LeftUpVectorBar;": "\u2958", + "LeftVector;": "\u21bc", + "LeftVectorBar;": "\u2952", + "Leftarrow;": "\u21d0", + "Leftrightarrow;": "\u21d4", + "LessEqualGreater;": "\u22da", + "LessFullEqual;": "\u2266", + "LessGreater;": "\u2276", + "LessLess;": "\u2aa1", + "LessSlantEqual;": "\u2a7d", + "LessTilde;": "\u2272", + "Lfr;": "\U0001d50f", + "Ll;": "\u22d8", + "Lleftarrow;": "\u21da", + "Lmidot;": "\u013f", + "LongLeftArrow;": "\u27f5", + "LongLeftRightArrow;": "\u27f7", + "LongRightArrow;": "\u27f6", + "Longleftarrow;": "\u27f8", + "Longleftrightarrow;": "\u27fa", + "Longrightarrow;": "\u27f9", + "Lopf;": "\U0001d543", + "LowerLeftArrow;": "\u2199", + "LowerRightArrow;": "\u2198", + "Lscr;": "\u2112", + "Lsh;": "\u21b0", + "Lstrok;": "\u0141", + "Lt;": "\u226a", + "Map;": "\u2905", + "Mcy;": "\u041c", + "MediumSpace;": "\u205f", + "Mellintrf;": "\u2133", + "Mfr;": "\U0001d510", + "MinusPlus;": "\u2213", + "Mopf;": "\U0001d544", + "Mscr;": "\u2133", + "Mu;": "\u039c", + "NJcy;": "\u040a", + "Nacute;": "\u0143", + "Ncaron;": "\u0147", + "Ncedil;": "\u0145", + "Ncy;": "\u041d", + "NegativeMediumSpace;": "\u200b", + "NegativeThickSpace;": "\u200b", + "NegativeThinSpace;": "\u200b", + "NegativeVeryThinSpace;": "\u200b", + "NestedGreaterGreater;": "\u226b", + "NestedLessLess;": "\u226a", + "NewLine;": "\n", + "Nfr;": "\U0001d511", + "NoBreak;": "\u2060", + "NonBreakingSpace;": "\xa0", + "Nopf;": "\u2115", + "Not;": "\u2aec", + "NotCongruent;": "\u2262", + "NotCupCap;": "\u226d", + "NotDoubleVerticalBar;": "\u2226", + "NotElement;": "\u2209", + "NotEqual;": "\u2260", + "NotEqualTilde;": "\u2242\u0338", + "NotExists;": "\u2204", + "NotGreater;": "\u226f", + "NotGreaterEqual;": "\u2271", + "NotGreaterFullEqual;": "\u2267\u0338", + "NotGreaterGreater;": "\u226b\u0338", + "NotGreaterLess;": "\u2279", + "NotGreaterSlantEqual;": "\u2a7e\u0338", + "NotGreaterTilde;": "\u2275", + "NotHumpDownHump;": "\u224e\u0338", + "NotHumpEqual;": "\u224f\u0338", + "NotLeftTriangle;": "\u22ea", + "NotLeftTriangleBar;": "\u29cf\u0338", + "NotLeftTriangleEqual;": "\u22ec", + "NotLess;": "\u226e", + "NotLessEqual;": "\u2270", + "NotLessGreater;": "\u2278", + "NotLessLess;": "\u226a\u0338", + "NotLessSlantEqual;": "\u2a7d\u0338", + "NotLessTilde;": "\u2274", + "NotNestedGreaterGreater;": "\u2aa2\u0338", + "NotNestedLessLess;": "\u2aa1\u0338", + "NotPrecedes;": "\u2280", + "NotPrecedesEqual;": "\u2aaf\u0338", + "NotPrecedesSlantEqual;": "\u22e0", + "NotReverseElement;": "\u220c", + "NotRightTriangle;": "\u22eb", + "NotRightTriangleBar;": "\u29d0\u0338", + "NotRightTriangleEqual;": "\u22ed", + "NotSquareSubset;": "\u228f\u0338", + "NotSquareSubsetEqual;": "\u22e2", + "NotSquareSuperset;": "\u2290\u0338", + "NotSquareSupersetEqual;": "\u22e3", + "NotSubset;": "\u2282\u20d2", + "NotSubsetEqual;": "\u2288", + "NotSucceeds;": "\u2281", + "NotSucceedsEqual;": "\u2ab0\u0338", + "NotSucceedsSlantEqual;": "\u22e1", + "NotSucceedsTilde;": "\u227f\u0338", + "NotSuperset;": "\u2283\u20d2", + "NotSupersetEqual;": "\u2289", + "NotTilde;": "\u2241", + "NotTildeEqual;": "\u2244", + "NotTildeFullEqual;": "\u2247", + "NotTildeTilde;": "\u2249", + "NotVerticalBar;": "\u2224", + "Nscr;": "\U0001d4a9", + "Ntilde": "\xd1", + "Ntilde;": "\xd1", + "Nu;": "\u039d", + "OElig;": "\u0152", + "Oacute": "\xd3", + "Oacute;": "\xd3", + "Ocirc": "\xd4", + "Ocirc;": "\xd4", + "Ocy;": "\u041e", + "Odblac;": "\u0150", + "Ofr;": "\U0001d512", + "Ograve": "\xd2", + "Ograve;": "\xd2", + "Omacr;": "\u014c", + "Omega;": "\u03a9", + "Omicron;": "\u039f", + "Oopf;": "\U0001d546", + "OpenCurlyDoubleQuote;": "\u201c", + "OpenCurlyQuote;": "\u2018", + "Or;": "\u2a54", + "Oscr;": "\U0001d4aa", + "Oslash": "\xd8", + "Oslash;": "\xd8", + "Otilde": "\xd5", + "Otilde;": "\xd5", + "Otimes;": "\u2a37", + "Ouml": "\xd6", + "Ouml;": "\xd6", + "OverBar;": "\u203e", + "OverBrace;": "\u23de", + "OverBracket;": "\u23b4", + "OverParenthesis;": "\u23dc", + "PartialD;": "\u2202", + "Pcy;": "\u041f", + "Pfr;": "\U0001d513", + "Phi;": "\u03a6", + "Pi;": "\u03a0", + "PlusMinus;": "\xb1", + "Poincareplane;": "\u210c", + "Popf;": "\u2119", + "Pr;": "\u2abb", + "Precedes;": "\u227a", + "PrecedesEqual;": "\u2aaf", + "PrecedesSlantEqual;": "\u227c", + "PrecedesTilde;": "\u227e", + "Prime;": "\u2033", + "Product;": "\u220f", + "Proportion;": "\u2237", + "Proportional;": "\u221d", + "Pscr;": "\U0001d4ab", + "Psi;": "\u03a8", + "QUOT": "\"", + "QUOT;": "\"", + "Qfr;": "\U0001d514", + "Qopf;": "\u211a", + "Qscr;": "\U0001d4ac", + "RBarr;": "\u2910", + "REG": "\xae", + "REG;": "\xae", + "Racute;": "\u0154", + "Rang;": "\u27eb", + "Rarr;": "\u21a0", + "Rarrtl;": "\u2916", + "Rcaron;": "\u0158", + "Rcedil;": "\u0156", + "Rcy;": "\u0420", + "Re;": "\u211c", + "ReverseElement;": "\u220b", + "ReverseEquilibrium;": "\u21cb", + "ReverseUpEquilibrium;": "\u296f", + "Rfr;": "\u211c", + "Rho;": "\u03a1", + "RightAngleBracket;": "\u27e9", + "RightArrow;": "\u2192", + "RightArrowBar;": "\u21e5", + "RightArrowLeftArrow;": "\u21c4", + "RightCeiling;": "\u2309", + "RightDoubleBracket;": "\u27e7", + "RightDownTeeVector;": "\u295d", + "RightDownVector;": "\u21c2", + "RightDownVectorBar;": "\u2955", + "RightFloor;": "\u230b", + "RightTee;": "\u22a2", + "RightTeeArrow;": "\u21a6", + "RightTeeVector;": "\u295b", + "RightTriangle;": "\u22b3", + "RightTriangleBar;": "\u29d0", + "RightTriangleEqual;": "\u22b5", + "RightUpDownVector;": "\u294f", + "RightUpTeeVector;": "\u295c", + "RightUpVector;": "\u21be", + "RightUpVectorBar;": "\u2954", + "RightVector;": "\u21c0", + "RightVectorBar;": "\u2953", + "Rightarrow;": "\u21d2", + "Ropf;": "\u211d", + "RoundImplies;": "\u2970", + "Rrightarrow;": "\u21db", + "Rscr;": "\u211b", + "Rsh;": "\u21b1", + "RuleDelayed;": "\u29f4", + "SHCHcy;": "\u0429", + "SHcy;": "\u0428", + "SOFTcy;": "\u042c", + "Sacute;": "\u015a", + "Sc;": "\u2abc", + "Scaron;": "\u0160", + "Scedil;": "\u015e", + "Scirc;": "\u015c", + "Scy;": "\u0421", + "Sfr;": "\U0001d516", + "ShortDownArrow;": "\u2193", + "ShortLeftArrow;": "\u2190", + "ShortRightArrow;": "\u2192", + "ShortUpArrow;": "\u2191", + "Sigma;": "\u03a3", + "SmallCircle;": "\u2218", + "Sopf;": "\U0001d54a", + "Sqrt;": "\u221a", + "Square;": "\u25a1", + "SquareIntersection;": "\u2293", + "SquareSubset;": "\u228f", + "SquareSubsetEqual;": "\u2291", + "SquareSuperset;": "\u2290", + "SquareSupersetEqual;": "\u2292", + "SquareUnion;": "\u2294", + "Sscr;": "\U0001d4ae", + "Star;": "\u22c6", + "Sub;": "\u22d0", + "Subset;": "\u22d0", + "SubsetEqual;": "\u2286", + "Succeeds;": "\u227b", + "SucceedsEqual;": "\u2ab0", + "SucceedsSlantEqual;": "\u227d", + "SucceedsTilde;": "\u227f", + "SuchThat;": "\u220b", + "Sum;": "\u2211", + "Sup;": "\u22d1", + "Superset;": "\u2283", + "SupersetEqual;": "\u2287", + "Supset;": "\u22d1", + "THORN": "\xde", + "THORN;": "\xde", + "TRADE;": "\u2122", + "TSHcy;": "\u040b", + "TScy;": "\u0426", + "Tab;": "\t", + "Tau;": "\u03a4", + "Tcaron;": "\u0164", + "Tcedil;": "\u0162", + "Tcy;": "\u0422", + "Tfr;": "\U0001d517", + "Therefore;": "\u2234", + "Theta;": "\u0398", + "ThickSpace;": "\u205f\u200a", + "ThinSpace;": "\u2009", + "Tilde;": "\u223c", + "TildeEqual;": "\u2243", + "TildeFullEqual;": "\u2245", + "TildeTilde;": "\u2248", + "Topf;": "\U0001d54b", + "TripleDot;": "\u20db", + "Tscr;": "\U0001d4af", + "Tstrok;": "\u0166", + "Uacute": "\xda", + "Uacute;": "\xda", + "Uarr;": "\u219f", + "Uarrocir;": "\u2949", + "Ubrcy;": "\u040e", + "Ubreve;": "\u016c", + "Ucirc": "\xdb", + "Ucirc;": "\xdb", + "Ucy;": "\u0423", + "Udblac;": "\u0170", + "Ufr;": "\U0001d518", + "Ugrave": "\xd9", + "Ugrave;": "\xd9", + "Umacr;": "\u016a", + "UnderBar;": "_", + "UnderBrace;": "\u23df", + "UnderBracket;": "\u23b5", + "UnderParenthesis;": "\u23dd", + "Union;": "\u22c3", + "UnionPlus;": "\u228e", + "Uogon;": "\u0172", + "Uopf;": "\U0001d54c", + "UpArrow;": "\u2191", + "UpArrowBar;": "\u2912", + "UpArrowDownArrow;": "\u21c5", + "UpDownArrow;": "\u2195", + "UpEquilibrium;": "\u296e", + "UpTee;": "\u22a5", + "UpTeeArrow;": "\u21a5", + "Uparrow;": "\u21d1", + "Updownarrow;": "\u21d5", + "UpperLeftArrow;": "\u2196", + "UpperRightArrow;": "\u2197", + "Upsi;": "\u03d2", + "Upsilon;": "\u03a5", + "Uring;": "\u016e", + "Uscr;": "\U0001d4b0", + "Utilde;": "\u0168", + "Uuml": "\xdc", + "Uuml;": "\xdc", + "VDash;": "\u22ab", + "Vbar;": "\u2aeb", + "Vcy;": "\u0412", + "Vdash;": "\u22a9", + "Vdashl;": "\u2ae6", + "Vee;": "\u22c1", + "Verbar;": "\u2016", + "Vert;": "\u2016", + "VerticalBar;": "\u2223", + "VerticalLine;": "|", + "VerticalSeparator;": "\u2758", + "VerticalTilde;": "\u2240", + "VeryThinSpace;": "\u200a", + "Vfr;": "\U0001d519", + "Vopf;": "\U0001d54d", + "Vscr;": "\U0001d4b1", + "Vvdash;": "\u22aa", + "Wcirc;": "\u0174", + "Wedge;": "\u22c0", + "Wfr;": "\U0001d51a", + "Wopf;": "\U0001d54e", + "Wscr;": "\U0001d4b2", + "Xfr;": "\U0001d51b", + "Xi;": "\u039e", + "Xopf;": "\U0001d54f", + "Xscr;": "\U0001d4b3", + "YAcy;": "\u042f", + "YIcy;": "\u0407", + "YUcy;": "\u042e", + "Yacute": "\xdd", + "Yacute;": "\xdd", + "Ycirc;": "\u0176", + "Ycy;": "\u042b", + "Yfr;": "\U0001d51c", + "Yopf;": "\U0001d550", + "Yscr;": "\U0001d4b4", + "Yuml;": "\u0178", + "ZHcy;": "\u0416", + "Zacute;": "\u0179", + "Zcaron;": "\u017d", + "Zcy;": "\u0417", + "Zdot;": "\u017b", + "ZeroWidthSpace;": "\u200b", + "Zeta;": "\u0396", + "Zfr;": "\u2128", + "Zopf;": "\u2124", + "Zscr;": "\U0001d4b5", + "aacute": "\xe1", + "aacute;": "\xe1", + "abreve;": "\u0103", + "ac;": "\u223e", + "acE;": "\u223e\u0333", + "acd;": "\u223f", + "acirc": "\xe2", + "acirc;": "\xe2", + "acute": "\xb4", + "acute;": "\xb4", + "acy;": "\u0430", + "aelig": "\xe6", + "aelig;": "\xe6", + "af;": "\u2061", + "afr;": "\U0001d51e", + "agrave": "\xe0", + "agrave;": "\xe0", + "alefsym;": "\u2135", + "aleph;": "\u2135", + "alpha;": "\u03b1", + "amacr;": "\u0101", + "amalg;": "\u2a3f", + "amp": "&", + "amp;": "&", + "and;": "\u2227", + "andand;": "\u2a55", + "andd;": "\u2a5c", + "andslope;": "\u2a58", + "andv;": "\u2a5a", + "ang;": "\u2220", + "ange;": "\u29a4", + "angle;": "\u2220", + "angmsd;": "\u2221", + "angmsdaa;": "\u29a8", + "angmsdab;": "\u29a9", + "angmsdac;": "\u29aa", + "angmsdad;": "\u29ab", + "angmsdae;": "\u29ac", + "angmsdaf;": "\u29ad", + "angmsdag;": "\u29ae", + "angmsdah;": "\u29af", + "angrt;": "\u221f", + "angrtvb;": "\u22be", + "angrtvbd;": "\u299d", + "angsph;": "\u2222", + "angst;": "\xc5", + "angzarr;": "\u237c", + "aogon;": "\u0105", + "aopf;": "\U0001d552", + "ap;": "\u2248", + "apE;": "\u2a70", + "apacir;": "\u2a6f", + "ape;": "\u224a", + "apid;": "\u224b", + "apos;": "'", + "approx;": "\u2248", + "approxeq;": "\u224a", + "aring": "\xe5", + "aring;": "\xe5", + "ascr;": "\U0001d4b6", + "ast;": "*", + "asymp;": "\u2248", + "asympeq;": "\u224d", + "atilde": "\xe3", + "atilde;": "\xe3", + "auml": "\xe4", + "auml;": "\xe4", + "awconint;": "\u2233", + "awint;": "\u2a11", + "bNot;": "\u2aed", + "backcong;": "\u224c", + "backepsilon;": "\u03f6", + "backprime;": "\u2035", + "backsim;": "\u223d", + "backsimeq;": "\u22cd", + "barvee;": "\u22bd", + "barwed;": "\u2305", + "barwedge;": "\u2305", + "bbrk;": "\u23b5", + "bbrktbrk;": "\u23b6", + "bcong;": "\u224c", + "bcy;": "\u0431", + "bdquo;": "\u201e", + "becaus;": "\u2235", + "because;": "\u2235", + "bemptyv;": "\u29b0", + "bepsi;": "\u03f6", + "bernou;": "\u212c", + "beta;": "\u03b2", + "beth;": "\u2136", + "between;": "\u226c", + "bfr;": "\U0001d51f", + "bigcap;": "\u22c2", + "bigcirc;": "\u25ef", + "bigcup;": "\u22c3", + "bigodot;": "\u2a00", + "bigoplus;": "\u2a01", + "bigotimes;": "\u2a02", + "bigsqcup;": "\u2a06", + "bigstar;": "\u2605", + "bigtriangledown;": "\u25bd", + "bigtriangleup;": "\u25b3", + "biguplus;": "\u2a04", + "bigvee;": "\u22c1", + "bigwedge;": "\u22c0", + "bkarow;": "\u290d", + "blacklozenge;": "\u29eb", + "blacksquare;": "\u25aa", + "blacktriangle;": "\u25b4", + "blacktriangledown;": "\u25be", + "blacktriangleleft;": "\u25c2", + "blacktriangleright;": "\u25b8", + "blank;": "\u2423", + "blk12;": "\u2592", + "blk14;": "\u2591", + "blk34;": "\u2593", + "block;": "\u2588", + "bne;": "=\u20e5", + "bnequiv;": "\u2261\u20e5", + "bnot;": "\u2310", + "bopf;": "\U0001d553", + "bot;": "\u22a5", + "bottom;": "\u22a5", + "bowtie;": "\u22c8", + "boxDL;": "\u2557", + "boxDR;": "\u2554", + "boxDl;": "\u2556", + "boxDr;": "\u2553", + "boxH;": "\u2550", + "boxHD;": "\u2566", + "boxHU;": "\u2569", + "boxHd;": "\u2564", + "boxHu;": "\u2567", + "boxUL;": "\u255d", + "boxUR;": "\u255a", + "boxUl;": "\u255c", + "boxUr;": "\u2559", + "boxV;": "\u2551", + "boxVH;": "\u256c", + "boxVL;": "\u2563", + "boxVR;": "\u2560", + "boxVh;": "\u256b", + "boxVl;": "\u2562", + "boxVr;": "\u255f", + "boxbox;": "\u29c9", + "boxdL;": "\u2555", + "boxdR;": "\u2552", + "boxdl;": "\u2510", + "boxdr;": "\u250c", + "boxh;": "\u2500", + "boxhD;": "\u2565", + "boxhU;": "\u2568", + "boxhd;": "\u252c", + "boxhu;": "\u2534", + "boxminus;": "\u229f", + "boxplus;": "\u229e", + "boxtimes;": "\u22a0", + "boxuL;": "\u255b", + "boxuR;": "\u2558", + "boxul;": "\u2518", + "boxur;": "\u2514", + "boxv;": "\u2502", + "boxvH;": "\u256a", + "boxvL;": "\u2561", + "boxvR;": "\u255e", + "boxvh;": "\u253c", + "boxvl;": "\u2524", + "boxvr;": "\u251c", + "bprime;": "\u2035", + "breve;": "\u02d8", + "brvbar": "\xa6", + "brvbar;": "\xa6", + "bscr;": "\U0001d4b7", + "bsemi;": "\u204f", + "bsim;": "\u223d", + "bsime;": "\u22cd", + "bsol;": "\\", + "bsolb;": "\u29c5", + "bsolhsub;": "\u27c8", + "bull;": "\u2022", + "bullet;": "\u2022", + "bump;": "\u224e", + "bumpE;": "\u2aae", + "bumpe;": "\u224f", + "bumpeq;": "\u224f", + "cacute;": "\u0107", + "cap;": "\u2229", + "capand;": "\u2a44", + "capbrcup;": "\u2a49", + "capcap;": "\u2a4b", + "capcup;": "\u2a47", + "capdot;": "\u2a40", + "caps;": "\u2229\ufe00", + "caret;": "\u2041", + "caron;": "\u02c7", + "ccaps;": "\u2a4d", + "ccaron;": "\u010d", + "ccedil": "\xe7", + "ccedil;": "\xe7", + "ccirc;": "\u0109", + "ccups;": "\u2a4c", + "ccupssm;": "\u2a50", + "cdot;": "\u010b", + "cedil": "\xb8", + "cedil;": "\xb8", + "cemptyv;": "\u29b2", + "cent": "\xa2", + "cent;": "\xa2", + "centerdot;": "\xb7", + "cfr;": "\U0001d520", + "chcy;": "\u0447", + "check;": "\u2713", + "checkmark;": "\u2713", + "chi;": "\u03c7", + "cir;": "\u25cb", + "cirE;": "\u29c3", + "circ;": "\u02c6", + "circeq;": "\u2257", + "circlearrowleft;": "\u21ba", + "circlearrowright;": "\u21bb", + "circledR;": "\xae", + "circledS;": "\u24c8", + "circledast;": "\u229b", + "circledcirc;": "\u229a", + "circleddash;": "\u229d", + "cire;": "\u2257", + "cirfnint;": "\u2a10", + "cirmid;": "\u2aef", + "cirscir;": "\u29c2", + "clubs;": "\u2663", + "clubsuit;": "\u2663", + "colon;": ":", + "colone;": "\u2254", + "coloneq;": "\u2254", + "comma;": ",", + "commat;": "@", + "comp;": "\u2201", + "compfn;": "\u2218", + "complement;": "\u2201", + "complexes;": "\u2102", + "cong;": "\u2245", + "congdot;": "\u2a6d", + "conint;": "\u222e", + "copf;": "\U0001d554", + "coprod;": "\u2210", + "copy": "\xa9", + "copy;": "\xa9", + "copysr;": "\u2117", + "crarr;": "\u21b5", + "cross;": "\u2717", + "cscr;": "\U0001d4b8", + "csub;": "\u2acf", + "csube;": "\u2ad1", + "csup;": "\u2ad0", + "csupe;": "\u2ad2", + "ctdot;": "\u22ef", + "cudarrl;": "\u2938", + "cudarrr;": "\u2935", + "cuepr;": "\u22de", + "cuesc;": "\u22df", + "cularr;": "\u21b6", + "cularrp;": "\u293d", + "cup;": "\u222a", + "cupbrcap;": "\u2a48", + "cupcap;": "\u2a46", + "cupcup;": "\u2a4a", + "cupdot;": "\u228d", + "cupor;": "\u2a45", + "cups;": "\u222a\ufe00", + "curarr;": "\u21b7", + "curarrm;": "\u293c", + "curlyeqprec;": "\u22de", + "curlyeqsucc;": "\u22df", + "curlyvee;": "\u22ce", + "curlywedge;": "\u22cf", + "curren": "\xa4", + "curren;": "\xa4", + "curvearrowleft;": "\u21b6", + "curvearrowright;": "\u21b7", + "cuvee;": "\u22ce", + "cuwed;": "\u22cf", + "cwconint;": "\u2232", + "cwint;": "\u2231", + "cylcty;": "\u232d", + "dArr;": "\u21d3", + "dHar;": "\u2965", + "dagger;": "\u2020", + "daleth;": "\u2138", + "darr;": "\u2193", + "dash;": "\u2010", + "dashv;": "\u22a3", + "dbkarow;": "\u290f", + "dblac;": "\u02dd", + "dcaron;": "\u010f", + "dcy;": "\u0434", + "dd;": "\u2146", + "ddagger;": "\u2021", + "ddarr;": "\u21ca", + "ddotseq;": "\u2a77", + "deg": "\xb0", + "deg;": "\xb0", + "delta;": "\u03b4", + "demptyv;": "\u29b1", + "dfisht;": "\u297f", + "dfr;": "\U0001d521", + "dharl;": "\u21c3", + "dharr;": "\u21c2", + "diam;": "\u22c4", + "diamond;": "\u22c4", + "diamondsuit;": "\u2666", + "diams;": "\u2666", + "die;": "\xa8", + "digamma;": "\u03dd", + "disin;": "\u22f2", + "div;": "\xf7", + "divide": "\xf7", + "divide;": "\xf7", + "divideontimes;": "\u22c7", + "divonx;": "\u22c7", + "djcy;": "\u0452", + "dlcorn;": "\u231e", + "dlcrop;": "\u230d", + "dollar;": "$", + "dopf;": "\U0001d555", + "dot;": "\u02d9", + "doteq;": "\u2250", + "doteqdot;": "\u2251", + "dotminus;": "\u2238", + "dotplus;": "\u2214", + "dotsquare;": "\u22a1", + "doublebarwedge;": "\u2306", + "downarrow;": "\u2193", + "downdownarrows;": "\u21ca", + "downharpoonleft;": "\u21c3", + "downharpoonright;": "\u21c2", + "drbkarow;": "\u2910", + "drcorn;": "\u231f", + "drcrop;": "\u230c", + "dscr;": "\U0001d4b9", + "dscy;": "\u0455", + "dsol;": "\u29f6", + "dstrok;": "\u0111", + "dtdot;": "\u22f1", + "dtri;": "\u25bf", + "dtrif;": "\u25be", + "duarr;": "\u21f5", + "duhar;": "\u296f", + "dwangle;": "\u29a6", + "dzcy;": "\u045f", + "dzigrarr;": "\u27ff", + "eDDot;": "\u2a77", + "eDot;": "\u2251", + "eacute": "\xe9", + "eacute;": "\xe9", + "easter;": "\u2a6e", + "ecaron;": "\u011b", + "ecir;": "\u2256", + "ecirc": "\xea", + "ecirc;": "\xea", + "ecolon;": "\u2255", + "ecy;": "\u044d", + "edot;": "\u0117", + "ee;": "\u2147", + "efDot;": "\u2252", + "efr;": "\U0001d522", + "eg;": "\u2a9a", + "egrave": "\xe8", + "egrave;": "\xe8", + "egs;": "\u2a96", + "egsdot;": "\u2a98", + "el;": "\u2a99", + "elinters;": "\u23e7", + "ell;": "\u2113", + "els;": "\u2a95", + "elsdot;": "\u2a97", + "emacr;": "\u0113", + "empty;": "\u2205", + "emptyset;": "\u2205", + "emptyv;": "\u2205", + "emsp13;": "\u2004", + "emsp14;": "\u2005", + "emsp;": "\u2003", + "eng;": "\u014b", + "ensp;": "\u2002", + "eogon;": "\u0119", + "eopf;": "\U0001d556", + "epar;": "\u22d5", + "eparsl;": "\u29e3", + "eplus;": "\u2a71", + "epsi;": "\u03b5", + "epsilon;": "\u03b5", + "epsiv;": "\u03f5", + "eqcirc;": "\u2256", + "eqcolon;": "\u2255", + "eqsim;": "\u2242", + "eqslantgtr;": "\u2a96", + "eqslantless;": "\u2a95", + "equals;": "=", + "equest;": "\u225f", + "equiv;": "\u2261", + "equivDD;": "\u2a78", + "eqvparsl;": "\u29e5", + "erDot;": "\u2253", + "erarr;": "\u2971", + "escr;": "\u212f", + "esdot;": "\u2250", + "esim;": "\u2242", + "eta;": "\u03b7", + "eth": "\xf0", + "eth;": "\xf0", + "euml": "\xeb", + "euml;": "\xeb", + "euro;": "\u20ac", + "excl;": "!", + "exist;": "\u2203", + "expectation;": "\u2130", + "exponentiale;": "\u2147", + "fallingdotseq;": "\u2252", + "fcy;": "\u0444", + "female;": "\u2640", + "ffilig;": "\ufb03", + "fflig;": "\ufb00", + "ffllig;": "\ufb04", + "ffr;": "\U0001d523", + "filig;": "\ufb01", + "fjlig;": "fj", + "flat;": "\u266d", + "fllig;": "\ufb02", + "fltns;": "\u25b1", + "fnof;": "\u0192", + "fopf;": "\U0001d557", + "forall;": "\u2200", + "fork;": "\u22d4", + "forkv;": "\u2ad9", + "fpartint;": "\u2a0d", + "frac12": "\xbd", + "frac12;": "\xbd", + "frac13;": "\u2153", + "frac14": "\xbc", + "frac14;": "\xbc", + "frac15;": "\u2155", + "frac16;": "\u2159", + "frac18;": "\u215b", + "frac23;": "\u2154", + "frac25;": "\u2156", + "frac34": "\xbe", + "frac34;": "\xbe", + "frac35;": "\u2157", + "frac38;": "\u215c", + "frac45;": "\u2158", + "frac56;": "\u215a", + "frac58;": "\u215d", + "frac78;": "\u215e", + "frasl;": "\u2044", + "frown;": "\u2322", + "fscr;": "\U0001d4bb", + "gE;": "\u2267", + "gEl;": "\u2a8c", + "gacute;": "\u01f5", + "gamma;": "\u03b3", + "gammad;": "\u03dd", + "gap;": "\u2a86", + "gbreve;": "\u011f", + "gcirc;": "\u011d", + "gcy;": "\u0433", + "gdot;": "\u0121", + "ge;": "\u2265", + "gel;": "\u22db", + "geq;": "\u2265", + "geqq;": "\u2267", + "geqslant;": "\u2a7e", + "ges;": "\u2a7e", + "gescc;": "\u2aa9", + "gesdot;": "\u2a80", + "gesdoto;": "\u2a82", + "gesdotol;": "\u2a84", + "gesl;": "\u22db\ufe00", + "gesles;": "\u2a94", + "gfr;": "\U0001d524", + "gg;": "\u226b", + "ggg;": "\u22d9", + "gimel;": "\u2137", + "gjcy;": "\u0453", + "gl;": "\u2277", + "glE;": "\u2a92", + "gla;": "\u2aa5", + "glj;": "\u2aa4", + "gnE;": "\u2269", + "gnap;": "\u2a8a", + "gnapprox;": "\u2a8a", + "gne;": "\u2a88", + "gneq;": "\u2a88", + "gneqq;": "\u2269", + "gnsim;": "\u22e7", + "gopf;": "\U0001d558", + "grave;": "`", + "gscr;": "\u210a", + "gsim;": "\u2273", + "gsime;": "\u2a8e", + "gsiml;": "\u2a90", + "gt": ">", + "gt;": ">", + "gtcc;": "\u2aa7", + "gtcir;": "\u2a7a", + "gtdot;": "\u22d7", + "gtlPar;": "\u2995", + "gtquest;": "\u2a7c", + "gtrapprox;": "\u2a86", + "gtrarr;": "\u2978", + "gtrdot;": "\u22d7", + "gtreqless;": "\u22db", + "gtreqqless;": "\u2a8c", + "gtrless;": "\u2277", + "gtrsim;": "\u2273", + "gvertneqq;": "\u2269\ufe00", + "gvnE;": "\u2269\ufe00", + "hArr;": "\u21d4", + "hairsp;": "\u200a", + "half;": "\xbd", + "hamilt;": "\u210b", + "hardcy;": "\u044a", + "harr;": "\u2194", + "harrcir;": "\u2948", + "harrw;": "\u21ad", + "hbar;": "\u210f", + "hcirc;": "\u0125", + "hearts;": "\u2665", + "heartsuit;": "\u2665", + "hellip;": "\u2026", + "hercon;": "\u22b9", + "hfr;": "\U0001d525", + "hksearow;": "\u2925", + "hkswarow;": "\u2926", + "hoarr;": "\u21ff", + "homtht;": "\u223b", + "hookleftarrow;": "\u21a9", + "hookrightarrow;": "\u21aa", + "hopf;": "\U0001d559", + "horbar;": "\u2015", + "hscr;": "\U0001d4bd", + "hslash;": "\u210f", + "hstrok;": "\u0127", + "hybull;": "\u2043", + "hyphen;": "\u2010", + "iacute": "\xed", + "iacute;": "\xed", + "ic;": "\u2063", + "icirc": "\xee", + "icirc;": "\xee", + "icy;": "\u0438", + "iecy;": "\u0435", + "iexcl": "\xa1", + "iexcl;": "\xa1", + "iff;": "\u21d4", + "ifr;": "\U0001d526", + "igrave": "\xec", + "igrave;": "\xec", + "ii;": "\u2148", + "iiiint;": "\u2a0c", + "iiint;": "\u222d", + "iinfin;": "\u29dc", + "iiota;": "\u2129", + "ijlig;": "\u0133", + "imacr;": "\u012b", + "image;": "\u2111", + "imagline;": "\u2110", + "imagpart;": "\u2111", + "imath;": "\u0131", + "imof;": "\u22b7", + "imped;": "\u01b5", + "in;": "\u2208", + "incare;": "\u2105", + "infin;": "\u221e", + "infintie;": "\u29dd", + "inodot;": "\u0131", + "int;": "\u222b", + "intcal;": "\u22ba", + "integers;": "\u2124", + "intercal;": "\u22ba", + "intlarhk;": "\u2a17", + "intprod;": "\u2a3c", + "iocy;": "\u0451", + "iogon;": "\u012f", + "iopf;": "\U0001d55a", + "iota;": "\u03b9", + "iprod;": "\u2a3c", + "iquest": "\xbf", + "iquest;": "\xbf", + "iscr;": "\U0001d4be", + "isin;": "\u2208", + "isinE;": "\u22f9", + "isindot;": "\u22f5", + "isins;": "\u22f4", + "isinsv;": "\u22f3", + "isinv;": "\u2208", + "it;": "\u2062", + "itilde;": "\u0129", + "iukcy;": "\u0456", + "iuml": "\xef", + "iuml;": "\xef", + "jcirc;": "\u0135", + "jcy;": "\u0439", + "jfr;": "\U0001d527", + "jmath;": "\u0237", + "jopf;": "\U0001d55b", + "jscr;": "\U0001d4bf", + "jsercy;": "\u0458", + "jukcy;": "\u0454", + "kappa;": "\u03ba", + "kappav;": "\u03f0", + "kcedil;": "\u0137", + "kcy;": "\u043a", + "kfr;": "\U0001d528", + "kgreen;": "\u0138", + "khcy;": "\u0445", + "kjcy;": "\u045c", + "kopf;": "\U0001d55c", + "kscr;": "\U0001d4c0", + "lAarr;": "\u21da", + "lArr;": "\u21d0", + "lAtail;": "\u291b", + "lBarr;": "\u290e", + "lE;": "\u2266", + "lEg;": "\u2a8b", + "lHar;": "\u2962", + "lacute;": "\u013a", + "laemptyv;": "\u29b4", + "lagran;": "\u2112", + "lambda;": "\u03bb", + "lang;": "\u27e8", + "langd;": "\u2991", + "langle;": "\u27e8", + "lap;": "\u2a85", + "laquo": "\xab", + "laquo;": "\xab", + "larr;": "\u2190", + "larrb;": "\u21e4", + "larrbfs;": "\u291f", + "larrfs;": "\u291d", + "larrhk;": "\u21a9", + "larrlp;": "\u21ab", + "larrpl;": "\u2939", + "larrsim;": "\u2973", + "larrtl;": "\u21a2", + "lat;": "\u2aab", + "latail;": "\u2919", + "late;": "\u2aad", + "lates;": "\u2aad\ufe00", + "lbarr;": "\u290c", + "lbbrk;": "\u2772", + "lbrace;": "{", + "lbrack;": "[", + "lbrke;": "\u298b", + "lbrksld;": "\u298f", + "lbrkslu;": "\u298d", + "lcaron;": "\u013e", + "lcedil;": "\u013c", + "lceil;": "\u2308", + "lcub;": "{", + "lcy;": "\u043b", + "ldca;": "\u2936", + "ldquo;": "\u201c", + "ldquor;": "\u201e", + "ldrdhar;": "\u2967", + "ldrushar;": "\u294b", + "ldsh;": "\u21b2", + "le;": "\u2264", + "leftarrow;": "\u2190", + "leftarrowtail;": "\u21a2", + "leftharpoondown;": "\u21bd", + "leftharpoonup;": "\u21bc", + "leftleftarrows;": "\u21c7", + "leftrightarrow;": "\u2194", + "leftrightarrows;": "\u21c6", + "leftrightharpoons;": "\u21cb", + "leftrightsquigarrow;": "\u21ad", + "leftthreetimes;": "\u22cb", + "leg;": "\u22da", + "leq;": "\u2264", + "leqq;": "\u2266", + "leqslant;": "\u2a7d", + "les;": "\u2a7d", + "lescc;": "\u2aa8", + "lesdot;": "\u2a7f", + "lesdoto;": "\u2a81", + "lesdotor;": "\u2a83", + "lesg;": "\u22da\ufe00", + "lesges;": "\u2a93", + "lessapprox;": "\u2a85", + "lessdot;": "\u22d6", + "lesseqgtr;": "\u22da", + "lesseqqgtr;": "\u2a8b", + "lessgtr;": "\u2276", + "lesssim;": "\u2272", + "lfisht;": "\u297c", + "lfloor;": "\u230a", + "lfr;": "\U0001d529", + "lg;": "\u2276", + "lgE;": "\u2a91", + "lhard;": "\u21bd", + "lharu;": "\u21bc", + "lharul;": "\u296a", + "lhblk;": "\u2584", + "ljcy;": "\u0459", + "ll;": "\u226a", + "llarr;": "\u21c7", + "llcorner;": "\u231e", + "llhard;": "\u296b", + "lltri;": "\u25fa", + "lmidot;": "\u0140", + "lmoust;": "\u23b0", + "lmoustache;": "\u23b0", + "lnE;": "\u2268", + "lnap;": "\u2a89", + "lnapprox;": "\u2a89", + "lne;": "\u2a87", + "lneq;": "\u2a87", + "lneqq;": "\u2268", + "lnsim;": "\u22e6", + "loang;": "\u27ec", + "loarr;": "\u21fd", + "lobrk;": "\u27e6", + "longleftarrow;": "\u27f5", + "longleftrightarrow;": "\u27f7", + "longmapsto;": "\u27fc", + "longrightarrow;": "\u27f6", + "looparrowleft;": "\u21ab", + "looparrowright;": "\u21ac", + "lopar;": "\u2985", + "lopf;": "\U0001d55d", + "loplus;": "\u2a2d", + "lotimes;": "\u2a34", + "lowast;": "\u2217", + "lowbar;": "_", + "loz;": "\u25ca", + "lozenge;": "\u25ca", + "lozf;": "\u29eb", + "lpar;": "(", + "lparlt;": "\u2993", + "lrarr;": "\u21c6", + "lrcorner;": "\u231f", + "lrhar;": "\u21cb", + "lrhard;": "\u296d", + "lrm;": "\u200e", + "lrtri;": "\u22bf", + "lsaquo;": "\u2039", + "lscr;": "\U0001d4c1", + "lsh;": "\u21b0", + "lsim;": "\u2272", + "lsime;": "\u2a8d", + "lsimg;": "\u2a8f", + "lsqb;": "[", + "lsquo;": "\u2018", + "lsquor;": "\u201a", + "lstrok;": "\u0142", + "lt": "<", + "lt;": "<", + "ltcc;": "\u2aa6", + "ltcir;": "\u2a79", + "ltdot;": "\u22d6", + "lthree;": "\u22cb", + "ltimes;": "\u22c9", + "ltlarr;": "\u2976", + "ltquest;": "\u2a7b", + "ltrPar;": "\u2996", + "ltri;": "\u25c3", + "ltrie;": "\u22b4", + "ltrif;": "\u25c2", + "lurdshar;": "\u294a", + "luruhar;": "\u2966", + "lvertneqq;": "\u2268\ufe00", + "lvnE;": "\u2268\ufe00", + "mDDot;": "\u223a", + "macr": "\xaf", + "macr;": "\xaf", + "male;": "\u2642", + "malt;": "\u2720", + "maltese;": "\u2720", + "map;": "\u21a6", + "mapsto;": "\u21a6", + "mapstodown;": "\u21a7", + "mapstoleft;": "\u21a4", + "mapstoup;": "\u21a5", + "marker;": "\u25ae", + "mcomma;": "\u2a29", + "mcy;": "\u043c", + "mdash;": "\u2014", + "measuredangle;": "\u2221", + "mfr;": "\U0001d52a", + "mho;": "\u2127", + "micro": "\xb5", + "micro;": "\xb5", + "mid;": "\u2223", + "midast;": "*", + "midcir;": "\u2af0", + "middot": "\xb7", + "middot;": "\xb7", + "minus;": "\u2212", + "minusb;": "\u229f", + "minusd;": "\u2238", + "minusdu;": "\u2a2a", + "mlcp;": "\u2adb", + "mldr;": "\u2026", + "mnplus;": "\u2213", + "models;": "\u22a7", + "mopf;": "\U0001d55e", + "mp;": "\u2213", + "mscr;": "\U0001d4c2", + "mstpos;": "\u223e", + "mu;": "\u03bc", + "multimap;": "\u22b8", + "mumap;": "\u22b8", + "nGg;": "\u22d9\u0338", + "nGt;": "\u226b\u20d2", + "nGtv;": "\u226b\u0338", + "nLeftarrow;": "\u21cd", + "nLeftrightarrow;": "\u21ce", + "nLl;": "\u22d8\u0338", + "nLt;": "\u226a\u20d2", + "nLtv;": "\u226a\u0338", + "nRightarrow;": "\u21cf", + "nVDash;": "\u22af", + "nVdash;": "\u22ae", + "nabla;": "\u2207", + "nacute;": "\u0144", + "nang;": "\u2220\u20d2", + "nap;": "\u2249", + "napE;": "\u2a70\u0338", + "napid;": "\u224b\u0338", + "napos;": "\u0149", + "napprox;": "\u2249", + "natur;": "\u266e", + "natural;": "\u266e", + "naturals;": "\u2115", + "nbsp": "\xa0", + "nbsp;": "\xa0", + "nbump;": "\u224e\u0338", + "nbumpe;": "\u224f\u0338", + "ncap;": "\u2a43", + "ncaron;": "\u0148", + "ncedil;": "\u0146", + "ncong;": "\u2247", + "ncongdot;": "\u2a6d\u0338", + "ncup;": "\u2a42", + "ncy;": "\u043d", + "ndash;": "\u2013", + "ne;": "\u2260", + "neArr;": "\u21d7", + "nearhk;": "\u2924", + "nearr;": "\u2197", + "nearrow;": "\u2197", + "nedot;": "\u2250\u0338", + "nequiv;": "\u2262", + "nesear;": "\u2928", + "nesim;": "\u2242\u0338", + "nexist;": "\u2204", + "nexists;": "\u2204", + "nfr;": "\U0001d52b", + "ngE;": "\u2267\u0338", + "nge;": "\u2271", + "ngeq;": "\u2271", + "ngeqq;": "\u2267\u0338", + "ngeqslant;": "\u2a7e\u0338", + "nges;": "\u2a7e\u0338", + "ngsim;": "\u2275", + "ngt;": "\u226f", + "ngtr;": "\u226f", + "nhArr;": "\u21ce", + "nharr;": "\u21ae", + "nhpar;": "\u2af2", + "ni;": "\u220b", + "nis;": "\u22fc", + "nisd;": "\u22fa", + "niv;": "\u220b", + "njcy;": "\u045a", + "nlArr;": "\u21cd", + "nlE;": "\u2266\u0338", + "nlarr;": "\u219a", + "nldr;": "\u2025", + "nle;": "\u2270", + "nleftarrow;": "\u219a", + "nleftrightarrow;": "\u21ae", + "nleq;": "\u2270", + "nleqq;": "\u2266\u0338", + "nleqslant;": "\u2a7d\u0338", + "nles;": "\u2a7d\u0338", + "nless;": "\u226e", + "nlsim;": "\u2274", + "nlt;": "\u226e", + "nltri;": "\u22ea", + "nltrie;": "\u22ec", + "nmid;": "\u2224", + "nopf;": "\U0001d55f", + "not": "\xac", + "not;": "\xac", + "notin;": "\u2209", + "notinE;": "\u22f9\u0338", + "notindot;": "\u22f5\u0338", + "notinva;": "\u2209", + "notinvb;": "\u22f7", + "notinvc;": "\u22f6", + "notni;": "\u220c", + "notniva;": "\u220c", + "notnivb;": "\u22fe", + "notnivc;": "\u22fd", + "npar;": "\u2226", + "nparallel;": "\u2226", + "nparsl;": "\u2afd\u20e5", + "npart;": "\u2202\u0338", + "npolint;": "\u2a14", + "npr;": "\u2280", + "nprcue;": "\u22e0", + "npre;": "\u2aaf\u0338", + "nprec;": "\u2280", + "npreceq;": "\u2aaf\u0338", + "nrArr;": "\u21cf", + "nrarr;": "\u219b", + "nrarrc;": "\u2933\u0338", + "nrarrw;": "\u219d\u0338", + "nrightarrow;": "\u219b", + "nrtri;": "\u22eb", + "nrtrie;": "\u22ed", + "nsc;": "\u2281", + "nsccue;": "\u22e1", + "nsce;": "\u2ab0\u0338", + "nscr;": "\U0001d4c3", + "nshortmid;": "\u2224", + "nshortparallel;": "\u2226", + "nsim;": "\u2241", + "nsime;": "\u2244", + "nsimeq;": "\u2244", + "nsmid;": "\u2224", + "nspar;": "\u2226", + "nsqsube;": "\u22e2", + "nsqsupe;": "\u22e3", + "nsub;": "\u2284", + "nsubE;": "\u2ac5\u0338", + "nsube;": "\u2288", + "nsubset;": "\u2282\u20d2", + "nsubseteq;": "\u2288", + "nsubseteqq;": "\u2ac5\u0338", + "nsucc;": "\u2281", + "nsucceq;": "\u2ab0\u0338", + "nsup;": "\u2285", + "nsupE;": "\u2ac6\u0338", + "nsupe;": "\u2289", + "nsupset;": "\u2283\u20d2", + "nsupseteq;": "\u2289", + "nsupseteqq;": "\u2ac6\u0338", + "ntgl;": "\u2279", + "ntilde": "\xf1", + "ntilde;": "\xf1", + "ntlg;": "\u2278", + "ntriangleleft;": "\u22ea", + "ntrianglelefteq;": "\u22ec", + "ntriangleright;": "\u22eb", + "ntrianglerighteq;": "\u22ed", + "nu;": "\u03bd", + "num;": "#", + "numero;": "\u2116", + "numsp;": "\u2007", + "nvDash;": "\u22ad", + "nvHarr;": "\u2904", + "nvap;": "\u224d\u20d2", + "nvdash;": "\u22ac", + "nvge;": "\u2265\u20d2", + "nvgt;": ">\u20d2", + "nvinfin;": "\u29de", + "nvlArr;": "\u2902", + "nvle;": "\u2264\u20d2", + "nvlt;": "<\u20d2", + "nvltrie;": "\u22b4\u20d2", + "nvrArr;": "\u2903", + "nvrtrie;": "\u22b5\u20d2", + "nvsim;": "\u223c\u20d2", + "nwArr;": "\u21d6", + "nwarhk;": "\u2923", + "nwarr;": "\u2196", + "nwarrow;": "\u2196", + "nwnear;": "\u2927", + "oS;": "\u24c8", + "oacute": "\xf3", + "oacute;": "\xf3", + "oast;": "\u229b", + "ocir;": "\u229a", + "ocirc": "\xf4", + "ocirc;": "\xf4", + "ocy;": "\u043e", + "odash;": "\u229d", + "odblac;": "\u0151", + "odiv;": "\u2a38", + "odot;": "\u2299", + "odsold;": "\u29bc", + "oelig;": "\u0153", + "ofcir;": "\u29bf", + "ofr;": "\U0001d52c", + "ogon;": "\u02db", + "ograve": "\xf2", + "ograve;": "\xf2", + "ogt;": "\u29c1", + "ohbar;": "\u29b5", + "ohm;": "\u03a9", + "oint;": "\u222e", + "olarr;": "\u21ba", + "olcir;": "\u29be", + "olcross;": "\u29bb", + "oline;": "\u203e", + "olt;": "\u29c0", + "omacr;": "\u014d", + "omega;": "\u03c9", + "omicron;": "\u03bf", + "omid;": "\u29b6", + "ominus;": "\u2296", + "oopf;": "\U0001d560", + "opar;": "\u29b7", + "operp;": "\u29b9", + "oplus;": "\u2295", + "or;": "\u2228", + "orarr;": "\u21bb", + "ord;": "\u2a5d", + "order;": "\u2134", + "orderof;": "\u2134", + "ordf": "\xaa", + "ordf;": "\xaa", + "ordm": "\xba", + "ordm;": "\xba", + "origof;": "\u22b6", + "oror;": "\u2a56", + "orslope;": "\u2a57", + "orv;": "\u2a5b", + "oscr;": "\u2134", + "oslash": "\xf8", + "oslash;": "\xf8", + "osol;": "\u2298", + "otilde": "\xf5", + "otilde;": "\xf5", + "otimes;": "\u2297", + "otimesas;": "\u2a36", + "ouml": "\xf6", + "ouml;": "\xf6", + "ovbar;": "\u233d", + "par;": "\u2225", + "para": "\xb6", + "para;": "\xb6", + "parallel;": "\u2225", + "parsim;": "\u2af3", + "parsl;": "\u2afd", + "part;": "\u2202", + "pcy;": "\u043f", + "percnt;": "%", + "period;": ".", + "permil;": "\u2030", + "perp;": "\u22a5", + "pertenk;": "\u2031", + "pfr;": "\U0001d52d", + "phi;": "\u03c6", + "phiv;": "\u03d5", + "phmmat;": "\u2133", + "phone;": "\u260e", + "pi;": "\u03c0", + "pitchfork;": "\u22d4", + "piv;": "\u03d6", + "planck;": "\u210f", + "planckh;": "\u210e", + "plankv;": "\u210f", + "plus;": "+", + "plusacir;": "\u2a23", + "plusb;": "\u229e", + "pluscir;": "\u2a22", + "plusdo;": "\u2214", + "plusdu;": "\u2a25", + "pluse;": "\u2a72", + "plusmn": "\xb1", + "plusmn;": "\xb1", + "plussim;": "\u2a26", + "plustwo;": "\u2a27", + "pm;": "\xb1", + "pointint;": "\u2a15", + "popf;": "\U0001d561", + "pound": "\xa3", + "pound;": "\xa3", + "pr;": "\u227a", + "prE;": "\u2ab3", + "prap;": "\u2ab7", + "prcue;": "\u227c", + "pre;": "\u2aaf", + "prec;": "\u227a", + "precapprox;": "\u2ab7", + "preccurlyeq;": "\u227c", + "preceq;": "\u2aaf", + "precnapprox;": "\u2ab9", + "precneqq;": "\u2ab5", + "precnsim;": "\u22e8", + "precsim;": "\u227e", + "prime;": "\u2032", + "primes;": "\u2119", + "prnE;": "\u2ab5", + "prnap;": "\u2ab9", + "prnsim;": "\u22e8", + "prod;": "\u220f", + "profalar;": "\u232e", + "profline;": "\u2312", + "profsurf;": "\u2313", + "prop;": "\u221d", + "propto;": "\u221d", + "prsim;": "\u227e", + "prurel;": "\u22b0", + "pscr;": "\U0001d4c5", + "psi;": "\u03c8", + "puncsp;": "\u2008", + "qfr;": "\U0001d52e", + "qint;": "\u2a0c", + "qopf;": "\U0001d562", + "qprime;": "\u2057", + "qscr;": "\U0001d4c6", + "quaternions;": "\u210d", + "quatint;": "\u2a16", + "quest;": "?", + "questeq;": "\u225f", + "quot": "\"", + "quot;": "\"", + "rAarr;": "\u21db", + "rArr;": "\u21d2", + "rAtail;": "\u291c", + "rBarr;": "\u290f", + "rHar;": "\u2964", + "race;": "\u223d\u0331", + "racute;": "\u0155", + "radic;": "\u221a", + "raemptyv;": "\u29b3", + "rang;": "\u27e9", + "rangd;": "\u2992", + "range;": "\u29a5", + "rangle;": "\u27e9", + "raquo": "\xbb", + "raquo;": "\xbb", + "rarr;": "\u2192", + "rarrap;": "\u2975", + "rarrb;": "\u21e5", + "rarrbfs;": "\u2920", + "rarrc;": "\u2933", + "rarrfs;": "\u291e", + "rarrhk;": "\u21aa", + "rarrlp;": "\u21ac", + "rarrpl;": "\u2945", + "rarrsim;": "\u2974", + "rarrtl;": "\u21a3", + "rarrw;": "\u219d", + "ratail;": "\u291a", + "ratio;": "\u2236", + "rationals;": "\u211a", + "rbarr;": "\u290d", + "rbbrk;": "\u2773", + "rbrace;": "}", + "rbrack;": "]", + "rbrke;": "\u298c", + "rbrksld;": "\u298e", + "rbrkslu;": "\u2990", + "rcaron;": "\u0159", + "rcedil;": "\u0157", + "rceil;": "\u2309", + "rcub;": "}", + "rcy;": "\u0440", + "rdca;": "\u2937", + "rdldhar;": "\u2969", + "rdquo;": "\u201d", + "rdquor;": "\u201d", + "rdsh;": "\u21b3", + "real;": "\u211c", + "realine;": "\u211b", + "realpart;": "\u211c", + "reals;": "\u211d", + "rect;": "\u25ad", + "reg": "\xae", + "reg;": "\xae", + "rfisht;": "\u297d", + "rfloor;": "\u230b", + "rfr;": "\U0001d52f", + "rhard;": "\u21c1", + "rharu;": "\u21c0", + "rharul;": "\u296c", + "rho;": "\u03c1", + "rhov;": "\u03f1", + "rightarrow;": "\u2192", + "rightarrowtail;": "\u21a3", + "rightharpoondown;": "\u21c1", + "rightharpoonup;": "\u21c0", + "rightleftarrows;": "\u21c4", + "rightleftharpoons;": "\u21cc", + "rightrightarrows;": "\u21c9", + "rightsquigarrow;": "\u219d", + "rightthreetimes;": "\u22cc", + "ring;": "\u02da", + "risingdotseq;": "\u2253", + "rlarr;": "\u21c4", + "rlhar;": "\u21cc", + "rlm;": "\u200f", + "rmoust;": "\u23b1", + "rmoustache;": "\u23b1", + "rnmid;": "\u2aee", + "roang;": "\u27ed", + "roarr;": "\u21fe", + "robrk;": "\u27e7", + "ropar;": "\u2986", + "ropf;": "\U0001d563", + "roplus;": "\u2a2e", + "rotimes;": "\u2a35", + "rpar;": ")", + "rpargt;": "\u2994", + "rppolint;": "\u2a12", + "rrarr;": "\u21c9", + "rsaquo;": "\u203a", + "rscr;": "\U0001d4c7", + "rsh;": "\u21b1", + "rsqb;": "]", + "rsquo;": "\u2019", + "rsquor;": "\u2019", + "rthree;": "\u22cc", + "rtimes;": "\u22ca", + "rtri;": "\u25b9", + "rtrie;": "\u22b5", + "rtrif;": "\u25b8", + "rtriltri;": "\u29ce", + "ruluhar;": "\u2968", + "rx;": "\u211e", + "sacute;": "\u015b", + "sbquo;": "\u201a", + "sc;": "\u227b", + "scE;": "\u2ab4", + "scap;": "\u2ab8", + "scaron;": "\u0161", + "sccue;": "\u227d", + "sce;": "\u2ab0", + "scedil;": "\u015f", + "scirc;": "\u015d", + "scnE;": "\u2ab6", + "scnap;": "\u2aba", + "scnsim;": "\u22e9", + "scpolint;": "\u2a13", + "scsim;": "\u227f", + "scy;": "\u0441", + "sdot;": "\u22c5", + "sdotb;": "\u22a1", + "sdote;": "\u2a66", + "seArr;": "\u21d8", + "searhk;": "\u2925", + "searr;": "\u2198", + "searrow;": "\u2198", + "sect": "\xa7", + "sect;": "\xa7", + "semi;": ";", + "seswar;": "\u2929", + "setminus;": "\u2216", + "setmn;": "\u2216", + "sext;": "\u2736", + "sfr;": "\U0001d530", + "sfrown;": "\u2322", + "sharp;": "\u266f", + "shchcy;": "\u0449", + "shcy;": "\u0448", + "shortmid;": "\u2223", + "shortparallel;": "\u2225", + "shy": "\xad", + "shy;": "\xad", + "sigma;": "\u03c3", + "sigmaf;": "\u03c2", + "sigmav;": "\u03c2", + "sim;": "\u223c", + "simdot;": "\u2a6a", + "sime;": "\u2243", + "simeq;": "\u2243", + "simg;": "\u2a9e", + "simgE;": "\u2aa0", + "siml;": "\u2a9d", + "simlE;": "\u2a9f", + "simne;": "\u2246", + "simplus;": "\u2a24", + "simrarr;": "\u2972", + "slarr;": "\u2190", + "smallsetminus;": "\u2216", + "smashp;": "\u2a33", + "smeparsl;": "\u29e4", + "smid;": "\u2223", + "smile;": "\u2323", + "smt;": "\u2aaa", + "smte;": "\u2aac", + "smtes;": "\u2aac\ufe00", + "softcy;": "\u044c", + "sol;": "/", + "solb;": "\u29c4", + "solbar;": "\u233f", + "sopf;": "\U0001d564", + "spades;": "\u2660", + "spadesuit;": "\u2660", + "spar;": "\u2225", + "sqcap;": "\u2293", + "sqcaps;": "\u2293\ufe00", + "sqcup;": "\u2294", + "sqcups;": "\u2294\ufe00", + "sqsub;": "\u228f", + "sqsube;": "\u2291", + "sqsubset;": "\u228f", + "sqsubseteq;": "\u2291", + "sqsup;": "\u2290", + "sqsupe;": "\u2292", + "sqsupset;": "\u2290", + "sqsupseteq;": "\u2292", + "squ;": "\u25a1", + "square;": "\u25a1", + "squarf;": "\u25aa", + "squf;": "\u25aa", + "srarr;": "\u2192", + "sscr;": "\U0001d4c8", + "ssetmn;": "\u2216", + "ssmile;": "\u2323", + "sstarf;": "\u22c6", + "star;": "\u2606", + "starf;": "\u2605", + "straightepsilon;": "\u03f5", + "straightphi;": "\u03d5", + "strns;": "\xaf", + "sub;": "\u2282", + "subE;": "\u2ac5", + "subdot;": "\u2abd", + "sube;": "\u2286", + "subedot;": "\u2ac3", + "submult;": "\u2ac1", + "subnE;": "\u2acb", + "subne;": "\u228a", + "subplus;": "\u2abf", + "subrarr;": "\u2979", + "subset;": "\u2282", + "subseteq;": "\u2286", + "subseteqq;": "\u2ac5", + "subsetneq;": "\u228a", + "subsetneqq;": "\u2acb", + "subsim;": "\u2ac7", + "subsub;": "\u2ad5", + "subsup;": "\u2ad3", + "succ;": "\u227b", + "succapprox;": "\u2ab8", + "succcurlyeq;": "\u227d", + "succeq;": "\u2ab0", + "succnapprox;": "\u2aba", + "succneqq;": "\u2ab6", + "succnsim;": "\u22e9", + "succsim;": "\u227f", + "sum;": "\u2211", + "sung;": "\u266a", + "sup1": "\xb9", + "sup1;": "\xb9", + "sup2": "\xb2", + "sup2;": "\xb2", + "sup3": "\xb3", + "sup3;": "\xb3", + "sup;": "\u2283", + "supE;": "\u2ac6", + "supdot;": "\u2abe", + "supdsub;": "\u2ad8", + "supe;": "\u2287", + "supedot;": "\u2ac4", + "suphsol;": "\u27c9", + "suphsub;": "\u2ad7", + "suplarr;": "\u297b", + "supmult;": "\u2ac2", + "supnE;": "\u2acc", + "supne;": "\u228b", + "supplus;": "\u2ac0", + "supset;": "\u2283", + "supseteq;": "\u2287", + "supseteqq;": "\u2ac6", + "supsetneq;": "\u228b", + "supsetneqq;": "\u2acc", + "supsim;": "\u2ac8", + "supsub;": "\u2ad4", + "supsup;": "\u2ad6", + "swArr;": "\u21d9", + "swarhk;": "\u2926", + "swarr;": "\u2199", + "swarrow;": "\u2199", + "swnwar;": "\u292a", + "szlig": "\xdf", + "szlig;": "\xdf", + "target;": "\u2316", + "tau;": "\u03c4", + "tbrk;": "\u23b4", + "tcaron;": "\u0165", + "tcedil;": "\u0163", + "tcy;": "\u0442", + "tdot;": "\u20db", + "telrec;": "\u2315", + "tfr;": "\U0001d531", + "there4;": "\u2234", + "therefore;": "\u2234", + "theta;": "\u03b8", + "thetasym;": "\u03d1", + "thetav;": "\u03d1", + "thickapprox;": "\u2248", + "thicksim;": "\u223c", + "thinsp;": "\u2009", + "thkap;": "\u2248", + "thksim;": "\u223c", + "thorn": "\xfe", + "thorn;": "\xfe", + "tilde;": "\u02dc", + "times": "\xd7", + "times;": "\xd7", + "timesb;": "\u22a0", + "timesbar;": "\u2a31", + "timesd;": "\u2a30", + "tint;": "\u222d", + "toea;": "\u2928", + "top;": "\u22a4", + "topbot;": "\u2336", + "topcir;": "\u2af1", + "topf;": "\U0001d565", + "topfork;": "\u2ada", + "tosa;": "\u2929", + "tprime;": "\u2034", + "trade;": "\u2122", + "triangle;": "\u25b5", + "triangledown;": "\u25bf", + "triangleleft;": "\u25c3", + "trianglelefteq;": "\u22b4", + "triangleq;": "\u225c", + "triangleright;": "\u25b9", + "trianglerighteq;": "\u22b5", + "tridot;": "\u25ec", + "trie;": "\u225c", + "triminus;": "\u2a3a", + "triplus;": "\u2a39", + "trisb;": "\u29cd", + "tritime;": "\u2a3b", + "trpezium;": "\u23e2", + "tscr;": "\U0001d4c9", + "tscy;": "\u0446", + "tshcy;": "\u045b", + "tstrok;": "\u0167", + "twixt;": "\u226c", + "twoheadleftarrow;": "\u219e", + "twoheadrightarrow;": "\u21a0", + "uArr;": "\u21d1", + "uHar;": "\u2963", + "uacute": "\xfa", + "uacute;": "\xfa", + "uarr;": "\u2191", + "ubrcy;": "\u045e", + "ubreve;": "\u016d", + "ucirc": "\xfb", + "ucirc;": "\xfb", + "ucy;": "\u0443", + "udarr;": "\u21c5", + "udblac;": "\u0171", + "udhar;": "\u296e", + "ufisht;": "\u297e", + "ufr;": "\U0001d532", + "ugrave": "\xf9", + "ugrave;": "\xf9", + "uharl;": "\u21bf", + "uharr;": "\u21be", + "uhblk;": "\u2580", + "ulcorn;": "\u231c", + "ulcorner;": "\u231c", + "ulcrop;": "\u230f", + "ultri;": "\u25f8", + "umacr;": "\u016b", + "uml": "\xa8", + "uml;": "\xa8", + "uogon;": "\u0173", + "uopf;": "\U0001d566", + "uparrow;": "\u2191", + "updownarrow;": "\u2195", + "upharpoonleft;": "\u21bf", + "upharpoonright;": "\u21be", + "uplus;": "\u228e", + "upsi;": "\u03c5", + "upsih;": "\u03d2", + "upsilon;": "\u03c5", + "upuparrows;": "\u21c8", + "urcorn;": "\u231d", + "urcorner;": "\u231d", + "urcrop;": "\u230e", + "uring;": "\u016f", + "urtri;": "\u25f9", + "uscr;": "\U0001d4ca", + "utdot;": "\u22f0", + "utilde;": "\u0169", + "utri;": "\u25b5", + "utrif;": "\u25b4", + "uuarr;": "\u21c8", + "uuml": "\xfc", + "uuml;": "\xfc", + "uwangle;": "\u29a7", + "vArr;": "\u21d5", + "vBar;": "\u2ae8", + "vBarv;": "\u2ae9", + "vDash;": "\u22a8", + "vangrt;": "\u299c", + "varepsilon;": "\u03f5", + "varkappa;": "\u03f0", + "varnothing;": "\u2205", + "varphi;": "\u03d5", + "varpi;": "\u03d6", + "varpropto;": "\u221d", + "varr;": "\u2195", + "varrho;": "\u03f1", + "varsigma;": "\u03c2", + "varsubsetneq;": "\u228a\ufe00", + "varsubsetneqq;": "\u2acb\ufe00", + "varsupsetneq;": "\u228b\ufe00", + "varsupsetneqq;": "\u2acc\ufe00", + "vartheta;": "\u03d1", + "vartriangleleft;": "\u22b2", + "vartriangleright;": "\u22b3", + "vcy;": "\u0432", + "vdash;": "\u22a2", + "vee;": "\u2228", + "veebar;": "\u22bb", + "veeeq;": "\u225a", + "vellip;": "\u22ee", + "verbar;": "|", + "vert;": "|", + "vfr;": "\U0001d533", + "vltri;": "\u22b2", + "vnsub;": "\u2282\u20d2", + "vnsup;": "\u2283\u20d2", + "vopf;": "\U0001d567", + "vprop;": "\u221d", + "vrtri;": "\u22b3", + "vscr;": "\U0001d4cb", + "vsubnE;": "\u2acb\ufe00", + "vsubne;": "\u228a\ufe00", + "vsupnE;": "\u2acc\ufe00", + "vsupne;": "\u228b\ufe00", + "vzigzag;": "\u299a", + "wcirc;": "\u0175", + "wedbar;": "\u2a5f", + "wedge;": "\u2227", + "wedgeq;": "\u2259", + "weierp;": "\u2118", + "wfr;": "\U0001d534", + "wopf;": "\U0001d568", + "wp;": "\u2118", + "wr;": "\u2240", + "wreath;": "\u2240", + "wscr;": "\U0001d4cc", + "xcap;": "\u22c2", + "xcirc;": "\u25ef", + "xcup;": "\u22c3", + "xdtri;": "\u25bd", + "xfr;": "\U0001d535", + "xhArr;": "\u27fa", + "xharr;": "\u27f7", + "xi;": "\u03be", + "xlArr;": "\u27f8", + "xlarr;": "\u27f5", + "xmap;": "\u27fc", + "xnis;": "\u22fb", + "xodot;": "\u2a00", + "xopf;": "\U0001d569", + "xoplus;": "\u2a01", + "xotime;": "\u2a02", + "xrArr;": "\u27f9", + "xrarr;": "\u27f6", + "xscr;": "\U0001d4cd", + "xsqcup;": "\u2a06", + "xuplus;": "\u2a04", + "xutri;": "\u25b3", + "xvee;": "\u22c1", + "xwedge;": "\u22c0", + "yacute": "\xfd", + "yacute;": "\xfd", + "yacy;": "\u044f", + "ycirc;": "\u0177", + "ycy;": "\u044b", + "yen": "\xa5", + "yen;": "\xa5", + "yfr;": "\U0001d536", + "yicy;": "\u0457", + "yopf;": "\U0001d56a", + "yscr;": "\U0001d4ce", + "yucy;": "\u044e", + "yuml": "\xff", + "yuml;": "\xff", + "zacute;": "\u017a", + "zcaron;": "\u017e", + "zcy;": "\u0437", + "zdot;": "\u017c", + "zeetrf;": "\u2128", + "zeta;": "\u03b6", + "zfr;": "\U0001d537", + "zhcy;": "\u0436", + "zigrarr;": "\u21dd", + "zopf;": "\U0001d56b", + "zscr;": "\U0001d4cf", + "zwj;": "\u200d", + "zwnj;": "\u200c", } replacementCharacters = { - 0x0:u"\uFFFD", - 0x0d:u"\u000D", - 0x80:u"\u20AC", - 0x81:u"\u0081", - 0x81:u"\u0081", - 0x82:u"\u201A", - 0x83:u"\u0192", - 0x84:u"\u201E", - 0x85:u"\u2026", - 0x86:u"\u2020", - 0x87:u"\u2021", - 0x88:u"\u02C6", - 0x89:u"\u2030", - 0x8A:u"\u0160", - 0x8B:u"\u2039", - 0x8C:u"\u0152", - 0x8D:u"\u008D", - 0x8E:u"\u017D", - 0x8F:u"\u008F", - 0x90:u"\u0090", - 0x91:u"\u2018", - 0x92:u"\u2019", - 0x93:u"\u201C", - 0x94:u"\u201D", - 0x95:u"\u2022", - 0x96:u"\u2013", - 0x97:u"\u2014", - 0x98:u"\u02DC", - 0x99:u"\u2122", - 0x9A:u"\u0161", - 0x9B:u"\u203A", - 0x9C:u"\u0153", - 0x9D:u"\u009D", - 0x9E:u"\u017E", - 0x9F:u"\u0178", + 0x0: "\uFFFD", + 0x0d: "\u000D", + 0x80: "\u20AC", + 0x81: "\u0081", + 0x81: "\u0081", + 0x82: "\u201A", + 0x83: "\u0192", + 0x84: "\u201E", + 0x85: "\u2026", + 0x86: "\u2020", + 0x87: "\u2021", + 0x88: "\u02C6", + 0x89: "\u2030", + 0x8A: "\u0160", + 0x8B: "\u2039", + 0x8C: "\u0152", + 0x8D: "\u008D", + 0x8E: "\u017D", + 0x8F: "\u008F", + 0x90: "\u0090", + 0x91: "\u2018", + 0x92: "\u2019", + 0x93: "\u201C", + 0x94: "\u201D", + 0x95: "\u2022", + 0x96: "\u2013", + 0x97: "\u2014", + 0x98: "\u02DC", + 0x99: "\u2122", + 0x9A: "\u0161", + 0x9B: "\u203A", + 0x9C: "\u0153", + 0x9D: "\u009D", + 0x9E: "\u017E", + 0x9F: "\u0178", } encodings = { @@ -3061,25 +3078,27 @@ encodings = { 'x-x-big5': 'big5'} tokenTypes = { - "Doctype":0, - "Characters":1, - "SpaceCharacters":2, - "StartTag":3, - "EndTag":4, - "EmptyTag":5, - "Comment":6, - "ParseError":7 + "Doctype": 0, + "Characters": 1, + "SpaceCharacters": 2, + "StartTag": 3, + "EndTag": 4, + "EmptyTag": 5, + "Comment": 6, + "ParseError": 7 } -tagTokenTypes = frozenset((tokenTypes["StartTag"], tokenTypes["EndTag"], +tagTokenTypes = frozenset((tokenTypes["StartTag"], tokenTypes["EndTag"], tokenTypes["EmptyTag"])) -prefixes = dict([(v,k) for k,v in namespaces.iteritems()]) +prefixes = dict([(v, k) for k, v in namespaces.items()]) prefixes["http://www.w3.org/1998/Math/MathML"] = "math" + class DataLossWarning(UserWarning): pass + class ReparseException(Exception): pass diff --git a/libs/html5lib/filters/_base.py b/libs/html5lib/filters/_base.py index bca94ad..c7dbaed 100644 --- a/libs/html5lib/filters/_base.py +++ b/libs/html5lib/filters/_base.py @@ -1,3 +1,5 @@ +from __future__ import absolute_import, division, unicode_literals + class Filter(object): def __init__(self, source): diff --git a/libs/html5lib/filters/alphabeticalattributes.py b/libs/html5lib/filters/alphabeticalattributes.py new file mode 100644 index 0000000..fed6996 --- /dev/null +++ b/libs/html5lib/filters/alphabeticalattributes.py @@ -0,0 +1,20 @@ +from __future__ import absolute_import, division, unicode_literals + +from . import _base + +try: + from collections import OrderedDict +except ImportError: + from ordereddict import OrderedDict + + +class Filter(_base.Filter): + def __iter__(self): + for token in _base.Filter.__iter__(self): + if token["type"] in ("StartTag", "EmptyTag"): + attrs = OrderedDict() + for name, value in sorted(token["data"].items(), + key=lambda x: x[0]): + attrs[name] = value + token["data"] = attrs + yield token diff --git a/libs/html5lib/filters/formfiller.py b/libs/html5lib/filters/formfiller.py deleted file mode 100644 index 9400171..0000000 --- a/libs/html5lib/filters/formfiller.py +++ /dev/null @@ -1,127 +0,0 @@ -# -# The goal is to finally have a form filler where you pass data for -# each form, using the algorithm for "Seeding a form with initial values" -# See http://www.whatwg.org/specs/web-forms/current-work/#seeding -# - -import _base - -from html5lib.constants import spaceCharacters -spaceCharacters = u"".join(spaceCharacters) - -class SimpleFilter(_base.Filter): - def __init__(self, source, fieldStorage): - _base.Filter.__init__(self, source) - self.fieldStorage = fieldStorage - - def __iter__(self): - field_indices = {} - state = None - field_name = None - for token in _base.Filter.__iter__(self): - type = token["type"] - if type in ("StartTag", "EmptyTag"): - name = token["name"].lower() - if name == "input": - field_name = None - field_type = None - input_value_index = -1 - input_checked_index = -1 - for i,(n,v) in enumerate(token["data"]): - n = n.lower() - if n == u"name": - field_name = v.strip(spaceCharacters) - elif n == u"type": - field_type = v.strip(spaceCharacters) - elif n == u"checked": - input_checked_index = i - elif n == u"value": - input_value_index = i - - value_list = self.fieldStorage.getlist(field_name) - field_index = field_indices.setdefault(field_name, 0) - if field_index < len(value_list): - value = value_list[field_index] - else: - value = "" - - if field_type in (u"checkbox", u"radio"): - if value_list: - if token["data"][input_value_index][1] == value: - if input_checked_index < 0: - token["data"].append((u"checked", u"")) - field_indices[field_name] = field_index + 1 - elif input_checked_index >= 0: - del token["data"][input_checked_index] - - elif field_type not in (u"button", u"submit", u"reset"): - if input_value_index >= 0: - token["data"][input_value_index] = (u"value", value) - else: - token["data"].append((u"value", value)) - field_indices[field_name] = field_index + 1 - - field_type = None - field_name = None - - elif name == "textarea": - field_type = "textarea" - field_name = dict((token["data"])[::-1])["name"] - - elif name == "select": - field_type = "select" - attributes = dict(token["data"][::-1]) - field_name = attributes.get("name") - is_select_multiple = "multiple" in attributes - is_selected_option_found = False - - elif field_type == "select" and field_name and name == "option": - option_selected_index = -1 - option_value = None - for i,(n,v) in enumerate(token["data"]): - n = n.lower() - if n == "selected": - option_selected_index = i - elif n == "value": - option_value = v.strip(spaceCharacters) - if option_value is None: - raise NotImplementedError("<option>s without a value= attribute") - else: - value_list = self.fieldStorage.getlist(field_name) - if value_list: - field_index = field_indices.setdefault(field_name, 0) - if field_index < len(value_list): - value = value_list[field_index] - else: - value = "" - if (is_select_multiple or not is_selected_option_found) and option_value == value: - if option_selected_index < 0: - token["data"].append((u"selected", u"")) - field_indices[field_name] = field_index + 1 - is_selected_option_found = True - elif option_selected_index >= 0: - del token["data"][option_selected_index] - - elif field_type is not None and field_name and type == "EndTag": - name = token["name"].lower() - if name == field_type: - if name == "textarea": - value_list = self.fieldStorage.getlist(field_name) - if value_list: - field_index = field_indices.setdefault(field_name, 0) - if field_index < len(value_list): - value = value_list[field_index] - else: - value = "" - yield {"type": "Characters", "data": value} - field_indices[field_name] = field_index + 1 - - field_name = None - - elif name == "option" and field_type == "select": - pass # TODO: part of "option without value= attribute" processing - - elif field_type == "textarea": - continue # ignore token - - yield token diff --git a/libs/html5lib/filters/inject_meta_charset.py b/libs/html5lib/filters/inject_meta_charset.py index 8e04d8a..ca33b70 100644 --- a/libs/html5lib/filters/inject_meta_charset.py +++ b/libs/html5lib/filters/inject_meta_charset.py @@ -1,4 +1,7 @@ -import _base +from __future__ import absolute_import, division, unicode_literals + +from . import _base + class Filter(_base.Filter): def __init__(self, source, encoding): @@ -13,44 +16,44 @@ class Filter(_base.Filter): for token in _base.Filter.__iter__(self): type = token["type"] if type == "StartTag": - if token["name"].lower() == u"head": + if token["name"].lower() == "head": state = "in_head" elif type == "EmptyTag": - if token["name"].lower() == u"meta": - # replace charset with actual encoding - has_http_equiv_content_type = False - for (namespace,name),value in token["data"].iteritems(): - if namespace != None: - continue - elif name.lower() == u'charset': - token["data"][(namespace,name)] = self.encoding - meta_found = True - break - elif name == u'http-equiv' and value.lower() == u'content-type': - has_http_equiv_content_type = True - else: - if has_http_equiv_content_type and (None, u"content") in token["data"]: - token["data"][(None, u"content")] = u'text/html; charset=%s' % self.encoding - meta_found = True - - elif token["name"].lower() == u"head" and not meta_found: + if token["name"].lower() == "meta": + # replace charset with actual encoding + has_http_equiv_content_type = False + for (namespace, name), value in token["data"].items(): + if namespace is not None: + continue + elif name.lower() == 'charset': + token["data"][(namespace, name)] = self.encoding + meta_found = True + break + elif name == 'http-equiv' and value.lower() == 'content-type': + has_http_equiv_content_type = True + else: + if has_http_equiv_content_type and (None, "content") in token["data"]: + token["data"][(None, "content")] = 'text/html; charset=%s' % self.encoding + meta_found = True + + elif token["name"].lower() == "head" and not meta_found: # insert meta into empty head - yield {"type": "StartTag", "name": u"head", + yield {"type": "StartTag", "name": "head", "data": token["data"]} - yield {"type": "EmptyTag", "name": u"meta", - "data": {(None, u"charset"): self.encoding}} - yield {"type": "EndTag", "name": u"head"} + yield {"type": "EmptyTag", "name": "meta", + "data": {(None, "charset"): self.encoding}} + yield {"type": "EndTag", "name": "head"} meta_found = True continue elif type == "EndTag": - if token["name"].lower() == u"head" and pending: + if token["name"].lower() == "head" and pending: # insert meta into head (if necessary) and flush pending queue yield pending.pop(0) if not meta_found: - yield {"type": "EmptyTag", "name": u"meta", - "data": {(None, u"charset"): self.encoding}} + yield {"type": "EmptyTag", "name": "meta", + "data": {(None, "charset"): self.encoding}} while pending: yield pending.pop(0) meta_found = True diff --git a/libs/html5lib/filters/lint.py b/libs/html5lib/filters/lint.py index ea5c619..7cc99a4 100644 --- a/libs/html5lib/filters/lint.py +++ b/libs/html5lib/filters/lint.py @@ -1,13 +1,18 @@ +from __future__ import absolute_import, division, unicode_literals + from gettext import gettext _ = gettext -import _base -from html5lib.constants import cdataElements, rcdataElements, voidElements +from . import _base +from ..constants import cdataElements, rcdataElements, voidElements + +from ..constants import spaceCharacters +spaceCharacters = "".join(spaceCharacters) + -from html5lib.constants import spaceCharacters -spaceCharacters = u"".join(spaceCharacters) +class LintError(Exception): + pass -class LintError(Exception): pass class Filter(_base.Filter): def __iter__(self): @@ -18,24 +23,24 @@ class Filter(_base.Filter): if type in ("StartTag", "EmptyTag"): name = token["name"] if contentModelFlag != "PCDATA": - raise LintError(_("StartTag not in PCDATA content model flag: %s") % name) - if not isinstance(name, unicode): - raise LintError(_(u"Tag name is not a string: %r") % name) + raise LintError(_("StartTag not in PCDATA content model flag: %(tag)s") % {"tag": name}) + if not isinstance(name, str): + raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name}) if not name: - raise LintError(_(u"Empty tag name")) + raise LintError(_("Empty tag name")) if type == "StartTag" and name in voidElements: - raise LintError(_(u"Void element reported as StartTag token: %s") % name) + raise LintError(_("Void element reported as StartTag token: %(tag)s") % {"tag": name}) elif type == "EmptyTag" and name not in voidElements: - raise LintError(_(u"Non-void element reported as EmptyTag token: %s") % token["name"]) + raise LintError(_("Non-void element reported as EmptyTag token: %(tag)s") % {"tag": token["name"]}) if type == "StartTag": open_elements.append(name) for name, value in token["data"]: - if not isinstance(name, unicode): - raise LintError(_("Attribute name is not a string: %r") % name) + if not isinstance(name, str): + raise LintError(_("Attribute name is not a string: %(name)r") % {"name": name}) if not name: - raise LintError(_(u"Empty attribute name")) - if not isinstance(value, unicode): - raise LintError(_("Attribute value is not a string: %r") % value) + raise LintError(_("Empty attribute name")) + if not isinstance(value, str): + raise LintError(_("Attribute value is not a string: %(value)r") % {"value": value}) if name in cdataElements: contentModelFlag = "CDATA" elif name in rcdataElements: @@ -45,15 +50,15 @@ class Filter(_base.Filter): elif type == "EndTag": name = token["name"] - if not isinstance(name, unicode): - raise LintError(_(u"Tag name is not a string: %r") % name) + if not isinstance(name, str): + raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name}) if not name: - raise LintError(_(u"Empty tag name")) + raise LintError(_("Empty tag name")) if name in voidElements: - raise LintError(_(u"Void element reported as EndTag token: %s") % name) + raise LintError(_("Void element reported as EndTag token: %(tag)s") % {"tag": name}) start_name = open_elements.pop() if start_name != name: - raise LintError(_(u"EndTag (%s) does not match StartTag (%s)") % (name, start_name)) + raise LintError(_("EndTag (%(end)s) does not match StartTag (%(start)s)") % {"end": name, "start": start_name}) contentModelFlag = "PCDATA" elif type == "Comment": @@ -62,27 +67,27 @@ class Filter(_base.Filter): elif type in ("Characters", "SpaceCharacters"): data = token["data"] - if not isinstance(data, unicode): - raise LintError(_("Attribute name is not a string: %r") % data) + if not isinstance(data, str): + raise LintError(_("Attribute name is not a string: %(name)r") % {"name": data}) if not data: - raise LintError(_(u"%s token with empty data") % type) + raise LintError(_("%(type)s token with empty data") % {"type": type}) if type == "SpaceCharacters": data = data.strip(spaceCharacters) if data: - raise LintError(_(u"Non-space character(s) found in SpaceCharacters token: ") % data) + raise LintError(_("Non-space character(s) found in SpaceCharacters token: %(token)r") % {"token": data}) elif type == "Doctype": name = token["name"] if contentModelFlag != "PCDATA": - raise LintError(_("Doctype not in PCDATA content model flag: %s") % name) - if not isinstance(name, unicode): - raise LintError(_(u"Tag name is not a string: %r") % name) + raise LintError(_("Doctype not in PCDATA content model flag: %(name)s") % {"name": name}) + if not isinstance(name, str): + raise LintError(_("Tag name is not a string: %(tag)r") % {"tag": name}) # XXX: what to do with token["data"] ? elif type in ("ParseError", "SerializeError"): pass else: - raise LintError(_(u"Unknown token type: %s") % type) + raise LintError(_("Unknown token type: %(type)s") % {"type": type}) yield token diff --git a/libs/html5lib/filters/optionaltags.py b/libs/html5lib/filters/optionaltags.py index a77aa72..fefe0b3 100644 --- a/libs/html5lib/filters/optionaltags.py +++ b/libs/html5lib/filters/optionaltags.py @@ -1,4 +1,7 @@ -import _base +from __future__ import absolute_import, division, unicode_literals + +from . import _base + class Filter(_base.Filter): def slider(self): @@ -14,8 +17,8 @@ class Filter(_base.Filter): for previous, token, next in self.slider(): type = token["type"] if type == "StartTag": - if (token["data"] or - not self.is_optional_start(token["name"], previous, next)): + if (token["data"] or + not self.is_optional_start(token["name"], previous, next)): yield token elif type == "EndTag": if not self.is_optional_end(token["name"], next): @@ -73,7 +76,7 @@ class Filter(_base.Filter): # omit the thead and tfoot elements' end tag when they are # immediately followed by a tbody element. See is_optional_end. if previous and previous['type'] == 'EndTag' and \ - previous['name'] in ('tbody','thead','tfoot'): + previous['name'] in ('tbody', 'thead', 'tfoot'): return False return next["name"] == 'tr' else: @@ -121,10 +124,10 @@ class Filter(_base.Filter): # there is no more content in the parent element. if type in ("StartTag", "EmptyTag"): return next["name"] in ('address', 'article', 'aside', - 'blockquote', 'datagrid', 'dialog', + 'blockquote', 'datagrid', 'dialog', 'dir', 'div', 'dl', 'fieldset', 'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', - 'header', 'hr', 'menu', 'nav', 'ol', + 'header', 'hr', 'menu', 'nav', 'ol', 'p', 'pre', 'section', 'table', 'ul') else: return type == "EndTag" or type is None diff --git a/libs/html5lib/filters/sanitizer.py b/libs/html5lib/filters/sanitizer.py index 0023527..b206b54 100644 --- a/libs/html5lib/filters/sanitizer.py +++ b/libs/html5lib/filters/sanitizer.py @@ -1,8 +1,12 @@ -import _base -from html5lib.sanitizer import HTMLSanitizerMixin +from __future__ import absolute_import, division, unicode_literals + +from . import _base +from ..sanitizer import HTMLSanitizerMixin + class Filter(_base.Filter, HTMLSanitizerMixin): def __iter__(self): for token in _base.Filter.__iter__(self): token = self.sanitize_token(token) - if token: yield token + if token: + yield token diff --git a/libs/html5lib/filters/whitespace.py b/libs/html5lib/filters/whitespace.py index 74d6f4d..dfc60ee 100644 --- a/libs/html5lib/filters/whitespace.py +++ b/libs/html5lib/filters/whitespace.py @@ -1,16 +1,13 @@ -try: - frozenset -except NameError: - # Import from the sets module for python 2.3 - from sets import ImmutableSet as frozenset +from __future__ import absolute_import, division, unicode_literals import re -import _base -from html5lib.constants import rcdataElements, spaceCharacters -spaceCharacters = u"".join(spaceCharacters) +from . import _base +from ..constants import rcdataElements, spaceCharacters +spaceCharacters = "".join(spaceCharacters) + +SPACES_REGEX = re.compile("[%s]+" % spaceCharacters) -SPACES_REGEX = re.compile(u"[%s]+" % spaceCharacters) class Filter(_base.Filter): @@ -21,7 +18,7 @@ class Filter(_base.Filter): for token in _base.Filter.__iter__(self): type = token["type"] if type == "StartTag" \ - and (preserve or token["name"] in self.spacePreserveElements): + and (preserve or token["name"] in self.spacePreserveElements): preserve += 1 elif type == "EndTag" and preserve: @@ -29,13 +26,13 @@ class Filter(_base.Filter): elif not preserve and type == "SpaceCharacters" and token["data"]: # Test on token["data"] above to not introduce spaces where there were not - token["data"] = u" " + token["data"] = " " elif not preserve and type == "Characters": token["data"] = collapse_spaces(token["data"]) yield token + def collapse_spaces(text): return SPACES_REGEX.sub(' ', text) - diff --git a/libs/html5lib/html5parser.py b/libs/html5lib/html5parser.py index 08a8f8a..b0f14f3 100644 --- a/libs/html5lib/html5parser.py +++ b/libs/html5lib/html5parser.py @@ -1,89 +1,65 @@ -try: - frozenset -except NameError: - # Import from the sets module for python 2.3 - from sets import Set as set - from sets import ImmutableSet as frozenset - -try: - any -except: - # Implement 'any' for python 2.4 and previous - def any(iterable): - for element in iterable: - if element: - return True - return False - -try: - "abc".startswith(("a", "b")) - def startswithany(str, prefixes): - return str.startswith(prefixes) -except: - # Python 2.4 doesn't accept a tuple as argument to string startswith - def startswithany(str, prefixes): - for prefix in prefixes: - if str.startswith(prefix): - return True - return False - -import sys +from __future__ import absolute_import, division, unicode_literals +from six import with_metaclass + import types -import inputstream -import tokenizer +from . import inputstream +from . import tokenizer + +from . import treebuilders +from .treebuilders._base import Marker -import treebuilders -from treebuilders._base import Marker -from treebuilders import simpletree +from . import utils +from . import constants +from .constants import spaceCharacters, asciiUpper2Lower +from .constants import specialElements +from .constants import headingElements +from .constants import cdataElements, rcdataElements +from .constants import tokenTypes, ReparseException, namespaces +from .constants import htmlIntegrationPointElements, mathmlTextIntegrationPointElements +from .constants import adjustForeignAttributes as adjustForeignAttributesMap -import utils -import constants -from constants import spaceCharacters, asciiUpper2Lower -from constants import formattingElements, specialElements -from constants import headingElements, tableInsertModeElements -from constants import cdataElements, rcdataElements, voidElements -from constants import tokenTypes, ReparseException, namespaces, spaceCharacters -from constants import htmlIntegrationPointElements, mathmlTextIntegrationPointElements -def parse(doc, treebuilder="simpletree", encoding=None, +def parse(doc, treebuilder="etree", encoding=None, namespaceHTMLElements=True): """Parse a string or file-like object into a tree""" tb = treebuilders.getTreeBuilder(treebuilder) p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) return p.parse(doc, encoding=encoding) -def parseFragment(doc, container="div", treebuilder="simpletree", encoding=None, + +def parseFragment(doc, container="div", treebuilder="etree", encoding=None, namespaceHTMLElements=True): tb = treebuilders.getTreeBuilder(treebuilder) p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) return p.parseFragment(doc, container=container, encoding=encoding) + def method_decorator_metaclass(function): class Decorated(type): def __new__(meta, classname, bases, classDict): - for attributeName, attribute in classDict.iteritems(): - if type(attribute) == types.FunctionType: + for attributeName, attribute in classDict.items(): + if isinstance(attribute, types.FunctionType): attribute = function(attribute) classDict[attributeName] = attribute - return type.__new__(meta, classname, bases, classDict) + return type.__new__(meta, classname, bases, classDict) return Decorated + class HTMLParser(object): """HTML parser. Generates a tree structure from a stream of (possibly malformed) HTML""" - def __init__(self, tree = simpletree.TreeBuilder, - tokenizer = tokenizer.HTMLTokenizer, strict = False, - namespaceHTMLElements = True, debug=False): + def __init__(self, tree=None, tokenizer=tokenizer.HTMLTokenizer, + strict=False, namespaceHTMLElements=True, debug=False): """ strict - raise an exception when a parse error is encountered tree - a treebuilder class controlling the type of tree that will be returned. Built in treebuilders can be accessed through html5lib.treebuilders.getTreeBuilder(treeType) - + tokenizer - a class that provides a stream of tokens to the treebuilder. This may be replaced for e.g. a sanitizer which converts some tags to text @@ -92,12 +68,14 @@ class HTMLParser(object): # Raise an exception on the first error encountered self.strict = strict + if tree is None: + tree = treebuilders.getTreeBuilder("etree") self.tree = tree(namespaceHTMLElements) self.tokenizer_class = tokenizer self.errors = [] self.phases = dict([(name, cls(self, self.tree)) for name, cls in - getPhases(debug).iteritems()]) + getPhases(debug).items()]) def _parse(self, stream, innerHTML=False, container="div", encoding=None, parseMeta=True, useChardet=True, **kwargs): @@ -106,7 +84,7 @@ class HTMLParser(object): self.container = container self.tokenizer = self.tokenizer_class(stream, encoding=encoding, parseMeta=parseMeta, - useChardet=useChardet, + useChardet=useChardet, parser=self, **kwargs) self.reset() @@ -114,14 +92,14 @@ class HTMLParser(object): try: self.mainLoop() break - except ReparseException, e: + except ReparseException: self.reset() def reset(self): self.tree.reset() self.firstStartTag = False self.errors = [] - self.log = [] #only used with debug mode + self.log = [] # only used with debug mode # "quirks" / "limited quirks" / "no quirks" self.compatMode = "no quirks" @@ -152,18 +130,18 @@ class HTMLParser(object): self.framesetOK = True def isHTMLIntegrationPoint(self, element): - if (element.name == "annotation-xml" and - element.namespace == namespaces["mathml"]): + if (element.name == "annotation-xml" and + element.namespace == namespaces["mathml"]): return ("encoding" in element.attributes and element.attributes["encoding"].translate( - asciiUpper2Lower) in + asciiUpper2Lower) in ("text/html", "application/xhtml+xml")) else: return (element.namespace, element.name) in htmlIntegrationPointElements def isMathMLTextIntegrationPoint(self, element): return (element.namespace, element.name) in mathmlTextIntegrationPointElements - + def mainLoop(self): CharactersToken = tokenTypes["Characters"] SpaceCharactersToken = tokenTypes["SpaceCharacters"] @@ -172,7 +150,7 @@ class HTMLParser(object): CommentToken = tokenTypes["Comment"] DoctypeToken = tokenTypes["Doctype"] ParseErrorToken = tokenTypes["ParseError"] - + for token in self.normalizedTokens(): new_token = token while new_token is not None: @@ -181,7 +159,7 @@ class HTMLParser(object): currentNodeName = currentNode.name if currentNode else None type = new_token["type"] - + if type == ParseErrorToken: self.parseError(new_token["data"], new_token.get("datavars", {})) new_token = None @@ -191,7 +169,7 @@ class HTMLParser(object): (self.isMathMLTextIntegrationPoint(currentNode) and ((type == StartTagToken and token["name"] not in frozenset(["mglyph", "malignmark"])) or - type in (CharactersToken, SpaceCharactersToken))) or + type in (CharactersToken, SpaceCharactersToken))) or (currentNodeNamespace == namespaces["mathml"] and currentNodeName == "annotation-xml" and token["name"] == "svg") or @@ -204,7 +182,7 @@ class HTMLParser(object): if type == CharactersToken: new_token = phase.processCharacters(new_token) elif type == SpaceCharactersToken: - new_token= phase.processSpaceCharacters(new_token) + new_token = phase.processSpaceCharacters(new_token) elif type == StartTagToken: new_token = phase.processStartTag(new_token) elif type == EndTagToken: @@ -215,10 +193,9 @@ class HTMLParser(object): new_token = phase.processDoctype(new_token) if (type == StartTagToken and token["selfClosing"] - and not token["selfClosingAcknowledged"]): + and not token["selfClosingAcknowledged"]): self.parseError("non-void-element-with-trailing-solidus", - {"name":token["name"]}) - + {"name": token["name"]}) # When the loop finishes it's EOF reprocess = True @@ -243,14 +220,14 @@ class HTMLParser(object): regardless of any BOM or later declaration (such as in a meta element) """ - self._parse(stream, innerHTML=False, encoding=encoding, + self._parse(stream, innerHTML=False, encoding=encoding, parseMeta=parseMeta, useChardet=useChardet) return self.tree.getDocument() - + def parseFragment(self, stream, container="div", encoding=None, parseMeta=False, useChardet=True): """Parse a HTML fragment into a well-formed tree fragment - + container - name of the element we're setting the innerHTML property if set to None, default to 'div' @@ -279,100 +256,87 @@ class HTMLParser(object): return token def adjustMathMLAttributes(self, token): - replacements = {"definitionurl":u"definitionURL"} - for k,v in replacements.iteritems(): + replacements = {"definitionurl": "definitionURL"} + for k, v in replacements.items(): if k in token["data"]: token["data"][v] = token["data"][k] del token["data"][k] def adjustSVGAttributes(self, token): replacements = { - "attributename":u"attributeName", - "attributetype":u"attributeType", - "basefrequency":u"baseFrequency", - "baseprofile":u"baseProfile", - "calcmode":u"calcMode", - "clippathunits":u"clipPathUnits", - "contentscripttype":u"contentScriptType", - "contentstyletype":u"contentStyleType", - "diffuseconstant":u"diffuseConstant", - "edgemode":u"edgeMode", - "externalresourcesrequired":u"externalResourcesRequired", - "filterres":u"filterRes", - "filterunits":u"filterUnits", - "glyphref":u"glyphRef", - "gradienttransform":u"gradientTransform", - "gradientunits":u"gradientUnits", - "kernelmatrix":u"kernelMatrix", - "kernelunitlength":u"kernelUnitLength", - "keypoints":u"keyPoints", - "keysplines":u"keySplines", - "keytimes":u"keyTimes", - "lengthadjust":u"lengthAdjust", - "limitingconeangle":u"limitingConeAngle", - "markerheight":u"markerHeight", - "markerunits":u"markerUnits", - "markerwidth":u"markerWidth", - "maskcontentunits":u"maskContentUnits", - "maskunits":u"maskUnits", - "numoctaves":u"numOctaves", - "pathlength":u"pathLength", - "patterncontentunits":u"patternContentUnits", - "patterntransform":u"patternTransform", - "patternunits":u"patternUnits", - "pointsatx":u"pointsAtX", - "pointsaty":u"pointsAtY", - "pointsatz":u"pointsAtZ", - "preservealpha":u"preserveAlpha", - "preserveaspectratio":u"preserveAspectRatio", - "primitiveunits":u"primitiveUnits", - "refx":u"refX", - "refy":u"refY", - "repeatcount":u"repeatCount", - "repeatdur":u"repeatDur", - "requiredextensions":u"requiredExtensions", - "requiredfeatures":u"requiredFeatures", - "specularconstant":u"specularConstant", - "specularexponent":u"specularExponent", - "spreadmethod":u"spreadMethod", - "startoffset":u"startOffset", - "stddeviation":u"stdDeviation", - "stitchtiles":u"stitchTiles", - "surfacescale":u"surfaceScale", - "systemlanguage":u"systemLanguage", - "tablevalues":u"tableValues", - "targetx":u"targetX", - "targety":u"targetY", - "textlength":u"textLength", - "viewbox":u"viewBox", - "viewtarget":u"viewTarget", - "xchannelselector":u"xChannelSelector", - "ychannelselector":u"yChannelSelector", - "zoomandpan":u"zoomAndPan" - } - for originalName in token["data"].keys(): + "attributename": "attributeName", + "attributetype": "attributeType", + "basefrequency": "baseFrequency", + "baseprofile": "baseProfile", + "calcmode": "calcMode", + "clippathunits": "clipPathUnits", + "contentscripttype": "contentScriptType", + "contentstyletype": "contentStyleType", + "diffuseconstant": "diffuseConstant", + "edgemode": "edgeMode", + "externalresourcesrequired": "externalResourcesRequired", + "filterres": "filterRes", + "filterunits": "filterUnits", + "glyphref": "glyphRef", + "gradienttransform": "gradientTransform", + "gradientunits": "gradientUnits", + "kernelmatrix": "kernelMatrix", + "kernelunitlength": "kernelUnitLength", + "keypoints": "keyPoints", + "keysplines": "keySplines", + "keytimes": "keyTimes", + "lengthadjust": "lengthAdjust", + "limitingconeangle": "limitingConeAngle", + "markerheight": "markerHeight", + "markerunits": "markerUnits", + "markerwidth": "markerWidth", + "maskcontentunits": "maskContentUnits", + "maskunits": "maskUnits", + "numoctaves": "numOctaves", + "pathlength": "pathLength", + "patterncontentunits": "patternContentUnits", + "patterntransform": "patternTransform", + "patternunits": "patternUnits", + "pointsatx": "pointsAtX", + "pointsaty": "pointsAtY", + "pointsatz": "pointsAtZ", + "preservealpha": "preserveAlpha", + "preserveaspectratio": "preserveAspectRatio", + "primitiveunits": "primitiveUnits", + "refx": "refX", + "refy": "refY", + "repeatcount": "repeatCount", + "repeatdur": "repeatDur", + "requiredextensions": "requiredExtensions", + "requiredfeatures": "requiredFeatures", + "specularconstant": "specularConstant", + "specularexponent": "specularExponent", + "spreadmethod": "spreadMethod", + "startoffset": "startOffset", + "stddeviation": "stdDeviation", + "stitchtiles": "stitchTiles", + "surfacescale": "surfaceScale", + "systemlanguage": "systemLanguage", + "tablevalues": "tableValues", + "targetx": "targetX", + "targety": "targetY", + "textlength": "textLength", + "viewbox": "viewBox", + "viewtarget": "viewTarget", + "xchannelselector": "xChannelSelector", + "ychannelselector": "yChannelSelector", + "zoomandpan": "zoomAndPan" + } + for originalName in list(token["data"].keys()): if originalName in replacements: svgName = replacements[originalName] token["data"][svgName] = token["data"][originalName] del token["data"][originalName] def adjustForeignAttributes(self, token): - replacements = { - "xlink:actuate":("xlink", "actuate", namespaces["xlink"]), - "xlink:arcrole":("xlink", "arcrole", namespaces["xlink"]), - "xlink:href":("xlink", "href", namespaces["xlink"]), - "xlink:role":("xlink", "role", namespaces["xlink"]), - "xlink:show":("xlink", "show", namespaces["xlink"]), - "xlink:title":("xlink", "title", namespaces["xlink"]), - "xlink:type":("xlink", "type", namespaces["xlink"]), - "xml:base":("xml", "base", namespaces["xml"]), - "xml:lang":("xml", "lang", namespaces["xml"]), - "xml:space":("xml", "space", namespaces["xml"]), - "xmlns":(None, "xmlns", namespaces["xmlns"]), - "xmlns:xlink":("xmlns", "xlink", namespaces["xmlns"]) - } - - for originalName in token["data"].iterkeys(): + replacements = adjustForeignAttributesMap + + for originalName in token["data"].keys(): if originalName in replacements: foreignName = replacements[originalName] token["data"][foreignName] = token["data"][originalName] @@ -386,20 +350,20 @@ class HTMLParser(object): # specification.) last = False newModes = { - "select":"inSelect", - "td":"inCell", - "th":"inCell", - "tr":"inRow", - "tbody":"inTableBody", - "thead":"inTableBody", - "tfoot":"inTableBody", - "caption":"inCaption", - "colgroup":"inColumnGroup", - "table":"inTable", - "head":"inBody", - "body":"inBody", - "frameset":"inFrameset", - "html":"beforeHead" + "select": "inSelect", + "td": "inCell", + "th": "inCell", + "tr": "inRow", + "tbody": "inTableBody", + "thead": "inTableBody", + "tfoot": "inTableBody", + "caption": "inCaption", + "colgroup": "inColumnGroup", + "table": "inTable", + "head": "inBody", + "body": "inBody", + "frameset": "inFrameset", + "html": "beforeHead" } for node in self.tree.openElements[::-1]: nodeName = node.name @@ -430,9 +394,9 @@ class HTMLParser(object): contentType - RCDATA or RAWTEXT """ assert contentType in ("RAWTEXT", "RCDATA") - - element = self.tree.insertElement(token) - + + self.tree.insertElement(token) + if contentType == "RAWTEXT": self.tokenizer.state = self.tokenizer.rawtextState else: @@ -442,25 +406,27 @@ class HTMLParser(object): self.phase = self.phases["text"] + def getPhases(debug): def log(function): """Logger that records which phase processes each token""" - type_names = dict((value, key) for key, value in - constants.tokenTypes.iteritems()) + type_names = dict((value, key) for key, value in + constants.tokenTypes.items()) + def wrapped(self, *args, **kwargs): if function.__name__.startswith("process") and len(args) > 0: token = args[0] try: - info = {"type":type_names[token['type']]} + info = {"type": type_names[token['type']]} except: raise if token['type'] in constants.tagTokenTypes: info["name"] = token['name'] self.parser.log.append((self.parser.tokenizer.state.__name__, - self.parser.phase.__class__.__name__, - self.__class__.__name__, - function.__name__, + self.parser.phase.__class__.__name__, + self.__class__.__name__, + function.__name__, info)) return function(self, *args, **kwargs) else: @@ -473,21 +439,9 @@ def getPhases(debug): else: return type - class Phase(object): + class Phase(with_metaclass(getMetaclass(debug, log))): """Base class for helper object that implements each phase of processing """ - # Order should be (they can be omitted): - # * EOF - # * Comment - # * Doctype - # * SpaceCharacters - # * Characters - # * StartTag - # - startTag* methods - # * EndTag - # - endTag* methods - - __metaclass__ = getMetaclass(debug, log) def __init__(self, parser, tree): self.parser = parser @@ -514,11 +468,11 @@ def getPhases(debug): return self.startTagHandler[token["name"]](token) def startTagHtml(self, token): - if self.parser.firstStartTag == False and token["name"] == "html": - self.parser.parseError("non-html-root") + if not self.parser.firstStartTag and token["name"] == "html": + self.parser.parseError("non-html-root") # XXX Need a check here to see if the first start tag token emitted is # this token... If it's not, invoke self.parser.parseError(). - for attr, value in token["data"].iteritems(): + for attr, value in token["data"].items(): if attr not in self.tree.openElements[0].attributes: self.tree.openElements[0].attributes[attr] = value self.parser.firstStartTag = False @@ -539,8 +493,8 @@ def getPhases(debug): systemId = token["systemId"] correct = token["correct"] - if (name != "html" or publicId != None or - systemId != None and systemId != "about:legacy-compat"): + if (name != "html" or publicId is not None or + systemId is not None and systemId != "about:legacy-compat"): self.parser.parseError("unknown-doctype") if publicId is None: @@ -552,79 +506,79 @@ def getPhases(debug): publicId = publicId.translate(asciiUpper2Lower) if (not correct or token["name"] != "html" - or startswithany(publicId, - ("+//silmaril//dtd html pro v0r11 19970101//", - "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", - "-//as//dtd html 3.0 aswedit + extensions//", - "-//ietf//dtd html 2.0 level 1//", - "-//ietf//dtd html 2.0 level 2//", - "-//ietf//dtd html 2.0 strict level 1//", - "-//ietf//dtd html 2.0 strict level 2//", - "-//ietf//dtd html 2.0 strict//", - "-//ietf//dtd html 2.0//", - "-//ietf//dtd html 2.1e//", - "-//ietf//dtd html 3.0//", - "-//ietf//dtd html 3.2 final//", - "-//ietf//dtd html 3.2//", - "-//ietf//dtd html 3//", - "-//ietf//dtd html level 0//", - "-//ietf//dtd html level 1//", - "-//ietf//dtd html level 2//", - "-//ietf//dtd html level 3//", - "-//ietf//dtd html strict level 0//", - "-//ietf//dtd html strict level 1//", - "-//ietf//dtd html strict level 2//", - "-//ietf//dtd html strict level 3//", - "-//ietf//dtd html strict//", - "-//ietf//dtd html//", - "-//metrius//dtd metrius presentational//", - "-//microsoft//dtd internet explorer 2.0 html strict//", - "-//microsoft//dtd internet explorer 2.0 html//", - "-//microsoft//dtd internet explorer 2.0 tables//", - "-//microsoft//dtd internet explorer 3.0 html strict//", - "-//microsoft//dtd internet explorer 3.0 html//", - "-//microsoft//dtd internet explorer 3.0 tables//", - "-//netscape comm. corp.//dtd html//", - "-//netscape comm. corp.//dtd strict html//", - "-//o'reilly and associates//dtd html 2.0//", - "-//o'reilly and associates//dtd html extended 1.0//", - "-//o'reilly and associates//dtd html extended relaxed 1.0//", - "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", - "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", - "-//spyglass//dtd html 2.0 extended//", - "-//sq//dtd html 2.0 hotmetal + extensions//", - "-//sun microsystems corp.//dtd hotjava html//", - "-//sun microsystems corp.//dtd hotjava strict html//", - "-//w3c//dtd html 3 1995-03-24//", - "-//w3c//dtd html 3.2 draft//", - "-//w3c//dtd html 3.2 final//", - "-//w3c//dtd html 3.2//", - "-//w3c//dtd html 3.2s draft//", - "-//w3c//dtd html 4.0 frameset//", - "-//w3c//dtd html 4.0 transitional//", - "-//w3c//dtd html experimental 19960712//", - "-//w3c//dtd html experimental 970421//", - "-//w3c//dtd w3 html//", - "-//w3o//dtd w3 html 3.0//", - "-//webtechs//dtd mozilla html 2.0//", - "-//webtechs//dtd mozilla html//")) + or publicId.startswith( + ("+//silmaril//dtd html pro v0r11 19970101//", + "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", + "-//as//dtd html 3.0 aswedit + extensions//", + "-//ietf//dtd html 2.0 level 1//", + "-//ietf//dtd html 2.0 level 2//", + "-//ietf//dtd html 2.0 strict level 1//", + "-//ietf//dtd html 2.0 strict level 2//", + "-//ietf//dtd html 2.0 strict//", + "-//ietf//dtd html 2.0//", + "-//ietf//dtd html 2.1e//", + "-//ietf//dtd html 3.0//", + "-//ietf//dtd html 3.2 final//", + "-//ietf//dtd html 3.2//", + "-//ietf//dtd html 3//", + "-//ietf//dtd html level 0//", + "-//ietf//dtd html level 1//", + "-//ietf//dtd html level 2//", + "-//ietf//dtd html level 3//", + "-//ietf//dtd html strict level 0//", + "-//ietf//dtd html strict level 1//", + "-//ietf//dtd html strict level 2//", + "-//ietf//dtd html strict level 3//", + "-//ietf//dtd html strict//", + "-//ietf//dtd html//", + "-//metrius//dtd metrius presentational//", + "-//microsoft//dtd internet explorer 2.0 html strict//", + "-//microsoft//dtd internet explorer 2.0 html//", + "-//microsoft//dtd internet explorer 2.0 tables//", + "-//microsoft//dtd internet explorer 3.0 html strict//", + "-//microsoft//dtd internet explorer 3.0 html//", + "-//microsoft//dtd internet explorer 3.0 tables//", + "-//netscape comm. corp.//dtd html//", + "-//netscape comm. corp.//dtd strict html//", + "-//o'reilly and associates//dtd html 2.0//", + "-//o'reilly and associates//dtd html extended 1.0//", + "-//o'reilly and associates//dtd html extended relaxed 1.0//", + "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", + "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", + "-//spyglass//dtd html 2.0 extended//", + "-//sq//dtd html 2.0 hotmetal + extensions//", + "-//sun microsystems corp.//dtd hotjava html//", + "-//sun microsystems corp.//dtd hotjava strict html//", + "-//w3c//dtd html 3 1995-03-24//", + "-//w3c//dtd html 3.2 draft//", + "-//w3c//dtd html 3.2 final//", + "-//w3c//dtd html 3.2//", + "-//w3c//dtd html 3.2s draft//", + "-//w3c//dtd html 4.0 frameset//", + "-//w3c//dtd html 4.0 transitional//", + "-//w3c//dtd html experimental 19960712//", + "-//w3c//dtd html experimental 970421//", + "-//w3c//dtd w3 html//", + "-//w3o//dtd w3 html 3.0//", + "-//webtechs//dtd mozilla html 2.0//", + "-//webtechs//dtd mozilla html//")) or publicId in ("-//w3o//dtd w3 html strict 3.0//en//", "-/w3c/dtd html 4.0 transitional/en", "html") - or startswithany(publicId, + or publicId.startswith( ("-//w3c//dtd html 4.01 frameset//", - "-//w3c//dtd html 4.01 transitional//")) and - systemId == None - or systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"): + "-//w3c//dtd html 4.01 transitional//")) and + systemId is None + or systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"): self.parser.compatMode = "quirks" - elif (startswithany(publicId, + elif (publicId.startswith( ("-//w3c//dtd xhtml 1.0 frameset//", "-//w3c//dtd xhtml 1.0 transitional//")) - or startswithany(publicId, + or publicId.startswith( ("-//w3c//dtd html 4.01 frameset//", - "-//w3c//dtd html 4.01 transitional//")) and - systemId != None): + "-//w3c//dtd html 4.01 transitional//")) and + systemId is not None): self.parser.compatMode = "limited quirks" self.parser.phase = self.parser.phases["beforeHtml"] @@ -640,13 +594,13 @@ def getPhases(debug): def processStartTag(self, token): self.parser.parseError("expected-doctype-but-got-start-tag", - {"name": token["name"]}) + {"name": token["name"]}) self.anythingElse() return token def processEndTag(self, token): self.parser.parseError("expected-doctype-but-got-end-tag", - {"name": token["name"]}) + {"name": token["name"]}) self.anythingElse() return token @@ -655,7 +609,6 @@ def getPhases(debug): self.anythingElse() return True - class BeforeHtmlPhase(Phase): # helper methods def insertHtmlElement(self): @@ -686,12 +639,11 @@ def getPhases(debug): def processEndTag(self, token): if token["name"] not in ("head", "body", "html", "br"): self.parser.parseError("unexpected-end-tag-before-html", - {"name": token["name"]}) + {"name": token["name"]}) else: self.insertHtmlElement() return token - class BeforeHeadPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) @@ -736,18 +688,18 @@ def getPhases(debug): def endTagOther(self, token): self.parser.parseError("end-tag-after-implied-root", - {"name": token["name"]}) + {"name": token["name"]}) class InHeadPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - self.startTagHandler = utils.MethodDispatcher([ + self.startTagHandler = utils.MethodDispatcher([ ("html", self.startTagHtml), ("title", self.startTagTitle), (("noscript", "noframes", "style"), self.startTagNoScriptNoFramesStyle), ("script", self.startTagScript), - (("base", "basefont", "bgsound", "command", "link"), + (("base", "basefont", "bgsound", "command", "link"), self.startTagBaseLinkCommand), ("meta", self.startTagMeta), ("head", self.startTagHead) @@ -761,7 +713,7 @@ def getPhases(debug): self.endTagHandler.default = self.endTagOther # the real thing - def processEOF (self): + def processEOF(self): self.anythingElse() return True @@ -789,7 +741,9 @@ def getPhases(debug): if self.parser.tokenizer.stream.charEncoding[1] == "tentative": if "charset" in attributes: self.parser.tokenizer.stream.changeEncoding(attributes["charset"]) - elif "content" in attributes: + elif ("content" in attributes and + "http-equiv" in attributes and + attributes["http-equiv"].lower() == "content-type"): # Encoding it as UTF-8 here is a hack, as really we should pass # the abstract Unicode string, and just use the # ContentAttrParser on that, but using UTF-8 allows all chars @@ -803,7 +757,7 @@ def getPhases(debug): self.parser.parseRCDataRawtext(token, "RCDATA") def startTagNoScriptNoFramesStyle(self, token): - #Need to decide whether to implement the scripting-disabled case + # Need to decide whether to implement the scripting-disabled case self.parser.parseRCDataRawtext(token, "RAWTEXT") def startTagScript(self, token): @@ -818,7 +772,7 @@ def getPhases(debug): def endTagHead(self, token): node = self.parser.tree.openElements.pop() - assert node.name == "head", "Expected head got %s"%node.name + assert node.name == "head", "Expected head got %s" % node.name self.parser.phase = self.parser.phases["afterHead"] def endTagHtmlBodyBr(self, token): @@ -831,12 +785,10 @@ def getPhases(debug): def anythingElse(self): self.endTagHead(impliedTagToken("head")) - # XXX If we implement a parser for which scripting is disabled we need to # implement this phase. # # class InHeadNoScriptPhase(Phase): - class AfterHeadPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) @@ -845,13 +797,13 @@ def getPhases(debug): ("html", self.startTagHtml), ("body", self.startTagBody), ("frameset", self.startTagFrameset), - (("base", "basefont", "bgsound", "link", "meta", "noframes", "script", + (("base", "basefont", "bgsound", "link", "meta", "noframes", "script", "style", "title"), - self.startTagFromHead), + self.startTagFromHead), ("head", self.startTagHead) ]) self.startTagHandler.default = self.startTagOther - self.endTagHandler = utils.MethodDispatcher([(("body", "html", "br"), + self.endTagHandler = utils.MethodDispatcher([(("body", "html", "br"), self.endTagHtmlBodyBr)]) self.endTagHandler.default = self.endTagOther @@ -877,7 +829,7 @@ def getPhases(debug): def startTagFromHead(self, token): self.parser.parseError("unexpected-start-tag-out-of-my-head", - {"name": token["name"]}) + {"name": token["name"]}) self.tree.openElements.append(self.tree.headPointer) self.parser.phases["inHead"].processStartTag(token) for node in self.tree.openElements[::-1]: @@ -886,7 +838,7 @@ def getPhases(debug): break def startTagHead(self, token): - self.parser.parseError("unexpected-start-tag", {"name":token["name"]}) + self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) def startTagOther(self, token): self.anythingElse() @@ -897,43 +849,42 @@ def getPhases(debug): return token def endTagOther(self, token): - self.parser.parseError("unexpected-end-tag", {"name":token["name"]}) + self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) def anythingElse(self): self.tree.insertElement(impliedTagToken("body", "StartTag")) self.parser.phase = self.parser.phases["inBody"] self.parser.framesetOK = True - class InBodyPhase(Phase): # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody # the really-really-really-very crazy mode def __init__(self, parser, tree): Phase.__init__(self, parser, tree) - #Keep a ref to this for special handling of whitespace in <pre> + # Keep a ref to this for special handling of whitespace in <pre> self.processSpaceCharactersNonPre = self.processSpaceCharacters self.startTagHandler = utils.MethodDispatcher([ ("html", self.startTagHtml), - (("base", "basefont", "bgsound", "command", "link", "meta", - "noframes", "script", "style", "title"), + (("base", "basefont", "bgsound", "command", "link", "meta", + "noframes", "script", "style", "title"), self.startTagProcessInHead), ("body", self.startTagBody), ("frameset", self.startTagFrameset), (("address", "article", "aside", "blockquote", "center", "details", "details", "dir", "div", "dl", "fieldset", "figcaption", "figure", - "footer", "header", "hgroup", "menu", "nav", "ol", "p", + "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p", "section", "summary", "ul"), - self.startTagCloseP), + self.startTagCloseP), (headingElements, self.startTagHeading), (("pre", "listing"), self.startTagPreListing), ("form", self.startTagForm), (("li", "dd", "dt"), self.startTagListItem), - ("plaintext",self.startTagPlaintext), + ("plaintext", self.startTagPlaintext), ("a", self.startTagA), - (("b", "big", "code", "em", "font", "i", "s", "small", "strike", - "strong", "tt", "u"),self.startTagFormatting), + (("b", "big", "code", "em", "font", "i", "s", "small", "strike", + "strong", "tt", "u"), self.startTagFormatting), ("nobr", self.startTagNobr), ("button", self.startTagButton), (("applet", "marquee", "object"), self.startTagAppletMarqueeObject), @@ -961,21 +912,21 @@ def getPhases(debug): self.startTagHandler.default = self.startTagOther self.endTagHandler = utils.MethodDispatcher([ - ("body",self.endTagBody), - ("html",self.endTagHtml), - (("address", "article", "aside", "blockquote", "center", - "details", "dir", "div", "dl", "fieldset", "figcaption", "figure", - "footer", "header", "hgroup", "listing", "menu", "nav", "ol", "pre", + ("body", self.endTagBody), + ("html", self.endTagHtml), + (("address", "article", "aside", "blockquote", "button", "center", + "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure", + "footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre", "section", "summary", "ul"), self.endTagBlock), ("form", self.endTagForm), - ("p",self.endTagP), + ("p", self.endTagP), (("dd", "dt", "li"), self.endTagListItem), (headingElements, self.endTagHeading), (("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u"), self.endTagFormatting), - (("applet", "marquee", "object"), self.endTagAppletMarqueeObject), + (("applet", "marquee", "object"), self.endTagAppletMarqueeObject), ("br", self.endTagBr), - ]) + ]) self.endTagHandler.default = self.endTagOther def isMatchingFormattingElement(self, node1, node2): @@ -995,14 +946,14 @@ def getPhases(debug): def addFormattingElement(self, token): self.tree.insertElement(token) element = self.tree.openElements[-1] - + matchingElements = [] for node in self.tree.activeFormattingElements[::-1]: if node is Marker: break elif self.isMatchingFormattingElement(node, element): matchingElements.append(node) - + assert len(matchingElements) <= 3 if len(matchingElements) == 3: self.tree.activeFormattingElements.remove(matchingElements[-1]) @@ -1017,7 +968,7 @@ def getPhases(debug): if node.name not in allowed_elements: self.parser.parseError("expected-closing-tag-but-got-eof") break - #Stop parsing + # Stop parsing def processSpaceCharactersDropNewline(self, token): # Sometimes (start of <pre>, <listing>, and <textarea> blocks) we @@ -1026,19 +977,19 @@ def getPhases(debug): self.processSpaceCharacters = self.processSpaceCharactersNonPre if (data.startswith("\n") and self.tree.openElements[-1].name in ("pre", "listing", "textarea") - and not self.tree.openElements[-1].hasContent()): + and not self.tree.openElements[-1].hasContent()): data = data[1:] if data: self.tree.reconstructActiveFormattingElements() self.tree.insertText(data) def processCharacters(self, token): - if token["data"] == u"\u0000": - #The tokenizer should always emit null on its own + if token["data"] == "\u0000": + # The tokenizer should always emit null on its own return self.tree.reconstructActiveFormattingElements() self.tree.insertText(token["data"]) - #This must be bad for performance + # This must be bad for performance if (self.parser.framesetOK and any([char not in spaceCharacters for char in token["data"]])): @@ -1054,11 +1005,11 @@ def getPhases(debug): def startTagBody(self, token): self.parser.parseError("unexpected-start-tag", {"name": "body"}) if (len(self.tree.openElements) == 1 - or self.tree.openElements[1].name != "body"): + or self.tree.openElements[1].name != "body"): assert self.parser.innerHTML else: self.parser.framesetOK = False - for attr, value in token["data"].iteritems(): + for attr, value in token["data"].items(): if attr not in self.tree.openElements[1].attributes: self.tree.openElements[1].attributes[attr] = value @@ -1090,7 +1041,7 @@ def getPhases(debug): def startTagForm(self, token): if self.tree.formPointer: - self.parser.parseError(u"unexpected-start-tag", {"name": "form"}) + self.parser.parseError("unexpected-start-tag", {"name": "form"}) else: if self.tree.elementInScope("p", variant="button"): self.endTagP(impliedTagToken("p")) @@ -1100,9 +1051,9 @@ def getPhases(debug): def startTagListItem(self, token): self.parser.framesetOK = False - stopNamesMap = {"li":["li"], - "dt":["dt", "dd"], - "dd":["dt", "dd"]} + stopNamesMap = {"li": ["li"], + "dt": ["dt", "dd"], + "dd": ["dt", "dd"]} stopNames = stopNamesMap[token["name"]] for node in reversed(self.tree.openElements): if node.name in stopNames: @@ -1110,7 +1061,7 @@ def getPhases(debug): impliedTagToken(node.name, "EndTag")) break if (node.nameTuple in specialElements and - node.name not in ("address", "div", "p")): + node.name not in ("address", "div", "p")): break if self.tree.elementInScope("p", variant="button"): @@ -1137,7 +1088,7 @@ def getPhases(debug): afeAElement = self.tree.elementInActiveFormattingElements("a") if afeAElement: self.parser.parseError("unexpected-start-tag-implies-end-tag", - {"startName": "a", "endName": "a"}) + {"startName": "a", "endName": "a"}) self.endTagFormatting(impliedTagToken("a")) if afeAElement in self.tree.openElements: self.tree.openElements.remove(afeAElement) @@ -1154,7 +1105,7 @@ def getPhases(debug): self.tree.reconstructActiveFormattingElements() if self.tree.elementInScope("nobr"): self.parser.parseError("unexpected-start-tag-implies-end-tag", - {"startName": "nobr", "endName": "nobr"}) + {"startName": "nobr", "endName": "nobr"}) self.processEndTag(impliedTagToken("nobr")) # XXX Need tests that trigger the following self.tree.reconstructActiveFormattingElements() @@ -1163,7 +1114,7 @@ def getPhases(debug): def startTagButton(self, token): if self.tree.elementInScope("button"): self.parser.parseError("unexpected-start-tag-implies-end-tag", - {"startName": "button", "endName": "button"}) + {"startName": "button", "endName": "button"}) self.processEndTag(impliedTagToken("button")) return token else: @@ -1203,8 +1154,8 @@ def getPhases(debug): framesetOK = self.parser.framesetOK self.startTagVoidFormatting(token) if ("type" in token["data"] and - token["data"]["type"].translate(asciiUpper2Lower) == "hidden"): - #input type=hidden doesn't change framesetOK + token["data"]["type"].translate(asciiUpper2Lower) == "hidden"): + # input type=hidden doesn't change framesetOK self.parser.framesetOK = framesetOK def startTagParamSource(self, token): @@ -1223,7 +1174,7 @@ def getPhases(debug): def startTagImage(self, token): # No really... self.parser.parseError("unexpected-start-tag-treated-as", - {"originalName": "image", "newName": "img"}) + {"originalName": "image", "newName": "img"}) self.processStartTag(impliedTagToken("img", "StartTag", attributes=token["data"], selfClosing=token["selfClosing"])) @@ -1243,18 +1194,18 @@ def getPhases(debug): if "prompt" in token["data"]: prompt = token["data"]["prompt"] else: - prompt = u"This is a searchable index. Enter search keywords: " + prompt = "This is a searchable index. Enter search keywords: " self.processCharacters( - {"type":tokenTypes["Characters"], "data":prompt}) + {"type": tokenTypes["Characters"], "data": prompt}) attributes = token["data"].copy() if "action" in attributes: del attributes["action"] if "prompt" in attributes: del attributes["prompt"] attributes["name"] = "isindex" - self.processStartTag(impliedTagToken("input", "StartTag", - attributes = attributes, - selfClosing = + self.processStartTag(impliedTagToken("input", "StartTag", + attributes=attributes, + selfClosing= token["selfClosing"])) self.processEndTag(impliedTagToken("label")) self.processStartTag(impliedTagToken("hr", "StartTag")) @@ -1287,7 +1238,7 @@ def getPhases(debug): if self.parser.phase in (self.parser.phases["inTable"], self.parser.phases["inCaption"], self.parser.phases["inColumnGroup"], - self.parser.phases["inTableBody"], + self.parser.phases["inTableBody"], self.parser.phases["inRow"], self.parser.phases["inCell"]): self.parser.phase = self.parser.phases["inSelectInTable"] @@ -1307,8 +1258,8 @@ def getPhases(debug): self.parser.adjustForeignAttributes(token) token["namespace"] = namespaces["mathml"] self.tree.insertElement(token) - #Need to get the parse error right for the case where the token - #has a namespace not equal to the xmlns attribute + # Need to get the parse error right for the case where the token + # has a namespace not equal to the xmlns attribute if token["selfClosing"]: self.tree.openElements.pop() token["selfClosingAcknowledged"] = True @@ -1319,8 +1270,8 @@ def getPhases(debug): self.parser.adjustForeignAttributes(token) token["namespace"] = namespaces["svg"] self.tree.insertElement(token) - #Need to get the parse error right for the case where the token - #has a namespace not equal to the xmlns attribute + # Need to get the parse error right for the case where the token + # has a namespace not equal to the xmlns attribute if token["selfClosing"]: self.tree.openElements.pop() token["selfClosingAcknowledged"] = True @@ -1362,7 +1313,7 @@ def getPhases(debug): "tbody", "td", "tfoot", "th", "thead", "tr", "body", "html")): - #Not sure this is the correct name for the parse error + # Not sure this is the correct name for the parse error self.parser.parseError( "expected-one-end-tag-but-got-another", {"expectedName": "body", "gotName": node.name}) @@ -1370,20 +1321,20 @@ def getPhases(debug): self.parser.phase = self.parser.phases["afterBody"] def endTagHtml(self, token): - #We repeat the test for the body end tag token being ignored here + # We repeat the test for the body end tag token being ignored here if self.tree.elementInScope("body"): self.endTagBody(impliedTagToken("body")) return token def endTagBlock(self, token): - #Put us back in the right whitespace handling mode + # Put us back in the right whitespace handling mode if token["name"] == "pre": self.processSpaceCharacters = self.processSpaceCharactersNonPre inScope = self.tree.elementInScope(token["name"]) if inScope: self.tree.generateImpliedEndTags() if self.tree.openElements[-1].name != token["name"]: - self.parser.parseError("end-tag-too-early", {"name": token["name"]}) + self.parser.parseError("end-tag-too-early", {"name": token["name"]}) if inScope: node = self.tree.openElements.pop() while node.name != token["name"]: @@ -1394,7 +1345,7 @@ def getPhases(debug): self.tree.formPointer = None if node is None or not self.tree.elementInScope(node): self.parser.parseError("unexpected-end-tag", - {"name":"form"}) + {"name": "form"}) else: self.tree.generateImpliedEndTags() if self.tree.openElements[-1] != node: @@ -1410,7 +1361,7 @@ def getPhases(debug): if not self.tree.elementInScope(token["name"], variant=variant): self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) else: - self.tree.generateImpliedEndTags(exclude = token["name"]) + self.tree.generateImpliedEndTags(exclude=token["name"]) if self.tree.openElements[-1].name != token["name"]: self.parser.parseError( "end-tag-too-early", @@ -1436,65 +1387,105 @@ def getPhases(debug): def endTagFormatting(self, token): """The much-feared adoption agency algorithm""" - # http://www.whatwg.org/specs/web-apps/current-work/#adoptionAgency + # http://svn.whatwg.org/webapps/complete.html#adoptionAgency revision 7867 # XXX Better parseError messages appreciated. - name = token["name"] + # Step 1 outerLoopCounter = 0 + + # Step 2 while outerLoopCounter < 8: + + # Step 3 outerLoopCounter += 1 - # Step 1 paragraph 1 + # Step 4: + + # Let the formatting element be the last element in + # the list of active formatting elements that: + # - is between the end of the list and the last scope + # marker in the list, if any, or the start of the list + # otherwise, and + # - has the same tag name as the token. formattingElement = self.tree.elementInActiveFormattingElements( token["name"]) - if (not formattingElement or + if (not formattingElement or (formattingElement in self.tree.openElements and not self.tree.elementInScope(formattingElement.name))): - self.parser.parseError("adoption-agency-1.1", {"name": token["name"]}) + # If there is no such node, then abort these steps + # and instead act as described in the "any other + # end tag" entry below. + self.endTagOther(token) return - # Step 1 paragraph 2 + # Otherwise, if there is such a node, but that node is + # not in the stack of open elements, then this is a + # parse error; remove the element from the list, and + # abort these steps. elif formattingElement not in self.tree.openElements: self.parser.parseError("adoption-agency-1.2", {"name": token["name"]}) self.tree.activeFormattingElements.remove(formattingElement) return - # Step 1 paragraph 3 - if formattingElement != self.tree.openElements[-1]: - self.parser.parseError("adoption-agency-1.3", {"name": token["name"]}) + # Otherwise, if there is such a node, and that node is + # also in the stack of open elements, but the element + # is not in scope, then this is a parse error; ignore + # the token, and abort these steps. + elif not self.tree.elementInScope(formattingElement.name): + self.parser.parseError("adoption-agency-4.4", {"name": token["name"]}) + return + + # Otherwise, there is a formatting element and that + # element is in the stack and is in scope. If the + # element is not the current node, this is a parse + # error. In any case, proceed with the algorithm as + # written in the following steps. + else: + if formattingElement != self.tree.openElements[-1]: + self.parser.parseError("adoption-agency-1.3", {"name": token["name"]}) - # Step 2 - # Start of the adoption agency algorithm proper + # Step 5: + + # Let the furthest block be the topmost node in the + # stack of open elements that is lower in the stack + # than the formatting element, and is an element in + # the special category. There might not be one. afeIndex = self.tree.openElements.index(formattingElement) furthestBlock = None for element in self.tree.openElements[afeIndex:]: if element.nameTuple in specialElements: furthestBlock = element break - # Step 3 + + # Step 6: + + # If there is no furthest block, then the UA must + # first pop all the nodes from the bottom of the stack + # of open elements, from the current node up to and + # including the formatting element, then remove the + # formatting element from the list of active + # formatting elements, and finally abort these steps. if furthestBlock is None: element = self.tree.openElements.pop() while element != formattingElement: element = self.tree.openElements.pop() self.tree.activeFormattingElements.remove(element) return - commonAncestor = self.tree.openElements[afeIndex-1] - # Step 5 - #if furthestBlock.parent: - # furthestBlock.parent.removeChild(furthestBlock) + # Step 7 + commonAncestor = self.tree.openElements[afeIndex - 1] - # Step 5 + # Step 8: # The bookmark is supposed to help us identify where to reinsert - # nodes in step 12. We have to ensure that we reinsert nodes after + # nodes in step 15. We have to ensure that we reinsert nodes after # the node before the active formatting element. Note the bookmark - # can move in step 7.4 + # can move in step 9.7 bookmark = self.tree.activeFormattingElements.index(formattingElement) - # Step 6 + # Step 9 lastNode = node = furthestBlock innerLoopCounter = 0 - + index = self.tree.openElements.index(node) while innerLoopCounter < 3: innerLoopCounter += 1 @@ -1504,15 +1495,13 @@ def getPhases(debug): if node not in self.tree.activeFormattingElements: self.tree.openElements.remove(node) continue - # Step 6.3 + # Step 9.6 if node == formattingElement: break - # Step 6.4 + # Step 9.7 if lastNode == furthestBlock: - bookmark = (self.tree.activeFormattingElements.index(node) - + 1) - # Step 6.5 - #cite = node.parent + bookmark = self.tree.activeFormattingElements.index(node) + 1 + # Step 9.8 clone = node.cloneNode() # Replace node with clone self.tree.activeFormattingElements[ @@ -1520,20 +1509,18 @@ def getPhases(debug): self.tree.openElements[ self.tree.openElements.index(node)] = clone node = clone - - # Step 6.6 + # Step 9.9 # Remove lastNode from its parents, if any if lastNode.parent: lastNode.parent.removeChild(lastNode) node.appendChild(lastNode) - # Step 7.7 + # Step 9.10 lastNode = node - # End of inner loop - # Step 7 + # Step 10 # Foster parent lastNode if commonAncestor is a - # table, tbody, tfoot, thead, or tr we need to foster parent the - # lastNode + # table, tbody, tfoot, thead, or tr we need to foster + # parent the lastNode if lastNode.parent: lastNode.parent.removeChild(lastNode) @@ -1543,23 +1530,23 @@ def getPhases(debug): else: commonAncestor.appendChild(lastNode) - # Step 8 + # Step 11 clone = formattingElement.cloneNode() - # Step 9 + # Step 12 furthestBlock.reparentChildren(clone) - # Step 10 + # Step 13 furthestBlock.appendChild(clone) - # Step 11 + # Step 14 self.tree.activeFormattingElements.remove(formattingElement) self.tree.activeFormattingElements.insert(bookmark, clone) - # Step 12 + # Step 15 self.tree.openElements.remove(formattingElement) self.tree.openElements.insert( - self.tree.openElements.index(furthestBlock) + 1, clone) + self.tree.openElements.index(furthestBlock) + 1, clone) def endTagAppletMarqueeObject(self, token): if self.tree.elementInScope(token["name"]): @@ -1575,7 +1562,7 @@ def getPhases(debug): def endTagBr(self, token): self.parser.parseError("unexpected-end-tag-treated-as", - {"originalName": "br", "newName": "br element"}) + {"originalName": "br", "newName": "br element"}) self.tree.reconstructActiveFormattingElements() self.tree.insertElement(impliedTagToken("br", "StartTag")) self.tree.openElements.pop() @@ -1600,31 +1587,31 @@ def getPhases(debug): self.startTagHandler = utils.MethodDispatcher([]) self.startTagHandler.default = self.startTagOther self.endTagHandler = utils.MethodDispatcher([ - ("script", self.endTagScript)]) + ("script", self.endTagScript)]) self.endTagHandler.default = self.endTagOther def processCharacters(self, token): self.tree.insertText(token["data"]) def processEOF(self): - self.parser.parseError("expected-named-closing-tag-but-got-eof", - self.tree.openElements[-1].name) + self.parser.parseError("expected-named-closing-tag-but-got-eof", + {"name": self.tree.openElements[-1].name}) self.tree.openElements.pop() self.parser.phase = self.parser.originalPhase return True def startTagOther(self, token): - assert False, "Tried to process start tag %s in RCDATA/RAWTEXT mode"%token['name'] + assert False, "Tried to process start tag %s in RCDATA/RAWTEXT mode" % token['name'] def endTagScript(self, token): node = self.tree.openElements.pop() assert node.name == "script" self.parser.phase = self.parser.originalPhase - #The rest of this method is all stuff that only happens if - #document.write works + # The rest of this method is all stuff that only happens if + # document.write works def endTagOther(self, token): - node = self.tree.openElements.pop() + self.tree.openElements.pop() self.parser.phase = self.parser.originalPhase class InTablePhase(Phase): @@ -1656,7 +1643,7 @@ def getPhases(debug): def clearStackToTableContext(self): # "clear the stack back to a table context" while self.tree.openElements[-1].name not in ("table", "html"): - #self.parser.parseError("unexpected-implied-end-tag-in-table", + # self.parser.parseError("unexpected-implied-end-tag-in-table", # {"name": self.tree.openElements[-1].name}) self.tree.openElements.pop() # When the current node is <html> it's an innerHTML case @@ -1667,7 +1654,7 @@ def getPhases(debug): self.parser.parseError("eof-in-table") else: assert self.parser.innerHTML - #Stop parsing + # Stop parsing def processSpaceCharacters(self, token): originalPhase = self.parser.phase @@ -1682,7 +1669,7 @@ def getPhases(debug): self.parser.phase.processCharacters(token) def insertText(self, token): - #If we get here there must be at least one non-whitespace character + # If we get here there must be at least one non-whitespace character # Do the table magic! self.tree.insertFromTable = True self.parser.phases["inBody"].processCharacters(token) @@ -1714,7 +1701,7 @@ def getPhases(debug): def startTagTable(self, token): self.parser.parseError("unexpected-start-tag-implies-end-tag", - {"startName": "table", "endName": "table"}) + {"startName": "table", "endName": "table"}) self.parser.phase.processEndTag(impliedTagToken("table")) if not self.parser.innerHTML: return token @@ -1723,8 +1710,8 @@ def getPhases(debug): return self.parser.phases["inHead"].processStartTag(token) def startTagInput(self, token): - if ("type" in token["data"] and - token["data"]["type"].translate(asciiUpper2Lower) == "hidden"): + if ("type" in token["data"] and + token["data"]["type"].translate(asciiUpper2Lower) == "hidden"): self.parser.parseError("unexpected-hidden-input-in-table") self.tree.insertElement(token) # XXX associate with form @@ -1751,8 +1738,8 @@ def getPhases(debug): self.tree.generateImpliedEndTags() if self.tree.openElements[-1].name != "table": self.parser.parseError("end-tag-too-early-named", - {"gotName": "table", - "expectedName": self.tree.openElements[-1].name}) + {"gotName": "table", + "expectedName": self.tree.openElements[-1].name}) while self.tree.openElements[-1].name != "table": self.tree.openElements.pop() self.tree.openElements.pop() @@ -1781,7 +1768,7 @@ def getPhases(debug): def flushCharacters(self): data = "".join([item["data"] for item in self.characterTokens]) if any([item not in spaceCharacters for item in data]): - token = {"type":tokenTypes["Characters"], "data":data} + token = {"type": tokenTypes["Characters"], "data": data} self.parser.phases["inTable"].insertText(token) elif data: self.tree.insertText(data) @@ -1798,12 +1785,12 @@ def getPhases(debug): return True def processCharacters(self, token): - if token["data"] == u"\u0000": + if token["data"] == "\u0000": return self.characterTokens.append(token) def processSpaceCharacters(self, token): - #pretty sure we should never reach here + # pretty sure we should never reach here self.characterTokens.append(token) # assert False @@ -1817,7 +1804,6 @@ def getPhases(debug): self.parser.phase = self.originalPhase return token - class InCaptionPhase(Phase): # http://www.whatwg.org/specs/web-apps/current-work/#in-caption def __init__(self, parser, tree): @@ -1849,7 +1835,7 @@ def getPhases(debug): def startTagTableElement(self, token): self.parser.parseError() - #XXX Have to duplicate logic here to find out if the tag is ignored + # XXX Have to duplicate logic here to find out if the tag is ignored ignoreEndTag = self.ignoreEndTagCaption() self.parser.phase.processEndTag(impliedTagToken("caption")) if not ignoreEndTag: @@ -1864,8 +1850,8 @@ def getPhases(debug): self.tree.generateImpliedEndTags() if self.tree.openElements[-1].name != "caption": self.parser.parseError("expected-one-end-tag-but-got-another", - {"gotName": "caption", - "expectedName": self.tree.openElements[-1].name}) + {"gotName": "caption", + "expectedName": self.tree.openElements[-1].name}) while self.tree.openElements[-1].name != "caption": self.tree.openElements.pop() self.tree.openElements.pop() @@ -1889,7 +1875,6 @@ def getPhases(debug): def endTagOther(self, token): return self.parser.phases["inBody"].processEndTag(token) - class InColumnGroupPhase(Phase): # http://www.whatwg.org/specs/web-apps/current-work/#in-column @@ -1955,7 +1940,6 @@ def getPhases(debug): if not ignoreEndTag: return token - class InTableBodyPhase(Phase): # http://www.whatwg.org/specs/web-apps/current-work/#in-table0 def __init__(self, parser, tree): @@ -1980,8 +1964,8 @@ def getPhases(debug): # helper methods def clearStackToTableBodyContext(self): while self.tree.openElements[-1].name not in ("tbody", "tfoot", - "thead", "html"): - #self.parser.parseError("unexpected-implied-end-tag-in-table", + "thead", "html"): + # self.parser.parseError("unexpected-implied-end-tag-in-table", # {"name": self.tree.openElements[-1].name}) self.tree.openElements.pop() if self.tree.openElements[-1].name == "html": @@ -2003,7 +1987,7 @@ def getPhases(debug): self.parser.phase = self.parser.phases["inRow"] def startTagTableCell(self, token): - self.parser.parseError("unexpected-cell-in-table-body", + self.parser.parseError("unexpected-cell-in-table-body", {"name": token["name"]}) self.startTagTr(impliedTagToken("tr", "StartTag")) return token @@ -2012,7 +1996,7 @@ def getPhases(debug): # XXX AT Any ideas on how to share this with endTagTable? if (self.tree.elementInScope("tbody", variant="table") or self.tree.elementInScope("thead", variant="table") or - self.tree.elementInScope("tfoot", variant="table")): + self.tree.elementInScope("tfoot", variant="table")): self.clearStackToTableBodyContext() self.endTagTableRowGroup( impliedTagToken(self.tree.openElements[-1].name)) @@ -2032,12 +2016,12 @@ def getPhases(debug): self.parser.phase = self.parser.phases["inTable"] else: self.parser.parseError("unexpected-end-tag-in-table-body", - {"name": token["name"]}) + {"name": token["name"]}) def endTagTable(self, token): if (self.tree.elementInScope("tbody", variant="table") or self.tree.elementInScope("thead", variant="table") or - self.tree.elementInScope("tfoot", variant="table")): + self.tree.elementInScope("tfoot", variant="table")): self.clearStackToTableBodyContext() self.endTagTableRowGroup( impliedTagToken(self.tree.openElements[-1].name)) @@ -2049,12 +2033,11 @@ def getPhases(debug): def endTagIgnore(self, token): self.parser.parseError("unexpected-end-tag-in-table-body", - {"name": token["name"]}) + {"name": token["name"]}) def endTagOther(self, token): return self.parser.phases["inTable"].processEndTag(token) - class InRowPhase(Phase): # http://www.whatwg.org/specs/web-apps/current-work/#in-row def __init__(self, parser, tree): @@ -2072,7 +2055,7 @@ def getPhases(debug): ("table", self.endTagTable), (("tbody", "tfoot", "thead"), self.endTagTableRowGroup), (("body", "caption", "col", "colgroup", "html", "td", "th"), - self.endTagIgnore) + self.endTagIgnore) ]) self.endTagHandler.default = self.endTagOther @@ -2080,7 +2063,7 @@ def getPhases(debug): def clearStackToTableRowContext(self): while self.tree.openElements[-1].name not in ("tr", "html"): self.parser.parseError("unexpected-implied-end-tag-in-table-row", - {"name": self.tree.openElements[-1].name}) + {"name": self.tree.openElements[-1].name}) self.tree.openElements.pop() def ignoreEndTagTr(self): @@ -2091,7 +2074,7 @@ def getPhases(debug): self.parser.phases["inTable"].processEOF() def processSpaceCharacters(self, token): - return self.parser.phases["inTable"].processSpaceCharacters(token) + return self.parser.phases["inTable"].processSpaceCharacters(token) def processCharacters(self, token): return self.parser.phases["inTable"].processCharacters(token) @@ -2139,7 +2122,7 @@ def getPhases(debug): def endTagIgnore(self, token): self.parser.parseError("unexpected-end-tag-in-table-row", - {"name": token["name"]}) + {"name": token["name"]}) def endTagOther(self, token): return self.parser.phases["inTable"].processEndTag(token) @@ -2178,7 +2161,7 @@ def getPhases(debug): def startTagTableOther(self, token): if (self.tree.elementInScope("td", variant="table") or - self.tree.elementInScope("th", variant="table")): + self.tree.elementInScope("th", variant="table")): self.closeCell() return token else: @@ -2194,7 +2177,7 @@ def getPhases(debug): self.tree.generateImpliedEndTags(token["name"]) if self.tree.openElements[-1].name != token["name"]: self.parser.parseError("unexpected-cell-end-tag", - {"name": token["name"]}) + {"name": token["name"]}) while True: node = self.tree.openElements.pop() if node.name == token["name"]: @@ -2249,7 +2232,7 @@ def getPhases(debug): assert self.parser.innerHTML def processCharacters(self, token): - if token["data"] == u"\u0000": + if token["data"] == "\u0000": return self.tree.insertText(token["data"]) @@ -2283,19 +2266,19 @@ def getPhases(debug): def startTagOther(self, token): self.parser.parseError("unexpected-start-tag-in-select", - {"name": token["name"]}) + {"name": token["name"]}) def endTagOption(self, token): if self.tree.openElements[-1].name == "option": self.tree.openElements.pop() else: self.parser.parseError("unexpected-end-tag-in-select", - {"name": "option"}) + {"name": "option"}) def endTagOptgroup(self, token): # </optgroup> implicitly closes <option> if (self.tree.openElements[-1].name == "option" and - self.tree.openElements[-2].name == "optgroup"): + self.tree.openElements[-2].name == "optgroup"): self.tree.openElements.pop() # It also closes </optgroup> if self.tree.openElements[-1].name == "optgroup": @@ -2303,7 +2286,7 @@ def getPhases(debug): # But nothing else else: self.parser.parseError("unexpected-end-tag-in-select", - {"name": "optgroup"}) + {"name": "optgroup"}) def endTagSelect(self, token): if self.tree.elementInScope("select", variant="select"): @@ -2318,8 +2301,7 @@ def getPhases(debug): def endTagOther(self, token): self.parser.parseError("unexpected-end-tag-in-select", - {"name": token["name"]}) - + {"name": token["name"]}) class InSelectInTablePhase(Phase): def __init__(self, parser, tree): @@ -2360,64 +2342,64 @@ def getPhases(debug): def endTagOther(self, token): return self.parser.phases["inSelect"].processEndTag(token) - class InForeignContentPhase(Phase): - breakoutElements = frozenset(["b", "big", "blockquote", "body", "br", + breakoutElements = frozenset(["b", "big", "blockquote", "body", "br", "center", "code", "dd", "div", "dl", "dt", - "em", "embed", "h1", "h2", "h3", + "em", "embed", "h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "i", "img", - "li", "listing", "menu", "meta", "nobr", - "ol", "p", "pre", "ruby", "s", "small", - "span", "strong", "strike", "sub", "sup", + "li", "listing", "menu", "meta", "nobr", + "ol", "p", "pre", "ruby", "s", "small", + "span", "strong", "strike", "sub", "sup", "table", "tt", "u", "ul", "var"]) + def __init__(self, parser, tree): Phase.__init__(self, parser, tree) def adjustSVGTagNames(self, token): - replacements = {u"altglyph":u"altGlyph", - u"altglyphdef":u"altGlyphDef", - u"altglyphitem":u"altGlyphItem", - u"animatecolor":u"animateColor", - u"animatemotion":u"animateMotion", - u"animatetransform":u"animateTransform", - u"clippath":u"clipPath", - u"feblend":u"feBlend", - u"fecolormatrix":u"feColorMatrix", - u"fecomponenttransfer":u"feComponentTransfer", - u"fecomposite":u"feComposite", - u"feconvolvematrix":u"feConvolveMatrix", - u"fediffuselighting":u"feDiffuseLighting", - u"fedisplacementmap":u"feDisplacementMap", - u"fedistantlight":u"feDistantLight", - u"feflood":u"feFlood", - u"fefunca":u"feFuncA", - u"fefuncb":u"feFuncB", - u"fefuncg":u"feFuncG", - u"fefuncr":u"feFuncR", - u"fegaussianblur":u"feGaussianBlur", - u"feimage":u"feImage", - u"femerge":u"feMerge", - u"femergenode":u"feMergeNode", - u"femorphology":u"feMorphology", - u"feoffset":u"feOffset", - u"fepointlight":u"fePointLight", - u"fespecularlighting":u"feSpecularLighting", - u"fespotlight":u"feSpotLight", - u"fetile":u"feTile", - u"feturbulence":u"feTurbulence", - u"foreignobject":u"foreignObject", - u"glyphref":u"glyphRef", - u"lineargradient":u"linearGradient", - u"radialgradient":u"radialGradient", - u"textpath":u"textPath"} + replacements = {"altglyph": "altGlyph", + "altglyphdef": "altGlyphDef", + "altglyphitem": "altGlyphItem", + "animatecolor": "animateColor", + "animatemotion": "animateMotion", + "animatetransform": "animateTransform", + "clippath": "clipPath", + "feblend": "feBlend", + "fecolormatrix": "feColorMatrix", + "fecomponenttransfer": "feComponentTransfer", + "fecomposite": "feComposite", + "feconvolvematrix": "feConvolveMatrix", + "fediffuselighting": "feDiffuseLighting", + "fedisplacementmap": "feDisplacementMap", + "fedistantlight": "feDistantLight", + "feflood": "feFlood", + "fefunca": "feFuncA", + "fefuncb": "feFuncB", + "fefuncg": "feFuncG", + "fefuncr": "feFuncR", + "fegaussianblur": "feGaussianBlur", + "feimage": "feImage", + "femerge": "feMerge", + "femergenode": "feMergeNode", + "femorphology": "feMorphology", + "feoffset": "feOffset", + "fepointlight": "fePointLight", + "fespecularlighting": "feSpecularLighting", + "fespotlight": "feSpotLight", + "fetile": "feTile", + "feturbulence": "feTurbulence", + "foreignobject": "foreignObject", + "glyphref": "glyphRef", + "lineargradient": "linearGradient", + "radialgradient": "radialGradient", + "textpath": "textPath"} if token["name"] in replacements: token["name"] = replacements[token["name"]] def processCharacters(self, token): - if token["data"] == u"\u0000": - token["data"] = u"\uFFFD" - elif (self.parser.framesetOK and + if token["data"] == "\u0000": + token["data"] = "\uFFFD" + elif (self.parser.framesetOK and any(char not in spaceCharacters for char in token["data"])): self.parser.framesetOK = False Phase.processCharacters(self, token) @@ -2428,9 +2410,9 @@ def getPhases(debug): (token["name"] == "font" and set(token["data"].keys()) & set(["color", "face", "size"]))): self.parser.parseError("unexpected-html-element-in-foreign-content", - token["name"]) + {"name": token["name"]}) while (self.tree.openElements[-1].namespace != - self.tree.defaultNamespace and + self.tree.defaultNamespace and not self.parser.isHTMLIntegrationPoint(self.tree.openElements[-1]) and not self.parser.isMathMLTextIntegrationPoint(self.tree.openElements[-1])): self.tree.openElements.pop() @@ -2453,11 +2435,11 @@ def getPhases(debug): nodeIndex = len(self.tree.openElements) - 1 node = self.tree.openElements[-1] if node.name != token["name"]: - self.parser.parseError("unexpected-end-tag", token["name"]) + self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) while True: if node.name.translate(asciiUpper2Lower) == token["name"]: - #XXX this isn't in the spec but it seems necessary + # XXX this isn't in the spec but it seems necessary if self.parser.phase == self.parser.phases["inTableText"]: self.parser.phase.flushCharacters() self.parser.phase = self.parser.phase.originalPhase @@ -2475,21 +2457,20 @@ def getPhases(debug): break return new_token - class AfterBodyPhase(Phase): def __init__(self, parser, tree): Phase.__init__(self, parser, tree) self.startTagHandler = utils.MethodDispatcher([ - ("html", self.startTagHtml) - ]) + ("html", self.startTagHtml) + ]) self.startTagHandler.default = self.startTagOther self.endTagHandler = utils.MethodDispatcher([("html", self.endTagHtml)]) self.endTagHandler.default = self.endTagOther def processEOF(self): - #Stop parsing + # Stop parsing pass def processComment(self, token): @@ -2507,11 +2488,11 @@ def getPhases(debug): def startTagOther(self, token): self.parser.parseError("unexpected-start-tag-after-body", - {"name": token["name"]}) + {"name": token["name"]}) self.parser.phase = self.parser.phases["inBody"] return token - def endTagHtml(self,name): + def endTagHtml(self, name): if self.parser.innerHTML: self.parser.parseError("unexpected-end-tag-after-body-innerhtml") else: @@ -2519,7 +2500,7 @@ def getPhases(debug): def endTagOther(self, token): self.parser.parseError("unexpected-end-tag-after-body", - {"name": token["name"]}) + {"name": token["name"]}) self.parser.phase = self.parser.phases["inBody"] return token @@ -2562,7 +2543,7 @@ def getPhases(debug): def startTagOther(self, token): self.parser.parseError("unexpected-start-tag-in-frameset", - {"name": token["name"]}) + {"name": token["name"]}) def endTagFrameset(self, token): if self.tree.openElements[-1].name == "html": @@ -2571,15 +2552,14 @@ def getPhases(debug): else: self.tree.openElements.pop() if (not self.parser.innerHTML and - self.tree.openElements[-1].name != "frameset"): + self.tree.openElements[-1].name != "frameset"): # If we're not in innerHTML mode and the the current node is not a # "frameset" element (anymore) then switch. self.parser.phase = self.parser.phases["afterFrameset"] def endTagOther(self, token): self.parser.parseError("unexpected-end-tag-in-frameset", - {"name": token["name"]}) - + {"name": token["name"]}) class AfterFramesetPhase(Phase): # http://www.whatwg.org/specs/web-apps/current-work/#after3 @@ -2598,7 +2578,7 @@ def getPhases(debug): self.endTagHandler.default = self.endTagOther def processEOF(self): - #Stop parsing + # Stop parsing pass def processCharacters(self, token): @@ -2609,15 +2589,14 @@ def getPhases(debug): def startTagOther(self, token): self.parser.parseError("unexpected-start-tag-after-frameset", - {"name": token["name"]}) + {"name": token["name"]}) def endTagHtml(self, token): self.parser.phase = self.parser.phases["afterAfterFrameset"] def endTagOther(self, token): self.parser.parseError("unexpected-end-tag-after-frameset", - {"name": token["name"]}) - + {"name": token["name"]}) class AfterAfterBodyPhase(Phase): def __init__(self, parser, tree): @@ -2647,13 +2626,13 @@ def getPhases(debug): def startTagOther(self, token): self.parser.parseError("expected-eof-but-got-start-tag", - {"name": token["name"]}) + {"name": token["name"]}) self.parser.phase = self.parser.phases["inBody"] return token def processEndTag(self, token): self.parser.parseError("expected-eof-but-got-end-tag", - {"name": token["name"]}) + {"name": token["name"]}) self.parser.phase = self.parser.phases["inBody"] return token @@ -2687,12 +2666,11 @@ def getPhases(debug): def startTagOther(self, token): self.parser.parseError("expected-eof-but-got-start-tag", - {"name": token["name"]}) + {"name": token["name"]}) def processEndTag(self, token): self.parser.parseError("expected-eof-but-got-end-tag", - {"name": token["name"]}) - + {"name": token["name"]}) return { "initial": InitialPhase, @@ -2719,14 +2697,16 @@ def getPhases(debug): "afterAfterBody": AfterAfterBodyPhase, "afterAfterFrameset": AfterAfterFramesetPhase, # XXX after after frameset - } + } + -def impliedTagToken(name, type="EndTag", attributes = None, - selfClosing = False): +def impliedTagToken(name, type="EndTag", attributes=None, + selfClosing=False): if attributes is None: attributes = {} - return {"type":tokenTypes[type], "name":unicode(name), "data":attributes, - "selfClosing":selfClosing} + return {"type": tokenTypes[type], "name": name, "data": attributes, + "selfClosing": selfClosing} + class ParseError(Exception): """Error in parsed document""" diff --git a/libs/html5lib/ihatexml.py b/libs/html5lib/ihatexml.py index dd78563..0fc7930 100644 --- a/libs/html5lib/ihatexml.py +++ b/libs/html5lib/ihatexml.py @@ -1,25 +1,105 @@ +from __future__ import absolute_import, division, unicode_literals + import re +import warnings + +from .constants import DataLossWarning -baseChar = """[#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] | [#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] | [#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 | [#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] | [#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] | [#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] | [#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] | [#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 | [#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] | [#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] | [#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D | [#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] | [#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] | [#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] | [#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] | [#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] | [#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] | [#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 | [#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] | [#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] | [#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] | [#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] | [#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] | [#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] | [#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] | [#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] | [#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] | [#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] | [#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A | #x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 | #x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] | #x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] | [#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] | [#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C | #x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 | [#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] | [#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] | [#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 | [#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] | [#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B | #x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE | [#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | [#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 | [#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] | [#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]""" +baseChar = """ +[#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | +[#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] | +[#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] | +[#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 | +[#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] | +[#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] | +[#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] | +[#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] | +[#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 | +[#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] | +[#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] | +[#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D | +[#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] | +[#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] | +[#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] | +[#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] | +[#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] | +[#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] | +[#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 | +[#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] | +[#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] | +[#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] | +[#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] | +[#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] | +[#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] | +[#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] | +[#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] | +[#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] | +[#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] | +[#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A | +#x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 | +#x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] | +#x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] | +[#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] | +[#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C | +#x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 | +[#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] | +[#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] | +[#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 | +[#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] | +[#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B | +#x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE | +[#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | +[#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 | +[#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] | +[#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]""" ideographic = """[#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]""" -combiningCharacter = """[#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] | [#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 | [#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] | [#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] | #x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] | [#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] | [#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 | #x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] | [#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC | [#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] | #x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] | [#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | [#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] | [#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] | [#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] | [#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] | #x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 | [#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] | #x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] | [#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] | [#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] | #x3099 | #x309A""" +combiningCharacter = """ +[#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] | +[#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 | +[#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] | +[#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] | +#x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] | +[#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] | +[#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 | +#x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] | +[#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC | +[#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] | +#x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] | +[#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | +[#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] | +[#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] | +[#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] | +[#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] | +#x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 | +[#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] | +#x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] | +[#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] | +[#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] | +#x3099 | #x309A""" -digit = """[#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] | [#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | [#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] | [#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]""" +digit = """ +[#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] | +[#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | +[#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] | +[#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]""" -extender = """#x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]""" +extender = """ +#x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 | +#[#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]""" letter = " | ".join([baseChar, ideographic]) -#Without the -name = " | ".join([letter, digit, ".", "-", "_", combiningCharacter, - extender]) +# Without the +name = " | ".join([letter, digit, ".", "-", "_", combiningCharacter, + extender]) nameFirst = " | ".join([letter, "_"]) reChar = re.compile(r"#x([\d|A-F]{4,4})") reCharRange = re.compile(r"\[#x([\d|A-F]{4,4})-#x([\d|A-F]{4,4})\]") + def charStringToList(chars): charRanges = [item.strip() for item in chars.split(" | ")] rv = [] @@ -30,16 +110,17 @@ def charStringToList(chars): if match is not None: rv.append([hexToInt(item) for item in match.groups()]) if len(rv[-1]) == 1: - rv[-1] = rv[-1]*2 + rv[-1] = rv[-1] * 2 foundMatch = True break if not foundMatch: assert len(item) == 1 - + rv.append([ord(item)] * 2) rv = normaliseCharList(rv) return rv + def normaliseCharList(charList): charList = sorted(charList) for item in charList: @@ -49,61 +130,69 @@ def normaliseCharList(charList): while i < len(charList): j = 1 rv.append(charList[i]) - while i + j < len(charList) and charList[i+j][0] <= rv[-1][1] + 1: - rv[-1][1] = charList[i+j][1] + while i + j < len(charList) and charList[i + j][0] <= rv[-1][1] + 1: + rv[-1][1] = charList[i + j][1] j += 1 i += j return rv -#We don't really support characters above the BMP :( +# We don't really support characters above the BMP :( max_unicode = int("FFFF", 16) - + + def missingRanges(charList): rv = [] if charList[0] != 0: rv.append([0, charList[0][0] - 1]) for i, item in enumerate(charList[:-1]): - rv.append([item[1]+1, charList[i+1][0] - 1]) + rv.append([item[1] + 1, charList[i + 1][0] - 1]) if charList[-1][1] != max_unicode: rv.append([charList[-1][1] + 1, max_unicode]) return rv + def listToRegexpStr(charList): rv = [] for item in charList: if item[0] == item[1]: - rv.append(escapeRegexp(unichr(item[0]))) + rv.append(escapeRegexp(chr(item[0]))) else: - rv.append(escapeRegexp(unichr(item[0])) + "-" + - escapeRegexp(unichr(item[1]))) - return "[%s]"%"".join(rv) + rv.append(escapeRegexp(chr(item[0])) + "-" + + escapeRegexp(chr(item[1]))) + return "[%s]" % "".join(rv) + def hexToInt(hex_str): return int(hex_str, 16) + def escapeRegexp(string): specialCharacters = (".", "^", "$", "*", "+", "?", "{", "}", - "[", "]", "|", "(", ")", "-") + "[", "]", "|", "(", ")", "-") for char in specialCharacters: string = string.replace(char, "\\" + char) - if char in string: - print string return string -#output from the above -nonXmlNameBMPRegexp = re.compile(u'[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') +# output from the above +nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') + +nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') + +# Simpler things +nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\-\'()+,./:=?;!*#@$_%]") -nonXmlNameFirstBMPRegexp = re.compile(u'[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') class InfosetFilter(object): replacementRegexp = re.compile(r"U[\dA-F]{5,5}") - def __init__(self, replaceChars = None, - dropXmlnsLocalName = False, - dropXmlnsAttrNs = False, - preventDoubleDashComments = False, - preventDashAtCommentEnd = False, - replaceFormFeedCharacters = True): + + def __init__(self, replaceChars=None, + dropXmlnsLocalName=False, + dropXmlnsAttrNs=False, + preventDoubleDashComments=False, + preventDashAtCommentEnd=False, + replaceFormFeedCharacters=True, + preventSingleQuotePubid=False): self.dropXmlnsLocalName = dropXmlnsLocalName self.dropXmlnsAttrNs = dropXmlnsAttrNs @@ -113,14 +202,17 @@ class InfosetFilter(object): self.replaceFormFeedCharacters = replaceFormFeedCharacters + self.preventSingleQuotePubid = preventSingleQuotePubid + self.replaceCache = {} def coerceAttribute(self, name, namespace=None): if self.dropXmlnsLocalName and name.startswith("xmlns:"): - #Need a datalosswarning here + warnings.warn("Attributes cannot begin with xmlns", DataLossWarning) return None - elif (self.dropXmlnsAttrNs and + elif (self.dropXmlnsAttrNs and namespace == "http://www.w3.org/2000/xmlns/"): + warnings.warn("Attributes cannot be in the xml namespace", DataLossWarning) return None else: return self.toXmlName(name) @@ -131,20 +223,35 @@ class InfosetFilter(object): def coerceComment(self, data): if self.preventDoubleDashComments: while "--" in data: + warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning) data = data.replace("--", "- -") return data - + def coerceCharacters(self, data): if self.replaceFormFeedCharacters: + for i in range(data.count("\x0C")): + warnings.warn("Text cannot contain U+000C", DataLossWarning) data = data.replace("\x0C", " ") - #Other non-xml characters + # Other non-xml characters return data + def coercePubid(self, data): + dataOutput = data + for char in nonPubidCharRegexp.findall(data): + warnings.warn("Coercing non-XML pubid", DataLossWarning) + replacement = self.getReplacementCharacter(char) + dataOutput = dataOutput.replace(char, replacement) + if self.preventSingleQuotePubid and dataOutput.find("'") >= 0: + warnings.warn("Pubid cannot contain single quote", DataLossWarning) + dataOutput = dataOutput.replace("'", self.getReplacementCharacter("'")) + return dataOutput + def toXmlName(self, name): nameFirst = name[0] nameRest = name[1:] m = nonXmlNameFirstBMPRegexp.match(nameFirst) if m: + warnings.warn("Coercing non-XML name", DataLossWarning) nameFirstOutput = self.getReplacementCharacter(nameFirst) else: nameFirstOutput = nameFirst @@ -152,10 +259,11 @@ class InfosetFilter(object): nameRestOutput = nameRest replaceChars = set(nonXmlNameBMPRegexp.findall(nameRest)) for char in replaceChars: + warnings.warn("Coercing non-XML name", DataLossWarning) replacement = self.getReplacementCharacter(char) nameRestOutput = nameRestOutput.replace(char, replacement) return nameFirstOutput + nameRestOutput - + def getReplacementCharacter(self, char): if char in self.replaceCache: replacement = self.replaceCache[char] @@ -169,9 +277,9 @@ class InfosetFilter(object): return name def escapeChar(self, char): - replacement = "U" + hex(ord(char))[2:].upper().rjust(5, "0") + replacement = "U%05X" % ord(char) self.replaceCache[char] = replacement return replacement def unescapeChar(self, charcode): - return unichr(int(charcode[1:], 16)) + return chr(int(charcode[1:], 16)) diff --git a/libs/html5lib/inputstream.py b/libs/html5lib/inputstream.py index edec132..004bdd4 100644 --- a/libs/html5lib/inputstream.py +++ b/libs/html5lib/inputstream.py @@ -1,19 +1,33 @@ +from __future__ import absolute_import, division, unicode_literals +from six import text_type + import codecs import re -import types -import sys -from constants import EOF, spaceCharacters, asciiLetters, asciiUppercase -from constants import encodings, ReparseException -import utils +from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase +from .constants import encodings, ReparseException +from . import utils + +from io import StringIO + +try: + from io import BytesIO +except ImportError: + BytesIO = StringIO -#Non-unicode versions of constants for use in the pre-parser -spaceCharactersBytes = frozenset([str(item) for item in spaceCharacters]) -asciiLettersBytes = frozenset([str(item) for item in asciiLetters]) -asciiUppercaseBytes = frozenset([str(item) for item in asciiUppercase]) -spacesAngleBrackets = spaceCharactersBytes | frozenset([">", "<"]) +try: + from io import BufferedIOBase +except ImportError: + class BufferedIOBase(object): + pass -invalid_unicode_re = re.compile(u"[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uD800-\uDFFF\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]") +# Non-unicode versions of constants for use in the pre-parser +spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters]) +asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters]) +asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase]) +spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"]) + +invalid_unicode_re = re.compile("[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uD800-\uDFFF\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]") non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, @@ -23,22 +37,23 @@ non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF, 0x10FFFE, 0x10FFFF]) -ascii_punctuation_re = re.compile(ur"[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u007E]") +ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u007E]") # Cache for charsUntil() charsUntilRegEx = {} - -class BufferedStream: + + +class BufferedStream(object): """Buffering for streams that do not have buffering of their own - The buffer is implemented as a list of chunks on the assumption that + The buffer is implemented as a list of chunks on the assumption that joining many strings will be slow since it is O(n**2) """ - + def __init__(self, stream): self.stream = stream self.buffer = [] - self.position = [-1,0] #chunk number, offset + self.position = [-1, 0] # chunk number, offset def tell(self): pos = 0 @@ -48,11 +63,11 @@ class BufferedStream: return pos def seek(self, pos): - assert pos < self._bufferedBytes() + assert pos <= self._bufferedBytes() offset = pos i = 0 while len(self.buffer[i]) < offset: - offset -= pos + offset -= len(self.buffer[i]) i += 1 self.position = [i, offset] @@ -64,7 +79,7 @@ class BufferedStream: return self._readStream(bytes) else: return self._readFromBuffer(bytes) - + def _bufferedBytes(self): return sum([len(item) for item in self.buffer]) @@ -83,7 +98,7 @@ class BufferedStream: while bufferIndex < len(self.buffer) and remainingBytes != 0: assert remainingBytes > 0 bufferedData = self.buffer[bufferIndex] - + if remainingBytes <= len(bufferedData) - bufferOffset: bytesToRead = remainingBytes self.position = [bufferIndex, bufferOffset + bytesToRead] @@ -91,20 +106,33 @@ class BufferedStream: bytesToRead = len(bufferedData) - bufferOffset self.position = [bufferIndex, len(bufferedData)] bufferIndex += 1 - data = rv.append(bufferedData[bufferOffset: - bufferOffset + bytesToRead]) + rv.append(bufferedData[bufferOffset:bufferOffset + bytesToRead]) remainingBytes -= bytesToRead bufferOffset = 0 if remainingBytes: rv.append(self._readStream(remainingBytes)) - - return "".join(rv) - + + return b"".join(rv) -class HTMLInputStream: +def HTMLInputStream(source, encoding=None, parseMeta=True, chardet=True): + if hasattr(source, "read"): + isUnicode = isinstance(source.read(0), text_type) + else: + isUnicode = isinstance(source, text_type) + + if isUnicode: + if encoding is not None: + raise TypeError("Cannot explicitly set an encoding with a unicode string") + + return HTMLUnicodeInputStream(source) + else: + return HTMLBinaryInputStream(source, encoding, parseMeta, chardet) + + +class HTMLUnicodeInputStream(object): """Provides a unicode stream of characters to the HTMLTokenizer. This class takes care of character encoding and removing or replacing @@ -114,7 +142,7 @@ class HTMLInputStream: _defaultChunkSize = 10240 - def __init__(self, source, encoding=None, parseMeta=True, chardet=True): + def __init__(self, source): """Initialises the HTMLInputStream. HTMLInputStream(source, [encoding]) -> Normalized stream from source @@ -126,49 +154,29 @@ class HTMLInputStream: the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element) - + parseMeta - Look for a <meta> element containing encoding information """ - #Craziness - if len(u"\U0010FFFF") == 1: + # Craziness + if len("\U0010FFFF") == 1: self.reportCharacterErrors = self.characterErrorsUCS4 - self.replaceCharactersRegexp = re.compile(u"[\uD800-\uDFFF]") + self.replaceCharactersRegexp = re.compile("[\uD800-\uDFFF]") else: self.reportCharacterErrors = self.characterErrorsUCS2 - self.replaceCharactersRegexp = re.compile(u"([\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF])") + self.replaceCharactersRegexp = re.compile("([\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF])") # List of where new lines occur self.newLines = [0] - self.charEncoding = (codecName(encoding), "certain") - - # Raw Stream - for unicode objects this will encode to utf-8 and set - # self.charEncoding as appropriate - self.rawStream = self.openStream(source) - - # Encoding Information - #Number of bytes to use when looking for a meta element with - #encoding information - self.numBytesMeta = 512 - #Number of bytes to use when using detecting encoding using chardet - self.numBytesChardet = 100 - #Encoding to use if no other information can be found - self.defaultEncoding = "windows-1252" - - #Detect encoding iff no explicit "transport level" encoding is supplied - if (self.charEncoding[0] is None): - self.charEncoding = self.detectEncoding(parseMeta, chardet) - + self.charEncoding = ("utf-8", "certain") + self.dataStream = self.openStream(source) self.reset() def reset(self): - self.dataStream = codecs.getreader(self.charEncoding[0])(self.rawStream, - 'replace') - - self.chunk = u"" + self.chunk = "" self.chunkSize = 0 self.chunkOffset = 0 self.errors = [] @@ -177,8 +185,8 @@ class HTMLInputStream: self.prevNumLines = 0 # number of columns in the last line of the previous chunk self.prevNumCols = 0 - - #Deal with CR LF and surrogates split over chunk boundaries + + # Deal with CR LF and surrogates split over chunk boundaries self._bufferedCharacter = None def openStream(self, source): @@ -191,126 +199,15 @@ class HTMLInputStream: if hasattr(source, 'read'): stream = source else: - # Otherwise treat source as a string and convert to a file object - if isinstance(source, unicode): - source = source.encode('utf-8') - self.charEncoding = ("utf-8", "certain") - try: - from io import BytesIO - except: - # 2to3 converts this line to: from io import StringIO - from cStringIO import StringIO as BytesIO - stream = BytesIO(source) - - if (not(hasattr(stream, "tell") and hasattr(stream, "seek")) or - stream is sys.stdin): - stream = BufferedStream(stream) + stream = StringIO(source) return stream - def detectEncoding(self, parseMeta=True, chardet=True): - #First look for a BOM - #This will also read past the BOM if present - encoding = self.detectBOM() - confidence = "certain" - #If there is no BOM need to look for meta elements with encoding - #information - if encoding is None and parseMeta: - encoding = self.detectEncodingMeta() - confidence = "tentative" - #Guess with chardet, if avaliable - if encoding is None and chardet: - confidence = "tentative" - try: - from chardet.universaldetector import UniversalDetector - buffers = [] - detector = UniversalDetector() - while not detector.done: - buffer = self.rawStream.read(self.numBytesChardet) - if not buffer: - break - buffers.append(buffer) - detector.feed(buffer) - detector.close() - encoding = detector.result['encoding'] - self.rawStream.seek(0) - except ImportError: - pass - # If all else fails use the default encoding - if encoding is None: - confidence="tentative" - encoding = self.defaultEncoding - - #Substitute for equivalent encodings: - encodingSub = {"iso-8859-1":"windows-1252"} - - if encoding.lower() in encodingSub: - encoding = encodingSub[encoding.lower()] - - return encoding, confidence - - def changeEncoding(self, newEncoding): - newEncoding = codecName(newEncoding) - if newEncoding in ("utf-16", "utf-16-be", "utf-16-le"): - newEncoding = "utf-8" - if newEncoding is None: - return - elif newEncoding == self.charEncoding[0]: - self.charEncoding = (self.charEncoding[0], "certain") - else: - self.rawStream.seek(0) - self.reset() - self.charEncoding = (newEncoding, "certain") - raise ReparseException, "Encoding changed from %s to %s"%(self.charEncoding[0], newEncoding) - - def detectBOM(self): - """Attempts to detect at BOM at the start of the stream. If - an encoding can be determined from the BOM return the name of the - encoding otherwise return None""" - bomDict = { - codecs.BOM_UTF8: 'utf-8', - codecs.BOM_UTF16_LE: 'utf-16-le', codecs.BOM_UTF16_BE: 'utf-16-be', - codecs.BOM_UTF32_LE: 'utf-32-le', codecs.BOM_UTF32_BE: 'utf-32-be' - } - - # Go to beginning of file and read in 4 bytes - string = self.rawStream.read(4) - - # Try detecting the BOM using bytes from the string - encoding = bomDict.get(string[:3]) # UTF-8 - seek = 3 - if not encoding: - # Need to detect UTF-32 before UTF-16 - encoding = bomDict.get(string) # UTF-32 - seek = 4 - if not encoding: - encoding = bomDict.get(string[:2]) # UTF-16 - seek = 2 - - # Set the read position past the BOM if one was found, otherwise - # set it to the start of the stream - self.rawStream.seek(encoding and seek or 0) - - return encoding - - def detectEncodingMeta(self): - """Report the encoding declared by the meta element - """ - buffer = self.rawStream.read(self.numBytesMeta) - parser = EncodingParser(buffer) - self.rawStream.seek(0) - encoding = parser.getEncoding() - - if encoding in ("utf-16", "utf-16-be", "utf-16-le"): - encoding = "utf-8" - - return encoding - def _position(self, offset): chunk = self.chunk - nLines = chunk.count(u'\n', 0, offset) + nLines = chunk.count('\n', 0, offset) positionLine = self.prevNumLines + nLines - lastLinePos = chunk.rfind(u'\n', 0, offset) + lastLinePos = chunk.rfind('\n', 0, offset) if lastLinePos == -1: positionColumn = self.prevNumCols + offset else: @@ -320,7 +217,7 @@ class HTMLInputStream: def position(self): """Returns (line, col) of the current position in the stream.""" line, col = self._position(self.chunkOffset) - return (line+1, col) + return (line + 1, col) def char(self): """ Read one character from the stream or queue if available. Return @@ -343,34 +240,34 @@ class HTMLInputStream: self.prevNumLines, self.prevNumCols = self._position(self.chunkSize) - self.chunk = u"" + self.chunk = "" self.chunkSize = 0 self.chunkOffset = 0 data = self.dataStream.read(chunkSize) - - #Deal with CR LF and surrogates broken across chunks + + # Deal with CR LF and surrogates broken across chunks if self._bufferedCharacter: data = self._bufferedCharacter + data self._bufferedCharacter = None elif not data: # We have no more data, bye-bye stream return False - + if len(data) > 1: lastv = ord(data[-1]) if lastv == 0x0D or 0xD800 <= lastv <= 0xDBFF: self._bufferedCharacter = data[-1] data = data[:-1] - + self.reportCharacterErrors(data) - + # Replace invalid characters # Note U+0000 is dealt with in the tokenizer - data = self.replaceCharactersRegexp.sub(u"\ufffd", data) - - data = data.replace(u"\r\n", u"\n") - data = data.replace(u"\r", u"\n") + data = self.replaceCharactersRegexp.sub("\ufffd", data) + + data = data.replace("\r\n", "\n") + data = data.replace("\r", "\n") self.chunk = data self.chunkSize = len(data) @@ -378,23 +275,22 @@ class HTMLInputStream: return True def characterErrorsUCS4(self, data): - for i in xrange(len(invalid_unicode_re.findall(data))): + for i in range(len(invalid_unicode_re.findall(data))): self.errors.append("invalid-codepoint") def characterErrorsUCS2(self, data): - #Someone picked the wrong compile option - #You lose + # Someone picked the wrong compile option + # You lose skip = False - import sys for match in invalid_unicode_re.finditer(data): if skip: continue codepoint = ord(match.group()) pos = match.start() - #Pretty sure there should be endianness issues here - if utils.isSurrogatePair(data[pos:pos+2]): - #We have a surrogate pair! - char_val = utils.surrogatePairToCodepoint(data[pos:pos+2]) + # Pretty sure there should be endianness issues here + if utils.isSurrogatePair(data[pos:pos + 2]): + # We have a surrogate pair! + char_val = utils.surrogatePairToCodepoint(data[pos:pos + 2]) if char_val in non_bmp_invalid_codepoints: self.errors.append("invalid-codepoint") skip = True @@ -405,7 +301,7 @@ class HTMLInputStream: skip = False self.errors.append("invalid-codepoint") - def charsUntil(self, characters, opposite = False): + def charsUntil(self, characters, opposite=False): """ Returns a string of characters from the stream up to but not including any character in 'characters' or EOF. 'characters' must be a container that supports the 'in' method and iteration over its @@ -417,12 +313,12 @@ class HTMLInputStream: chars = charsUntilRegEx[(characters, opposite)] except KeyError: if __debug__: - for c in characters: + for c in characters: assert(ord(c) < 128) - regex = u"".join([u"\\x%02x" % ord(c) for c in characters]) + regex = "".join(["\\x%02x" % ord(c) for c in characters]) if not opposite: - regex = u"^%s" % regex - chars = charsUntilRegEx[(characters, opposite)] = re.compile(u"[%s]+" % regex) + regex = "^%s" % regex + chars = charsUntilRegEx[(characters, opposite)] = re.compile("[%s]+" % regex) rv = [] @@ -449,7 +345,7 @@ class HTMLInputStream: # Reached EOF break - r = u"".join(rv) + r = "".join(rv) return r def unget(self, char): @@ -468,26 +364,210 @@ class HTMLInputStream: self.chunkOffset -= 1 assert self.chunk[self.chunkOffset] == char -class EncodingBytes(str): + +class HTMLBinaryInputStream(HTMLUnicodeInputStream): + """Provides a unicode stream of characters to the HTMLTokenizer. + + This class takes care of character encoding and removing or replacing + incorrect byte-sequences and also provides column and line tracking. + + """ + + def __init__(self, source, encoding=None, parseMeta=True, chardet=True): + """Initialises the HTMLInputStream. + + HTMLInputStream(source, [encoding]) -> Normalized stream from source + for use by html5lib. + + source can be either a file-object, local filename or a string. + + The optional encoding parameter must be a string that indicates + the encoding. If specified, that encoding will be used, + regardless of any BOM or later declaration (such as in a meta + element) + + parseMeta - Look for a <meta> element containing encoding information + + """ + # Raw Stream - for unicode objects this will encode to utf-8 and set + # self.charEncoding as appropriate + self.rawStream = self.openStream(source) + + HTMLUnicodeInputStream.__init__(self, self.rawStream) + + self.charEncoding = (codecName(encoding), "certain") + + # Encoding Information + # Number of bytes to use when looking for a meta element with + # encoding information + self.numBytesMeta = 512 + # Number of bytes to use when using detecting encoding using chardet + self.numBytesChardet = 100 + # Encoding to use if no other information can be found + self.defaultEncoding = "windows-1252" + + # Detect encoding iff no explicit "transport level" encoding is supplied + if (self.charEncoding[0] is None): + self.charEncoding = self.detectEncoding(parseMeta, chardet) + + # Call superclass + self.reset() + + def reset(self): + self.dataStream = codecs.getreader(self.charEncoding[0])(self.rawStream, + 'replace') + HTMLUnicodeInputStream.reset(self) + + def openStream(self, source): + """Produces a file object from source. + + source can be either a file object, local filename or a string. + + """ + # Already a file object + if hasattr(source, 'read'): + stream = source + else: + stream = BytesIO(source) + + try: + stream.seek(stream.tell()) + except: + stream = BufferedStream(stream) + + return stream + + def detectEncoding(self, parseMeta=True, chardet=True): + # First look for a BOM + # This will also read past the BOM if present + encoding = self.detectBOM() + confidence = "certain" + # If there is no BOM need to look for meta elements with encoding + # information + if encoding is None and parseMeta: + encoding = self.detectEncodingMeta() + confidence = "tentative" + # Guess with chardet, if avaliable + if encoding is None and chardet: + confidence = "tentative" + try: + try: + from charade.universaldetector import UniversalDetector + except ImportError: + from chardet.universaldetector import UniversalDetector + buffers = [] + detector = UniversalDetector() + while not detector.done: + buffer = self.rawStream.read(self.numBytesChardet) + assert isinstance(buffer, bytes) + if not buffer: + break + buffers.append(buffer) + detector.feed(buffer) + detector.close() + encoding = detector.result['encoding'] + self.rawStream.seek(0) + except ImportError: + pass + # If all else fails use the default encoding + if encoding is None: + confidence = "tentative" + encoding = self.defaultEncoding + + # Substitute for equivalent encodings: + encodingSub = {"iso-8859-1": "windows-1252"} + + if encoding.lower() in encodingSub: + encoding = encodingSub[encoding.lower()] + + return encoding, confidence + + def changeEncoding(self, newEncoding): + assert self.charEncoding[1] != "certain" + newEncoding = codecName(newEncoding) + if newEncoding in ("utf-16", "utf-16-be", "utf-16-le"): + newEncoding = "utf-8" + if newEncoding is None: + return + elif newEncoding == self.charEncoding[0]: + self.charEncoding = (self.charEncoding[0], "certain") + else: + self.rawStream.seek(0) + self.reset() + self.charEncoding = (newEncoding, "certain") + raise ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding)) + + def detectBOM(self): + """Attempts to detect at BOM at the start of the stream. If + an encoding can be determined from the BOM return the name of the + encoding otherwise return None""" + bomDict = { + codecs.BOM_UTF8: 'utf-8', + codecs.BOM_UTF16_LE: 'utf-16-le', codecs.BOM_UTF16_BE: 'utf-16-be', + codecs.BOM_UTF32_LE: 'utf-32-le', codecs.BOM_UTF32_BE: 'utf-32-be' + } + + # Go to beginning of file and read in 4 bytes + string = self.rawStream.read(4) + assert isinstance(string, bytes) + + # Try detecting the BOM using bytes from the string + encoding = bomDict.get(string[:3]) # UTF-8 + seek = 3 + if not encoding: + # Need to detect UTF-32 before UTF-16 + encoding = bomDict.get(string) # UTF-32 + seek = 4 + if not encoding: + encoding = bomDict.get(string[:2]) # UTF-16 + seek = 2 + + # Set the read position past the BOM if one was found, otherwise + # set it to the start of the stream + self.rawStream.seek(encoding and seek or 0) + + return encoding + + def detectEncodingMeta(self): + """Report the encoding declared by the meta element + """ + buffer = self.rawStream.read(self.numBytesMeta) + assert isinstance(buffer, bytes) + parser = EncodingParser(buffer) + self.rawStream.seek(0) + encoding = parser.getEncoding() + + if encoding in ("utf-16", "utf-16-be", "utf-16-le"): + encoding = "utf-8" + + return encoding + + +class EncodingBytes(bytes): """String-like object with an associated position and various extra methods If the position is ever greater than the string length then an exception is raised""" def __new__(self, value): - return str.__new__(self, value.lower()) + assert isinstance(value, bytes) + return bytes.__new__(self, value.lower()) def __init__(self, value): - self._position=-1 - + self._position = -1 + def __iter__(self): return self - - def next(self): + + def __next__(self): p = self._position = self._position + 1 if p >= len(self): raise StopIteration elif p < 0: raise TypeError - return self[p] + return self[p:p + 1] + + def next(self): + # Py2 compat + return self.__next__() def previous(self): p = self._position @@ -496,13 +576,13 @@ class EncodingBytes(str): elif p < 0: raise TypeError self._position = p = p - 1 - return self[p] - + return self[p:p + 1] + def setPosition(self, position): if self._position >= len(self): raise StopIteration self._position = position - + def getPosition(self): if self._position >= len(self): raise StopIteration @@ -510,19 +590,19 @@ class EncodingBytes(str): return self._position else: return None - + position = property(getPosition, setPosition) def getCurrentByte(self): - return self[self.position] - + return self[self.position:self.position + 1] + currentByte = property(getCurrentByte) def skip(self, chars=spaceCharactersBytes): """Skip past a list of characters""" p = self.position # use property for the error-checking while p < len(self): - c = self[p] + c = self[p:p + 1] if c not in chars: self._position = p return c @@ -533,7 +613,7 @@ class EncodingBytes(str): def skipUntil(self, chars): p = self.position while p < len(self): - c = self[p] + c = self[p:p + 1] if c in chars: self._position = p return c @@ -542,16 +622,16 @@ class EncodingBytes(str): return None def matchBytes(self, bytes): - """Look for a sequence of bytes at the start of a string. If the bytes - are found return True and advance the position to the byte after the + """Look for a sequence of bytes at the start of a string. If the bytes + are found return True and advance the position to the byte after the match. Otherwise return False and leave the position alone""" p = self.position - data = self[p:p+len(bytes)] + data = self[p:p + len(bytes)] rv = data.startswith(bytes) if rv: self.position += len(bytes) return rv - + def jumpTo(self, bytes): """Look for the next sequence of bytes matching a given sequence. If a match is found advance the position to the last byte of the match""" @@ -560,11 +640,12 @@ class EncodingBytes(str): # XXX: This is ugly, but I can't see a nicer way to fix this. if self._position == -1: self._position = 0 - self._position += (newPosition + len(bytes)-1) + self._position += (newPosition + len(bytes) - 1) return True else: raise StopIteration + class EncodingParser(object): """Mini parser for detecting character encoding from meta elements""" @@ -575,147 +656,158 @@ class EncodingParser(object): def getEncoding(self): methodDispatch = ( - ("<!--",self.handleComment), - ("<meta",self.handleMeta), - ("</",self.handlePossibleEndTag), - ("<!",self.handleOther), - ("<?",self.handleOther), - ("<",self.handlePossibleStartTag)) + (b"<!--", self.handleComment), + (b"<meta", self.handleMeta), + (b"</", self.handlePossibleEndTag), + (b"<!", self.handleOther), + (b"<?", self.handleOther), + (b"<", self.handlePossibleStartTag)) for byte in self.data: keepParsing = True for key, method in methodDispatch: if self.data.matchBytes(key): try: - keepParsing = method() + keepParsing = method() break except StopIteration: - keepParsing=False + keepParsing = False break if not keepParsing: break - + return self.encoding def handleComment(self): """Skip over comments""" - return self.data.jumpTo("-->") + return self.data.jumpTo(b"-->") def handleMeta(self): if self.data.currentByte not in spaceCharactersBytes: - #if we have <meta not followed by a space so just keep going + # if we have <meta not followed by a space so just keep going return True - #We have a valid meta element we want to search for attributes + # We have a valid meta element we want to search for attributes + hasPragma = False + pendingEncoding = None while True: - #Try to find the next attribute after the current position + # Try to find the next attribute after the current position attr = self.getAttribute() if attr is None: return True else: - if attr[0] == "charset": + if attr[0] == b"http-equiv": + hasPragma = attr[1] == b"content-type" + if hasPragma and pendingEncoding is not None: + self.encoding = pendingEncoding + return False + elif attr[0] == b"charset": tentativeEncoding = attr[1] codec = codecName(tentativeEncoding) if codec is not None: self.encoding = codec return False - elif attr[0] == "content": + elif attr[0] == b"content": contentParser = ContentAttrParser(EncodingBytes(attr[1])) tentativeEncoding = contentParser.parse() - codec = codecName(tentativeEncoding) - if codec is not None: - self.encoding = codec - return False + if tentativeEncoding is not None: + codec = codecName(tentativeEncoding) + if codec is not None: + if hasPragma: + self.encoding = codec + return False + else: + pendingEncoding = codec def handlePossibleStartTag(self): return self.handlePossibleTag(False) def handlePossibleEndTag(self): - self.data.next() + next(self.data) return self.handlePossibleTag(True) def handlePossibleTag(self, endTag): data = self.data if data.currentByte not in asciiLettersBytes: - #If the next byte is not an ascii letter either ignore this - #fragment (possible start tag case) or treat it according to - #handleOther + # If the next byte is not an ascii letter either ignore this + # fragment (possible start tag case) or treat it according to + # handleOther if endTag: data.previous() self.handleOther() return True - + c = data.skipUntil(spacesAngleBrackets) - if c == "<": - #return to the first step in the overall "two step" algorithm - #reprocessing the < byte + if c == b"<": + # return to the first step in the overall "two step" algorithm + # reprocessing the < byte data.previous() else: - #Read all attributes + # Read all attributes attr = self.getAttribute() while attr is not None: attr = self.getAttribute() return True def handleOther(self): - return self.data.jumpTo(">") + return self.data.jumpTo(b">") def getAttribute(self): - """Return a name,value pair for the next attribute in the stream, + """Return a name,value pair for the next attribute in the stream, if one is found, or None""" data = self.data # Step 1 (skip chars) - c = data.skip(spaceCharactersBytes | frozenset("/")) + c = data.skip(spaceCharactersBytes | frozenset([b"/"])) + assert c is None or len(c) == 1 # Step 2 - if c in (">", None): + if c in (b">", None): return None # Step 3 attrName = [] attrValue = [] - #Step 4 attribute name + # Step 4 attribute name while True: - if c == "=" and attrName: + if c == b"=" and attrName: break elif c in spaceCharactersBytes: - #Step 6! + # Step 6! c = data.skip() - c = data.next() break - elif c in ("/", ">"): - return "".join(attrName), "" + elif c in (b"/", b">"): + return b"".join(attrName), b"" elif c in asciiUppercaseBytes: attrName.append(c.lower()) - elif c == None: + elif c is None: return None else: attrName.append(c) - #Step 5 - c = data.next() - #Step 7 - if c != "=": + # Step 5 + c = next(data) + # Step 7 + if c != b"=": data.previous() - return "".join(attrName), "" - #Step 8 - data.next() - #Step 9 + return b"".join(attrName), b"" + # Step 8 + next(data) + # Step 9 c = data.skip() - #Step 10 - if c in ("'", '"'): - #10.1 + # Step 10 + if c in (b"'", b'"'): + # 10.1 quoteChar = c while True: - #10.2 - c = data.next() - #10.3 + # 10.2 + c = next(data) + # 10.3 if c == quoteChar: - data.next() - return "".join(attrName), "".join(attrValue) - #10.4 + next(data) + return b"".join(attrName), b"".join(attrValue) + # 10.4 elif c in asciiUppercaseBytes: attrValue.append(c.lower()) - #10.5 + # 10.5 else: attrValue.append(c) - elif c == ">": - return "".join(attrName), "" + elif c == b">": + return b"".join(attrName), b"" elif c in asciiUppercaseBytes: attrValue.append(c.lower()) elif c is None: @@ -724,9 +816,9 @@ class EncodingParser(object): attrValue.append(c) # Step 11 while True: - c = data.next() + c = next(data) if c in spacesAngleBrackets: - return "".join(attrName), "".join(attrValue) + return b"".join(attrName), b"".join(attrValue) elif c in asciiUppercaseBytes: attrValue.append(c.lower()) elif c is None: @@ -737,21 +829,23 @@ class EncodingParser(object): class ContentAttrParser(object): def __init__(self, data): + assert isinstance(data, bytes) self.data = data + def parse(self): try: - #Check if the attr name is charset - #otherwise return - self.data.jumpTo("charset") + # Check if the attr name is charset + # otherwise return + self.data.jumpTo(b"charset") self.data.position += 1 self.data.skip() - if not self.data.currentByte == "=": - #If there is no = sign keep looking for attrs + if not self.data.currentByte == b"=": + # If there is no = sign keep looking for attrs return None self.data.position += 1 self.data.skip() - #Look for an encoding between matching quote marks - if self.data.currentByte in ('"', "'"): + # Look for an encoding between matching quote marks + if self.data.currentByte in (b'"', b"'"): quoteMark = self.data.currentByte self.data.position += 1 oldPosition = self.data.position @@ -760,13 +854,13 @@ class ContentAttrParser(object): else: return None else: - #Unquoted value + # Unquoted value oldPosition = self.data.position try: self.data.skipUntil(spaceCharactersBytes) return self.data[oldPosition:self.data.position] except StopIteration: - #Return the whole remaining value + # Return the whole remaining value return self.data[oldPosition:] except StopIteration: return None @@ -775,7 +869,12 @@ class ContentAttrParser(object): def codecName(encoding): """Return the python codec name corresponding to an encoding or None if the string doesn't correspond to a valid encoding.""" - if (encoding is not None and type(encoding) in types.StringTypes): + if isinstance(encoding, bytes): + try: + encoding = encoding.decode("ascii") + except UnicodeDecodeError: + return None + if encoding: canonicalName = ascii_punctuation_re.sub("", encoding).lower() return encodings.get(canonicalName, None) else: diff --git a/libs/html5lib/sanitizer.py b/libs/html5lib/sanitizer.py index ae4c7d8..71dc521 100644 --- a/libs/html5lib/sanitizer.py +++ b/libs/html5lib/sanitizer.py @@ -1,142 +1,145 @@ +from __future__ import absolute_import, division, unicode_literals + import re from xml.sax.saxutils import escape, unescape -from tokenizer import HTMLTokenizer -from constants import tokenTypes +from .tokenizer import HTMLTokenizer +from .constants import tokenTypes + class HTMLSanitizerMixin(object): """ sanitization of XHTML+MathML+SVG and of inline style attributes.""" acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', - 'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button', - 'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup', - 'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn', - 'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset', - 'figcaption', 'figure', 'footer', 'font', 'form', 'header', 'h1', - 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins', - 'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter', - 'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option', - 'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select', - 'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong', - 'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot', - 'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video'] - + 'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button', + 'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup', + 'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn', + 'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset', + 'figcaption', 'figure', 'footer', 'font', 'form', 'header', 'h1', + 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins', + 'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter', + 'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option', + 'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select', + 'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong', + 'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot', + 'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video'] + mathml_elements = ['maction', 'math', 'merror', 'mfrac', 'mi', - 'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom', - 'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', 'msub', - 'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder', - 'munderover', 'none'] - + 'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom', + 'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', 'msub', + 'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder', + 'munderover', 'none'] + svg_elements = ['a', 'animate', 'animateColor', 'animateMotion', - 'animateTransform', 'clipPath', 'circle', 'defs', 'desc', 'ellipse', - 'font-face', 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern', - 'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph', - 'mpath', 'path', 'polygon', 'polyline', 'radialGradient', 'rect', - 'set', 'stop', 'svg', 'switch', 'text', 'title', 'tspan', 'use'] - + 'animateTransform', 'clipPath', 'circle', 'defs', 'desc', 'ellipse', + 'font-face', 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern', + 'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph', + 'mpath', 'path', 'polygon', 'polyline', 'radialGradient', 'rect', + 'set', 'stop', 'svg', 'switch', 'text', 'title', 'tspan', 'use'] + acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey', - 'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis', - 'background', 'balance', 'bgcolor', 'bgproperties', 'border', - 'bordercolor', 'bordercolordark', 'bordercolorlight', 'bottompadding', - 'cellpadding', 'cellspacing', 'ch', 'challenge', 'char', 'charoff', - 'choff', 'charset', 'checked', 'cite', 'class', 'clear', 'color', - 'cols', 'colspan', 'compact', 'contenteditable', 'controls', 'coords', - 'data', 'datafld', 'datapagesize', 'datasrc', 'datetime', 'default', - 'delay', 'dir', 'disabled', 'draggable', 'dynsrc', 'enctype', 'end', - 'face', 'for', 'form', 'frame', 'galleryimg', 'gutter', 'headers', - 'height', 'hidefocus', 'hidden', 'high', 'href', 'hreflang', 'hspace', - 'icon', 'id', 'inputmode', 'ismap', 'keytype', 'label', 'leftspacing', - 'lang', 'list', 'longdesc', 'loop', 'loopcount', 'loopend', - 'loopstart', 'low', 'lowsrc', 'max', 'maxlength', 'media', 'method', - 'min', 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'open', - 'optimum', 'pattern', 'ping', 'point-size', 'prompt', 'pqg', - 'radiogroup', 'readonly', 'rel', 'repeat-max', 'repeat-min', - 'replace', 'required', 'rev', 'rightspacing', 'rows', 'rowspan', - 'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', 'start', - 'step', 'style', 'summary', 'suppress', 'tabindex', 'target', - 'template', 'title', 'toppadding', 'type', 'unselectable', 'usemap', - 'urn', 'valign', 'value', 'variable', 'volume', 'vspace', 'vrml', - 'width', 'wrap', 'xml:lang'] + 'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis', + 'background', 'balance', 'bgcolor', 'bgproperties', 'border', + 'bordercolor', 'bordercolordark', 'bordercolorlight', 'bottompadding', + 'cellpadding', 'cellspacing', 'ch', 'challenge', 'char', 'charoff', + 'choff', 'charset', 'checked', 'cite', 'class', 'clear', 'color', + 'cols', 'colspan', 'compact', 'contenteditable', 'controls', 'coords', + 'data', 'datafld', 'datapagesize', 'datasrc', 'datetime', 'default', + 'delay', 'dir', 'disabled', 'draggable', 'dynsrc', 'enctype', 'end', + 'face', 'for', 'form', 'frame', 'galleryimg', 'gutter', 'headers', + 'height', 'hidefocus', 'hidden', 'high', 'href', 'hreflang', 'hspace', + 'icon', 'id', 'inputmode', 'ismap', 'keytype', 'label', 'leftspacing', + 'lang', 'list', 'longdesc', 'loop', 'loopcount', 'loopend', + 'loopstart', 'low', 'lowsrc', 'max', 'maxlength', 'media', 'method', + 'min', 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'open', + 'optimum', 'pattern', 'ping', 'point-size', 'poster', 'pqg', 'preload', + 'prompt', 'radiogroup', 'readonly', 'rel', 'repeat-max', 'repeat-min', + 'replace', 'required', 'rev', 'rightspacing', 'rows', 'rowspan', + 'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', 'start', + 'step', 'style', 'summary', 'suppress', 'tabindex', 'target', + 'template', 'title', 'toppadding', 'type', 'unselectable', 'usemap', + 'urn', 'valign', 'value', 'variable', 'volume', 'vspace', 'vrml', + 'width', 'wrap', 'xml:lang'] mathml_attributes = ['actiontype', 'align', 'columnalign', 'columnalign', - 'columnalign', 'columnlines', 'columnspacing', 'columnspan', 'depth', - 'display', 'displaystyle', 'equalcolumns', 'equalrows', 'fence', - 'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', 'lspace', - 'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', 'maxsize', - 'minsize', 'other', 'rowalign', 'rowalign', 'rowalign', 'rowlines', - 'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection', - 'separator', 'stretchy', 'width', 'width', 'xlink:href', 'xlink:show', - 'xlink:type', 'xmlns', 'xmlns:xlink'] - + 'columnalign', 'columnlines', 'columnspacing', 'columnspan', 'depth', + 'display', 'displaystyle', 'equalcolumns', 'equalrows', 'fence', + 'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', 'lspace', + 'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', 'maxsize', + 'minsize', 'other', 'rowalign', 'rowalign', 'rowalign', 'rowlines', + 'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection', + 'separator', 'stretchy', 'width', 'width', 'xlink:href', 'xlink:show', + 'xlink:type', 'xmlns', 'xmlns:xlink'] + svg_attributes = ['accent-height', 'accumulate', 'additive', 'alphabetic', - 'arabic-form', 'ascent', 'attributeName', 'attributeType', - 'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height', - 'class', 'clip-path', 'color', 'color-rendering', 'content', 'cx', - 'cy', 'd', 'dx', 'dy', 'descent', 'display', 'dur', 'end', 'fill', - 'fill-opacity', 'fill-rule', 'font-family', 'font-size', - 'font-stretch', 'font-style', 'font-variant', 'font-weight', 'from', - 'fx', 'fy', 'g1', 'g2', 'glyph-name', 'gradientUnits', 'hanging', - 'height', 'horiz-adv-x', 'horiz-origin-x', 'id', 'ideographic', 'k', - 'keyPoints', 'keySplines', 'keyTimes', 'lang', 'marker-end', - 'marker-mid', 'marker-start', 'markerHeight', 'markerUnits', - 'markerWidth', 'mathematical', 'max', 'min', 'name', 'offset', - 'opacity', 'orient', 'origin', 'overline-position', - 'overline-thickness', 'panose-1', 'path', 'pathLength', 'points', - 'preserveAspectRatio', 'r', 'refX', 'refY', 'repeatCount', - 'repeatDur', 'requiredExtensions', 'requiredFeatures', 'restart', - 'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv', 'stop-color', - 'stop-opacity', 'strikethrough-position', 'strikethrough-thickness', - 'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap', - 'stroke-linejoin', 'stroke-miterlimit', 'stroke-opacity', - 'stroke-width', 'systemLanguage', 'target', 'text-anchor', 'to', - 'transform', 'type', 'u1', 'u2', 'underline-position', - 'underline-thickness', 'unicode', 'unicode-range', 'units-per-em', - 'values', 'version', 'viewBox', 'visibility', 'width', 'widths', 'x', - 'x-height', 'x1', 'x2', 'xlink:actuate', 'xlink:arcrole', - 'xlink:href', 'xlink:role', 'xlink:show', 'xlink:title', 'xlink:type', - 'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y', - 'y1', 'y2', 'zoomAndPan'] - - attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc', - 'xlink:href', 'xml:base'] + 'arabic-form', 'ascent', 'attributeName', 'attributeType', + 'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height', + 'class', 'clip-path', 'color', 'color-rendering', 'content', 'cx', + 'cy', 'd', 'dx', 'dy', 'descent', 'display', 'dur', 'end', 'fill', + 'fill-opacity', 'fill-rule', 'font-family', 'font-size', + 'font-stretch', 'font-style', 'font-variant', 'font-weight', 'from', + 'fx', 'fy', 'g1', 'g2', 'glyph-name', 'gradientUnits', 'hanging', + 'height', 'horiz-adv-x', 'horiz-origin-x', 'id', 'ideographic', 'k', + 'keyPoints', 'keySplines', 'keyTimes', 'lang', 'marker-end', + 'marker-mid', 'marker-start', 'markerHeight', 'markerUnits', + 'markerWidth', 'mathematical', 'max', 'min', 'name', 'offset', + 'opacity', 'orient', 'origin', 'overline-position', + 'overline-thickness', 'panose-1', 'path', 'pathLength', 'points', + 'preserveAspectRatio', 'r', 'refX', 'refY', 'repeatCount', + 'repeatDur', 'requiredExtensions', 'requiredFeatures', 'restart', + 'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv', 'stop-color', + 'stop-opacity', 'strikethrough-position', 'strikethrough-thickness', + 'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap', + 'stroke-linejoin', 'stroke-miterlimit', 'stroke-opacity', + 'stroke-width', 'systemLanguage', 'target', 'text-anchor', 'to', + 'transform', 'type', 'u1', 'u2', 'underline-position', + 'underline-thickness', 'unicode', 'unicode-range', 'units-per-em', + 'values', 'version', 'viewBox', 'visibility', 'width', 'widths', 'x', + 'x-height', 'x1', 'x2', 'xlink:actuate', 'xlink:arcrole', + 'xlink:href', 'xlink:role', 'xlink:show', 'xlink:title', 'xlink:type', + 'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y', + 'y1', 'y2', 'zoomAndPan'] + + attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc', 'poster', + 'xlink:href', 'xml:base'] svg_attr_val_allows_ref = ['clip-path', 'color-profile', 'cursor', 'fill', - 'filter', 'marker', 'marker-start', 'marker-mid', 'marker-end', - 'mask', 'stroke'] + 'filter', 'marker', 'marker-start', 'marker-mid', 'marker-end', + 'mask', 'stroke'] svg_allow_local_href = ['altGlyph', 'animate', 'animateColor', - 'animateMotion', 'animateTransform', 'cursor', 'feImage', 'filter', - 'linearGradient', 'pattern', 'radialGradient', 'textpath', 'tref', - 'set', 'use'] - + 'animateMotion', 'animateTransform', 'cursor', 'feImage', 'filter', + 'linearGradient', 'pattern', 'radialGradient', 'textpath', 'tref', + 'set', 'use'] + acceptable_css_properties = ['azimuth', 'background-color', - 'border-bottom-color', 'border-collapse', 'border-color', - 'border-left-color', 'border-right-color', 'border-top-color', 'clear', - 'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font', - 'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight', - 'height', 'letter-spacing', 'line-height', 'overflow', 'pause', - 'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness', - 'speak', 'speak-header', 'speak-numeral', 'speak-punctuation', - 'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent', - 'unicode-bidi', 'vertical-align', 'voice-family', 'volume', - 'white-space', 'width'] - + 'border-bottom-color', 'border-collapse', 'border-color', + 'border-left-color', 'border-right-color', 'border-top-color', 'clear', + 'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font', + 'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight', + 'height', 'letter-spacing', 'line-height', 'overflow', 'pause', + 'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness', + 'speak', 'speak-header', 'speak-numeral', 'speak-punctuation', + 'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent', + 'unicode-bidi', 'vertical-align', 'voice-family', 'volume', + 'white-space', 'width'] + acceptable_css_keywords = ['auto', 'aqua', 'black', 'block', 'blue', - 'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed', - 'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left', - 'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive', - 'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top', - 'transparent', 'underline', 'white', 'yellow'] - - acceptable_svg_properties = [ 'fill', 'fill-opacity', 'fill-rule', - 'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin', - 'stroke-opacity'] - - acceptable_protocols = [ 'ed2k', 'ftp', 'http', 'https', 'irc', - 'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal', - 'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag', - 'ssh', 'sftp', 'rtsp', 'afs' ] - + 'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed', + 'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left', + 'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive', + 'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top', + 'transparent', 'underline', 'white', 'yellow'] + + acceptable_svg_properties = ['fill', 'fill-opacity', 'fill-rule', + 'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin', + 'stroke-opacity'] + + acceptable_protocols = ['ed2k', 'ftp', 'http', 'https', 'irc', + 'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal', + 'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag', + 'ssh', 'sftp', 'rtsp', 'afs'] + # subclasses may define their own versions of these constants allowed_elements = acceptable_elements + mathml_elements + svg_elements allowed_attributes = acceptable_attributes + mathml_attributes + svg_attributes @@ -160,94 +163,104 @@ class HTMLSanitizerMixin(object): # accommodate filters which use token_type differently token_type = token["type"] - if token_type in tokenTypes.keys(): - token_type = tokenTypes[token_type] + if token_type in list(tokenTypes.keys()): + token_type = tokenTypes[token_type] - if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"], - tokenTypes["EmptyTag"]): + if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"], + tokenTypes["EmptyTag"]): if token["name"] in self.allowed_elements: - if token.has_key("data"): - attrs = dict([(name,val) for name,val in - token["data"][::-1] - if name in self.allowed_attributes]) - for attr in self.attr_val_is_uri: - if not attrs.has_key(attr): - continue - val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '', - unescape(attrs[attr])).lower() - #remove replacement characters from unescaped characters - val_unescaped = val_unescaped.replace(u"\ufffd", "") - if (re.match("^[a-z0-9][-+.a-z0-9]*:",val_unescaped) and - (val_unescaped.split(':')[0] not in - self.allowed_protocols)): - del attrs[attr] - for attr in self.svg_attr_val_allows_ref: - if attr in attrs: - attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)', - ' ', - unescape(attrs[attr])) - if (token["name"] in self.svg_allow_local_href and - 'xlink:href' in attrs and re.search('^\s*[^#\s].*', - attrs['xlink:href'])): - del attrs['xlink:href'] - if attrs.has_key('style'): - attrs['style'] = self.sanitize_css(attrs['style']) - token["data"] = [[name,val] for name,val in attrs.items()] - return token + return self.allowed_token(token, token_type) else: - if token_type == tokenTypes["EndTag"]: - token["data"] = "</%s>" % token["name"] - elif token["data"]: - attrs = ''.join([' %s="%s"' % (k,escape(v)) for k,v in token["data"]]) - token["data"] = "<%s%s>" % (token["name"],attrs) - else: - token["data"] = "<%s>" % token["name"] - if token.get("selfClosing"): - token["data"]=token["data"][:-1] + "/>" - - if token["type"] in tokenTypes.keys(): - token["type"] = "Characters" - else: - token["type"] = tokenTypes["Characters"] - - del token["name"] - return token + return self.disallowed_token(token, token_type) elif token_type == tokenTypes["Comment"]: pass else: return token + def allowed_token(self, token, token_type): + if "data" in token: + attrs = dict([(name, val) for name, val in + token["data"][::-1] + if name in self.allowed_attributes]) + for attr in self.attr_val_is_uri: + if attr not in attrs: + continue + val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '', + unescape(attrs[attr])).lower() + # remove replacement characters from unescaped characters + val_unescaped = val_unescaped.replace("\ufffd", "") + if (re.match("^[a-z0-9][-+.a-z0-9]*:", val_unescaped) and + (val_unescaped.split(':')[0] not in + self.allowed_protocols)): + del attrs[attr] + for attr in self.svg_attr_val_allows_ref: + if attr in attrs: + attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)', + ' ', + unescape(attrs[attr])) + if (token["name"] in self.svg_allow_local_href and + 'xlink:href' in attrs and re.search('^\s*[^#\s].*', + attrs['xlink:href'])): + del attrs['xlink:href'] + if 'style' in attrs: + attrs['style'] = self.sanitize_css(attrs['style']) + token["data"] = [[name, val] for name, val in list(attrs.items())] + return token + + def disallowed_token(self, token, token_type): + if token_type == tokenTypes["EndTag"]: + token["data"] = "</%s>" % token["name"] + elif token["data"]: + attrs = ''.join([' %s="%s"' % (k, escape(v)) for k, v in token["data"]]) + token["data"] = "<%s%s>" % (token["name"], attrs) + else: + token["data"] = "<%s>" % token["name"] + if token.get("selfClosing"): + token["data"] = token["data"][:-1] + "/>" + + if token["type"] in list(tokenTypes.keys()): + token["type"] = "Characters" + else: + token["type"] = tokenTypes["Characters"] + + del token["name"] + return token + def sanitize_css(self, style): # disallow urls - style=re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ',style) + style = re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style) # gauntlet - if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): return '' - if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style): return '' + if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): + return '' + if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style): + return '' clean = [] - for prop,value in re.findall("([-\w]+)\s*:\s*([^:;]*)",style): - if not value: continue - if prop.lower() in self.allowed_css_properties: - clean.append(prop + ': ' + value + ';') - elif prop.split('-')[0].lower() in ['background','border','margin', - 'padding']: - for keyword in value.split(): - if not keyword in self.acceptable_css_keywords and \ - not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$",keyword): - break - else: - clean.append(prop + ': ' + value + ';') - elif prop.lower() in self.allowed_svg_properties: - clean.append(prop + ': ' + value + ';') + for prop, value in re.findall("([-\w]+)\s*:\s*([^:;]*)", style): + if not value: + continue + if prop.lower() in self.allowed_css_properties: + clean.append(prop + ': ' + value + ';') + elif prop.split('-')[0].lower() in ['background', 'border', 'margin', + 'padding']: + for keyword in value.split(): + if not keyword in self.acceptable_css_keywords and \ + not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): + break + else: + clean.append(prop + ': ' + value + ';') + elif prop.lower() in self.allowed_svg_properties: + clean.append(prop + ': ' + value + ';') return ' '.join(clean) + class HTMLSanitizer(HTMLTokenizer, HTMLSanitizerMixin): def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True, lowercaseElementName=False, lowercaseAttrName=False, parser=None): - #Change case matching defaults as we only output lowercase html anyway - #This solution doesn't seem ideal... + # Change case matching defaults as we only output lowercase html anyway + # This solution doesn't seem ideal... HTMLTokenizer.__init__(self, stream, encoding, parseMeta, useChardet, lowercaseElementName, lowercaseAttrName, parser=parser) diff --git a/libs/html5lib/serializer/__init__.py b/libs/html5lib/serializer/__init__.py index 1b74665..8380839 100644 --- a/libs/html5lib/serializer/__init__.py +++ b/libs/html5lib/serializer/__init__.py @@ -1,17 +1,16 @@ +from __future__ import absolute_import, division, unicode_literals -from html5lib import treewalkers +from .. import treewalkers -from htmlserializer import HTMLSerializer -from xhtmlserializer import XHTMLSerializer +from .htmlserializer import HTMLSerializer -def serialize(input, tree="simpletree", format="html", encoding=None, + +def serialize(input, tree="etree", format="html", encoding=None, **serializer_opts): # XXX: Should we cache this? - walker = treewalkers.getTreeWalker(tree) + walker = treewalkers.getTreeWalker(tree) if format == "html": s = HTMLSerializer(**serializer_opts) - elif format == "xhtml": - s = XHTMLSerializer(**serializer_opts) else: - raise ValueError, "type must be either html or xhtml" + raise ValueError("type must be html") return s.render(walker(input), encoding) diff --git a/libs/html5lib/serializer/htmlserializer.py b/libs/html5lib/serializer/htmlserializer.py index 8dd0a81..412a5a2 100644 --- a/libs/html5lib/serializer/htmlserializer.py +++ b/libs/html5lib/serializer/htmlserializer.py @@ -1,18 +1,20 @@ -try: - frozenset -except NameError: - # Import from the sets module for python 2.3 - from sets import ImmutableSet as frozenset +from __future__ import absolute_import, division, unicode_literals +from six import text_type import gettext _ = gettext.gettext -from html5lib.constants import voidElements, booleanAttributes, spaceCharacters -from html5lib.constants import rcdataElements, entities, xmlEntities -from html5lib import utils +try: + from functools import reduce +except ImportError: + pass + +from ..constants import voidElements, booleanAttributes, spaceCharacters +from ..constants import rcdataElements, entities, xmlEntities +from .. import utils from xml.sax.saxutils import escape -spaceCharacters = u"".join(spaceCharacters) +spaceCharacters = "".join(spaceCharacters) try: from codecs import register_error, xmlcharrefreplace_errors @@ -21,24 +23,18 @@ except ImportError: else: unicode_encode_errors = "htmlentityreplace" - from html5lib.constants import entities - encode_entity_map = {} - is_ucs4 = len(u"\U0010FFFF") == 1 - for k, v in entities.items(): - #skip multi-character entities + is_ucs4 = len("\U0010FFFF") == 1 + for k, v in list(entities.items()): + # skip multi-character entities if ((is_ucs4 and len(v) > 1) or - (not is_ucs4 and len(v) > 2)): + (not is_ucs4 and len(v) > 2)): continue if v != "&": if len(v) == 2: v = utils.surrogatePairToCodepoint(v) else: - try: - v = ord(v) - except: - print v - raise + v = ord(v) if not v in encode_entity_map or k.islower(): # prefer < over < and similarly for &, >, etc. encode_entity_map[v] = k @@ -53,8 +49,8 @@ else: skip = False continue index = i + exc.start - if utils.isSurrogatePair(exc.object[index:min([exc.end, index+2])]): - codepoint = utils.surrogatePairToCodepoint(exc.object[index:index+2]) + if utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]): + codepoint = utils.surrogatePairToCodepoint(exc.object[index:index + 2]) skip = True else: codepoint = ord(c) @@ -67,8 +63,8 @@ else: if not e.endswith(";"): res.append(";") else: - res.append("&#x%s;"%(hex(cp)[2:])) - return (u"".join(res), exc.end) + res.append("&#x%s;" % (hex(cp)[2:])) + return ("".join(res), exc.end) else: return xmlcharrefreplace_errors(exc) @@ -81,7 +77,7 @@ class HTMLSerializer(object): # attribute quoting options quote_attr_values = False - quote_char = u'"' + quote_char = '"' use_best_quote_char = True # tag syntax options @@ -96,15 +92,17 @@ class HTMLSerializer(object): resolve_entities = True # miscellaneous options + alphabetical_attributes = False inject_meta_charset = True strip_whitespace = False sanitize = False options = ("quote_attr_values", "quote_char", "use_best_quote_char", - "minimize_boolean_attributes", "use_trailing_solidus", - "space_before_trailing_solidus", "omit_optional_tags", - "strip_whitespace", "inject_meta_charset", "escape_lt_in_attrs", - "escape_rcdata", "resolve_entities", "sanitize") + "omit_optional_tags", "minimize_boolean_attributes", + "use_trailing_solidus", "space_before_trailing_solidus", + "escape_lt_in_attrs", "escape_rcdata", "resolve_entities", + "alphabetical_attributes", "inject_meta_charset", + "strip_whitespace", "sanitize") def __init__(self, **kwargs): """Initialize HTMLSerializer. @@ -147,10 +145,12 @@ class HTMLSerializer(object): See `html5lib user documentation`_ omit_optional_tags=True|False Omit start/end tags that are optional. + alphabetical_attributes=False|True + Reorder attributes to be in alphabetical order. .. _html5lib user documentation: http://code.google.com/p/html5lib/wiki/UserDocumentation """ - if kwargs.has_key('quote_char'): + if 'quote_char' in kwargs: self.use_best_quote_char = False for attr in self.options: setattr(self, attr, kwargs.get(attr, getattr(self, attr))) @@ -158,14 +158,14 @@ class HTMLSerializer(object): self.strict = False def encode(self, string): - assert(isinstance(string, unicode)) + assert(isinstance(string, text_type)) if self.encoding: return string.encode(self.encoding, unicode_encode_errors) else: return string def encodeStrict(self, string): - assert(isinstance(string, unicode)) + assert(isinstance(string, text_type)) if self.encoding: return string.encode(self.encoding, "strict") else: @@ -175,39 +175,46 @@ class HTMLSerializer(object): self.encoding = encoding in_cdata = False self.errors = [] + if encoding and self.inject_meta_charset: - from html5lib.filters.inject_meta_charset import Filter + from ..filters.inject_meta_charset import Filter treewalker = Filter(treewalker, encoding) - # XXX: WhitespaceFilter should be used before OptionalTagFilter + # WhitespaceFilter should be used before OptionalTagFilter # for maximum efficiently of this latter filter if self.strip_whitespace: - from html5lib.filters.whitespace import Filter + from ..filters.whitespace import Filter treewalker = Filter(treewalker) if self.sanitize: - from html5lib.filters.sanitizer import Filter + from ..filters.sanitizer import Filter treewalker = Filter(treewalker) if self.omit_optional_tags: - from html5lib.filters.optionaltags import Filter + from ..filters.optionaltags import Filter + treewalker = Filter(treewalker) + # Alphabetical attributes must be last, as other filters + # could add attributes and alter the order + if self.alphabetical_attributes: + from ..filters.alphabeticalattributes import Filter treewalker = Filter(treewalker) + for token in treewalker: type = token["type"] if type == "Doctype": - doctype = u"<!DOCTYPE %s" % token["name"] - + doctype = "<!DOCTYPE %s" % token["name"] + if token["publicId"]: - doctype += u' PUBLIC "%s"' % token["publicId"] + doctype += ' PUBLIC "%s"' % token["publicId"] elif token["systemId"]: - doctype += u" SYSTEM" - if token["systemId"]: - if token["systemId"].find(u'"') >= 0: - if token["systemId"].find(u"'") >= 0: + doctype += " SYSTEM" + if token["systemId"]: + if token["systemId"].find('"') >= 0: + if token["systemId"].find("'") >= 0: self.serializeError(_("System identifer contains both single and double quote characters")) - quote_char = u"'" + quote_char = "'" else: - quote_char = u'"' - doctype += u" %s%s%s" % (quote_char, token["systemId"], quote_char) - - doctype += u">" + quote_char = '"' + doctype += " %s%s%s" % (quote_char, token["systemId"], quote_char) + + doctype += ">" yield self.encodeStrict(doctype) elif type in ("Characters", "SpaceCharacters"): @@ -220,41 +227,41 @@ class HTMLSerializer(object): elif type in ("StartTag", "EmptyTag"): name = token["name"] - yield self.encodeStrict(u"<%s" % name) + yield self.encodeStrict("<%s" % name) if name in rcdataElements and not self.escape_rcdata: in_cdata = True elif in_cdata: self.serializeError(_("Unexpected child element of a CDATA element")) - attributes = [] - for (attr_namespace,attr_name),attr_value in sorted(token["data"].items()): - #TODO: Add namespace support here + for (attr_namespace, attr_name), attr_value in token["data"].items(): + # TODO: Add namespace support here k = attr_name v = attr_value - yield self.encodeStrict(u' ') + yield self.encodeStrict(' ') yield self.encodeStrict(k) if not self.minimize_boolean_attributes or \ - (k not in booleanAttributes.get(name, tuple()) \ - and k not in booleanAttributes.get("", tuple())): - yield self.encodeStrict(u"=") + (k not in booleanAttributes.get(name, tuple()) + and k not in booleanAttributes.get("", tuple())): + yield self.encodeStrict("=") if self.quote_attr_values or not v: quote_attr = True else: - quote_attr = reduce(lambda x,y: x or (y in v), - spaceCharacters + u">\"'=", False) - v = v.replace(u"&", u"&") - if self.escape_lt_in_attrs: v = v.replace(u"<", u"<") + quote_attr = reduce(lambda x, y: x or (y in v), + spaceCharacters + ">\"'=", False) + v = v.replace("&", "&") + if self.escape_lt_in_attrs: + v = v.replace("<", "<") if quote_attr: quote_char = self.quote_char if self.use_best_quote_char: - if u"'" in v and u'"' not in v: - quote_char = u'"' - elif u'"' in v and u"'" not in v: - quote_char = u"'" - if quote_char == u"'": - v = v.replace(u"'", u"'") + if "'" in v and '"' not in v: + quote_char = '"' + elif '"' in v and "'" not in v: + quote_char = "'" + if quote_char == "'": + v = v.replace("'", "'") else: - v = v.replace(u'"', u""") + v = v.replace('"', """) yield self.encodeStrict(quote_char) yield self.encode(v) yield self.encodeStrict(quote_char) @@ -262,10 +269,10 @@ class HTMLSerializer(object): yield self.encode(v) if name in voidElements and self.use_trailing_solidus: if self.space_before_trailing_solidus: - yield self.encodeStrict(u" /") + yield self.encodeStrict(" /") else: - yield self.encodeStrict(u"/") - yield self.encode(u">") + yield self.encodeStrict("/") + yield self.encode(">") elif type == "EndTag": name = token["name"] @@ -273,13 +280,13 @@ class HTMLSerializer(object): in_cdata = False elif in_cdata: self.serializeError(_("Unexpected child element of a CDATA element")) - yield self.encodeStrict(u"</%s>" % name) + yield self.encodeStrict("</%s>" % name) elif type == "Comment": data = token["data"] if data.find("--") >= 0: self.serializeError(_("Comment contains --")) - yield self.encodeStrict(u"<!--%s-->" % token["data"]) + yield self.encodeStrict("<!--%s-->" % token["data"]) elif type == "Entity": name = token["name"] @@ -289,7 +296,7 @@ class HTMLSerializer(object): if self.resolve_entities and key not in xmlEntities: data = entities[key] else: - data = u"&%s;" % name + data = "&%s;" % name yield self.encodeStrict(data) else: @@ -297,9 +304,9 @@ class HTMLSerializer(object): def render(self, treewalker, encoding=None): if encoding: - return "".join(list(self.serialize(treewalker, encoding))) + return b"".join(list(self.serialize(treewalker, encoding))) else: - return u"".join(list(self.serialize(treewalker))) + return "".join(list(self.serialize(treewalker))) def serializeError(self, data="XXX ERROR MESSAGE NEEDED"): # XXX The idea is to make data mandatory. @@ -307,6 +314,7 @@ class HTMLSerializer(object): if self.strict: raise SerializeError + def SerializeError(Exception): """Error in serialized tree""" pass diff --git a/libs/html5lib/serializer/xhtmlserializer.py b/libs/html5lib/serializer/xhtmlserializer.py deleted file mode 100644 index 7fdce47..0000000 --- a/libs/html5lib/serializer/xhtmlserializer.py +++ /dev/null @@ -1,9 +0,0 @@ -from htmlserializer import HTMLSerializer - -class XHTMLSerializer(HTMLSerializer): - quote_attr_values = True - minimize_boolean_attributes = False - use_trailing_solidus = True - escape_lt_in_attrs = True - omit_optional_tags = False - escape_rcdata = True diff --git a/libs/html5lib/tokenizer.py b/libs/html5lib/tokenizer.py index 7e9eca8..7977457 100644 --- a/libs/html5lib/tokenizer.py +++ b/libs/html5lib/tokenizer.py @@ -1,27 +1,25 @@ +from __future__ import absolute_import, division, unicode_literals + try: - frozenset + chr = unichr # flake8: noqa except NameError: - # Import from the sets module for python 2.3 - from sets import Set as set - from sets import ImmutableSet as frozenset -try: - from collections import deque -except ImportError: - from utils import deque - -from constants import spaceCharacters -from constants import entitiesWindows1252, entities -from constants import asciiLowercase, asciiLetters, asciiUpper2Lower -from constants import digits, hexDigits, EOF -from constants import tokenTypes, tagTokenTypes -from constants import replacementCharacters - -from inputstream import HTMLInputStream - -# Group entities by their first character, for faster lookups -entitiesByFirstChar = {} -for e in entities: - entitiesByFirstChar.setdefault(e[0], []).append(e) + pass + +from collections import deque + +from .constants import spaceCharacters +from .constants import entities +from .constants import asciiLetters, asciiUpper2Lower +from .constants import digits, hexDigits, EOF +from .constants import tokenTypes, tagTokenTypes +from .constants import replacementCharacters + +from .inputstream import HTMLInputStream + +from .trie import Trie + +entitiesTrie = Trie(entities) + class HTMLTokenizer(object): """ This class takes care of tokenizing HTML. @@ -42,10 +40,10 @@ class HTMLTokenizer(object): self.stream = HTMLInputStream(stream, encoding, parseMeta, useChardet) self.parser = parser - #Perform case conversions? + # Perform case conversions? self.lowercaseElementName = lowercaseElementName self.lowercaseAttrName = lowercaseAttrName - + # Setup the initial tokenizer state self.escapeFlag = False self.lastFourChars = [] @@ -100,78 +98,79 @@ class HTMLTokenizer(object): if charAsInt in replacementCharacters: char = replacementCharacters[charAsInt] self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "illegal-codepoint-for-numeric-entity", - "datavars": {"charAsInt": charAsInt}}) - elif ((0xD800 <= charAsInt <= 0xDFFF) or + "illegal-codepoint-for-numeric-entity", + "datavars": {"charAsInt": charAsInt}}) + elif ((0xD800 <= charAsInt <= 0xDFFF) or (charAsInt > 0x10FFFF)): - char = u"\uFFFD" + char = "\uFFFD" self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "illegal-codepoint-for-numeric-entity", - "datavars": {"charAsInt": charAsInt}}) + "illegal-codepoint-for-numeric-entity", + "datavars": {"charAsInt": charAsInt}}) else: - #Should speed up this check somehow (e.g. move the set to a constant) - if ((0x0001 <= charAsInt <= 0x0008) or - (0x000E <= charAsInt <= 0x001F) or - (0x007F <= charAsInt <= 0x009F) or - (0xFDD0 <= charAsInt <= 0xFDEF) or - charAsInt in frozenset([0x000B, 0xFFFE, 0xFFFF, 0x1FFFE, + # Should speed up this check somehow (e.g. move the set to a constant) + if ((0x0001 <= charAsInt <= 0x0008) or + (0x000E <= charAsInt <= 0x001F) or + (0x007F <= charAsInt <= 0x009F) or + (0xFDD0 <= charAsInt <= 0xFDEF) or + charAsInt in frozenset([0x000B, 0xFFFE, 0xFFFF, 0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, - 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, + 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, - 0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE, - 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, - 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, + 0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE, + 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, + 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF, 0x10FFFE, 0x10FFFF])): - self.tokenQueue.append({"type": tokenTypes["ParseError"], + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "illegal-codepoint-for-numeric-entity", + "illegal-codepoint-for-numeric-entity", "datavars": {"charAsInt": charAsInt}}) try: # Try/except needed as UCS-2 Python builds' unichar only works # within the BMP. - char = unichr(charAsInt) + char = chr(charAsInt) except ValueError: - char = eval("u'\\U%08x'" % charAsInt) + v = charAsInt - 0x10000 + char = chr(0xD800 | (v >> 10)) + chr(0xDC00 | (v & 0x3FF)) # Discard the ; if present. Otherwise, put it back on the queue and # invoke parseError on parser. - if c != u";": + if c != ";": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "numeric-entity-without-semicolon"}) + "numeric-entity-without-semicolon"}) self.stream.unget(c) return char def consumeEntity(self, allowedChar=None, fromAttribute=False): # Initialise to the default output for when no entity is matched - output = u"&" + output = "&" charStack = [self.stream.char()] - if (charStack[0] in spaceCharacters or charStack[0] in (EOF, u"<", u"&") - or (allowedChar is not None and allowedChar == charStack[0])): + if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&") + or (allowedChar is not None and allowedChar == charStack[0])): self.stream.unget(charStack[0]) - elif charStack[0] == u"#": + elif charStack[0] == "#": # Read the next character to see if it's hex or decimal hex = False charStack.append(self.stream.char()) - if charStack[-1] in (u"x", u"X"): + if charStack[-1] in ("x", "X"): hex = True charStack.append(self.stream.char()) # charStack[-1] should be the first digit if (hex and charStack[-1] in hexDigits) \ - or (not hex and charStack[-1] in digits): + or (not hex and charStack[-1] in digits): # At least one digit found, so consume the whole number self.stream.unget(charStack[-1]) output = self.consumeNumberEntity(hex) else: # No digits found self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "expected-numeric-entity"}) + "data": "expected-numeric-entity"}) self.stream.unget(charStack.pop()) - output = u"&" + u"".join(charStack) + output = "&" + "".join(charStack) else: # At this point in the process might have named entity. Entities @@ -179,46 +178,40 @@ class HTMLTokenizer(object): # # Consume characters and compare to these to a substring of the # entity names in the list until the substring no longer matches. - filteredEntityList = entitiesByFirstChar.get(charStack[0], []) - - def entitiesStartingWith(name): - return [e for e in filteredEntityList if e.startswith(name)] - - while (charStack[-1] is not EOF and - entitiesStartingWith("".join(charStack))): + while (charStack[-1] is not EOF): + if not entitiesTrie.has_keys_with_prefix("".join(charStack)): + break charStack.append(self.stream.char()) # At this point we have a string that starts with some characters # that may match an entity - entityName = None - # Try to find the longest entity the string will match to take care # of ¬i for instance. - for entityLength in xrange(len(charStack)-1, 1, -1): - possibleEntityName = "".join(charStack[:entityLength]) - if possibleEntityName in entities: - entityName = possibleEntityName - break + try: + entityName = entitiesTrie.longest_prefix("".join(charStack[:-1])) + entityLength = len(entityName) + except KeyError: + entityName = None if entityName is not None: if entityName[-1] != ";": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "named-entity-without-semicolon"}) + "named-entity-without-semicolon"}) if (entityName[-1] != ";" and fromAttribute and (charStack[entityLength] in asciiLetters or charStack[entityLength] in digits or - charStack[entityLength] == "=")): + charStack[entityLength] == "=")): self.stream.unget(charStack.pop()) - output = u"&" + u"".join(charStack) + output = "&" + "".join(charStack) else: output = entities[entityName] self.stream.unget(charStack.pop()) - output += u"".join(charStack[entityLength:]) + output += "".join(charStack[entityLength:]) else: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-named-entity"}) + "expected-named-entity"}) self.stream.unget(charStack.pop()) - output = u"&" + u"".join(charStack) + output = "&" + "".join(charStack) if fromAttribute: self.currentToken["data"][-1][1] += output @@ -246,28 +239,26 @@ class HTMLTokenizer(object): token["name"] = token["name"].translate(asciiUpper2Lower) if token["type"] == tokenTypes["EndTag"]: if token["data"]: - self.tokenQueue.append({"type":tokenTypes["ParseError"], - "data":"attributes-in-end-tag"}) + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "attributes-in-end-tag"}) if token["selfClosing"]: - self.tokenQueue.append({"type":tokenTypes["ParseError"], - "data":"self-closing-flag-on-end-tag"}) + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "self-closing-flag-on-end-tag"}) self.tokenQueue.append(token) self.state = self.dataState - # Below are the various tokenizer states worked out. - def dataState(self): data = self.stream.char() if data == "&": self.state = self.entityDataState elif data == "<": self.state = self.tagOpenState - elif data == u"\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data":"invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": u"\u0000"}) + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], + "data": "invalid-codepoint"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "\u0000"}) elif data is EOF: # Tokenization ends. return False @@ -276,21 +267,21 @@ class HTMLTokenizer(object): # state". At that point spaceCharacters are important so they are # emitted separately. self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data": - data + self.stream.charsUntil(spaceCharacters, True)}) + data + self.stream.charsUntil(spaceCharacters, True)}) # No need to update lastFourChars here, since the first space will # have already been appended to lastFourChars and will have broken # any <!-- or --> sequences else: - chars = self.stream.charsUntil((u"&", u"<", u"\u0000")) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": - data + chars}) + chars = self.stream.charsUntil(("&", "<", "\u0000")) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": + data + chars}) return True def entityDataState(self): self.consumeEntity() self.state = self.dataState return True - + def rcdataState(self): data = self.stream.char() if data == "&": @@ -300,113 +291,113 @@ class HTMLTokenizer(object): elif data == EOF: # Tokenization ends. return False - elif data == u"\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": u"\uFFFD"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "\uFFFD"}) elif data in spaceCharacters: # Directly after emitting a token you switch back to the "data # state". At that point spaceCharacters are important so they are # emitted separately. self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data": - data + self.stream.charsUntil(spaceCharacters, True)}) + data + self.stream.charsUntil(spaceCharacters, True)}) # No need to update lastFourChars here, since the first space will # have already been appended to lastFourChars and will have broken # any <!-- or --> sequences else: - chars = self.stream.charsUntil((u"&", u"<")) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": - data + chars}) + chars = self.stream.charsUntil(("&", "<", "\u0000")) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": + data + chars}) return True def characterReferenceInRcdata(self): self.consumeEntity() self.state = self.rcdataState return True - + def rawtextState(self): data = self.stream.char() if data == "<": self.state = self.rawtextLessThanSignState - elif data == u"\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": u"\uFFFD"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "\uFFFD"}) elif data == EOF: # Tokenization ends. return False else: - chars = self.stream.charsUntil((u"<", u"\u0000")) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": - data + chars}) + chars = self.stream.charsUntil(("<", "\u0000")) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": + data + chars}) return True - + def scriptDataState(self): data = self.stream.char() if data == "<": self.state = self.scriptDataLessThanSignState - elif data == u"\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": u"\uFFFD"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "\uFFFD"}) elif data == EOF: # Tokenization ends. return False else: - chars = self.stream.charsUntil((u"<", u"\u0000")) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": - data + chars}) + chars = self.stream.charsUntil(("<", "\u0000")) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": + data + chars}) return True - + def plaintextState(self): data = self.stream.char() if data == EOF: # Tokenization ends. return False - elif data == u"\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": u"\uFFFD"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "\uFFFD"}) else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": - data + self.stream.charsUntil(u"\u0000")}) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": + data + self.stream.charsUntil("\u0000")}) return True def tagOpenState(self): data = self.stream.char() - if data == u"!": + if data == "!": self.state = self.markupDeclarationOpenState - elif data == u"/": + elif data == "/": self.state = self.closeTagOpenState elif data in asciiLetters: - self.currentToken = {"type": tokenTypes["StartTag"], + self.currentToken = {"type": tokenTypes["StartTag"], "name": data, "data": [], "selfClosing": False, "selfClosingAcknowledged": False} self.state = self.tagNameState - elif data == u">": + elif data == ">": # XXX In theory it could be something besides a tag name. But # do we really care? self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-tag-name-but-got-right-bracket"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"<>"}) + "expected-tag-name-but-got-right-bracket"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<>"}) self.state = self.dataState - elif data == u"?": + elif data == "?": # XXX In theory it could be something besides a tag name. But # do we really care? self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-tag-name-but-got-question-mark"}) + "expected-tag-name-but-got-question-mark"}) self.stream.unget(data) self.state = self.bogusCommentState else: # XXX self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-tag-name"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"<"}) + "expected-tag-name"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) self.stream.unget(data) self.state = self.dataState return True @@ -415,22 +406,22 @@ class HTMLTokenizer(object): data = self.stream.char() if data in asciiLetters: self.currentToken = {"type": tokenTypes["EndTag"], "name": data, - "data": [], "selfClosing":False} + "data": [], "selfClosing": False} self.state = self.tagNameState - elif data == u">": + elif data == ">": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-closing-tag-but-got-right-bracket"}) + "expected-closing-tag-but-got-right-bracket"}) self.state = self.dataState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-closing-tag-but-got-eof"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"</"}) + "expected-closing-tag-but-got-eof"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"}) self.state = self.dataState else: # XXX data can be _'_... self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-closing-tag-but-got-char", - "datavars": {"data": data}}) + "expected-closing-tag-but-got-char", + "datavars": {"data": data}}) self.stream.unget(data) self.state = self.bogusCommentState return True @@ -439,229 +430,229 @@ class HTMLTokenizer(object): data = self.stream.char() if data in spaceCharacters: self.state = self.beforeAttributeNameState - elif data == u">": + elif data == ">": self.emitCurrentToken() elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-tag-name"}) + "eof-in-tag-name"}) self.state = self.dataState - elif data == u"/": + elif data == "/": self.state = self.selfClosingStartTagState - elif data == u"\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) - self.currentToken["name"] += u"\uFFFD" + self.currentToken["name"] += "\uFFFD" else: self.currentToken["name"] += data # (Don't use charsUntil here, because tag names are # very short and it's faster to not do anything fancy) return True - + def rcdataLessThanSignState(self): data = self.stream.char() if data == "/": self.temporaryBuffer = "" self.state = self.rcdataEndTagOpenState else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"<"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) self.stream.unget(data) self.state = self.rcdataState return True - + def rcdataEndTagOpenState(self): data = self.stream.char() if data in asciiLetters: self.temporaryBuffer += data self.state = self.rcdataEndTagNameState else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"</"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"}) self.stream.unget(data) self.state = self.rcdataState return True - + def rcdataEndTagNameState(self): appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower() data = self.stream.char() if data in spaceCharacters and appropriate: self.currentToken = {"type": tokenTypes["EndTag"], "name": self.temporaryBuffer, - "data": [], "selfClosing":False} + "data": [], "selfClosing": False} self.state = self.beforeAttributeNameState elif data == "/" and appropriate: self.currentToken = {"type": tokenTypes["EndTag"], "name": self.temporaryBuffer, - "data": [], "selfClosing":False} + "data": [], "selfClosing": False} self.state = self.selfClosingStartTagState elif data == ">" and appropriate: self.currentToken = {"type": tokenTypes["EndTag"], "name": self.temporaryBuffer, - "data": [], "selfClosing":False} + "data": [], "selfClosing": False} self.emitCurrentToken() self.state = self.dataState elif data in asciiLetters: self.temporaryBuffer += data else: self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": u"</" + self.temporaryBuffer}) + "data": "</" + self.temporaryBuffer}) self.stream.unget(data) self.state = self.rcdataState return True - + def rawtextLessThanSignState(self): data = self.stream.char() if data == "/": self.temporaryBuffer = "" self.state = self.rawtextEndTagOpenState else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"<"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) self.stream.unget(data) self.state = self.rawtextState return True - + def rawtextEndTagOpenState(self): data = self.stream.char() if data in asciiLetters: self.temporaryBuffer += data self.state = self.rawtextEndTagNameState else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"</"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"}) self.stream.unget(data) self.state = self.rawtextState return True - + def rawtextEndTagNameState(self): appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower() data = self.stream.char() if data in spaceCharacters and appropriate: self.currentToken = {"type": tokenTypes["EndTag"], "name": self.temporaryBuffer, - "data": [], "selfClosing":False} + "data": [], "selfClosing": False} self.state = self.beforeAttributeNameState elif data == "/" and appropriate: self.currentToken = {"type": tokenTypes["EndTag"], "name": self.temporaryBuffer, - "data": [], "selfClosing":False} + "data": [], "selfClosing": False} self.state = self.selfClosingStartTagState elif data == ">" and appropriate: self.currentToken = {"type": tokenTypes["EndTag"], "name": self.temporaryBuffer, - "data": [], "selfClosing":False} + "data": [], "selfClosing": False} self.emitCurrentToken() self.state = self.dataState elif data in asciiLetters: self.temporaryBuffer += data else: self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": u"</" + self.temporaryBuffer}) + "data": "</" + self.temporaryBuffer}) self.stream.unget(data) self.state = self.rawtextState return True - + def scriptDataLessThanSignState(self): data = self.stream.char() if data == "/": self.temporaryBuffer = "" self.state = self.scriptDataEndTagOpenState elif data == "!": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"<!"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<!"}) self.state = self.scriptDataEscapeStartState else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"<"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) self.stream.unget(data) self.state = self.scriptDataState return True - + def scriptDataEndTagOpenState(self): data = self.stream.char() if data in asciiLetters: self.temporaryBuffer += data self.state = self.scriptDataEndTagNameState else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"</"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"}) self.stream.unget(data) self.state = self.scriptDataState return True - + def scriptDataEndTagNameState(self): appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower() data = self.stream.char() if data in spaceCharacters and appropriate: self.currentToken = {"type": tokenTypes["EndTag"], "name": self.temporaryBuffer, - "data": [], "selfClosing":False} + "data": [], "selfClosing": False} self.state = self.beforeAttributeNameState elif data == "/" and appropriate: self.currentToken = {"type": tokenTypes["EndTag"], "name": self.temporaryBuffer, - "data": [], "selfClosing":False} + "data": [], "selfClosing": False} self.state = self.selfClosingStartTagState elif data == ">" and appropriate: self.currentToken = {"type": tokenTypes["EndTag"], "name": self.temporaryBuffer, - "data": [], "selfClosing":False} + "data": [], "selfClosing": False} self.emitCurrentToken() self.state = self.dataState elif data in asciiLetters: self.temporaryBuffer += data else: self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": u"</" + self.temporaryBuffer}) + "data": "</" + self.temporaryBuffer}) self.stream.unget(data) self.state = self.scriptDataState return True - + def scriptDataEscapeStartState(self): data = self.stream.char() if data == "-": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"-"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) self.state = self.scriptDataEscapeStartDashState else: self.stream.unget(data) self.state = self.scriptDataState return True - + def scriptDataEscapeStartDashState(self): data = self.stream.char() if data == "-": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"-"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) self.state = self.scriptDataEscapedDashDashState else: self.stream.unget(data) self.state = self.scriptDataState return True - + def scriptDataEscapedState(self): data = self.stream.char() if data == "-": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"-"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) self.state = self.scriptDataEscapedDashState elif data == "<": self.state = self.scriptDataEscapedLessThanSignState - elif data == u"\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": u"\uFFFD"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "\uFFFD"}) elif data == EOF: self.state = self.dataState else: - chars = self.stream.charsUntil((u"<", u"-", u"\u0000")) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": - data + chars}) + chars = self.stream.charsUntil(("<", "-", "\u0000")) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": + data + chars}) return True - + def scriptDataEscapedDashState(self): data = self.stream.char() if data == "-": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"-"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) self.state = self.scriptDataEscapedDashDashState elif data == "<": self.state = self.scriptDataEscapedLessThanSignState - elif data == u"\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": u"\uFFFD"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "\uFFFD"}) self.state = self.scriptDataEscapedState elif data == EOF: self.state = self.dataState @@ -669,21 +660,21 @@ class HTMLTokenizer(object): self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) self.state = self.scriptDataEscapedState return True - + def scriptDataEscapedDashDashState(self): data = self.stream.char() if data == "-": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"-"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) elif data == "<": self.state = self.scriptDataEscapedLessThanSignState elif data == ">": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u">"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ">"}) self.state = self.scriptDataState - elif data == u"\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": u"\uFFFD"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "\uFFFD"}) self.state = self.scriptDataEscapedState elif data == EOF: self.state = self.dataState @@ -691,61 +682,61 @@ class HTMLTokenizer(object): self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) self.state = self.scriptDataEscapedState return True - + def scriptDataEscapedLessThanSignState(self): data = self.stream.char() if data == "/": self.temporaryBuffer = "" self.state = self.scriptDataEscapedEndTagOpenState elif data in asciiLetters: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"<" + data}) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<" + data}) self.temporaryBuffer = data self.state = self.scriptDataDoubleEscapeStartState else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"<"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) self.stream.unget(data) self.state = self.scriptDataEscapedState return True - + def scriptDataEscapedEndTagOpenState(self): data = self.stream.char() if data in asciiLetters: self.temporaryBuffer = data self.state = self.scriptDataEscapedEndTagNameState else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"</"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "</"}) self.stream.unget(data) self.state = self.scriptDataEscapedState return True - + def scriptDataEscapedEndTagNameState(self): appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower() data = self.stream.char() if data in spaceCharacters and appropriate: self.currentToken = {"type": tokenTypes["EndTag"], "name": self.temporaryBuffer, - "data": [], "selfClosing":False} + "data": [], "selfClosing": False} self.state = self.beforeAttributeNameState elif data == "/" and appropriate: self.currentToken = {"type": tokenTypes["EndTag"], "name": self.temporaryBuffer, - "data": [], "selfClosing":False} + "data": [], "selfClosing": False} self.state = self.selfClosingStartTagState elif data == ">" and appropriate: self.currentToken = {"type": tokenTypes["EndTag"], "name": self.temporaryBuffer, - "data": [], "selfClosing":False} + "data": [], "selfClosing": False} self.emitCurrentToken() self.state = self.dataState elif data in asciiLetters: self.temporaryBuffer += data else: self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": u"</" + self.temporaryBuffer}) + "data": "</" + self.temporaryBuffer}) self.stream.unget(data) self.state = self.scriptDataEscapedState return True - + def scriptDataDoubleEscapeStartState(self): data = self.stream.char() if data in (spaceCharacters | frozenset(("/", ">"))): @@ -761,87 +752,87 @@ class HTMLTokenizer(object): self.stream.unget(data) self.state = self.scriptDataEscapedState return True - + def scriptDataDoubleEscapedState(self): data = self.stream.char() if data == "-": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"-"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) self.state = self.scriptDataDoubleEscapedDashState elif data == "<": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"<"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) self.state = self.scriptDataDoubleEscapedLessThanSignState - elif data == u"\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": u"\uFFFD"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "\uFFFD"}) elif data == EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-script-in-script"}) + "eof-in-script-in-script"}) self.state = self.dataState else: self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) return True - + def scriptDataDoubleEscapedDashState(self): data = self.stream.char() if data == "-": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"-"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) self.state = self.scriptDataDoubleEscapedDashDashState elif data == "<": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"<"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) self.state = self.scriptDataDoubleEscapedLessThanSignState - elif data == u"\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": u"\uFFFD"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "\uFFFD"}) self.state = self.scriptDataDoubleEscapedState elif data == EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-script-in-script"}) + "eof-in-script-in-script"}) self.state = self.dataState else: self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) self.state = self.scriptDataDoubleEscapedState return True - - def scriptDataDoubleEscapedDashState(self): + + def scriptDataDoubleEscapedDashDashState(self): data = self.stream.char() if data == "-": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"-"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) elif data == "<": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"<"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) self.state = self.scriptDataDoubleEscapedLessThanSignState elif data == ">": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u">"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ">"}) self.state = self.scriptDataState - elif data == u"\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": u"\uFFFD"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], + "data": "\uFFFD"}) self.state = self.scriptDataDoubleEscapedState elif data == EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-script-in-script"}) + "eof-in-script-in-script"}) self.state = self.dataState else: self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) self.state = self.scriptDataDoubleEscapedState return True - + def scriptDataDoubleEscapedLessThanSignState(self): data = self.stream.char() if data == "/": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"/"}) + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "/"}) self.temporaryBuffer = "" self.state = self.scriptDataDoubleEscapeEndState else: self.stream.unget(data) self.state = self.scriptDataDoubleEscapedState return True - + def scriptDataDoubleEscapeEndState(self): data = self.stream.char() if data in (spaceCharacters | frozenset(("/", ">"))): @@ -865,23 +856,23 @@ class HTMLTokenizer(object): elif data in asciiLetters: self.currentToken["data"].append([data, ""]) self.state = self.attributeNameState - elif data == u">": + elif data == ">": self.emitCurrentToken() - elif data == u"/": + elif data == "/": self.state = self.selfClosingStartTagState - elif data in (u"'", u'"', u"=", u"<"): + elif data in ("'", '"', "=", "<"): self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "invalid-character-in-attribute-name"}) + "invalid-character-in-attribute-name"}) self.currentToken["data"].append([data, ""]) self.state = self.attributeNameState - elif data == u"\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) - self.currentToken["data"].append([u"\uFFFD", ""]) + self.currentToken["data"].append(["\uFFFD", ""]) self.state = self.attributeNameState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-attribute-name-but-got-eof"}) + "expected-attribute-name-but-got-eof"}) self.state = self.dataState else: self.currentToken["data"].append([data, ""]) @@ -892,34 +883,34 @@ class HTMLTokenizer(object): data = self.stream.char() leavingThisState = True emitToken = False - if data == u"=": + if data == "=": self.state = self.beforeAttributeValueState elif data in asciiLetters: self.currentToken["data"][-1][0] += data +\ - self.stream.charsUntil(asciiLetters, True) + self.stream.charsUntil(asciiLetters, True) leavingThisState = False - elif data == u">": + elif data == ">": # XXX If we emit here the attributes are converted to a dict # without being checked and when the code below runs we error # because data is a dict not a list emitToken = True elif data in spaceCharacters: self.state = self.afterAttributeNameState - elif data == u"/": + elif data == "/": self.state = self.selfClosingStartTagState - elif data == u"\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) - self.currentToken["data"][-1][0] += u"\uFFFD" + self.currentToken["data"][-1][0] += "\uFFFD" leavingThisState = False - elif data in (u"'", u'"', u"<"): - self.tokenQueue.append({"type": tokenTypes["ParseError"], + elif data in ("'", '"', "<"): + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "invalid-character-in-attribute-name"}) + "invalid-character-in-attribute-name"}) self.currentToken["data"][-1][0] += data leavingThisState = False elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-attribute-name"}) self.state = self.dataState else: @@ -936,7 +927,7 @@ class HTMLTokenizer(object): for name, value in self.currentToken["data"][:-1]: if self.currentToken["data"][-1][0] == name: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "duplicate-attribute"}) + "duplicate-attribute"}) break # XXX Fix for above XXX if emitToken: @@ -947,28 +938,28 @@ class HTMLTokenizer(object): data = self.stream.char() if data in spaceCharacters: self.stream.charsUntil(spaceCharacters, True) - elif data == u"=": + elif data == "=": self.state = self.beforeAttributeValueState - elif data == u">": + elif data == ">": self.emitCurrentToken() elif data in asciiLetters: self.currentToken["data"].append([data, ""]) self.state = self.attributeNameState - elif data == u"/": + elif data == "/": self.state = self.selfClosingStartTagState - elif data == u"\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) - self.currentToken["data"].append([u"\uFFFD", ""]) + self.currentToken["data"].append(["\uFFFD", ""]) self.state = self.attributeNameState - elif data in (u"'", u'"', u"<"): + elif data in ("'", '"', "<"): self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "invalid-character-after-attribute-name"}) + "invalid-character-after-attribute-name"}) self.currentToken["data"].append([data, ""]) self.state = self.attributeNameState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-end-of-tag-but-got-eof"}) + "expected-end-of-tag-but-got-eof"}) self.state = self.dataState else: self.currentToken["data"].append([data, ""]) @@ -979,30 +970,30 @@ class HTMLTokenizer(object): data = self.stream.char() if data in spaceCharacters: self.stream.charsUntil(spaceCharacters, True) - elif data == u"\"": + elif data == "\"": self.state = self.attributeValueDoubleQuotedState - elif data == u"&": + elif data == "&": self.state = self.attributeValueUnQuotedState - self.stream.unget(data); - elif data == u"'": + self.stream.unget(data) + elif data == "'": self.state = self.attributeValueSingleQuotedState - elif data == u">": + elif data == ">": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-attribute-value-but-got-right-bracket"}) + "expected-attribute-value-but-got-right-bracket"}) self.emitCurrentToken() - elif data == u"\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) - self.currentToken["data"][-1][1] += u"\uFFFD" + self.currentToken["data"][-1][1] += "\uFFFD" self.state = self.attributeValueUnQuotedState - elif data in (u"=", u"<", u"`"): + elif data in ("=", "<", "`"): self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "equals-in-unquoted-attribute-value"}) + "equals-in-unquoted-attribute-value"}) self.currentToken["data"][-1][1] += data self.state = self.attributeValueUnQuotedState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-attribute-value-but-got-eof"}) + "expected-attribute-value-but-got-eof"}) self.state = self.dataState else: self.currentToken["data"][-1][1] += data @@ -1013,81 +1004,81 @@ class HTMLTokenizer(object): data = self.stream.char() if data == "\"": self.state = self.afterAttributeValueState - elif data == u"&": - self.processEntityInAttribute(u'"') - elif data == u"\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], + elif data == "&": + self.processEntityInAttribute('"') + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) - self.currentToken["data"][-1][1] += u"\uFFFD" + self.currentToken["data"][-1][1] += "\uFFFD" elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-attribute-value-double-quote"}) + "eof-in-attribute-value-double-quote"}) self.state = self.dataState else: self.currentToken["data"][-1][1] += data +\ - self.stream.charsUntil(("\"", u"&")) + self.stream.charsUntil(("\"", "&", "\u0000")) return True def attributeValueSingleQuotedState(self): data = self.stream.char() if data == "'": self.state = self.afterAttributeValueState - elif data == u"&": - self.processEntityInAttribute(u"'") - elif data == u"\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], + elif data == "&": + self.processEntityInAttribute("'") + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) - self.currentToken["data"][-1][1] += u"\uFFFD" + self.currentToken["data"][-1][1] += "\uFFFD" elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-attribute-value-single-quote"}) + "eof-in-attribute-value-single-quote"}) self.state = self.dataState else: self.currentToken["data"][-1][1] += data +\ - self.stream.charsUntil(("'", u"&")) + self.stream.charsUntil(("'", "&", "\u0000")) return True def attributeValueUnQuotedState(self): data = self.stream.char() if data in spaceCharacters: self.state = self.beforeAttributeNameState - elif data == u"&": + elif data == "&": self.processEntityInAttribute(">") - elif data == u">": + elif data == ">": self.emitCurrentToken() - elif data in (u'"', u"'", u"=", u"<", u"`"): + elif data in ('"', "'", "=", "<", "`"): self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-character-in-unquoted-attribute-value"}) + "unexpected-character-in-unquoted-attribute-value"}) self.currentToken["data"][-1][1] += data - elif data == u"\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) - self.currentToken["data"][-1][1] += u"\uFFFD" + self.currentToken["data"][-1][1] += "\uFFFD" elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-attribute-value-no-quotes"}) + "eof-in-attribute-value-no-quotes"}) self.state = self.dataState else: self.currentToken["data"][-1][1] += data + self.stream.charsUntil( - frozenset((u"&", u">", u'"', u"'", u"=", u"<", u"`")) | spaceCharacters) + frozenset(("&", ">", '"', "'", "=", "<", "`", "\u0000")) | spaceCharacters) return True def afterAttributeValueState(self): data = self.stream.char() if data in spaceCharacters: self.state = self.beforeAttributeNameState - elif data == u">": + elif data == ">": self.emitCurrentToken() - elif data == u"/": + elif data == "/": self.state = self.selfClosingStartTagState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-EOF-after-attribute-value"}) + "unexpected-EOF-after-attribute-value"}) self.stream.unget(data) self.state = self.dataState else: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-character-after-attribute-value"}) + "unexpected-character-after-attribute-value"}) self.stream.unget(data) self.state = self.beforeAttributeNameState return True @@ -1098,14 +1089,14 @@ class HTMLTokenizer(object): self.currentToken["selfClosing"] = True self.emitCurrentToken() elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-EOF-after-solidus-in-tag"}) + "unexpected-EOF-after-solidus-in-tag"}) self.stream.unget(data) self.state = self.dataState else: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-character-after-soldius-in-tag"}) + "unexpected-character-after-solidus-in-tag"}) self.stream.unget(data) self.state = self.beforeAttributeNameState return True @@ -1114,10 +1105,10 @@ class HTMLTokenizer(object): # Make a new comment token and give it as value all the characters # until the first > or EOF (charsUntil checks for EOF automatically) # and emit it. - data = self.stream.charsUntil(u">") - data = data.replace(u"\u0000", u"\uFFFD") + data = self.stream.charsUntil(">") + data = data.replace("\u0000", "\uFFFD") self.tokenQueue.append( - {"type": tokenTypes["Comment"], "data": data}) + {"type": tokenTypes["Comment"], "data": data}) # Eat the character directly after the bogus comment which is either a # ">" or an EOF. @@ -1127,28 +1118,28 @@ class HTMLTokenizer(object): def markupDeclarationOpenState(self): charStack = [self.stream.char()] - if charStack[-1] == u"-": + if charStack[-1] == "-": charStack.append(self.stream.char()) - if charStack[-1] == u"-": - self.currentToken = {"type": tokenTypes["Comment"], "data": u""} + if charStack[-1] == "-": + self.currentToken = {"type": tokenTypes["Comment"], "data": ""} self.state = self.commentStartState return True - elif charStack[-1] in (u'd', u'D'): + elif charStack[-1] in ('d', 'D'): matched = True - for expected in ((u'o', u'O'), (u'c', u'C'), (u't', u'T'), - (u'y', u'Y'), (u'p', u'P'), (u'e', u'E')): + for expected in (('o', 'O'), ('c', 'C'), ('t', 'T'), + ('y', 'Y'), ('p', 'P'), ('e', 'E')): charStack.append(self.stream.char()) if charStack[-1] not in expected: matched = False break if matched: self.currentToken = {"type": tokenTypes["Doctype"], - "name": u"", - "publicId": None, "systemId": None, + "name": "", + "publicId": None, "systemId": None, "correct": True} self.state = self.doctypeState return True - elif (charStack[-1] == "[" and + elif (charStack[-1] == "[" and self.parser is not None and self.parser.tree.openElements and self.parser.tree.openElements[-1].namespace != self.parser.tree.defaultNamespace): @@ -1163,7 +1154,7 @@ class HTMLTokenizer(object): return True self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-dashes-or-doctype"}) + "expected-dashes-or-doctype"}) while charStack: self.stream.unget(charStack.pop()) @@ -1174,41 +1165,41 @@ class HTMLTokenizer(object): data = self.stream.char() if data == "-": self.state = self.commentStartDashState - elif data == u"\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) - self.currentToken["data"] += u"\uFFFD" + self.currentToken["data"] += "\uFFFD" elif data == ">": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "incorrect-comment"}) + "incorrect-comment"}) self.tokenQueue.append(self.currentToken) self.state = self.dataState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-comment"}) + "eof-in-comment"}) self.tokenQueue.append(self.currentToken) self.state = self.dataState else: self.currentToken["data"] += data self.state = self.commentState return True - + def commentStartDashState(self): data = self.stream.char() if data == "-": self.state = self.commentEndState - elif data == u"\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) - self.currentToken["data"] += u"-\uFFFD" + self.currentToken["data"] += "-\uFFFD" elif data == ">": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "incorrect-comment"}) + "incorrect-comment"}) self.tokenQueue.append(self.currentToken) self.state = self.dataState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-comment"}) + "eof-in-comment"}) self.tokenQueue.append(self.currentToken) self.state = self.dataState else: @@ -1216,95 +1207,94 @@ class HTMLTokenizer(object): self.state = self.commentState return True - def commentState(self): data = self.stream.char() - if data == u"-": + if data == "-": self.state = self.commentEndDashState - elif data == u"\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) - self.currentToken["data"] += u"\uFFFD" + self.currentToken["data"] += "\uFFFD" elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "eof-in-comment"}) self.tokenQueue.append(self.currentToken) self.state = self.dataState else: self.currentToken["data"] += data + \ - self.stream.charsUntil((u"-", u"\u0000")) + self.stream.charsUntil(("-", "\u0000")) return True def commentEndDashState(self): data = self.stream.char() - if data == u"-": + if data == "-": self.state = self.commentEndState - elif data == u"\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) - self.currentToken["data"] += u"-\uFFFD" + self.currentToken["data"] += "-\uFFFD" self.state = self.commentState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-comment-end-dash"}) + "eof-in-comment-end-dash"}) self.tokenQueue.append(self.currentToken) self.state = self.dataState else: - self.currentToken["data"] += u"-" + data + self.currentToken["data"] += "-" + data self.state = self.commentState return True def commentEndState(self): data = self.stream.char() - if data == u">": + if data == ">": self.tokenQueue.append(self.currentToken) self.state = self.dataState - elif data == u"\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) - self.currentToken["data"] += u"--\uFFFD" + self.currentToken["data"] += "--\uFFFD" self.state = self.commentState elif data == "!": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-bang-after-double-dash-in-comment"}) + "unexpected-bang-after-double-dash-in-comment"}) self.state = self.commentEndBangState - elif data == u"-": + elif data == "-": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-dash-after-double-dash-in-comment"}) + "unexpected-dash-after-double-dash-in-comment"}) self.currentToken["data"] += data elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-comment-double-dash"}) + "eof-in-comment-double-dash"}) self.tokenQueue.append(self.currentToken) self.state = self.dataState else: # XXX self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-comment"}) - self.currentToken["data"] += u"--" + data + "unexpected-char-in-comment"}) + self.currentToken["data"] += "--" + data self.state = self.commentState return True def commentEndBangState(self): data = self.stream.char() - if data == u">": + if data == ">": self.tokenQueue.append(self.currentToken) self.state = self.dataState - elif data == u"-": + elif data == "-": self.currentToken["data"] += "--!" self.state = self.commentEndDashState - elif data == u"\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) - self.currentToken["data"] += u"--!\uFFFD" + self.currentToken["data"] += "--!\uFFFD" self.state = self.commentState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-comment-end-bang-state"}) + "eof-in-comment-end-bang-state"}) self.tokenQueue.append(self.currentToken) self.state = self.dataState else: - self.currentToken["data"] += u"--!" + data + self.currentToken["data"] += "--!" + data self.state = self.commentState return True @@ -1314,13 +1304,13 @@ class HTMLTokenizer(object): self.state = self.beforeDoctypeNameState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-doctype-name-but-got-eof"}) + "expected-doctype-name-but-got-eof"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState else: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "need-space-after-doctype"}) + "need-space-after-doctype"}) self.stream.unget(data) self.state = self.beforeDoctypeNameState return True @@ -1329,20 +1319,20 @@ class HTMLTokenizer(object): data = self.stream.char() if data in spaceCharacters: pass - elif data == u">": + elif data == ">": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-doctype-name-but-got-right-bracket"}) + "expected-doctype-name-but-got-right-bracket"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState - elif data == u"\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) - self.currentToken["name"] = u"\uFFFD" + self.currentToken["name"] = "\uFFFD" self.state = self.doctypeNameState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-doctype-name-but-got-eof"}) + "expected-doctype-name-but-got-eof"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState @@ -1356,18 +1346,18 @@ class HTMLTokenizer(object): if data in spaceCharacters: self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower) self.state = self.afterDoctypeNameState - elif data == u">": + elif data == ">": self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower) self.tokenQueue.append(self.currentToken) self.state = self.dataState - elif data == u"\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) - self.currentToken["name"] += u"\uFFFD" + self.currentToken["name"] += "\uFFFD" self.state = self.doctypeNameState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype-name"}) + "eof-in-doctype-name"}) self.currentToken["correct"] = False self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower) self.tokenQueue.append(self.currentToken) @@ -1380,21 +1370,21 @@ class HTMLTokenizer(object): data = self.stream.char() if data in spaceCharacters: pass - elif data == u">": + elif data == ">": self.tokenQueue.append(self.currentToken) self.state = self.dataState elif data is EOF: self.currentToken["correct"] = False self.stream.unget(data) self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) + "eof-in-doctype"}) self.tokenQueue.append(self.currentToken) self.state = self.dataState else: - if data in (u"p", u"P"): + if data in ("p", "P"): matched = True - for expected in ((u"u", u"U"), (u"b", u"B"), (u"l", u"L"), - (u"i", u"I"), (u"c", u"C")): + for expected in (("u", "U"), ("b", "B"), ("l", "L"), + ("i", "I"), ("c", "C")): data = self.stream.char() if data not in expected: matched = False @@ -1402,10 +1392,10 @@ class HTMLTokenizer(object): if matched: self.state = self.afterDoctypePublicKeywordState return True - elif data in (u"s", u"S"): + elif data in ("s", "S"): matched = True - for expected in ((u"y", u"Y"), (u"s", u"S"), (u"t", u"T"), - (u"e", u"E"), (u"m", u"M")): + for expected in (("y", "Y"), ("s", "S"), ("t", "T"), + ("e", "E"), ("m", "M")): data = self.stream.char() if data not in expected: matched = False @@ -1420,25 +1410,25 @@ class HTMLTokenizer(object): # and needs to be ungetted self.stream.unget(data) self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-space-or-right-bracket-in-doctype", "datavars": - {"data": data}}) + "expected-space-or-right-bracket-in-doctype", "datavars": + {"data": data}}) self.currentToken["correct"] = False self.state = self.bogusDoctypeState return True - + def afterDoctypePublicKeywordState(self): data = self.stream.char() if data in spaceCharacters: self.state = self.beforeDoctypePublicIdentifierState elif data in ("'", '"'): self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) + "unexpected-char-in-doctype"}) self.stream.unget(data) self.state = self.beforeDoctypePublicIdentifierState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) + "eof-in-doctype"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState @@ -1452,26 +1442,26 @@ class HTMLTokenizer(object): if data in spaceCharacters: pass elif data == "\"": - self.currentToken["publicId"] = u"" + self.currentToken["publicId"] = "" self.state = self.doctypePublicIdentifierDoubleQuotedState elif data == "'": - self.currentToken["publicId"] = u"" + self.currentToken["publicId"] = "" self.state = self.doctypePublicIdentifierSingleQuotedState elif data == ">": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-end-of-doctype"}) + "unexpected-end-of-doctype"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) + "eof-in-doctype"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState else: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) + "unexpected-char-in-doctype"}) self.currentToken["correct"] = False self.state = self.bogusDoctypeState return True @@ -1480,19 +1470,19 @@ class HTMLTokenizer(object): data = self.stream.char() if data == "\"": self.state = self.afterDoctypePublicIdentifierState - elif data == u"\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) - self.currentToken["publicId"] += u"\uFFFD" + self.currentToken["publicId"] += "\uFFFD" elif data == ">": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-end-of-doctype"}) + "unexpected-end-of-doctype"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) + "eof-in-doctype"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState @@ -1504,19 +1494,19 @@ class HTMLTokenizer(object): data = self.stream.char() if data == "'": self.state = self.afterDoctypePublicIdentifierState - elif data == u"\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) - self.currentToken["publicId"] += u"\uFFFD" + self.currentToken["publicId"] += "\uFFFD" elif data == ">": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-end-of-doctype"}) + "unexpected-end-of-doctype"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) + "eof-in-doctype"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState @@ -1533,27 +1523,27 @@ class HTMLTokenizer(object): self.state = self.dataState elif data == '"': self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.currentToken["systemId"] = u"" + "unexpected-char-in-doctype"}) + self.currentToken["systemId"] = "" self.state = self.doctypeSystemIdentifierDoubleQuotedState elif data == "'": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.currentToken["systemId"] = u"" + "unexpected-char-in-doctype"}) + self.currentToken["systemId"] = "" self.state = self.doctypeSystemIdentifierSingleQuotedState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) + "eof-in-doctype"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState else: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) + "unexpected-char-in-doctype"}) self.currentToken["correct"] = False self.state = self.bogusDoctypeState return True - + def betweenDoctypePublicAndSystemIdentifiersState(self): data = self.stream.char() if data in spaceCharacters: @@ -1562,36 +1552,36 @@ class HTMLTokenizer(object): self.tokenQueue.append(self.currentToken) self.state = self.dataState elif data == '"': - self.currentToken["systemId"] = u"" + self.currentToken["systemId"] = "" self.state = self.doctypeSystemIdentifierDoubleQuotedState elif data == "'": - self.currentToken["systemId"] = u"" + self.currentToken["systemId"] = "" self.state = self.doctypeSystemIdentifierSingleQuotedState elif data == EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) + "eof-in-doctype"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState else: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) + "unexpected-char-in-doctype"}) self.currentToken["correct"] = False self.state = self.bogusDoctypeState return True - + def afterDoctypeSystemKeywordState(self): data = self.stream.char() if data in spaceCharacters: self.state = self.beforeDoctypeSystemIdentifierState elif data in ("'", '"'): self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) + "unexpected-char-in-doctype"}) self.stream.unget(data) self.state = self.beforeDoctypeSystemIdentifierState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) + "eof-in-doctype"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState @@ -1599,32 +1589,32 @@ class HTMLTokenizer(object): self.stream.unget(data) self.state = self.beforeDoctypeSystemIdentifierState return True - + def beforeDoctypeSystemIdentifierState(self): data = self.stream.char() if data in spaceCharacters: pass elif data == "\"": - self.currentToken["systemId"] = u"" + self.currentToken["systemId"] = "" self.state = self.doctypeSystemIdentifierDoubleQuotedState elif data == "'": - self.currentToken["systemId"] = u"" + self.currentToken["systemId"] = "" self.state = self.doctypeSystemIdentifierSingleQuotedState elif data == ">": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) + "unexpected-char-in-doctype"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) + "eof-in-doctype"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState else: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) + "unexpected-char-in-doctype"}) self.currentToken["correct"] = False self.state = self.bogusDoctypeState return True @@ -1633,19 +1623,19 @@ class HTMLTokenizer(object): data = self.stream.char() if data == "\"": self.state = self.afterDoctypeSystemIdentifierState - elif data == u"\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) - self.currentToken["systemId"] += u"\uFFFD" + self.currentToken["systemId"] += "\uFFFD" elif data == ">": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-end-of-doctype"}) + "unexpected-end-of-doctype"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) + "eof-in-doctype"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState @@ -1657,19 +1647,19 @@ class HTMLTokenizer(object): data = self.stream.char() if data == "'": self.state = self.afterDoctypeSystemIdentifierState - elif data == u"\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], + elif data == "\u0000": + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) - self.currentToken["systemId"] += u"\uFFFD" + self.currentToken["systemId"] += "\uFFFD" elif data == ">": self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-end-of-doctype"}) + "unexpected-end-of-doctype"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) + "eof-in-doctype"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState @@ -1686,19 +1676,19 @@ class HTMLTokenizer(object): self.state = self.dataState elif data is EOF: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) + "eof-in-doctype"}) self.currentToken["correct"] = False self.tokenQueue.append(self.currentToken) self.state = self.dataState else: self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) + "unexpected-char-in-doctype"}) self.state = self.bogusDoctypeState return True def bogusDoctypeState(self): data = self.stream.char() - if data == u">": + if data == ">": self.tokenQueue.append(self.currentToken) self.state = self.dataState elif data is EOF: @@ -1713,32 +1703,29 @@ class HTMLTokenizer(object): def cdataSectionState(self): data = [] while True: - data.append(self.stream.charsUntil(u"]")) - charStack = [] - - for expected in ["]", "]", ">"]: - charStack.append(self.stream.char()) - matched = True - if charStack[-1] == EOF: - data.extend(charStack[:-1]) - break - elif charStack[-1] != expected: - matched = False - data.extend(charStack) + data.append(self.stream.charsUntil("]")) + data.append(self.stream.charsUntil(">")) + char = self.stream.char() + if char == EOF: + break + else: + assert char == ">" + if data[-1][-2:] == "]]": + data[-1] = data[-1][:-2] break + else: + data.append(char) - if matched: - break data = "".join(data) - #Deal with null here rather than in the parser - nullCount = data.count(u"\u0000") + # Deal with null here rather than in the parser + nullCount = data.count("\u0000") if nullCount > 0: - for i in xrange(nullCount): - self.tokenQueue.append({"type": tokenTypes["ParseError"], + for i in range(nullCount): + self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": "invalid-codepoint"}) - data = data.replace(u"\u0000", u"\uFFFD") + data = data.replace("\u0000", "\uFFFD") if data: - self.tokenQueue.append({"type": tokenTypes["Characters"], + self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) self.state = self.dataState return True diff --git a/libs/html5lib/treeadapters/__init__.py b/libs/html5lib/treeadapters/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/libs/html5lib/treeadapters/sax.py b/libs/html5lib/treeadapters/sax.py new file mode 100644 index 0000000..ad47df9 --- /dev/null +++ b/libs/html5lib/treeadapters/sax.py @@ -0,0 +1,44 @@ +from __future__ import absolute_import, division, unicode_literals + +from xml.sax.xmlreader import AttributesNSImpl + +from ..constants import adjustForeignAttributes, unadjustForeignAttributes + +prefix_mapping = {} +for prefix, localName, namespace in adjustForeignAttributes.values(): + if prefix is not None: + prefix_mapping[prefix] = namespace + + +def to_sax(walker, handler): + """Call SAX-like content handler based on treewalker walker""" + handler.startDocument() + for prefix, namespace in prefix_mapping.items(): + handler.startPrefixMapping(prefix, namespace) + + for token in walker: + type = token["type"] + if type == "Doctype": + continue + elif type in ("StartTag", "EmptyTag"): + attrs = AttributesNSImpl(token["data"], + unadjustForeignAttributes) + handler.startElementNS((token["namespace"], token["name"]), + token["name"], + attrs) + if type == "EmptyTag": + handler.endElementNS((token["namespace"], token["name"]), + token["name"]) + elif type == "EndTag": + handler.endElementNS((token["namespace"], token["name"]), + token["name"]) + elif type in ("Characters", "SpaceCharacters"): + handler.characters(token["data"]) + elif type == "Comment": + pass + else: + assert False, "Unknown token type" + + for prefix, namespace in prefix_mapping.items(): + handler.endPrefixMapping(prefix) + handler.endDocument() diff --git a/libs/html5lib/treebuilders/__init__.py b/libs/html5lib/treebuilders/__init__.py index 14f66d4..6a6b2a4 100755 --- a/libs/html5lib/treebuilders/__init__.py +++ b/libs/html5lib/treebuilders/__init__.py @@ -7,7 +7,7 @@ implement several things: 1) A set of classes for various types of elements: Document, Doctype, Comment, Element. These must implement the interface of _base.treebuilders.Node (although comment nodes have a different -signature for their constructor, see treebuilders.simpletree.Comment) +signature for their constructor, see treebuilders.etree.Comment) Textual content may also be implemented as another node type, or not, as your tree implementation requires. @@ -24,73 +24,53 @@ getDocument - Returns the root node of the complete document tree testSerializer method on your treebuilder which accepts a node and returns a string containing Node and its children serialized according to the format used in the unittests - -The supplied simpletree module provides a python-only implementation -of a full treebuilder and is a useful reference for the semantics of -the various methods. """ +from __future__ import absolute_import, division, unicode_literals + +from ..utils import default_etree + treeBuilderCache = {} -import sys def getTreeBuilder(treeType, implementation=None, **kwargs): """Get a TreeBuilder class for various types of tree with built-in support - + treeType - the name of the tree type required (case-insensitive). Supported - values are "simpletree", "dom", "etree" and "beautifulsoup" - - "simpletree" - a built-in DOM-ish tree type with support for some - more pythonic idioms. - "dom" - A generic builder for DOM implementations, defaulting to - a xml.dom.minidom based implementation for the sake of - backwards compatibility (as releases up until 0.10 had a - builder called "dom" that was a minidom implemenation). - "etree" - A generic builder for tree implementations exposing an - elementtree-like interface (known to work with - ElementTree, cElementTree and lxml.etree). - "beautifulsoup" - Beautiful soup (if installed) - + values are: + + "dom" - A generic builder for DOM implementations, defaulting to + a xml.dom.minidom based implementation. + "etree" - A generic builder for tree implementations exposing an + ElementTree-like interface, defaulting to + xml.etree.cElementTree if available and + xml.etree.ElementTree if not. + "lxml" - A etree-based builder for lxml.etree, handling + limitations of lxml's implementation. + implementation - (Currently applies to the "etree" and "dom" tree types). A module implementing the tree type e.g. - xml.etree.ElementTree or lxml.etree.""" - + xml.etree.ElementTree or xml.etree.cElementTree.""" + treeType = treeType.lower() if treeType not in treeBuilderCache: if treeType == "dom": - import dom - # XXX: Keep backwards compatibility by using minidom if no implementation is given - if implementation == None: + from . import dom + # Come up with a sane default (pref. from the stdlib) + if implementation is None: from xml.dom import minidom implementation = minidom - # XXX: NEVER cache here, caching is done in the dom submodule + # NEVER cache here, caching is done in the dom submodule return dom.getDomModule(implementation, **kwargs).TreeBuilder - elif treeType == "simpletree": - import simpletree - treeBuilderCache[treeType] = simpletree.TreeBuilder - elif treeType == "beautifulsoup": - import soup - treeBuilderCache[treeType] = soup.TreeBuilder elif treeType == "lxml": - import etree_lxml + from . import etree_lxml treeBuilderCache[treeType] = etree_lxml.TreeBuilder elif treeType == "etree": - # Come up with a sane default - if implementation == None: - try: - import xml.etree.cElementTree as ET - except ImportError: - try: - import xml.etree.ElementTree as ET - except ImportError: - try: - import cElementTree as ET - except ImportError: - import elementtree.ElementTree as ET - implementation = ET - import etree + from . import etree + if implementation is None: + implementation = default_etree # NEVER cache here, caching is done in the etree submodule return etree.getETreeModule(implementation, **kwargs).TreeBuilder else: - raise ValueError("""Unrecognised treebuilder "%s" """%treeType) + raise ValueError("""Unrecognised treebuilder "%s" """ % treeType) return treeBuilderCache.get(treeType) diff --git a/libs/html5lib/treebuilders/_base.py b/libs/html5lib/treebuilders/_base.py index f3782d2..8b97cc1 100755 --- a/libs/html5lib/treebuilders/_base.py +++ b/libs/html5lib/treebuilders/_base.py @@ -1,25 +1,34 @@ -from html5lib.constants import scopingElements, tableInsertModeElements, namespaces -try: - frozenset -except NameError: - # Import from the sets module for python 2.3 - from sets import Set as set - from sets import ImmutableSet as frozenset +from __future__ import absolute_import, division, unicode_literals +from six import text_type + +from ..constants import scopingElements, tableInsertModeElements, namespaces # The scope markers are inserted when entering object elements, # marquees, table cells, and table captions, and are used to prevent formatting # from "leaking" into tables, object elements, and marquees. Marker = None +listElementsMap = { + None: (frozenset(scopingElements), False), + "button": (frozenset(scopingElements | set([(namespaces["html"], "button")])), False), + "list": (frozenset(scopingElements | set([(namespaces["html"], "ol"), + (namespaces["html"], "ul")])), False), + "table": (frozenset([(namespaces["html"], "html"), + (namespaces["html"], "table")]), False), + "select": (frozenset([(namespaces["html"], "optgroup"), + (namespaces["html"], "option")]), True) +} + + class Node(object): def __init__(self, name): """Node representing an item in the tree. name - The tag name associated with the node parent - The parent of the current node (or None for the document node) - value - The value of the current node (applies to text nodes and + value - The value of the current node (applies to text nodes and comments attributes - a dict holding name, value pairs for attributes of the node - childNodes - a list of child nodes of the current node. This must + childNodes - a list of child nodes of the current node. This must include all elements but not necessarily other node types _flags - A list of miscellaneous flags that can be set on the node """ @@ -30,14 +39,14 @@ class Node(object): self.childNodes = [] self._flags = [] - def __unicode__(self): - attributesStr = " ".join(["%s=\"%s\""%(name, value) - for name, value in - self.attributes.iteritems()]) + def __str__(self): + attributesStr = " ".join(["%s=\"%s\"" % (name, value) + for name, value in + self.attributes.items()]) if attributesStr: - return "<%s %s>"%(self.name,attributesStr) + return "<%s %s>" % (self.name, attributesStr) else: - return "<%s>"%(self.name) + return "<%s>" % (self.name) def __repr__(self): return "<%s>" % (self.name) @@ -48,14 +57,14 @@ class Node(object): raise NotImplementedError def insertText(self, data, insertBefore=None): - """Insert data as text in the current node, positioned before the + """Insert data as text in the current node, positioned before the start of node insertBefore or to the end of the node's text. """ raise NotImplementedError def insertBefore(self, node, refNode): - """Insert node as a child of the current node, before refNode in the - list of child nodes. Raises ValueError if refNode is not a child of + """Insert node as a child of the current node, before refNode in the + list of child nodes. Raises ValueError if refNode is not a child of the current node""" raise NotImplementedError @@ -65,11 +74,11 @@ class Node(object): raise NotImplementedError def reparentChildren(self, newParent): - """Move all the children of the current node to newParent. - This is needed so that trees that don't store text as nodes move the + """Move all the children of the current node to newParent. + This is needed so that trees that don't store text as nodes move the text in the correct way """ - #XXX - should this method be made more general? + # XXX - should this method be made more general? for child in self.childNodes: newParent.appendChild(child) self.childNodes = [] @@ -80,12 +89,12 @@ class Node(object): """ raise NotImplementedError - def hasContent(self): """Return true if the node has children or text, false otherwise """ raise NotImplementedError + class ActiveFormattingElements(list): def append(self, node): equalCount = 0 @@ -103,12 +112,13 @@ class ActiveFormattingElements(list): def nodesEqual(self, node1, node2): if not node1.nameTuple == node2.nameTuple: return False - + if not node1.attributes == node2.attributes: return False - + return True + class TreeBuilder(object): """Base treebuilder implementation documentClass - the class to use for the bottommost node of a document @@ -117,19 +127,19 @@ class TreeBuilder(object): doctypeClass - the class to use for doctypes """ - #Document class + # Document class documentClass = None - #The class to use for creating a node + # The class to use for creating a node elementClass = None - #The class to use for creating comments + # The class to use for creating comments commentClass = None - #The class to use for creating doctypes + # The class to use for creating doctypes doctypeClass = None - - #Fragment class + + # Fragment class fragmentClass = None def __init__(self, namespaceHTMLElements): @@ -138,12 +148,12 @@ class TreeBuilder(object): else: self.defaultNamespace = None self.reset() - + def reset(self): self.openElements = [] self.activeFormattingElements = ActiveFormattingElements() - #XXX - rename these to headElement, formElement + # XXX - rename these to headElement, formElement self.headPointer = None self.formPointer = None @@ -153,30 +163,20 @@ class TreeBuilder(object): def elementInScope(self, target, variant=None): - #If we pass a node in we match that. if we pass a string - #match any node with that name + # If we pass a node in we match that. if we pass a string + # match any node with that name exactNode = hasattr(target, "nameTuple") - listElementsMap = { - None:(scopingElements, False), - "button":(scopingElements | set([(namespaces["html"], "button")]), False), - "list":(scopingElements | set([(namespaces["html"], "ol"), - (namespaces["html"], "ul")]), False), - "table":(set([(namespaces["html"], "html"), - (namespaces["html"], "table")]), False), - "select":(set([(namespaces["html"], "optgroup"), - (namespaces["html"], "option")]), True) - } listElements, invert = listElementsMap[variant] for node in reversed(self.openElements): if (node.name == target and not exactNode or - node == target and exactNode): + node == target and exactNode): return True - elif (invert ^ (node.nameTuple in listElements)): + elif (invert ^ (node.nameTuple in listElements)): return False - assert False # We should never reach this point + assert False # We should never reach this point def reconstructActiveFormattingElements(self): # Within this algorithm the order of steps described in the @@ -196,7 +196,7 @@ class TreeBuilder(object): # Step 6 while entry != Marker and entry not in self.openElements: if i == 0: - #This will be reset to 0 below + # This will be reset to 0 below i = -1 break i -= 1 @@ -209,13 +209,13 @@ class TreeBuilder(object): # Step 8 entry = self.activeFormattingElements[i] - clone = entry.cloneNode() #Mainly to get a new copy of the attributes + clone = entry.cloneNode() # Mainly to get a new copy of the attributes # Step 9 - element = self.insertElement({"type":"StartTag", - "name":clone.name, - "namespace":clone.namespace, - "data":clone.attributes}) + element = self.insertElement({"type": "StartTag", + "name": clone.name, + "namespace": clone.namespace, + "data": clone.attributes}) # Step 10 self.activeFormattingElements[i] = element @@ -260,7 +260,7 @@ class TreeBuilder(object): if parent is None: parent = self.openElements[-1] parent.appendChild(self.commentClass(token["data"])) - + def createElement(self, token): """Create an element but don't insert it anywhere""" name = token["name"] @@ -282,10 +282,10 @@ class TreeBuilder(object): self.insertElement = self.insertElementNormal insertFromTable = property(_getInsertFromTable, _setInsertFromTable) - + def insertElementNormal(self, token): name = token["name"] - assert type(name) == unicode, "Element %s not unicode"%name + assert isinstance(name, text_type), "Element %s not unicode" % name namespace = token.get("namespace", self.defaultNamespace) element = self.elementClass(name, namespace) element.attributes = token["data"] @@ -294,13 +294,13 @@ class TreeBuilder(object): return element def insertElementTable(self, token): - """Create an element and insert it into the tree""" + """Create an element and insert it into the tree""" element = self.createElement(token) if self.openElements[-1].name not in tableInsertModeElements: return self.insertElementNormal(token) else: - #We should be in the InTable mode. This means we want to do - #special magic element rearranging + # We should be in the InTable mode. This means we want to do + # special magic element rearranging parent, insertBefore = self.getTableMisnestedNodePosition() if insertBefore is None: parent.appendChild(element) @@ -315,7 +315,7 @@ class TreeBuilder(object): parent = self.openElements[-1] if (not self.insertFromTable or (self.insertFromTable and - self.openElements[-1].name + self.openElements[-1].name not in tableInsertModeElements)): parent.insertText(data) else: @@ -323,14 +323,14 @@ class TreeBuilder(object): # special magic element rearranging parent, insertBefore = self.getTableMisnestedNodePosition() parent.insertText(data, insertBefore) - + def getTableMisnestedNodePosition(self): """Get the foster parent element, and sibling to insert before (or None) when inserting a misnested table node""" # The foster parent element is the one which comes before the most # recently opened table element # XXX - this is really inelegant - lastTable=None + lastTable = None fosterParent = None insertBefore = None for elm in self.openElements[::-1]: @@ -354,7 +354,7 @@ class TreeBuilder(object): name = self.openElements[-1].name # XXX td, th and tr are not actually needed if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt")) - and name != exclude): + and name != exclude): self.openElements.pop() # XXX This is not entirely what the specification says. We should # investigate it more closely. @@ -363,10 +363,10 @@ class TreeBuilder(object): def getDocument(self): "Return the final tree" return self.document - + def getFragment(self): "Return the final fragment" - #assert self.innerHTML + # assert self.innerHTML fragment = self.fragmentClass() self.openElements[0].reparentChildren(fragment) return fragment diff --git a/libs/html5lib/treebuilders/dom.py b/libs/html5lib/treebuilders/dom.py index 9578da2..61e5ed7 100644 --- a/libs/html5lib/treebuilders/dom.py +++ b/libs/html5lib/treebuilders/dom.py @@ -1,45 +1,38 @@ +from __future__ import absolute_import, division, unicode_literals -from xml.dom import minidom, Node, XML_NAMESPACE, XMLNS_NAMESPACE -try: - from types import ModuleType -except: - from new import module as ModuleType -import re -import weakref -import _base -from html5lib import constants, ihatexml -from html5lib.constants import namespaces +from xml.dom import minidom, Node +import weakref -moduleCache = {} +from . import _base +from .. import constants +from ..constants import namespaces +from ..utils import moduleFactoryFactory -def getDomModule(DomImplementation): - name = "_" + DomImplementation.__name__+"builder" - if name in moduleCache: - return moduleCache[name] - else: - mod = ModuleType(name) - objs = getDomBuilder(DomImplementation) - mod.__dict__.update(objs) - moduleCache[name] = mod - return mod def getDomBuilder(DomImplementation): Dom = DomImplementation + class AttrList(object): def __init__(self, element): self.element = element + def __iter__(self): - return self.element.attributes.items().__iter__() + return list(self.element.attributes.items()).__iter__() + def __setitem__(self, name, value): self.element.setAttribute(name, value) + def __len__(self): - return len(self.element.attributes.items()) + return len(list(self.element.attributes.items())) + def items(self): return [(item[0], item[1]) for item in - self.element.attributes.items()] + list(self.element.attributes.items())] + def keys(self): - return self.element.attributes.keys() + return list(self.element.attributes.keys()) + def __getitem__(self, name): return self.element.getAttribute(name) @@ -48,68 +41,68 @@ def getDomBuilder(DomImplementation): raise NotImplementedError else: return self.element.hasAttribute(name) - + class NodeBuilder(_base.Node): def __init__(self, element): _base.Node.__init__(self, element.nodeName) self.element = element - namespace = property(lambda self:hasattr(self.element, "namespaceURI") + namespace = property(lambda self: hasattr(self.element, "namespaceURI") and self.element.namespaceURI or None) def appendChild(self, node): node.parent = self self.element.appendChild(node.element) - + def insertText(self, data, insertBefore=None): text = self.element.ownerDocument.createTextNode(data) if insertBefore: self.element.insertBefore(text, insertBefore.element) else: self.element.appendChild(text) - + def insertBefore(self, node, refNode): self.element.insertBefore(node.element, refNode.element) node.parent = self - + def removeChild(self, node): if node.element.parentNode == self.element: self.element.removeChild(node.element) node.parent = None - + def reparentChildren(self, newParent): while self.element.hasChildNodes(): child = self.element.firstChild self.element.removeChild(child) newParent.element.appendChild(child) self.childNodes = [] - + def getAttributes(self): return AttrList(self.element) - + def setAttributes(self, attributes): if attributes: - for name, value in attributes.items(): + for name, value in list(attributes.items()): if isinstance(name, tuple): if name[0] is not None: qualifiedName = (name[0] + ":" + name[1]) else: qualifiedName = name[1] - self.element.setAttributeNS(name[2], qualifiedName, + self.element.setAttributeNS(name[2], qualifiedName, value) else: self.element.setAttribute( name, value) attributes = property(getAttributes, setAttributes) - + def cloneNode(self): return NodeBuilder(self.element.cloneNode(False)) - + def hasContent(self): return self.element.hasChildNodes() def getNameTuple(self): - if self.namespace == None: + if self.namespace is None: return namespaces["html"], self.name else: return self.namespace, self.name @@ -118,9 +111,9 @@ def getDomBuilder(DomImplementation): class TreeBuilder(_base.TreeBuilder): def documentClass(self): - self.dom = Dom.getDOMImplementation().createDocument(None,None,None) + self.dom = Dom.getDOMImplementation().createDocument(None, None, None) return weakref.proxy(self) - + def insertDoctype(self, token): name = token["name"] publicId = token["publicId"] @@ -131,7 +124,7 @@ def getDomBuilder(DomImplementation): self.document.appendChild(NodeBuilder(doctype)) if Dom == minidom: doctype.ownerDocument = self.dom - + def elementClass(self, name, namespace=None): if namespace is None and self.defaultNamespace is None: node = self.dom.createElement(name) @@ -139,70 +132,72 @@ def getDomBuilder(DomImplementation): node = self.dom.createElementNS(namespace, name) return NodeBuilder(node) - + def commentClass(self, data): return NodeBuilder(self.dom.createComment(data)) - + def fragmentClass(self): return NodeBuilder(self.dom.createDocumentFragment()) - + def appendChild(self, node): self.dom.appendChild(node.element) - + def testSerializer(self, element): return testSerializer(element) - + def getDocument(self): return self.dom - + def getFragment(self): return _base.TreeBuilder.getFragment(self).element - + def insertText(self, data, parent=None): - data=data - if parent <> self: + data = data + if parent != self: _base.TreeBuilder.insertText(self, data, parent) else: # HACK: allow text nodes as children of the document node if hasattr(self.dom, '_child_node_types'): if not Node.TEXT_NODE in self.dom._child_node_types: - self.dom._child_node_types=list(self.dom._child_node_types) + self.dom._child_node_types = list(self.dom._child_node_types) self.dom._child_node_types.append(Node.TEXT_NODE) self.dom.appendChild(self.dom.createTextNode(data)) - + + implementation = DomImplementation name = None - + def testSerializer(element): element.normalize() rv = [] + def serializeElement(element, indent=0): if element.nodeType == Node.DOCUMENT_TYPE_NODE: if element.name: if element.publicId or element.systemId: publicId = element.publicId or "" systemId = element.systemId or "" - rv.append( """|%s<!DOCTYPE %s "%s" "%s">"""%( - ' '*indent, element.name, publicId, systemId)) + rv.append("""|%s<!DOCTYPE %s "%s" "%s">""" % + (' ' * indent, element.name, publicId, systemId)) else: - rv.append("|%s<!DOCTYPE %s>"%(' '*indent, element.name)) + rv.append("|%s<!DOCTYPE %s>" % (' ' * indent, element.name)) else: - rv.append("|%s<!DOCTYPE >"%(' '*indent,)) + rv.append("|%s<!DOCTYPE >" % (' ' * indent,)) elif element.nodeType == Node.DOCUMENT_NODE: rv.append("#document") elif element.nodeType == Node.DOCUMENT_FRAGMENT_NODE: rv.append("#document-fragment") elif element.nodeType == Node.COMMENT_NODE: - rv.append("|%s<!-- %s -->"%(' '*indent, element.nodeValue)) + rv.append("|%s<!-- %s -->" % (' ' * indent, element.nodeValue)) elif element.nodeType == Node.TEXT_NODE: - rv.append("|%s\"%s\"" %(' '*indent, element.nodeValue)) + rv.append("|%s\"%s\"" % (' ' * indent, element.nodeValue)) else: if (hasattr(element, "namespaceURI") and - element.namespaceURI != None): - name = "%s %s"%(constants.prefixes[element.namespaceURI], - element.nodeName) + element.namespaceURI is not None): + name = "%s %s" % (constants.prefixes[element.namespaceURI], + element.nodeName) else: name = element.nodeName - rv.append("|%s<%s>"%(' '*indent, name)) + rv.append("|%s<%s>" % (' ' * indent, name)) if element.hasAttributes(): attributes = [] for i in range(len(element.attributes)): @@ -211,81 +206,22 @@ def getDomBuilder(DomImplementation): value = attr.value ns = attr.namespaceURI if ns: - name = "%s %s"%(constants.prefixes[ns], attr.localName) + name = "%s %s" % (constants.prefixes[ns], attr.localName) else: name = attr.nodeName attributes.append((name, value)) for name, value in sorted(attributes): - rv.append('|%s%s="%s"' % (' '*(indent+2), name, value)) + rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) indent += 2 for child in element.childNodes: serializeElement(child, indent) serializeElement(element, 0) - + return "\n".join(rv) - - def dom2sax(node, handler, nsmap={'xml':XML_NAMESPACE}): - if node.nodeType == Node.ELEMENT_NODE: - if not nsmap: - handler.startElement(node.nodeName, node.attributes) - for child in node.childNodes: dom2sax(child, handler, nsmap) - handler.endElement(node.nodeName) - else: - attributes = dict(node.attributes.itemsNS()) - - # gather namespace declarations - prefixes = [] - for attrname in node.attributes.keys(): - attr = node.getAttributeNode(attrname) - if (attr.namespaceURI == XMLNS_NAMESPACE or - (attr.namespaceURI == None and attr.nodeName.startswith('xmlns'))): - prefix = (attr.nodeName != 'xmlns' and attr.nodeName or None) - handler.startPrefixMapping(prefix, attr.nodeValue) - prefixes.append(prefix) - nsmap = nsmap.copy() - nsmap[prefix] = attr.nodeValue - del attributes[(attr.namespaceURI, attr.nodeName)] - - # apply namespace declarations - for attrname in node.attributes.keys(): - attr = node.getAttributeNode(attrname) - if attr.namespaceURI == None and ':' in attr.nodeName: - prefix = attr.nodeName.split(':')[0] - if nsmap.has_key(prefix): - del attributes[(attr.namespaceURI, attr.nodeName)] - attributes[(nsmap[prefix],attr.nodeName)]=attr.nodeValue - - # SAX events - ns = node.namespaceURI or nsmap.get(None,None) - handler.startElementNS((ns,node.nodeName), node.nodeName, attributes) - for child in node.childNodes: dom2sax(child, handler, nsmap) - handler.endElementNS((ns, node.nodeName), node.nodeName) - for prefix in prefixes: handler.endPrefixMapping(prefix) - - elif node.nodeType in [Node.TEXT_NODE, Node.CDATA_SECTION_NODE]: - handler.characters(node.nodeValue) - - elif node.nodeType == Node.DOCUMENT_NODE: - handler.startDocument() - for child in node.childNodes: dom2sax(child, handler, nsmap) - handler.endDocument() - - elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE: - for child in node.childNodes: dom2sax(child, handler, nsmap) - - else: - # ATTRIBUTE_NODE - # ENTITY_NODE - # PROCESSING_INSTRUCTION_NODE - # COMMENT_NODE - # DOCUMENT_TYPE_NODE - # NOTATION_NODE - pass - + return locals() -# Keep backwards compatibility with things that directly load -# classes/functions from this module -for key, value in getDomModule(minidom).__dict__.items(): - globals()[key] = value + +# The actual means to get a module! +getDomModule = moduleFactoryFactory(getDomBuilder) diff --git a/libs/html5lib/treebuilders/etree.py b/libs/html5lib/treebuilders/etree.py index 95be475..2c8ed19 100755 --- a/libs/html5lib/treebuilders/etree.py +++ b/libs/html5lib/treebuilders/etree.py @@ -1,32 +1,21 @@ -try: - from types import ModuleType -except: - from new import module as ModuleType +from __future__ import absolute_import, division, unicode_literals +from six import text_type + import re -import types -import _base -from html5lib import ihatexml -from html5lib import constants -from html5lib.constants import namespaces +from . import _base +from .. import ihatexml +from .. import constants +from ..constants import namespaces +from ..utils import moduleFactoryFactory tag_regexp = re.compile("{([^}]*)}(.*)") -moduleCache = {} - -def getETreeModule(ElementTreeImplementation, fullTree=False): - name = "_" + ElementTreeImplementation.__name__+"builder" - if name in moduleCache: - return moduleCache[name] - else: - mod = ModuleType("_" + ElementTreeImplementation.__name__+"builder") - objs = getETreeBuilder(ElementTreeImplementation, fullTree) - mod.__dict__.update(objs) - moduleCache[name] = mod - return mod def getETreeBuilder(ElementTreeImplementation, fullTree=False): ElementTree = ElementTreeImplementation + ElementTreeCommentType = ElementTree.Comment("asd").tag + class Element(_base.Node): def __init__(self, name, namespace=None): self._name = name @@ -45,16 +34,16 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False): if namespace is None: etree_tag = name else: - etree_tag = "{%s}%s"%(namespace, name) + etree_tag = "{%s}%s" % (namespace, name) return etree_tag - + def _setName(self, name): self._name = name self._element.tag = self._getETreeTag(self._name, self._namespace) - + def _getName(self): return self._name - + name = property(_getName, _setName) def _setNamespace(self, namespace): @@ -65,81 +54,82 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False): return self._namespace namespace = property(_getNamespace, _setNamespace) - + def _getAttributes(self): return self._element.attrib - + def _setAttributes(self, attributes): - #Delete existing attributes first - #XXX - there may be a better way to do this... - for key in self._element.attrib.keys(): + # Delete existing attributes first + # XXX - there may be a better way to do this... + for key in list(self._element.attrib.keys()): del self._element.attrib[key] - for key, value in attributes.iteritems(): + for key, value in attributes.items(): if isinstance(key, tuple): - name = "{%s}%s"%(key[2], key[1]) + name = "{%s}%s" % (key[2], key[1]) else: name = key self._element.set(name, value) - + attributes = property(_getAttributes, _setAttributes) - + def _getChildNodes(self): - return self._childNodes + return self._childNodes + def _setChildNodes(self, value): del self._element[:] self._childNodes = [] for element in value: self.insertChild(element) - + childNodes = property(_getChildNodes, _setChildNodes) - + def hasContent(self): """Return true if the node has children or text""" return bool(self._element.text or len(self._element)) - + def appendChild(self, node): self._childNodes.append(node) self._element.append(node._element) node.parent = self - + def insertBefore(self, node, refNode): index = list(self._element).index(refNode._element) self._element.insert(index, node._element) node.parent = self - + def removeChild(self, node): self._element.remove(node._element) - node.parent=None - + node.parent = None + def insertText(self, data, insertBefore=None): if not(len(self._element)): if not self._element.text: self._element.text = "" self._element.text += data elif insertBefore is None: - #Insert the text as the tail of the last child element + # Insert the text as the tail of the last child element if not self._element[-1].tail: self._element[-1].tail = "" self._element[-1].tail += data else: - #Insert the text before the specified node + # Insert the text before the specified node children = list(self._element) index = children.index(insertBefore._element) if index > 0: - if not self._element[index-1].tail: - self._element[index-1].tail = "" - self._element[index-1].tail += data + if not self._element[index - 1].tail: + self._element[index - 1].tail = "" + self._element[index - 1].tail += data else: if not self._element.text: self._element.text = "" self._element.text += data - + def cloneNode(self): element = type(self)(self.name, self.namespace) - for name, value in self.attributes.iteritems(): + for name, value in self.attributes.items(): element.attributes[name] = value return element - + def reparentChildren(self, newParent): if newParent.childNodes: newParent.childNodes[-1]._element.tail += self._element.text @@ -150,60 +140,60 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False): newParent._element.text += self._element.text self._element.text = "" _base.Node.reparentChildren(self, newParent) - + class Comment(Element): def __init__(self, data): - #Use the superclass constructor to set all properties on the - #wrapper element + # Use the superclass constructor to set all properties on the + # wrapper element self._element = ElementTree.Comment(data) self.parent = None self._childNodes = [] self._flags = [] - + def _getData(self): return self._element.text - + def _setData(self, value): self._element.text = value - + data = property(_getData, _setData) - + class DocumentType(Element): def __init__(self, name, publicId, systemId): - Element.__init__(self, "<!DOCTYPE>") + Element.__init__(self, "<!DOCTYPE>") self._element.text = name self.publicId = publicId self.systemId = systemId def _getPublicId(self): - return self._element.get(u"publicId", "") + return self._element.get("publicId", "") def _setPublicId(self, value): if value is not None: - self._element.set(u"publicId", value) + self._element.set("publicId", value) publicId = property(_getPublicId, _setPublicId) - + def _getSystemId(self): - return self._element.get(u"systemId", "") + return self._element.get("systemId", "") def _setSystemId(self, value): if value is not None: - self._element.set(u"systemId", value) + self._element.set("systemId", value) systemId = property(_getSystemId, _setSystemId) - + class Document(Element): def __init__(self): - Element.__init__(self, "<DOCUMENT_ROOT>") - + Element.__init__(self, "DOCUMENT_ROOT") + class DocumentFragment(Element): def __init__(self): - Element.__init__(self, "<DOCUMENT_FRAGMENT>") - + Element.__init__(self, "DOCUMENT_FRAGMENT") + def testSerializer(element): rv = [] - finalText = None + def serializeElement(element, indent=0): if not(hasattr(element, "tag")): element = element.getroot() @@ -211,20 +201,23 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False): if element.get("publicId") or element.get("systemId"): publicId = element.get("publicId") or "" systemId = element.get("systemId") or "" - rv.append( """<!DOCTYPE %s "%s" "%s">"""%( - element.text, publicId, systemId)) - else: - rv.append("<!DOCTYPE %s>"%(element.text,)) - elif element.tag == "<DOCUMENT_ROOT>": + rv.append("""<!DOCTYPE %s "%s" "%s">""" % + (element.text, publicId, systemId)) + else: + rv.append("<!DOCTYPE %s>" % (element.text,)) + elif element.tag == "DOCUMENT_ROOT": rv.append("#document") - if element.text: - rv.append("|%s\"%s\""%(' '*(indent+2), element.text)) - if element.tail: - finalText = element.tail - elif element.tag == ElementTree.Comment: - rv.append("|%s<!-- %s -->"%(' '*indent, element.text)) + if element.text is not None: + rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) + if element.tail is not None: + raise TypeError("Document node cannot have tail") + if hasattr(element, "attrib") and len(element.attrib): + raise TypeError("Document node cannot have attributes") + elif element.tag == ElementTreeCommentType: + rv.append("|%s<!-- %s -->" % (' ' * indent, element.text)) else: - assert type(element.tag) in types.StringTypes, "Expected unicode, got %s"%type(element.tag) + assert isinstance(element.tag, text_type), \ + "Expected unicode, got %s, %s" % (type(element.tag), element.tag) nsmatch = tag_regexp.match(element.tag) if nsmatch is None: @@ -232,113 +225,113 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False): else: ns, name = nsmatch.groups() prefix = constants.prefixes[ns] - name = "%s %s"%(prefix, name) - rv.append("|%s<%s>"%(' '*indent, name)) + name = "%s %s" % (prefix, name) + rv.append("|%s<%s>" % (' ' * indent, name)) if hasattr(element, "attrib"): attributes = [] - for name, value in element.attrib.iteritems(): + for name, value in element.attrib.items(): nsmatch = tag_regexp.match(name) if nsmatch is not None: ns, name = nsmatch.groups() prefix = constants.prefixes[ns] - attr_string = "%s %s"%(prefix, name) + attr_string = "%s %s" % (prefix, name) else: attr_string = name attributes.append((attr_string, value)) for name, value in sorted(attributes): - rv.append('|%s%s="%s"' % (' '*(indent+2), name, value)) + rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) if element.text: - rv.append("|%s\"%s\"" %(' '*(indent+2), element.text)) + rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) indent += 2 for child in element: serializeElement(child, indent) if element.tail: - rv.append("|%s\"%s\"" %(' '*(indent-2), element.tail)) + rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail)) serializeElement(element, 0) - - if finalText is not None: - rv.append("|%s\"%s\""%(' '*2, finalText)) - + return "\n".join(rv) - + def tostring(element): """Serialize an element and its child nodes to a string""" rv = [] - finalText = None filter = ihatexml.InfosetFilter() + def serializeElement(element): - if type(element) == type(ElementTree.ElementTree): + if isinstance(element, ElementTree.ElementTree): element = element.getroot() - + if element.tag == "<!DOCTYPE>": if element.get("publicId") or element.get("systemId"): publicId = element.get("publicId") or "" systemId = element.get("systemId") or "" - rv.append( """<!DOCTYPE %s PUBLIC "%s" "%s">"""%( - element.text, publicId, systemId)) - else: - rv.append("<!DOCTYPE %s>"%(element.text,)) - elif element.tag == "<DOCUMENT_ROOT>": - if element.text: + rv.append("""<!DOCTYPE %s PUBLIC "%s" "%s">""" % + (element.text, publicId, systemId)) + else: + rv.append("<!DOCTYPE %s>" % (element.text,)) + elif element.tag == "DOCUMENT_ROOT": + if element.text is not None: rv.append(element.text) - if element.tail: - finalText = element.tail - + if element.tail is not None: + raise TypeError("Document node cannot have tail") + if hasattr(element, "attrib") and len(element.attrib): + raise TypeError("Document node cannot have attributes") + for child in element: serializeElement(child) - - elif type(element.tag) == type(ElementTree.Comment): - rv.append("<!--%s-->"%(element.text,)) + + elif element.tag == ElementTreeCommentType: + rv.append("<!--%s-->" % (element.text,)) else: - #This is assumed to be an ordinary element + # This is assumed to be an ordinary element if not element.attrib: - rv.append("<%s>"%(filter.fromXmlName(element.tag),)) + rv.append("<%s>" % (filter.fromXmlName(element.tag),)) else: - attr = " ".join(["%s=\"%s\""%( - filter.fromXmlName(name), value) - for name, value in element.attrib.iteritems()]) - rv.append("<%s %s>"%(element.tag, attr)) + attr = " ".join(["%s=\"%s\"" % ( + filter.fromXmlName(name), value) + for name, value in element.attrib.items()]) + rv.append("<%s %s>" % (element.tag, attr)) if element.text: rv.append(element.text) - + for child in element: serializeElement(child) - - rv.append("</%s>"%(element.tag,)) - + + rv.append("</%s>" % (element.tag,)) + if element.tail: rv.append(element.tail) - + serializeElement(element) - - if finalText is not None: - rv.append("%s\""%(' '*2, finalText)) - + return "".join(rv) - + class TreeBuilder(_base.TreeBuilder): documentClass = Document doctypeClass = DocumentType elementClass = Element commentClass = Comment fragmentClass = DocumentFragment - + implementation = ElementTreeImplementation + def testSerializer(self, element): return testSerializer(element) - + def getDocument(self): if fullTree: return self.document._element else: if self.defaultNamespace is not None: return self.document._element.find( - "{%s}html"%self.defaultNamespace) + "{%s}html" % self.defaultNamespace) else: return self.document._element.find("html") - + def getFragment(self): return _base.TreeBuilder.getFragment(self)._element - + return locals() + + +getETreeModule = moduleFactoryFactory(getETreeBuilder) diff --git a/libs/html5lib/treebuilders/etree_lxml.py b/libs/html5lib/treebuilders/etree_lxml.py index eee1e3b..35d08ef 100644 --- a/libs/html5lib/treebuilders/etree_lxml.py +++ b/libs/html5lib/treebuilders/etree_lxml.py @@ -1,20 +1,3 @@ -import warnings -import re - -import _base -from html5lib.constants import DataLossWarning -import html5lib.constants as constants -import etree as etree_builders -from html5lib import ihatexml - -try: - import lxml.etree as etree -except ImportError: - pass - -fullTree = True -tag_regexp = re.compile("{([^}]*)}(.*)") - """Module for supporting the lxml.etree library. The idea here is to use as much of the native library as possible, without using fragile hacks like custom element names that break between releases. The downside of this is that we cannot represent @@ -26,12 +9,34 @@ Docypes with no name When any of these things occur, we emit a DataLossWarning """ +from __future__ import absolute_import, division, unicode_literals + +import warnings +import re +import sys + +from . import _base +from ..constants import DataLossWarning +from .. import constants +from . import etree as etree_builders +from .. import ihatexml + +import lxml.etree as etree + + +fullTree = True +tag_regexp = re.compile("{([^}]*)}(.*)") + +comment_type = etree.Comment("asd").tag + + class DocumentType(object): def __init__(self, name, publicId, systemId): - self.name = name + self.name = name self.publicId = publicId self.systemId = systemId + class Document(object): def __init__(self): self._elementTree = None @@ -42,118 +47,126 @@ class Document(object): def _getChildNodes(self): return self._childNodes - + childNodes = property(_getChildNodes) + def testSerializer(element): rv = [] finalText = None - filter = ihatexml.InfosetFilter() + infosetFilter = ihatexml.InfosetFilter() + def serializeElement(element, indent=0): if not hasattr(element, "tag"): - if hasattr(element, "getroot"): - #Full tree case + if hasattr(element, "getroot"): + # Full tree case rv.append("#document") if element.docinfo.internalDTD: - if not (element.docinfo.public_id or + if not (element.docinfo.public_id or element.docinfo.system_url): - dtd_str = "<!DOCTYPE %s>"%element.docinfo.root_name + dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name else: - dtd_str = """<!DOCTYPE %s "%s" "%s">"""%( - element.docinfo.root_name, + dtd_str = """<!DOCTYPE %s "%s" "%s">""" % ( + element.docinfo.root_name, element.docinfo.public_id, element.docinfo.system_url) - rv.append("|%s%s"%(' '*(indent+2), dtd_str)) + rv.append("|%s%s" % (' ' * (indent + 2), dtd_str)) next_element = element.getroot() while next_element.getprevious() is not None: next_element = next_element.getprevious() while next_element is not None: - serializeElement(next_element, indent+2) + serializeElement(next_element, indent + 2) next_element = next_element.getnext() - elif isinstance(element, basestring): - #Text in a fragment - rv.append("|%s\"%s\""%(' '*indent, element)) + elif isinstance(element, str) or isinstance(element, bytes): + # Text in a fragment + assert isinstance(element, str) or sys.version_info.major == 2 + rv.append("|%s\"%s\"" % (' ' * indent, element)) else: - #Fragment case + # Fragment case rv.append("#document-fragment") for next_element in element: - serializeElement(next_element, indent+2) - elif type(element.tag) == type(etree.Comment): - rv.append("|%s<!-- %s -->"%(' '*indent, element.text)) + serializeElement(next_element, indent + 2) + elif element.tag == comment_type: + rv.append("|%s<!-- %s -->" % (' ' * indent, element.text)) + if hasattr(element, "tail") and element.tail: + rv.append("|%s\"%s\"" % (' ' * indent, element.tail)) else: + assert isinstance(element, etree._Element) nsmatch = etree_builders.tag_regexp.match(element.tag) if nsmatch is not None: ns = nsmatch.group(1) tag = nsmatch.group(2) prefix = constants.prefixes[ns] - rv.append("|%s<%s %s>"%(' '*indent, prefix, - filter.fromXmlName(tag))) + rv.append("|%s<%s %s>" % (' ' * indent, prefix, + infosetFilter.fromXmlName(tag))) else: - rv.append("|%s<%s>"%(' '*indent, - filter.fromXmlName(element.tag))) + rv.append("|%s<%s>" % (' ' * indent, + infosetFilter.fromXmlName(element.tag))) if hasattr(element, "attrib"): attributes = [] - for name, value in element.attrib.iteritems(): + for name, value in element.attrib.items(): nsmatch = tag_regexp.match(name) if nsmatch is not None: ns, name = nsmatch.groups() - name = filter.fromXmlName(name) + name = infosetFilter.fromXmlName(name) prefix = constants.prefixes[ns] - attr_string = "%s %s"%(prefix, name) + attr_string = "%s %s" % (prefix, name) else: - attr_string = filter.fromXmlName(name) + attr_string = infosetFilter.fromXmlName(name) attributes.append((attr_string, value)) for name, value in sorted(attributes): - rv.append('|%s%s="%s"' % (' '*(indent+2), name, value)) + rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) if element.text: - rv.append("|%s\"%s\"" %(' '*(indent+2), element.text)) + rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) indent += 2 - for child in element.getchildren(): + for child in element: serializeElement(child, indent) - if hasattr(element, "tail") and element.tail: - rv.append("|%s\"%s\"" %(' '*(indent-2), element.tail)) + if hasattr(element, "tail") and element.tail: + rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail)) serializeElement(element, 0) if finalText is not None: - rv.append("|%s\"%s\""%(' '*2, finalText)) + rv.append("|%s\"%s\"" % (' ' * 2, finalText)) return "\n".join(rv) + def tostring(element): """Serialize an element and its child nodes to a string""" rv = [] finalText = None + def serializeElement(element): if not hasattr(element, "tag"): if element.docinfo.internalDTD: if element.docinfo.doctype: dtd_str = element.docinfo.doctype else: - dtd_str = "<!DOCTYPE %s>"%element.docinfo.root_name + dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name rv.append(dtd_str) serializeElement(element.getroot()) - - elif type(element.tag) == type(etree.Comment): - rv.append("<!--%s-->"%(element.text,)) - + + elif element.tag == comment_type: + rv.append("<!--%s-->" % (element.text,)) + else: - #This is assumed to be an ordinary element + # This is assumed to be an ordinary element if not element.attrib: - rv.append("<%s>"%(element.tag,)) + rv.append("<%s>" % (element.tag,)) else: - attr = " ".join(["%s=\"%s\""%(name, value) - for name, value in element.attrib.iteritems()]) - rv.append("<%s %s>"%(element.tag, attr)) + attr = " ".join(["%s=\"%s\"" % (name, value) + for name, value in element.attrib.items()]) + rv.append("<%s %s>" % (element.tag, attr)) if element.text: rv.append(element.text) - for child in element.getchildren(): + for child in element: serializeElement(child) - rv.append("</%s>"%(element.tag,)) + rv.append("</%s>" % (element.tag,)) if hasattr(element, "tail") and element.tail: rv.append(element.tail) @@ -161,56 +174,57 @@ def tostring(element): serializeElement(element) if finalText is not None: - rv.append("%s\""%(' '*2, finalText)) + rv.append("%s\"" % (' ' * 2, finalText)) return "".join(rv) - + class TreeBuilder(_base.TreeBuilder): documentClass = Document doctypeClass = DocumentType elementClass = None commentClass = None - fragmentClass = Document + fragmentClass = Document + implementation = etree - def __init__(self, namespaceHTMLElements, fullTree = False): + def __init__(self, namespaceHTMLElements, fullTree=False): builder = etree_builders.getETreeModule(etree, fullTree=fullTree) - filter = self.filter = ihatexml.InfosetFilter() + infosetFilter = self.infosetFilter = ihatexml.InfosetFilter() self.namespaceHTMLElements = namespaceHTMLElements class Attributes(dict): def __init__(self, element, value={}): self._element = element dict.__init__(self, value) - for key, value in self.iteritems(): + for key, value in self.items(): if isinstance(key, tuple): - name = "{%s}%s"%(key[2], filter.coerceAttribute(key[1])) + name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1])) else: - name = filter.coerceAttribute(key) + name = infosetFilter.coerceAttribute(key) self._element._element.attrib[name] = value def __setitem__(self, key, value): dict.__setitem__(self, key, value) if isinstance(key, tuple): - name = "{%s}%s"%(key[2], filter.coerceAttribute(key[1])) + name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1])) else: - name = filter.coerceAttribute(key) + name = infosetFilter.coerceAttribute(key) self._element._element.attrib[name] = value class Element(builder.Element): def __init__(self, name, namespace): - name = filter.coerceElement(name) + name = infosetFilter.coerceElement(name) builder.Element.__init__(self, name, namespace=namespace) self._attributes = Attributes(self) def _setName(self, name): - self._name = filter.coerceElement(name) + self._name = infosetFilter.coerceElement(name) self._element.tag = self._getETreeTag( self._name, self._namespace) - + def _getName(self): - return filter.fromXmlName(self._name) - + return infosetFilter.fromXmlName(self._name) + name = property(_getName, _setName) def _getAttributes(self): @@ -218,24 +232,23 @@ class TreeBuilder(_base.TreeBuilder): def _setAttributes(self, attributes): self._attributes = Attributes(self, attributes) - + attributes = property(_getAttributes, _setAttributes) def insertText(self, data, insertBefore=None): - data = filter.coerceCharacters(data) + data = infosetFilter.coerceCharacters(data) builder.Element.insertText(self, data, insertBefore) def appendChild(self, child): builder.Element.appendChild(self, child) - class Comment(builder.Comment): def __init__(self, data): - data = filter.coerceComment(data) + data = infosetFilter.coerceComment(data) builder.Comment.__init__(self, data) def _setData(self, data): - data = filter.coerceComment(data) + data = infosetFilter.coerceComment(data) self._element.text = data def _getData(self): @@ -245,9 +258,9 @@ class TreeBuilder(_base.TreeBuilder): self.elementClass = Element self.commentClass = builder.Comment - #self.fragmentClass = builder.DocumentFragment + # self.fragmentClass = builder.DocumentFragment _base.TreeBuilder.__init__(self, namespaceHTMLElements) - + def reset(self): _base.TreeBuilder.reset(self) self.insertComment = self.insertCommentInitial @@ -262,13 +275,13 @@ class TreeBuilder(_base.TreeBuilder): return self.document._elementTree else: return self.document._elementTree.getroot() - + def getFragment(self): fragment = [] element = self.openElements[0]._element if element.text: fragment.append(element.text) - fragment.extend(element.getchildren()) + fragment.extend(list(element)) if element.tail: fragment.append(element.tail) return fragment @@ -278,59 +291,79 @@ class TreeBuilder(_base.TreeBuilder): publicId = token["publicId"] systemId = token["systemId"] - if not name or ihatexml.nonXmlNameBMPRegexp.search(name) or name[0] == '"': - warnings.warn("lxml cannot represent null or non-xml doctype", DataLossWarning) + if not name: + warnings.warn("lxml cannot represent empty doctype", DataLossWarning) + self.doctype = None + else: + coercedName = self.infosetFilter.coerceElement(name) + if coercedName != name: + warnings.warn("lxml cannot represent non-xml doctype", DataLossWarning) + + doctype = self.doctypeClass(coercedName, publicId, systemId) + self.doctype = doctype - doctype = self.doctypeClass(name, publicId, systemId) - self.doctype = doctype - def insertCommentInitial(self, data, parent=None): self.initial_comments.append(data) - + + def insertCommentMain(self, data, parent=None): + if (parent == self.document and + self.document._elementTree.getroot()[-1].tag == comment_type): + warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning) + super(TreeBuilder, self).insertComment(data, parent) + def insertRoot(self, token): """Create the document root""" - #Because of the way libxml2 works, it doesn't seem to be possible to - #alter information like the doctype after the tree has been parsed. - #Therefore we need to use the built-in parser to create our iniial - #tree, after which we can add elements like normal + # Because of the way libxml2 works, it doesn't seem to be possible to + # alter information like the doctype after the tree has been parsed. + # Therefore we need to use the built-in parser to create our iniial + # tree, after which we can add elements like normal docStr = "" - if self.doctype and self.doctype.name and not self.doctype.name.startswith('"'): - docStr += "<!DOCTYPE %s"%self.doctype.name - if (self.doctype.publicId is not None or - self.doctype.systemId is not None): - docStr += ' PUBLIC "%s" "%s"'%(self.doctype.publicId or "", - self.doctype.systemId or "") + if self.doctype: + assert self.doctype.name + docStr += "<!DOCTYPE %s" % self.doctype.name + if (self.doctype.publicId is not None or + self.doctype.systemId is not None): + docStr += (' PUBLIC "%s" ' % + (self.infosetFilter.coercePubid(self.doctype.publicId or ""))) + if self.doctype.systemId: + sysid = self.doctype.systemId + if sysid.find("'") >= 0 and sysid.find('"') >= 0: + warnings.warn("DOCTYPE system cannot contain single and double quotes", DataLossWarning) + sysid = sysid.replace("'", 'U00027') + if sysid.find("'") >= 0: + docStr += '"%s"' % sysid + else: + docStr += "'%s'" % sysid + else: + docStr += "''" docStr += ">" + if self.doctype.name != token["name"]: + warnings.warn("lxml cannot represent doctype with a different name to the root element", DataLossWarning) docStr += "<THIS_SHOULD_NEVER_APPEAR_PUBLICLY/>" - - try: - root = etree.fromstring(docStr) - except etree.XMLSyntaxError: - print docStr - raise - - #Append the initial comments: + root = etree.fromstring(docStr) + + # Append the initial comments: for comment_token in self.initial_comments: root.addprevious(etree.Comment(comment_token["data"])) - - #Create the root document and add the ElementTree to it + + # Create the root document and add the ElementTree to it self.document = self.documentClass() self.document._elementTree = root.getroottree() - + # Give the root element the right name name = token["name"] namespace = token.get("namespace", self.defaultNamespace) if namespace is None: etree_tag = name else: - etree_tag = "{%s}%s"%(namespace, name) + etree_tag = "{%s}%s" % (namespace, name) root.tag = etree_tag - - #Add the root element to the internal child/open data structures + + # Add the root element to the internal child/open data structures root_element = self.elementClass(name, namespace) root_element._element = root self.document._childNodes.append(root_element) self.openElements.append(root_element) - - #Reset to the default insert comment function - self.insertComment = super(TreeBuilder, self).insertComment + + # Reset to the default insert comment function + self.insertComment = self.insertCommentMain diff --git a/libs/html5lib/treebuilders/simpletree.py b/libs/html5lib/treebuilders/simpletree.py deleted file mode 100755 index 67fe758..0000000 --- a/libs/html5lib/treebuilders/simpletree.py +++ /dev/null @@ -1,256 +0,0 @@ -import _base -from html5lib.constants import voidElements, namespaces, prefixes -from xml.sax.saxutils import escape - -# Really crappy basic implementation of a DOM-core like thing -class Node(_base.Node): - type = -1 - def __init__(self, name): - self.name = name - self.parent = None - self.value = None - self.childNodes = [] - self._flags = [] - - def __iter__(self): - for node in self.childNodes: - yield node - for item in node: - yield item - - def __unicode__(self): - return self.name - - def toxml(self): - raise NotImplementedError - - def printTree(self, indent=0): - tree = '\n|%s%s' % (' '* indent, unicode(self)) - for child in self.childNodes: - tree += child.printTree(indent + 2) - return tree - - def appendChild(self, node): - assert isinstance(node, Node) - if (isinstance(node, TextNode) and self.childNodes and - isinstance(self.childNodes[-1], TextNode)): - self.childNodes[-1].value += node.value - else: - self.childNodes.append(node) - node.parent = self - - def insertText(self, data, insertBefore=None): - assert isinstance(data, unicode), "data %s is of type %s expected unicode"%(repr(data), type(data)) - if insertBefore is None: - self.appendChild(TextNode(data)) - else: - self.insertBefore(TextNode(data), insertBefore) - - def insertBefore(self, node, refNode): - index = self.childNodes.index(refNode) - if (isinstance(node, TextNode) and index > 0 and - isinstance(self.childNodes[index - 1], TextNode)): - self.childNodes[index - 1].value += node.value - else: - self.childNodes.insert(index, node) - node.parent = self - - def removeChild(self, node): - try: - self.childNodes.remove(node) - except: - # XXX - raise - node.parent = None - - def cloneNode(self): - raise NotImplementedError - - def hasContent(self): - """Return true if the node has children or text""" - return bool(self.childNodes) - - def getNameTuple(self): - if self.namespace == None: - return namespaces["html"], self.name - else: - return self.namespace, self.name - - nameTuple = property(getNameTuple) - -class Document(Node): - type = 1 - def __init__(self): - Node.__init__(self, None) - - def __str__(self): - return "#document" - - def __unicode__(self): - return str(self) - - def appendChild(self, child): - Node.appendChild(self, child) - - def toxml(self, encoding="utf=8"): - result = "" - for child in self.childNodes: - result += child.toxml() - return result.encode(encoding) - - def hilite(self, encoding="utf-8"): - result = "<pre>" - for child in self.childNodes: - result += child.hilite() - return result.encode(encoding) + "</pre>" - - def printTree(self): - tree = unicode(self) - for child in self.childNodes: - tree += child.printTree(2) - return tree - - def cloneNode(self): - return Document() - -class DocumentFragment(Document): - type = 2 - def __str__(self): - return "#document-fragment" - - def __unicode__(self): - return str(self) - - def cloneNode(self): - return DocumentFragment() - -class DocumentType(Node): - type = 3 - def __init__(self, name, publicId, systemId): - Node.__init__(self, name) - self.publicId = publicId - self.systemId = systemId - - def __unicode__(self): - if self.publicId or self.systemId: - publicId = self.publicId or "" - systemId = self.systemId or "" - return """<!DOCTYPE %s "%s" "%s">"""%( - self.name, publicId, systemId) - - else: - return u"<!DOCTYPE %s>" % self.name - - - toxml = __unicode__ - - def hilite(self): - return '<code class="markup doctype"><!DOCTYPE %s></code>' % self.name - - def cloneNode(self): - return DocumentType(self.name, self.publicId, self.systemId) - -class TextNode(Node): - type = 4 - def __init__(self, value): - Node.__init__(self, None) - self.value = value - - def __unicode__(self): - return u"\"%s\"" % self.value - - def toxml(self): - return escape(self.value) - - hilite = toxml - - def cloneNode(self): - return TextNode(self.value) - -class Element(Node): - type = 5 - def __init__(self, name, namespace=None): - Node.__init__(self, name) - self.namespace = namespace - self.attributes = {} - - def __unicode__(self): - if self.namespace == None: - return u"<%s>" % self.name - else: - return u"<%s %s>"%(prefixes[self.namespace], self.name) - - def toxml(self): - result = '<' + self.name - if self.attributes: - for name,value in self.attributes.iteritems(): - result += u' %s="%s"' % (name, escape(value,{'"':'"'})) - if self.childNodes: - result += '>' - for child in self.childNodes: - result += child.toxml() - result += u'</%s>' % self.name - else: - result += u'/>' - return result - - def hilite(self): - result = '<<code class="markup element-name">%s</code>' % self.name - if self.attributes: - for name, value in self.attributes.iteritems(): - result += ' <code class="markup attribute-name">%s</code>=<code class="markup attribute-value">"%s"</code>' % (name, escape(value, {'"':'"'})) - if self.childNodes: - result += ">" - for child in self.childNodes: - result += child.hilite() - elif self.name in voidElements: - return result + ">" - return result + '</<code class="markup element-name">%s</code>>' % self.name - - def printTree(self, indent): - tree = '\n|%s%s' % (' '*indent, unicode(self)) - indent += 2 - if self.attributes: - for name, value in sorted(self.attributes.iteritems()): - if isinstance(name, tuple): - name = "%s %s"%(name[0], name[1]) - tree += '\n|%s%s="%s"' % (' ' * indent, name, value) - for child in self.childNodes: - tree += child.printTree(indent) - return tree - - def cloneNode(self): - newNode = Element(self.name) - if hasattr(self, 'namespace'): - newNode.namespace = self.namespace - for attr, value in self.attributes.iteritems(): - newNode.attributes[attr] = value - return newNode - -class CommentNode(Node): - type = 6 - def __init__(self, data): - Node.__init__(self, None) - self.data = data - - def __unicode__(self): - return "<!-- %s -->" % self.data - - def toxml(self): - return "<!--%s-->" % self.data - - def hilite(self): - return '<code class="markup comment"><!--%s--></code>' % escape(self.data) - - def cloneNode(self): - return CommentNode(self.data) - -class TreeBuilder(_base.TreeBuilder): - documentClass = Document - doctypeClass = DocumentType - elementClass = Element - commentClass = CommentNode - fragmentClass = DocumentFragment - - def testSerializer(self, node): - return node.printTree() diff --git a/libs/html5lib/treebuilders/soup.py b/libs/html5lib/treebuilders/soup.py deleted file mode 100644 index 9bc5ff0..0000000 --- a/libs/html5lib/treebuilders/soup.py +++ /dev/null @@ -1,236 +0,0 @@ -import warnings - -warnings.warn("BeautifulSoup 3.x (as of 3.1) is not fully compatible with html5lib and support will be removed in the future", DeprecationWarning) - -from BeautifulSoup import BeautifulSoup, Tag, NavigableString, Comment, Declaration - -import _base -from html5lib.constants import namespaces, DataLossWarning - -class AttrList(object): - def __init__(self, element): - self.element = element - self.attrs = dict(self.element.attrs) - def __iter__(self): - return self.attrs.items().__iter__() - def __setitem__(self, name, value): - "set attr", name, value - self.element[name] = value - def items(self): - return self.attrs.items() - def keys(self): - return self.attrs.keys() - def __getitem__(self, name): - return self.attrs[name] - def __contains__(self, name): - return name in self.attrs.keys() - def __eq__(self, other): - if len(self.keys()) != len(other.keys()): - return False - for item in self.keys(): - if item not in other: - return False - if self[item] != other[item]: - return False - return True - -class Element(_base.Node): - def __init__(self, element, soup, namespace): - _base.Node.__init__(self, element.name) - self.element = element - self.soup = soup - self.namespace = namespace - - def _nodeIndex(self, node, refNode): - # Finds a node by identity rather than equality - for index in range(len(self.element.contents)): - if id(self.element.contents[index]) == id(refNode.element): - return index - return None - - def appendChild(self, node): - if (node.element.__class__ == NavigableString and self.element.contents - and self.element.contents[-1].__class__ == NavigableString): - # Concatenate new text onto old text node - # (TODO: This has O(n^2) performance, for input like "a</a>a</a>a</a>...") - newStr = NavigableString(self.element.contents[-1]+node.element) - - # Remove the old text node - # (Can't simply use .extract() by itself, because it fails if - # an equal text node exists within the parent node) - oldElement = self.element.contents[-1] - del self.element.contents[-1] - oldElement.parent = None - oldElement.extract() - - self.element.insert(len(self.element.contents), newStr) - else: - self.element.insert(len(self.element.contents), node.element) - node.parent = self - - def getAttributes(self): - return AttrList(self.element) - - def setAttributes(self, attributes): - if attributes: - for name, value in attributes.items(): - self.element[name] = value - - attributes = property(getAttributes, setAttributes) - - def insertText(self, data, insertBefore=None): - text = TextNode(NavigableString(data), self.soup) - if insertBefore: - self.insertBefore(text, insertBefore) - else: - self.appendChild(text) - - def insertBefore(self, node, refNode): - index = self._nodeIndex(node, refNode) - if (node.element.__class__ == NavigableString and self.element.contents - and self.element.contents[index-1].__class__ == NavigableString): - # (See comments in appendChild) - newStr = NavigableString(self.element.contents[index-1]+node.element) - oldNode = self.element.contents[index-1] - del self.element.contents[index-1] - oldNode.parent = None - oldNode.extract() - - self.element.insert(index-1, newStr) - else: - self.element.insert(index, node.element) - node.parent = self - - def removeChild(self, node): - index = self._nodeIndex(node.parent, node) - del node.parent.element.contents[index] - node.element.parent = None - node.element.extract() - node.parent = None - - def reparentChildren(self, newParent): - while self.element.contents: - child = self.element.contents[0] - child.extract() - if isinstance(child, Tag): - newParent.appendChild(Element(child, self.soup, namespaces["html"])) - else: - newParent.appendChild(TextNode(child, self.soup)) - - def cloneNode(self): - node = Element(Tag(self.soup, self.element.name), self.soup, self.namespace) - for key,value in self.attributes: - node.attributes[key] = value - return node - - def hasContent(self): - return self.element.contents - - def getNameTuple(self): - if self.namespace == None: - return namespaces["html"], self.name - else: - return self.namespace, self.name - - nameTuple = property(getNameTuple) - -class TextNode(Element): - def __init__(self, element, soup): - _base.Node.__init__(self, None) - self.element = element - self.soup = soup - - def cloneNode(self): - raise NotImplementedError - -class TreeBuilder(_base.TreeBuilder): - def __init__(self, namespaceHTMLElements): - if namespaceHTMLElements: - warnings.warn("BeautifulSoup cannot represent elements in any namespace", DataLossWarning) - _base.TreeBuilder.__init__(self, namespaceHTMLElements) - - def documentClass(self): - self.soup = BeautifulSoup("") - return Element(self.soup, self.soup, None) - - def insertDoctype(self, token): - name = token["name"] - publicId = token["publicId"] - systemId = token["systemId"] - - if publicId: - self.soup.insert(0, Declaration("DOCTYPE %s PUBLIC \"%s\" \"%s\""%(name, publicId, systemId or ""))) - elif systemId: - self.soup.insert(0, Declaration("DOCTYPE %s SYSTEM \"%s\""% - (name, systemId))) - else: - self.soup.insert(0, Declaration("DOCTYPE %s"%name)) - - def elementClass(self, name, namespace): - if namespace is not None: - warnings.warn("BeautifulSoup cannot represent elements in any namespace", DataLossWarning) - return Element(Tag(self.soup, name), self.soup, namespace) - - def commentClass(self, data): - return TextNode(Comment(data), self.soup) - - def fragmentClass(self): - self.soup = BeautifulSoup("") - self.soup.name = "[document_fragment]" - return Element(self.soup, self.soup, None) - - def appendChild(self, node): - self.soup.insert(len(self.soup.contents), node.element) - - def testSerializer(self, element): - return testSerializer(element) - - def getDocument(self): - return self.soup - - def getFragment(self): - return _base.TreeBuilder.getFragment(self).element - -def testSerializer(element): - import re - rv = [] - def serializeElement(element, indent=0): - if isinstance(element, Declaration): - doctype_regexp = r'DOCTYPE\s+(?P<name>[^\s]*)( PUBLIC "(?P<publicId>.*)" "(?P<systemId1>.*)"| SYSTEM "(?P<systemId2>.*)")?' - m = re.compile(doctype_regexp).match(element.string) - assert m is not None, "DOCTYPE did not match expected format" - name = m.group('name') - publicId = m.group('publicId') - if publicId is not None: - systemId = m.group('systemId1') or "" - else: - systemId = m.group('systemId2') - - if publicId is not None or systemId is not None: - rv.append("""|%s<!DOCTYPE %s "%s" "%s">"""% - (' '*indent, name, publicId or "", systemId or "")) - else: - rv.append("|%s<!DOCTYPE %s>"%(' '*indent, name)) - - elif isinstance(element, BeautifulSoup): - if element.name == "[document_fragment]": - rv.append("#document-fragment") - else: - rv.append("#document") - - elif isinstance(element, Comment): - rv.append("|%s<!-- %s -->"%(' '*indent, element.string)) - elif isinstance(element, unicode): - rv.append("|%s\"%s\"" %(' '*indent, element)) - else: - rv.append("|%s<%s>"%(' '*indent, element.name)) - if element.attrs: - for name, value in sorted(element.attrs): - rv.append('|%s%s="%s"' % (' '*(indent+2), name, value)) - indent += 2 - if hasattr(element, "contents"): - for child in element.contents: - serializeElement(child, indent) - serializeElement(element, 0) - - return "\n".join(rv) diff --git a/libs/html5lib/treewalkers/__init__.py b/libs/html5lib/treewalkers/__init__.py index 3a606a8..18124e7 100644 --- a/libs/html5lib/treewalkers/__init__.py +++ b/libs/html5lib/treewalkers/__init__.py @@ -8,23 +8,27 @@ implements a 'serialize' method taking a tree as sole argument and returning an iterator generating tokens. """ +from __future__ import absolute_import, division, unicode_literals + +import sys + +from ..utils import default_etree + treeWalkerCache = {} + def getTreeWalker(treeType, implementation=None, **kwargs): """Get a TreeWalker class for various types of tree with built-in support treeType - the name of the tree type required (case-insensitive). Supported - values are "simpletree", "dom", "etree" and "beautifulsoup" + values are: - "simpletree" - a built-in DOM-ish tree type with support for some - more pythonic idioms. "dom" - The xml.dom.minidom DOM implementation "pulldom" - The xml.dom.pulldom event stream "etree" - A generic walker for tree implementations exposing an elementtree-like interface (known to work with ElementTree, cElementTree and lxml.etree). "lxml" - Optimized walker for lxml.etree - "beautifulsoup" - Beautiful soup (if installed) "genshi" - a Genshi stream implementation - (Currently applies to the "etree" tree type only). A module @@ -33,20 +37,21 @@ def getTreeWalker(treeType, implementation=None, **kwargs): treeType = treeType.lower() if treeType not in treeWalkerCache: - if treeType in ("dom", "pulldom", "simpletree"): - mod = __import__(treeType, globals()) + if treeType in ("dom", "pulldom"): + name = "%s.%s" % (__name__, treeType) + __import__(name) + mod = sys.modules[name] treeWalkerCache[treeType] = mod.TreeWalker elif treeType == "genshi": - import genshistream + from . import genshistream treeWalkerCache[treeType] = genshistream.TreeWalker - elif treeType == "beautifulsoup": - import soup - treeWalkerCache[treeType] = soup.TreeWalker elif treeType == "lxml": - import lxmletree + from . import lxmletree treeWalkerCache[treeType] = lxmletree.TreeWalker elif treeType == "etree": - import etree + from . import etree + if implementation is None: + implementation = default_etree # XXX: NEVER cache here, caching is done in the etree submodule return etree.getETreeModule(implementation, **kwargs).TreeWalker return treeWalkerCache.get(treeType) diff --git a/libs/html5lib/treewalkers/_base.py b/libs/html5lib/treewalkers/_base.py index 5929ba0..34252e5 100644 --- a/libs/html5lib/treewalkers/_base.py +++ b/libs/html5lib/treewalkers/_base.py @@ -1,8 +1,40 @@ +from __future__ import absolute_import, division, unicode_literals +from six import text_type, string_types + import gettext _ = gettext.gettext -from html5lib.constants import voidElements, spaceCharacters -spaceCharacters = u"".join(spaceCharacters) +from xml.dom import Node + +DOCUMENT = Node.DOCUMENT_NODE +DOCTYPE = Node.DOCUMENT_TYPE_NODE +TEXT = Node.TEXT_NODE +ELEMENT = Node.ELEMENT_NODE +COMMENT = Node.COMMENT_NODE +ENTITY = Node.ENTITY_NODE +UNKNOWN = "<#UNKNOWN#>" + +from ..constants import voidElements, spaceCharacters +spaceCharacters = "".join(spaceCharacters) + + +def to_text(s, blank_if_none=True): + """Wrapper around six.text_type to convert None to empty string""" + if s is None: + if blank_if_none: + return "" + else: + return None + elif isinstance(s, text_type): + return s + else: + return text_type(s) + + +def is_text_or_none(string): + """Wrapper around isinstance(string_types) or is None""" + return string is None or isinstance(string, string_types) + class TreeWalker(object): def __init__(self, tree): @@ -14,40 +46,50 @@ class TreeWalker(object): def error(self, msg): return {"type": "SerializeError", "data": msg} - def normalizeAttrs(self, attrs): - newattrs = {} - if attrs: - #TODO: treewalkers should always have attrs - for (namespace,name),value in attrs.iteritems(): - namespace = unicode(namespace) if namespace else None - name = unicode(name) - value = unicode(value) - newattrs[(namespace,name)] = value - return newattrs - def emptyTag(self, namespace, name, attrs, hasChildren=False): - yield {"type": "EmptyTag", "name": unicode(name), - "namespace":unicode(namespace), - "data": self.normalizeAttrs(attrs)} + assert namespace is None or isinstance(namespace, string_types), type(namespace) + assert isinstance(name, string_types), type(name) + assert all((namespace is None or isinstance(namespace, string_types)) and + isinstance(name, string_types) and + isinstance(value, string_types) + for (namespace, name), value in attrs.items()) + + yield {"type": "EmptyTag", "name": to_text(name, False), + "namespace": to_text(namespace), + "data": attrs} if hasChildren: yield self.error(_("Void element has children")) def startTag(self, namespace, name, attrs): - return {"type": "StartTag", - "name": unicode(name), - "namespace":unicode(namespace), - "data": self.normalizeAttrs(attrs)} + assert namespace is None or isinstance(namespace, string_types), type(namespace) + assert isinstance(name, string_types), type(name) + assert all((namespace is None or isinstance(namespace, string_types)) and + isinstance(name, string_types) and + isinstance(value, string_types) + for (namespace, name), value in attrs.items()) + + return {"type": "StartTag", + "name": text_type(name), + "namespace": to_text(namespace), + "data": dict(((to_text(namespace, False), to_text(name)), + to_text(value, False)) + for (namespace, name), value in attrs.items())} def endTag(self, namespace, name): - return {"type": "EndTag", - "name": unicode(name), - "namespace":unicode(namespace), + assert namespace is None or isinstance(namespace, string_types), type(namespace) + assert isinstance(name, string_types), type(namespace) + + return {"type": "EndTag", + "name": to_text(name, False), + "namespace": to_text(namespace), "data": {}} def text(self, data): - data = unicode(data) + assert isinstance(data, string_types), type(data) + + data = to_text(data) middle = data.lstrip(spaceCharacters) - left = data[:len(data)-len(middle)] + left = data[:len(data) - len(middle)] if left: yield {"type": "SpaceCharacters", "data": left} data = middle @@ -59,56 +101,40 @@ class TreeWalker(object): yield {"type": "SpaceCharacters", "data": right} def comment(self, data): - return {"type": "Comment", "data": unicode(data)} + assert isinstance(data, string_types), type(data) + + return {"type": "Comment", "data": text_type(data)} def doctype(self, name, publicId=None, systemId=None, correct=True): + assert is_text_or_none(name), type(name) + assert is_text_or_none(publicId), type(publicId) + assert is_text_or_none(systemId), type(systemId) + return {"type": "Doctype", - "name": name is not None and unicode(name) or u"", - "publicId": publicId, - "systemId": systemId, - "correct": correct} + "name": to_text(name), + "publicId": to_text(publicId), + "systemId": to_text(systemId), + "correct": to_text(correct)} def entity(self, name): - return {"type": "Entity", "name": unicode(name)} + assert isinstance(name, string_types), type(name) + + return {"type": "Entity", "name": text_type(name)} def unknown(self, nodeType): return self.error(_("Unknown node type: ") + nodeType) -class RecursiveTreeWalker(TreeWalker): - def walkChildren(self, node): - raise NodeImplementedError - - def element(self, node, namespace, name, attrs, hasChildren): - if name in voidElements: - for token in self.emptyTag(namespace, name, attrs, hasChildren): - yield token - else: - yield self.startTag(name, attrs) - if hasChildren: - for token in self.walkChildren(node): - yield token - yield self.endTag(name) - -from xml.dom import Node - -DOCUMENT = Node.DOCUMENT_NODE -DOCTYPE = Node.DOCUMENT_TYPE_NODE -TEXT = Node.TEXT_NODE -ELEMENT = Node.ELEMENT_NODE -COMMENT = Node.COMMENT_NODE -ENTITY = Node.ENTITY_NODE -UNKNOWN = "<#UNKNOWN#>" class NonRecursiveTreeWalker(TreeWalker): def getNodeDetails(self, node): raise NotImplementedError - + def getFirstChild(self, node): raise NotImplementedError - + def getNextSibling(self, node): raise NotImplementedError - + def getParentNode(self, node): raise NotImplementedError @@ -118,7 +144,6 @@ class NonRecursiveTreeWalker(TreeWalker): details = self.getNodeDetails(currentNode) type, details = details[0], details[1:] hasChildren = False - endTag = None if type == DOCTYPE: yield self.doctype(*details) @@ -130,12 +155,11 @@ class NonRecursiveTreeWalker(TreeWalker): elif type == ELEMENT: namespace, name, attributes, hasChildren = details if name in voidElements: - for token in self.emptyTag(namespace, name, attributes, + for token in self.emptyTag(namespace, name, attributes, hasChildren): yield token hasChildren = False else: - endTag = name yield self.startTag(namespace, name, attributes) elif type == COMMENT: @@ -149,12 +173,12 @@ class NonRecursiveTreeWalker(TreeWalker): else: yield self.unknown(details[0]) - + if hasChildren: firstChild = self.getFirstChild(currentNode) else: firstChild = None - + if firstChild is not None: currentNode = firstChild else: diff --git a/libs/html5lib/treewalkers/dom.py b/libs/html5lib/treewalkers/dom.py index 383b46c..a01287a 100644 --- a/libs/html5lib/treewalkers/dom.py +++ b/libs/html5lib/treewalkers/dom.py @@ -1,10 +1,12 @@ +from __future__ import absolute_import, division, unicode_literals + from xml.dom import Node import gettext _ = gettext.gettext -import _base -from html5lib.constants import voidElements +from . import _base + class TreeWalker(_base.NonRecursiveTreeWalker): def getNodeDetails(self, node): @@ -16,10 +18,13 @@ class TreeWalker(_base.NonRecursiveTreeWalker): elif node.nodeType == Node.ELEMENT_NODE: attrs = {} - for attr in node.attributes.keys(): + for attr in list(node.attributes.keys()): attr = node.getAttributeNode(attr) - attrs[(attr.namespaceURI,attr.localName)] = attr.value - return (_base.ELEMENT, node.namespaceURI, node.nodeName, + if attr.namespaceURI: + attrs[(attr.namespaceURI, attr.localName)] = attr.value + else: + attrs[(None, attr.name)] = attr.value + return (_base.ELEMENT, node.namespaceURI, node.nodeName, attrs, node.hasChildNodes()) elif node.nodeType == Node.COMMENT_NODE: diff --git a/libs/html5lib/treewalkers/etree.py b/libs/html5lib/treewalkers/etree.py index 13b0319..fd8a9cc 100644 --- a/libs/html5lib/treewalkers/etree.py +++ b/libs/html5lib/treewalkers/etree.py @@ -1,33 +1,28 @@ +from __future__ import absolute_import, division, unicode_literals + +try: + from collections import OrderedDict +except ImportError: + try: + from ordereddict import OrderedDict + except ImportError: + OrderedDict = dict import gettext _ = gettext.gettext -try: - from types import ModuleType -except: - from new import module as ModuleType -import copy import re -import _base -from html5lib.constants import voidElements +from six import text_type -tag_regexp = re.compile("{([^}]*)}(.*)") +from . import _base +from ..utils import moduleFactoryFactory -moduleCache = {} +tag_regexp = re.compile("{([^}]*)}(.*)") -def getETreeModule(ElementTreeImplementation): - name = "_" + ElementTreeImplementation.__name__+"builder" - if name in moduleCache: - return moduleCache[name] - else: - mod = ModuleType("_" + ElementTreeImplementation.__name__+"builder") - objs = getETreeBuilder(ElementTreeImplementation) - mod.__dict__.update(objs) - moduleCache[name] = mod - return mod def getETreeBuilder(ElementTreeImplementation): ElementTree = ElementTreeImplementation + ElementTreeCommentType = ElementTree.Comment("asd").tag class TreeWalker(_base.NonRecursiveTreeWalker): """Given the particular ElementTree representation, this implementation, @@ -35,16 +30,16 @@ def getETreeBuilder(ElementTreeImplementation): content: 1. The current element - + 2. The index of the element relative to its parent - + 3. A stack of ancestor elements - + 4. A flag "text", "tail" or None to indicate if the current node is a text node; either the text or tail of the current element (1) """ def getNodeDetails(self, node): - if isinstance(node, tuple): # It might be the root Element + if isinstance(node, tuple): # It might be the root Element elt, key, parents, flag = node if flag in ("text", "tail"): return _base.TEXT, getattr(elt, flag) @@ -54,41 +49,41 @@ def getETreeBuilder(ElementTreeImplementation): if not(hasattr(node, "tag")): node = node.getroot() - if node.tag in ("<DOCUMENT_ROOT>", "<DOCUMENT_FRAGMENT>"): + if node.tag in ("DOCUMENT_ROOT", "DOCUMENT_FRAGMENT"): return (_base.DOCUMENT,) elif node.tag == "<!DOCTYPE>": - return (_base.DOCTYPE, node.text, + return (_base.DOCTYPE, node.text, node.get("publicId"), node.get("systemId")) - elif node.tag == ElementTree.Comment: + elif node.tag == ElementTreeCommentType: return _base.COMMENT, node.text else: - assert type(node.tag) in (str, unicode), type(node.tag) - #This is assumed to be an ordinary element + assert type(node.tag) == text_type, type(node.tag) + # This is assumed to be an ordinary element match = tag_regexp.match(node.tag) if match: namespace, tag = match.groups() else: namespace = None tag = node.tag - attrs = {} - for name, value in node.attrib.items(): + attrs = OrderedDict() + for name, value in list(node.attrib.items()): match = tag_regexp.match(name) if match: - attrs[(match.group(1),match.group(2))] = value + attrs[(match.group(1), match.group(2))] = value else: - attrs[(None,name)] = value - return (_base.ELEMENT, namespace, tag, + attrs[(None, name)] = value + return (_base.ELEMENT, namespace, tag, attrs, len(node) or node.text) - + def getFirstChild(self, node): if isinstance(node, tuple): element, key, parents, flag = node else: element, key, parents, flag = node, None, [], None - + if flag in ("text", "tail"): return None else: @@ -99,13 +94,13 @@ def getETreeBuilder(ElementTreeImplementation): return element[0], 0, parents, None else: return None - + def getNextSibling(self, node): if isinstance(node, tuple): element, key, parents, flag = node else: return None - + if flag == "text": if len(element): parents.append(element) @@ -116,16 +111,16 @@ def getETreeBuilder(ElementTreeImplementation): if element.tail and flag != "tail": return element, key, parents, "tail" elif key < len(parents[-1]) - 1: - return parents[-1][key+1], key+1, parents, None + return parents[-1][key + 1], key + 1, parents, None else: return None - + def getParentNode(self, node): if isinstance(node, tuple): element, key, parents, flag = node else: return None - + if flag == "text": if not parents: return element @@ -139,3 +134,5 @@ def getETreeBuilder(ElementTreeImplementation): return parent, list(parents[-1]).index(parent), parents, None return locals() + +getETreeModule = moduleFactoryFactory(getETreeBuilder) diff --git a/libs/html5lib/treewalkers/genshistream.py b/libs/html5lib/treewalkers/genshistream.py index ef71a83..f559c45 100644 --- a/libs/html5lib/treewalkers/genshistream.py +++ b/libs/html5lib/treewalkers/genshistream.py @@ -1,50 +1,49 @@ +from __future__ import absolute_import, division, unicode_literals + +from genshi.core import QName from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT -from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT -from genshi.output import NamespaceFlattener +from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT + +from . import _base -import _base +from ..constants import voidElements, namespaces -from html5lib.constants import voidElements class TreeWalker(_base.TreeWalker): def __iter__(self): - depth = 0 - ignore_until = None + # Buffer the events so we can pass in the following one previous = None for event in self.tree: if previous is not None: - if previous[0] == START: - depth += 1 - if ignore_until <= depth: - ignore_until = None - if ignore_until is None: - for token in self.tokens(previous, event): - yield token - if token["type"] == "EmptyTag": - ignore_until = depth - if previous[0] == END: - depth -= 1 + for token in self.tokens(previous, event): + yield token previous = event + + # Don't forget the final event! if previous is not None: - if ignore_until is None or ignore_until <= depth: - for token in self.tokens(previous, None): - yield token - elif ignore_until is not None: - raise ValueError("Illformed DOM event stream: void element without END_ELEMENT") + for token in self.tokens(previous, None): + yield token def tokens(self, event, next): kind, data, pos = event if kind == START: - tag, attrib = data + tag, attribs = data name = tag.localname namespace = tag.namespace - if tag in voidElements: - for token in self.emptyTag(namespace, name, list(attrib), - not next or next[0] != END + converted_attribs = {} + for k, v in attribs: + if isinstance(k, QName): + converted_attribs[(k.namespace, k.localname)] = v + else: + converted_attribs[(None, k)] = v + + if namespace == namespaces["html"] and name in voidElements: + for token in self.emptyTag(namespace, name, converted_attribs, + not next or next[0] != END or next[1] != tag): yield token else: - yield self.startTag(namespace, name, list(attrib)) + yield self.startTag(namespace, name, converted_attribs) elif kind == END: name = data.localname @@ -62,8 +61,8 @@ class TreeWalker(_base.TreeWalker): elif kind == DOCTYPE: yield self.doctype(*data) - elif kind in (XML_NAMESPACE, DOCTYPE, START_NS, END_NS, \ - START_CDATA, END_CDATA, PI): + elif kind in (XML_NAMESPACE, DOCTYPE, START_NS, END_NS, + START_CDATA, END_CDATA, PI): pass else: diff --git a/libs/html5lib/treewalkers/lxmletree.py b/libs/html5lib/treewalkers/lxmletree.py index 46f4908..375cc2e 100644 --- a/libs/html5lib/treewalkers/lxmletree.py +++ b/libs/html5lib/treewalkers/lxmletree.py @@ -1,186 +1,208 @@ -from lxml import etree -from html5lib.treebuilders.etree import tag_regexp - -from gettext import gettext -_ = gettext - -import _base - -from html5lib.constants import voidElements -from html5lib import ihatexml - -class Root(object): - def __init__(self, et): - self.elementtree = et - self.children = [] - if et.docinfo.internalDTD: - self.children.append(Doctype(self, et.docinfo.root_name, - et.docinfo.public_id, - et.docinfo.system_url)) - root = et.getroot() - node = root - - while node.getprevious() is not None: - node = node.getprevious() - while node is not None: - self.children.append(node) - node = node.getnext() - - self.text = None - self.tail = None - - def __getitem__(self, key): - return self.children[key] - - def getnext(self): - return None - - def __len__(self): - return 1 - -class Doctype(object): - def __init__(self, root_node, name, public_id, system_id): - self.root_node = root_node - self.name = name - self.public_id = public_id - self.system_id = system_id - - self.text = None - self.tail = None - - def getnext(self): - return self.root_node.children[1] - -class FragmentRoot(Root): - def __init__(self, children): - self.children = [FragmentWrapper(self, child) for child in children] - self.text = self.tail = None - - def getnext(self): - return None - -class FragmentWrapper(object): - def __init__(self, fragment_root, obj): - self.root_node = fragment_root - self.obj = obj - if hasattr(self.obj, 'text'): - self.text = self.obj.text - else: - self.text = None - if hasattr(self.obj, 'tail'): - self.tail = self.obj.tail - else: - self.tail = None - self.isstring = isinstance(obj, basestring) - - def __getattr__(self, name): - return getattr(self.obj, name) - - def getnext(self): - siblings = self.root_node.children - idx = siblings.index(self) - if idx < len(siblings) - 1: - return siblings[idx + 1] - else: - return None - - def __getitem__(self, key): - return self.obj[key] - - def __nonzero__(self): - return bool(self.obj) - - def getparent(self): - return None - - def __str__(self): - return str(self.obj) - - def __unicode__(self): - return unicode(self.obj) - - def __len__(self): - return len(self.obj) - - -class TreeWalker(_base.NonRecursiveTreeWalker): - def __init__(self, tree): - if hasattr(tree, "getroot"): - tree = Root(tree) - elif isinstance(tree, list): - tree = FragmentRoot(tree) - _base.NonRecursiveTreeWalker.__init__(self, tree) - self.filter = ihatexml.InfosetFilter() - def getNodeDetails(self, node): - if isinstance(node, tuple): # Text node - node, key = node - assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key - return _base.TEXT, getattr(node, key) - - elif isinstance(node, Root): - return (_base.DOCUMENT,) - - elif isinstance(node, Doctype): - return _base.DOCTYPE, node.name, node.public_id, node.system_id - - elif isinstance(node, FragmentWrapper) and node.isstring: - return _base.TEXT, node - - elif node.tag == etree.Comment: - return _base.COMMENT, node.text - - elif node.tag == etree.Entity: - return _base.ENTITY, node.text[1:-1] # strip &; - - else: - #This is assumed to be an ordinary element - match = tag_regexp.match(node.tag) - if match: - namespace, tag = match.groups() - else: - namespace = None - tag = node.tag - attrs = {} - for name, value in node.attrib.items(): - match = tag_regexp.match(name) - if match: - attrs[(match.group(1),match.group(2))] = value - else: - attrs[(None,name)] = value - return (_base.ELEMENT, namespace, self.filter.fromXmlName(tag), - attrs, len(node) > 0 or node.text) - - def getFirstChild(self, node): - assert not isinstance(node, tuple), _("Text nodes have no children") - - assert len(node) or node.text, "Node has no children" - if node.text: - return (node, "text") - else: - return node[0] - - def getNextSibling(self, node): - if isinstance(node, tuple): # Text node - node, key = node - assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key - if key == "text": - # XXX: we cannot use a "bool(node) and node[0] or None" construct here - # because node[0] might evaluate to False if it has no child element - if len(node): - return node[0] - else: - return None - else: # tail - return node.getnext() - - return node.tail and (node, "tail") or node.getnext() - - def getParentNode(self, node): - if isinstance(node, tuple): # Text node - node, key = node - assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key - if key == "text": - return node - # else: fallback to "normal" processing - - return node.getparent() +from __future__ import absolute_import, division, unicode_literals +from six import text_type + +from lxml import etree +from ..treebuilders.etree import tag_regexp + +from gettext import gettext +_ = gettext + +from . import _base + +from .. import ihatexml + + +def ensure_str(s): + if s is None: + return None + elif isinstance(s, text_type): + return s + else: + return s.decode("utf-8", "strict") + + +class Root(object): + def __init__(self, et): + self.elementtree = et + self.children = [] + if et.docinfo.internalDTD: + self.children.append(Doctype(self, + ensure_str(et.docinfo.root_name), + ensure_str(et.docinfo.public_id), + ensure_str(et.docinfo.system_url))) + root = et.getroot() + node = root + + while node.getprevious() is not None: + node = node.getprevious() + while node is not None: + self.children.append(node) + node = node.getnext() + + self.text = None + self.tail = None + + def __getitem__(self, key): + return self.children[key] + + def getnext(self): + return None + + def __len__(self): + return 1 + + +class Doctype(object): + def __init__(self, root_node, name, public_id, system_id): + self.root_node = root_node + self.name = name + self.public_id = public_id + self.system_id = system_id + + self.text = None + self.tail = None + + def getnext(self): + return self.root_node.children[1] + + +class FragmentRoot(Root): + def __init__(self, children): + self.children = [FragmentWrapper(self, child) for child in children] + self.text = self.tail = None + + def getnext(self): + return None + + +class FragmentWrapper(object): + def __init__(self, fragment_root, obj): + self.root_node = fragment_root + self.obj = obj + if hasattr(self.obj, 'text'): + self.text = ensure_str(self.obj.text) + else: + self.text = None + if hasattr(self.obj, 'tail'): + self.tail = ensure_str(self.obj.tail) + else: + self.tail = None + self.isstring = isinstance(obj, str) or isinstance(obj, bytes) + # Support for bytes here is Py2 + if self.isstring: + self.obj = ensure_str(self.obj) + + def __getattr__(self, name): + return getattr(self.obj, name) + + def getnext(self): + siblings = self.root_node.children + idx = siblings.index(self) + if idx < len(siblings) - 1: + return siblings[idx + 1] + else: + return None + + def __getitem__(self, key): + return self.obj[key] + + def __bool__(self): + return bool(self.obj) + + def getparent(self): + return None + + def __str__(self): + return str(self.obj) + + def __unicode__(self): + return str(self.obj) + + def __len__(self): + return len(self.obj) + + +class TreeWalker(_base.NonRecursiveTreeWalker): + def __init__(self, tree): + if hasattr(tree, "getroot"): + tree = Root(tree) + elif isinstance(tree, list): + tree = FragmentRoot(tree) + _base.NonRecursiveTreeWalker.__init__(self, tree) + self.filter = ihatexml.InfosetFilter() + + def getNodeDetails(self, node): + if isinstance(node, tuple): # Text node + node, key = node + assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key + return _base.TEXT, ensure_str(getattr(node, key)) + + elif isinstance(node, Root): + return (_base.DOCUMENT,) + + elif isinstance(node, Doctype): + return _base.DOCTYPE, node.name, node.public_id, node.system_id + + elif isinstance(node, FragmentWrapper) and node.isstring: + return _base.TEXT, node.obj + + elif node.tag == etree.Comment: + return _base.COMMENT, ensure_str(node.text) + + elif node.tag == etree.Entity: + return _base.ENTITY, ensure_str(node.text)[1:-1] # strip &; + + else: + # This is assumed to be an ordinary element + match = tag_regexp.match(ensure_str(node.tag)) + if match: + namespace, tag = match.groups() + else: + namespace = None + tag = ensure_str(node.tag) + attrs = {} + for name, value in list(node.attrib.items()): + name = ensure_str(name) + value = ensure_str(value) + match = tag_regexp.match(name) + if match: + attrs[(match.group(1), match.group(2))] = value + else: + attrs[(None, name)] = value + return (_base.ELEMENT, namespace, self.filter.fromXmlName(tag), + attrs, len(node) > 0 or node.text) + + def getFirstChild(self, node): + assert not isinstance(node, tuple), _("Text nodes have no children") + + assert len(node) or node.text, "Node has no children" + if node.text: + return (node, "text") + else: + return node[0] + + def getNextSibling(self, node): + if isinstance(node, tuple): # Text node + node, key = node + assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key + if key == "text": + # XXX: we cannot use a "bool(node) and node[0] or None" construct here + # because node[0] might evaluate to False if it has no child element + if len(node): + return node[0] + else: + return None + else: # tail + return node.getnext() + + return (node, "tail") if node.tail else node.getnext() + + def getParentNode(self, node): + if isinstance(node, tuple): # Text node + node, key = node + assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key + if key == "text": + return node + # else: fallback to "normal" processing + + return node.getparent() diff --git a/libs/html5lib/treewalkers/pulldom.py b/libs/html5lib/treewalkers/pulldom.py index 1f8b95b..0b0f515 100644 --- a/libs/html5lib/treewalkers/pulldom.py +++ b/libs/html5lib/treewalkers/pulldom.py @@ -1,9 +1,12 @@ +from __future__ import absolute_import, division, unicode_literals + from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, \ COMMENT, IGNORABLE_WHITESPACE, CHARACTERS -import _base +from . import _base + +from ..constants import voidElements -from html5lib.constants import voidElements class TreeWalker(_base.TreeWalker): def __iter__(self): @@ -11,7 +14,7 @@ class TreeWalker(_base.TreeWalker): previous = None for event in self.tree: if previous is not None and \ - (ignore_until is None or previous[1] is ignore_until): + (ignore_until is None or previous[1] is ignore_until): if previous[1] is ignore_until: ignore_until = None for token in self.tokens(previous, event): @@ -31,9 +34,9 @@ class TreeWalker(_base.TreeWalker): name = node.nodeName namespace = node.namespaceURI attrs = {} - for attr in node.attributes.keys(): + for attr in list(node.attributes.keys()): attr = node.getAttributeNode(attr) - attrs[(attr.namespaceURI,attr.localName)] = attr.value + attrs[(attr.namespaceURI, attr.localName)] = attr.value if name in voidElements: for token in self.emptyTag(namespace, name, diff --git a/libs/html5lib/treewalkers/simpletree.py b/libs/html5lib/treewalkers/simpletree.py deleted file mode 100644 index 9e6bd4c..0000000 --- a/libs/html5lib/treewalkers/simpletree.py +++ /dev/null @@ -1,78 +0,0 @@ -import gettext -_ = gettext.gettext - -import _base - -class TreeWalker(_base.NonRecursiveTreeWalker): - """Given that simpletree has no performant way of getting a node's - next sibling, this implementation returns "nodes" as tuples with the - following content: - - 1. The parent Node (Element, Document or DocumentFragment) - - 2. The child index of the current node in its parent's children list - - 3. A list used as a stack of all ancestors. It is a pair tuple whose - first item is a parent Node and second item is a child index. - """ - - def getNodeDetails(self, node): - if isinstance(node, tuple): # It might be the root Node - parent, idx, parents = node - node = parent.childNodes[idx] - - # testing node.type allows us not to import treebuilders.simpletree - if node.type in (1, 2): # Document or DocumentFragment - return (_base.DOCUMENT,) - - elif node.type == 3: # DocumentType - return _base.DOCTYPE, node.name, node.publicId, node.systemId - - elif node.type == 4: # TextNode - return _base.TEXT, node.value - - elif node.type == 5: # Element - attrs = {} - for name, value in node.attributes.items(): - if isinstance(name, tuple): - attrs[(name[2],name[1])] = value - else: - attrs[(None,name)] = value - return (_base.ELEMENT, node.namespace, node.name, - attrs, node.hasContent()) - - elif node.type == 6: # CommentNode - return _base.COMMENT, node.data - - else: - return _node.UNKNOWN, node.type - - def getFirstChild(self, node): - if isinstance(node, tuple): # It might be the root Node - parent, idx, parents = node - parents.append((parent, idx)) - node = parent.childNodes[idx] - else: - parents = [] - - assert node.hasContent(), "Node has no children" - return (node, 0, parents) - - def getNextSibling(self, node): - assert isinstance(node, tuple), "Node is not a tuple: " + str(node) - parent, idx, parents = node - idx += 1 - if len(parent.childNodes) > idx: - return (parent, idx, parents) - else: - return None - - def getParentNode(self, node): - assert isinstance(node, tuple) - parent, idx, parents = node - if parents: - parent, idx = parents.pop() - return parent, idx, parents - else: - # HACK: We could return ``parent`` but None will stop the algorithm the same way - return None diff --git a/libs/html5lib/treewalkers/soup.py b/libs/html5lib/treewalkers/soup.py deleted file mode 100644 index fca65ec..0000000 --- a/libs/html5lib/treewalkers/soup.py +++ /dev/null @@ -1,60 +0,0 @@ -import re -import gettext -_ = gettext.gettext - -from BeautifulSoup import BeautifulSoup, Declaration, Comment, Tag -from html5lib.constants import namespaces -import _base - -class TreeWalker(_base.NonRecursiveTreeWalker): - doctype_regexp = re.compile( - r'DOCTYPE\s+(?P<name>[^\s]*)(\s*PUBLIC\s*"(?P<publicId>.*)"\s*"(?P<systemId1>.*)"|\s*SYSTEM\s*"(?P<systemId2>.*)")?') - def getNodeDetails(self, node): - if isinstance(node, BeautifulSoup): # Document or DocumentFragment - return (_base.DOCUMENT,) - - elif isinstance(node, Declaration): # DocumentType - string = unicode(node.string) - #Slice needed to remove markup added during unicode conversion, - #but only in some versions of BeautifulSoup/Python - if string.startswith('<!') and string.endswith('>'): - string = string[2:-1] - m = self.doctype_regexp.match(string) - #This regexp approach seems wrong and fragile - #but beautiful soup stores the doctype as a single thing and we want the seperate bits - #It should work as long as the tree is created by html5lib itself but may be wrong if it's - #been modified at all - #We could just feed to it a html5lib tokenizer, I guess... - assert m is not None, "DOCTYPE did not match expected format" - - name = m.group('name') - publicId = m.group('publicId') - if publicId is not None: - systemId = m.group('systemId1') - else: - systemId = m.group('systemId2') - return _base.DOCTYPE, name, publicId or "", systemId or "" - - elif isinstance(node, Comment): - string = unicode(node.string) - if string.startswith('<!--') and string.endswith('-->'): - string = string[4:-3] - return _base.COMMENT, string - - elif isinstance(node, unicode): # TextNode - return _base.TEXT, node - - elif isinstance(node, Tag): # Element - return (_base.ELEMENT, namespaces["html"], node.name, - dict(node.attrs).items(), node.contents) - else: - return _base.UNKNOWN, node.__class__.__name__ - - def getFirstChild(self, node): - return node.contents[0] - - def getNextSibling(self, node): - return node.nextSibling - - def getParentNode(self, node): - return node.parent diff --git a/libs/html5lib/trie/__init__.py b/libs/html5lib/trie/__init__.py new file mode 100644 index 0000000..a8cca8a --- /dev/null +++ b/libs/html5lib/trie/__init__.py @@ -0,0 +1,12 @@ +from __future__ import absolute_import, division, unicode_literals + +from .py import Trie as PyTrie + +Trie = PyTrie + +try: + from .datrie import Trie as DATrie +except ImportError: + pass +else: + Trie = DATrie diff --git a/libs/html5lib/trie/_base.py b/libs/html5lib/trie/_base.py new file mode 100644 index 0000000..724486b --- /dev/null +++ b/libs/html5lib/trie/_base.py @@ -0,0 +1,37 @@ +from __future__ import absolute_import, division, unicode_literals + +from collections import Mapping + + +class Trie(Mapping): + """Abstract base class for tries""" + + def keys(self, prefix=None): + keys = super().keys() + + if prefix is None: + return set(keys) + + # Python 2.6: no set comprehensions + return set([x for x in keys if x.startswith(prefix)]) + + def has_keys_with_prefix(self, prefix): + for key in self.keys(): + if key.startswith(prefix): + return True + + return False + + def longest_prefix(self, prefix): + if prefix in self: + return prefix + + for i in range(1, len(prefix) + 1): + if prefix[:-i] in self: + return prefix[:-i] + + raise KeyError(prefix) + + def longest_prefix_item(self, prefix): + lprefix = self.longest_prefix(prefix) + return (lprefix, self[lprefix]) diff --git a/libs/html5lib/trie/datrie.py b/libs/html5lib/trie/datrie.py new file mode 100644 index 0000000..51f3d04 --- /dev/null +++ b/libs/html5lib/trie/datrie.py @@ -0,0 +1,44 @@ +from __future__ import absolute_import, division, unicode_literals + +from datrie import Trie as DATrie +from six import text_type + +from ._base import Trie as ABCTrie + + +class Trie(ABCTrie): + def __init__(self, data): + chars = set() + for key in data.keys(): + if not isinstance(key, text_type): + raise TypeError("All keys must be strings") + for char in key: + chars.add(char) + + self._data = DATrie("".join(chars)) + for key, value in data.items(): + self._data[key] = value + + def __contains__(self, key): + return key in self._data + + def __len__(self): + return len(self._data) + + def __iter__(self): + raise NotImplementedError() + + def __getitem__(self, key): + return self._data[key] + + def keys(self, prefix=None): + return self._data.keys(prefix) + + def has_keys_with_prefix(self, prefix): + return self._data.has_keys_with_prefix(prefix) + + def longest_prefix(self, prefix): + return self._data.longest_prefix(prefix) + + def longest_prefix_item(self, prefix): + return self._data.longest_prefix_item(prefix) diff --git a/libs/html5lib/trie/py.py b/libs/html5lib/trie/py.py new file mode 100644 index 0000000..c2ba3da --- /dev/null +++ b/libs/html5lib/trie/py.py @@ -0,0 +1,67 @@ +from __future__ import absolute_import, division, unicode_literals +from six import text_type + +from bisect import bisect_left + +from ._base import Trie as ABCTrie + + +class Trie(ABCTrie): + def __init__(self, data): + if not all(isinstance(x, text_type) for x in data.keys()): + raise TypeError("All keys must be strings") + + self._data = data + self._keys = sorted(data.keys()) + self._cachestr = "" + self._cachepoints = (0, len(data)) + + def __contains__(self, key): + return key in self._data + + def __len__(self): + return len(self._data) + + def __iter__(self): + return iter(self._data) + + def __getitem__(self, key): + return self._data[key] + + def keys(self, prefix=None): + if prefix is None or prefix == "" or not self._keys: + return set(self._keys) + + if prefix.startswith(self._cachestr): + lo, hi = self._cachepoints + start = i = bisect_left(self._keys, prefix, lo, hi) + else: + start = i = bisect_left(self._keys, prefix) + + keys = set() + if start == len(self._keys): + return keys + + while self._keys[i].startswith(prefix): + keys.add(self._keys[i]) + i += 1 + + self._cachestr = prefix + self._cachepoints = (start, i) + + return keys + + def has_keys_with_prefix(self, prefix): + if prefix in self._data: + return True + + if prefix.startswith(self._cachestr): + lo, hi = self._cachepoints + i = bisect_left(self._keys, prefix, lo, hi) + else: + i = bisect_left(self._keys, prefix) + + if i == len(self._keys): + return False + + return self._keys[i].startswith(prefix) diff --git a/libs/html5lib/utils.py b/libs/html5lib/utils.py index d53f678..2f41f4d 100644 --- a/libs/html5lib/utils.py +++ b/libs/html5lib/utils.py @@ -1,9 +1,16 @@ +from __future__ import absolute_import, division, unicode_literals + +from types import ModuleType + try: - frozenset -except NameError: - #Import from the sets module for python 2.3 - from sets import Set as set - from sets import ImmutableSet as frozenset + import xml.etree.cElementTree as default_etree +except ImportError: + import xml.etree.ElementTree as default_etree + + +__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair", + "surrogatePairToCodepoint", "moduleFactoryFactory"] + class MethodDispatcher(dict): """Dict with 2 special properties: @@ -23,7 +30,7 @@ class MethodDispatcher(dict): # twice as fast. Please do careful performance testing before changing # anything here. _dictEntries = [] - for name,value in items: + for name, value in items: if type(name) in (list, tuple, frozenset, set): for item in name: _dictEntries.append((item, value)) @@ -35,141 +42,41 @@ class MethodDispatcher(dict): def __getitem__(self, key): return dict.get(self, key, self.default) -#Pure python implementation of deque taken from the ASPN Python Cookbook -#Original code by Raymond Hettinger - -class deque(object): - - def __init__(self, iterable=(), maxsize=-1): - if not hasattr(self, 'data'): - self.left = self.right = 0 - self.data = {} - self.maxsize = maxsize - self.extend(iterable) - - def append(self, x): - self.data[self.right] = x - self.right += 1 - if self.maxsize != -1 and len(self) > self.maxsize: - self.popleft() - - def appendleft(self, x): - self.left -= 1 - self.data[self.left] = x - if self.maxsize != -1 and len(self) > self.maxsize: - self.pop() - - def pop(self): - if self.left == self.right: - raise IndexError('cannot pop from empty deque') - self.right -= 1 - elem = self.data[self.right] - del self.data[self.right] - return elem - - def popleft(self): - if self.left == self.right: - raise IndexError('cannot pop from empty deque') - elem = self.data[self.left] - del self.data[self.left] - self.left += 1 - return elem - - def clear(self): - self.data.clear() - self.left = self.right = 0 - - def extend(self, iterable): - for elem in iterable: - self.append(elem) - - def extendleft(self, iterable): - for elem in iterable: - self.appendleft(elem) - - def rotate(self, n=1): - if self: - n %= len(self) - for i in xrange(n): - self.appendleft(self.pop()) - - def __getitem__(self, i): - if i < 0: - i += len(self) - try: - return self.data[i + self.left] - except KeyError: - raise IndexError - - def __setitem__(self, i, value): - if i < 0: - i += len(self) - try: - self.data[i + self.left] = value - except KeyError: - raise IndexError - - def __delitem__(self, i): - size = len(self) - if not (-size <= i < size): - raise IndexError - data = self.data - if i < 0: - i += size - for j in xrange(self.left+i, self.right-1): - data[j] = data[j+1] - self.pop() - - def __len__(self): - return self.right - self.left - - def __cmp__(self, other): - if type(self) != type(other): - return cmp(type(self), type(other)) - return cmp(list(self), list(other)) - - def __repr__(self, _track=[]): - if id(self) in _track: - return '...' - _track.append(id(self)) - r = 'deque(%r)' % (list(self),) - _track.remove(id(self)) - return r - - def __getstate__(self): - return (tuple(self),) - - def __setstate__(self, s): - self.__init__(s[0]) - - def __hash__(self): - raise TypeError - - def __copy__(self): - return self.__class__(self) - - def __deepcopy__(self, memo={}): - from copy import deepcopy - result = self.__class__() - memo[id(self)] = result - result.__init__(deepcopy(tuple(self), memo)) - return result - -#Some utility functions to dal with weirdness around UCS2 vs UCS4 -#python builds - -def encodingType(): - if len() == 2: - return "UCS2" - else: - return "UCS4" - -def isSurrogatePair(data): + +# Some utility functions to dal with weirdness around UCS2 vs UCS4 +# python builds + +def isSurrogatePair(data): return (len(data) == 2 and ord(data[0]) >= 0xD800 and ord(data[0]) <= 0xDBFF and ord(data[1]) >= 0xDC00 and ord(data[1]) <= 0xDFFF) + def surrogatePairToCodepoint(data): - char_val = (0x10000 + (ord(data[0]) - 0xD800) * 0x400 + + char_val = (0x10000 + (ord(data[0]) - 0xD800) * 0x400 + (ord(data[1]) - 0xDC00)) return char_val + +# Module Factory Factory (no, this isn't Java, I know) +# Here to stop this being duplicated all over the place. + + +def moduleFactoryFactory(factory): + moduleCache = {} + + def moduleFactory(baseModule, *args, **kwargs): + if isinstance(ModuleType.__name__, type("")): + name = "_%s_factory" % baseModule.__name__ + else: + name = b"_%s_factory" % baseModule.__name__ + + if name in moduleCache: + return moduleCache[name] + else: + mod = ModuleType(name) + objs = factory(baseModule, *args, **kwargs) + mod.__dict__.update(objs) + moduleCache[name] = mod + return mod + + return moduleFactory diff --git a/libs/httplib2/__init__.py b/libs/httplib2/__init__.py index 01151f7..9780d4e 100644 --- a/libs/httplib2/__init__.py +++ b/libs/httplib2/__init__.py @@ -3,7 +3,7 @@ from __future__ import generators httplib2 A caching http interface that supports ETags and gzip -to conserve bandwidth. +to conserve bandwidth. Requires Python 2.3 or later @@ -15,17 +15,17 @@ Changelog: __author__ = "Joe Gregorio (joe@bitworking.org)" __copyright__ = "Copyright 2006, Joe Gregorio" __contributors__ = ["Thomas Broyer (t.broyer@ltgt.net)", - "James Antill", - "Xavier Verges Farrero", - "Jonathan Feinberg", - "Blair Zajac", - "Sam Ruby", - "Louis Nyffenegger"] + "James Antill", + "Xavier Verges Farrero", + "Jonathan Feinberg", + "Blair Zajac", + "Sam Ruby", + "Louis Nyffenegger"] __license__ = "MIT" -__version__ = "$Rev$" +__version__ = "0.8" -import re -import sys +import re +import sys import email import email.Utils import email.Message @@ -35,6 +35,7 @@ import gzip import zlib import httplib import urlparse +import urllib import base64 import os import copy @@ -42,10 +43,10 @@ import calendar import time import random import errno -# remove depracated warning in python2.6 try: from hashlib import sha1 as _sha, md5 as _md5 except ImportError: + # prior to Python 2.5, these were separate modules import sha import md5 _sha = sha.new @@ -54,21 +55,38 @@ import hmac from gettext import gettext as _ import socket -# Try using local version, followed by system, and none if neither are found try: - import lib.socks as socks + from httplib2 import socks except ImportError: try: - import socks as socks - except ImportError: + import socks + except (ImportError, AttributeError): socks = None # Build the appropriate socket wrapper for ssl try: import ssl # python 2.6 - _ssl_wrap_socket = ssl.wrap_socket -except ImportError: - def _ssl_wrap_socket(sock, key_file, cert_file): + ssl_SSLError = ssl.SSLError + def _ssl_wrap_socket(sock, key_file, cert_file, + disable_validation, ca_certs): + if disable_validation: + cert_reqs = ssl.CERT_NONE + else: + cert_reqs = ssl.CERT_REQUIRED + # We should be specifying SSL version 3 or TLS v1, but the ssl module + # doesn't expose the necessary knobs. So we need to go with the default + # of SSLv23. + return ssl.wrap_socket(sock, keyfile=key_file, certfile=cert_file, + cert_reqs=cert_reqs, ca_certs=ca_certs) +except (AttributeError, ImportError): + ssl_SSLError = None + def _ssl_wrap_socket(sock, key_file, cert_file, + disable_validation, ca_certs): + if not disable_validation: + raise CertificateValidationUnsupported( + "SSL certificate validation is not supported without " + "the ssl module installed. To avoid this error, install " + "the ssl module, or explicity disable validation.") ssl_sock = socket.ssl(sock, key_file, cert_file) return httplib.FakeSocket(sock, ssl_sock) @@ -84,15 +102,19 @@ def has_timeout(timeout): # python 2.6 return (timeout is not None and timeout is not socket._GLOBAL_DEFAULT_TIMEOUT) return (timeout is not None) -__all__ = ['Http', 'Response', 'ProxyInfo', 'HttpLib2Error', - 'RedirectMissingLocation', 'RedirectLimit', 'FailedToDecompressContent', - 'UnimplementedDigestAuthOptionError', 'UnimplementedHmacDigestAuthOptionError', - 'debuglevel'] +__all__ = [ + 'Http', 'Response', 'ProxyInfo', 'HttpLib2Error', 'RedirectMissingLocation', + 'RedirectLimit', 'FailedToDecompressContent', + 'UnimplementedDigestAuthOptionError', + 'UnimplementedHmacDigestAuthOptionError', + 'debuglevel', 'ProxiesUnavailableError'] # The httplib debug level, set to a non-zero value to get debug output debuglevel = 0 +# A request will be tried 'RETRIES' times if it fails at the socket/connection level. +RETRIES = 2 # Python 2.3 support if sys.version_info < (2,4): @@ -113,8 +135,8 @@ if not hasattr(httplib.HTTPResponse, 'getheaders'): # All exceptions raised here derive from HttpLib2Error class HttpLib2Error(Exception): pass -# Some exceptions can be caught and optionally -# be turned back into responses. +# Some exceptions can be caught and optionally +# be turned back into responses. class HttpLib2ErrorWithResponse(HttpLib2Error): def __init__(self, desc, response, content): self.response = response @@ -127,8 +149,18 @@ class FailedToDecompressContent(HttpLib2ErrorWithResponse): pass class UnimplementedDigestAuthOptionError(HttpLib2ErrorWithResponse): pass class UnimplementedHmacDigestAuthOptionError(HttpLib2ErrorWithResponse): pass +class MalformedHeader(HttpLib2Error): pass class RelativeURIError(HttpLib2Error): pass class ServerNotFoundError(HttpLib2Error): pass +class ProxiesUnavailableError(HttpLib2Error): pass +class CertificateValidationUnsupported(HttpLib2Error): pass +class SSLHandshakeError(HttpLib2Error): pass +class NotSupportedOnThisPlatform(HttpLib2Error): pass +class CertificateHostnameMismatch(SSLHandshakeError): + def __init__(self, desc, host, cert): + HttpLib2Error.__init__(self, desc) + self.host = host + self.cert = cert # Open Items: # ----------- @@ -152,6 +184,16 @@ class ServerNotFoundError(HttpLib2Error): pass # requesting that URI again. DEFAULT_MAX_REDIRECTS = 5 +try: + # Users can optionally provide a module that tells us where the CA_CERTS + # are located. + import ca_certs_locater + CA_CERTS = ca_certs_locater.get() +except ImportError: + # Default CA certificates file bundled with httplib2. + CA_CERTS = os.path.join( + os.path.dirname(os.path.abspath(__file__ )), "cacerts.txt") + # Which headers are hop-by-hop headers by default HOP_BY_HOP = ['connection', 'keep-alive', 'proxy-authenticate', 'proxy-authorization', 'te', 'trailers', 'transfer-encoding', 'upgrade'] @@ -176,7 +218,7 @@ def urlnorm(uri): raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri) authority = authority.lower() scheme = scheme.lower() - if not path: + if not path: path = "/" # Could do syntax based normalization of the URI before # computing the digest. See Section 6.2.2 of Std 66. @@ -228,7 +270,7 @@ def _parse_cache_control(headers): parts_with_args = [tuple([x.strip().lower() for x in part.split("=", 1)]) for part in parts if -1 != part.find("=")] parts_wo_args = [(name.strip().lower(), 1) for name in parts if -1 == name.find("=")] retval = dict(parts_with_args + parts_wo_args) - return retval + return retval # Whether to use a strict mode to parse WWW-Authenticate headers # Might lead to bad results in case of ill-formed header value, @@ -249,25 +291,30 @@ def _parse_www_authenticate(headers, headername='www-authenticate'): per auth_scheme.""" retval = {} if headers.has_key(headername): - authenticate = headers[headername].strip() - www_auth = USE_WWW_AUTH_STRICT_PARSING and WWW_AUTH_STRICT or WWW_AUTH_RELAXED - while authenticate: - # Break off the scheme at the beginning of the line - if headername == 'authentication-info': - (auth_scheme, the_rest) = ('digest', authenticate) - else: - (auth_scheme, the_rest) = authenticate.split(" ", 1) - # Now loop over all the key value pairs that come after the scheme, - # being careful not to roll into the next scheme - match = www_auth.search(the_rest) - auth_params = {} - while match: - if match and len(match.groups()) == 3: - (key, value, the_rest) = match.groups() - auth_params[key.lower()] = UNQUOTE_PAIRS.sub(r'\1', value) # '\\'.join([x.replace('\\', '') for x in value.split('\\\\')]) + try: + + authenticate = headers[headername].strip() + www_auth = USE_WWW_AUTH_STRICT_PARSING and WWW_AUTH_STRICT or WWW_AUTH_RELAXED + while authenticate: + # Break off the scheme at the beginning of the line + if headername == 'authentication-info': + (auth_scheme, the_rest) = ('digest', authenticate) + else: + (auth_scheme, the_rest) = authenticate.split(" ", 1) + # Now loop over all the key value pairs that come after the scheme, + # being careful not to roll into the next scheme match = www_auth.search(the_rest) - retval[auth_scheme.lower()] = auth_params - authenticate = the_rest.strip() + auth_params = {} + while match: + if match and len(match.groups()) == 3: + (key, value, the_rest) = match.groups() + auth_params[key.lower()] = UNQUOTE_PAIRS.sub(r'\1', value) # '\\'.join([x.replace('\\', '') for x in value.split('\\\\')]) + match = www_auth.search(the_rest) + retval[auth_scheme.lower()] = auth_params + authenticate = the_rest.strip() + + except ValueError: + raise MalformedHeader("WWW-Authenticate") return retval @@ -279,17 +326,17 @@ def _entry_disposition(response_headers, request_headers): 1. Cache-Control: max-stale 2. Age: headers are not used in the calculations. - Not that this algorithm is simpler than you might think + Not that this algorithm is simpler than you might think because we are operating as a private (non-shared) cache. This lets us ignore 's-maxage'. We can also ignore 'proxy-invalidate' since we aren't a proxy. - We will never return a stale document as - fresh as a design decision, and thus the non-implementation - of 'max-stale'. This also lets us safely ignore 'must-revalidate' + We will never return a stale document as + fresh as a design decision, and thus the non-implementation + of 'max-stale'. This also lets us safely ignore 'must-revalidate' since we operate as if every server has sent 'must-revalidate'. Since we are private we get to ignore both 'public' and 'private' parameters. We also ignore 'no-transform' since - we don't do any transformations. + we don't do any transformations. The 'no-store' parameter is handled at a higher level. So the only Cache-Control parameters we look at are: @@ -298,7 +345,7 @@ def _entry_disposition(response_headers, request_headers): max-age min-fresh """ - + retval = "STALE" cc = _parse_cache_control(request_headers) cc_response = _parse_cache_control(response_headers) @@ -340,10 +387,10 @@ def _entry_disposition(response_headers, request_headers): min_fresh = int(cc['min-fresh']) except ValueError: min_fresh = 0 - current_age += min_fresh + current_age += min_fresh if freshness_lifetime > current_age: retval = "FRESH" - return retval + return retval def _decompressContent(response, new_content): content = new_content @@ -391,7 +438,7 @@ def _updateCache(request_headers, response_headers, content, cache, cachekey): if status == 304: status = 200 - status_header = 'status: %d\r\n' % response_headers.status + status_header = 'status: %d\r\n' % status header_str = info.as_string() @@ -408,10 +455,10 @@ def _wsse_username_token(cnonce, iso_now, password): return base64.b64encode(_sha("%s%s%s" % (cnonce, iso_now, password)).digest()).strip() -# For credentials we need two things, first +# For credentials we need two things, first # a pool of credential to try (not necesarily tied to BAsic, Digest, etc.) # Then we also need a list of URIs that have already demanded authentication -# That list is tricky since sub-URIs can take the same auth, or the +# That list is tricky since sub-URIs can take the same auth, or the # auth scheme may change as you descend the tree. # So we also need each Auth instance to be able to tell us # how close to the 'top' it is. @@ -435,7 +482,7 @@ class Authentication(object): def request(self, method, request_uri, headers, content): """Modify the request headers to add the appropriate - Authorization header. Over-rise this in sub-classes.""" + Authorization header. Over-ride this in sub-classes.""" pass def response(self, response, content): @@ -443,7 +490,7 @@ class Authentication(object): or such returned from the last authorized response. Over-rise this in sub-classes if necessary. - Return TRUE is the request is to be retried, for + Return TRUE is the request is to be retried, for example Digest may return stale=true. """ return False @@ -461,7 +508,7 @@ class BasicAuthentication(Authentication): class DigestAuthentication(Authentication): - """Only do qop='auth' and MD5, since that + """Only do qop='auth' and MD5, since that is all Apache currently implements""" def __init__(self, credentials, host, request_uri, headers, response, content, http): Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http) @@ -474,7 +521,7 @@ class DigestAuthentication(Authentication): self.challenge['algorithm'] = self.challenge.get('algorithm', 'MD5').upper() if self.challenge['algorithm'] != 'MD5': raise UnimplementedDigestAuthOptionError( _("Unsupported value for algorithm: %s." % self.challenge['algorithm'])) - self.A1 = "".join([self.credentials[0], ":", self.challenge['realm'], ":", self.credentials[1]]) + self.A1 = "".join([self.credentials[0], ":", self.challenge['realm'], ":", self.credentials[1]]) self.challenge['nc'] = 1 def request(self, method, request_uri, headers, content, cnonce = None): @@ -482,23 +529,24 @@ class DigestAuthentication(Authentication): H = lambda x: _md5(x).hexdigest() KD = lambda s, d: H("%s:%s" % (s, d)) A2 = "".join([method, ":", request_uri]) - self.challenge['cnonce'] = cnonce or _cnonce() - request_digest = '"%s"' % KD(H(self.A1), "%s:%s:%s:%s:%s" % (self.challenge['nonce'], - '%08x' % self.challenge['nc'], - self.challenge['cnonce'], - self.challenge['qop'], H(A2) - )) - headers['Authorization'] = 'Digest username="%s", realm="%s", nonce="%s", uri="%s", algorithm=%s, response=%s, qop=%s, nc=%08x, cnonce="%s"' % ( - self.credentials[0], + self.challenge['cnonce'] = cnonce or _cnonce() + request_digest = '"%s"' % KD(H(self.A1), "%s:%s:%s:%s:%s" % ( + self.challenge['nonce'], + '%08x' % self.challenge['nc'], + self.challenge['cnonce'], + self.challenge['qop'], H(A2))) + headers['authorization'] = 'Digest username="%s", realm="%s", nonce="%s", uri="%s", algorithm=%s, response=%s, qop=%s, nc=%08x, cnonce="%s"' % ( + self.credentials[0], self.challenge['realm'], self.challenge['nonce'], - request_uri, + request_uri, self.challenge['algorithm'], request_digest, self.challenge['qop'], self.challenge['nc'], - self.challenge['cnonce'], - ) + self.challenge['cnonce']) + if self.challenge.get('opaque'): + headers['authorization'] += ', opaque="%s"' % self.challenge['opaque'] self.challenge['nc'] += 1 def response(self, response, content): @@ -506,14 +554,14 @@ class DigestAuthentication(Authentication): challenge = _parse_www_authenticate(response, 'www-authenticate').get('digest', {}) if 'true' == challenge.get('stale'): self.challenge['nonce'] = challenge['nonce'] - self.challenge['nc'] = 1 + self.challenge['nc'] = 1 return True else: updated_challenge = _parse_www_authenticate(response, 'authentication-info').get('digest', {}) if updated_challenge.has_key('nextnonce'): self.challenge['nonce'] = updated_challenge['nextnonce'] - self.challenge['nc'] = 1 + self.challenge['nc'] = 1 return False @@ -547,9 +595,8 @@ class HmacDigestAuthentication(Authentication): else: self.pwhashmod = _sha self.key = "".join([self.credentials[0], ":", - self.pwhashmod.new("".join([self.credentials[1], self.challenge['salt']])).hexdigest().lower(), - ":", self.challenge['realm'] - ]) + self.pwhashmod.new("".join([self.credentials[1], self.challenge['salt']])).hexdigest().lower(), + ":", self.challenge['realm']]) self.key = self.pwhashmod.new(self.key).hexdigest().lower() def request(self, method, request_uri, headers, content): @@ -561,16 +608,15 @@ class HmacDigestAuthentication(Authentication): cnonce = _cnonce() request_digest = "%s:%s:%s:%s:%s" % (method, request_uri, cnonce, self.challenge['snonce'], headers_val) request_digest = hmac.new(self.key, request_digest, self.hashmod).hexdigest().lower() - headers['Authorization'] = 'HMACDigest username="%s", realm="%s", snonce="%s", cnonce="%s", uri="%s", created="%s", response="%s", headers="%s"' % ( - self.credentials[0], + headers['authorization'] = 'HMACDigest username="%s", realm="%s", snonce="%s", cnonce="%s", uri="%s", created="%s", response="%s", headers="%s"' % ( + self.credentials[0], self.challenge['realm'], self.challenge['snonce'], cnonce, - request_uri, + request_uri, created, request_digest, - keylist, - ) + keylist) def response(self, response, content): challenge = _parse_www_authenticate(response, 'www-authenticate').get('hmacdigest', {}) @@ -583,7 +629,7 @@ class WsseAuthentication(Authentication): """This is thinly tested and should not be relied upon. At this time there isn't any third party server to test against. Blogger and TypePad implemented this algorithm at one point - but Blogger has since switched to Basic over HTTPS and + but Blogger has since switched to Basic over HTTPS and TypePad has implemented it wrong, by never issuing a 401 challenge but instead requiring your client to telepathically know that their endpoint is expecting WSSE profile="UsernameToken".""" @@ -593,7 +639,7 @@ class WsseAuthentication(Authentication): def request(self, method, request_uri, headers, content): """Modify the request headers to add the appropriate Authorization header.""" - headers['Authorization'] = 'WSSE profile="UsernameToken"' + headers['authorization'] = 'WSSE profile="UsernameToken"' iso_now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) cnonce = _cnonce() password_digest = _wsse_username_token(cnonce, iso_now, self.credentials[1]) @@ -629,7 +675,7 @@ class GoogleLoginAuthentication(Authentication): def request(self, method, request_uri, headers, content): """Modify the request headers to add the appropriate Authorization header.""" - headers['authorization'] = 'GoogleLogin Auth=' + self.Auth + headers['authorization'] = 'GoogleLogin Auth=' + self.Auth AUTH_SCHEME_CLASSES = { @@ -644,13 +690,13 @@ AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"] class FileCache(object): """Uses a local directory as a store for cached files. - Not really safe to use if multiple threads or processes are going to + Not really safe to use if multiple threads or processes are going to be running on the same cache. """ def __init__(self, cache, safe=safename): # use safe=lambda x: md5.new(x).hexdigest() for the old behavior self.cache = cache self.safe = safe - if not os.path.exists(cache): + if not os.path.exists(cache): os.makedirs(self.cache) def get(self, key): @@ -660,7 +706,7 @@ class FileCache(object): f = file(cacheFullPath, "rb") retval = f.read() f.close() - except IOError, e: + except IOError: pass return retval @@ -688,34 +734,127 @@ class Credentials(object): def iter(self, domain): for (cdomain, name, password) in self.credentials: if cdomain == "" or domain == cdomain: - yield (name, password) + yield (name, password) class KeyCerts(Credentials): """Identical to Credentials except that name/password are mapped to key/cert.""" pass +class AllHosts(object): + pass class ProxyInfo(object): - """Collect information required to use a proxy.""" - def __init__(self, proxy_type, proxy_host, proxy_port, proxy_rdns=None, proxy_user=None, proxy_pass=None): - """The parameter proxy_type must be set to one of socks.PROXY_TYPE_XXX - constants. For example: + """Collect information required to use a proxy.""" + bypass_hosts = () + + def __init__(self, proxy_type, proxy_host, proxy_port, + proxy_rdns=None, proxy_user=None, proxy_pass=None): + """The parameter proxy_type must be set to one of socks.PROXY_TYPE_XXX + constants. For example: -p = ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, proxy_host='localhost', proxy_port=8000) - """ - self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns, self.proxy_user, self.proxy_pass = proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass + p = ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, + proxy_host='localhost', proxy_port=8000) + """ + self.proxy_type = proxy_type + self.proxy_host = proxy_host + self.proxy_port = proxy_port + self.proxy_rdns = proxy_rdns + self.proxy_user = proxy_user + self.proxy_pass = proxy_pass + + def astuple(self): + return (self.proxy_type, self.proxy_host, self.proxy_port, + self.proxy_rdns, self.proxy_user, self.proxy_pass) + + def isgood(self): + return (self.proxy_host != None) and (self.proxy_port != None) + + def applies_to(self, hostname): + return not self.bypass_host(hostname) + + def bypass_host(self, hostname): + """Has this host been excluded from the proxy config""" + if self.bypass_hosts is AllHosts: + return True + + bypass = False + for domain in self.bypass_hosts: + if hostname.endswith(domain): + bypass = True + + return bypass - def astuple(self): - return (self.proxy_type, self.proxy_host, self.proxy_port, self.proxy_rdns, - self.proxy_user, self.proxy_pass) - def isgood(self): - return socks and (self.proxy_host != None) and (self.proxy_port != None) +def proxy_info_from_environment(method='http'): + """ + Read proxy info from the environment variables. + """ + if method not in ['http', 'https']: + return + + env_var = method + '_proxy' + url = os.environ.get(env_var, os.environ.get(env_var.upper())) + if not url: + return + pi = proxy_info_from_url(url, method) + + no_proxy = os.environ.get('no_proxy', os.environ.get('NO_PROXY', '')) + bypass_hosts = [] + if no_proxy: + bypass_hosts = no_proxy.split(',') + # special case, no_proxy=* means all hosts bypassed + if no_proxy == '*': + bypass_hosts = AllHosts + + pi.bypass_hosts = bypass_hosts + return pi + +def proxy_info_from_url(url, method='http'): + """ + Construct a ProxyInfo from a URL (such as http_proxy env var) + """ + url = urlparse.urlparse(url) + username = None + password = None + port = None + if '@' in url[1]: + ident, host_port = url[1].split('@', 1) + if ':' in ident: + username, password = ident.split(':', 1) + else: + password = ident + else: + host_port = url[1] + if ':' in host_port: + host, port = host_port.split(':', 1) + else: + host = host_port + + if port: + port = int(port) + else: + port = dict(https=443, http=80)[method] + + proxy_type = 3 # socks.PROXY_TYPE_HTTP + return ProxyInfo( + proxy_type = proxy_type, + proxy_host = host, + proxy_port = port, + proxy_user = username or None, + proxy_pass = password or None, + ) class HTTPConnectionWithTimeout(httplib.HTTPConnection): - """HTTPConnection subclass that supports timeouts""" + """ + HTTPConnection subclass that supports timeouts + + All timeouts are in seconds. If None is passed for timeout then + Python's default timeout for sockets will be used. See for example + the docs of socket.setdefaulttimeout(): + http://docs.python.org/library/socket.html#socket.setdefaulttimeout + """ def __init__(self, host, port=None, strict=None, timeout=None, proxy_info=None): httplib.HTTPConnection.__init__(self, host, port, strict) @@ -725,27 +864,46 @@ class HTTPConnectionWithTimeout(httplib.HTTPConnection): def connect(self): """Connect to the host and port specified in __init__.""" # Mostly verbatim from httplib.py. + if self.proxy_info and socks is None: + raise ProxiesUnavailableError( + 'Proxy support missing but proxy use was requested!') msg = "getaddrinfo returns an empty list" - for res in socket.getaddrinfo(self.host, self.port, 0, - socket.SOCK_STREAM): + if self.proxy_info and self.proxy_info.isgood(): + use_proxy = True + proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass = self.proxy_info.astuple() + else: + use_proxy = False + if use_proxy and proxy_rdns: + host = proxy_host + port = proxy_port + else: + host = self.host + port = self.port + + for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): af, socktype, proto, canonname, sa = res try: - if self.proxy_info and self.proxy_info.isgood(): + if use_proxy: self.sock = socks.socksocket(af, socktype, proto) - self.sock.setproxy(*self.proxy_info.astuple()) + self.sock.setproxy(proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass) else: self.sock = socket.socket(af, socktype, proto) + self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) # Different from httplib: support timeouts. if has_timeout(self.timeout): self.sock.settimeout(self.timeout) # End of difference from httplib. if self.debuglevel > 0: - print "connect: (%s, %s)" % (self.host, self.port) + print "connect: (%s, %s) ************" % (self.host, self.port) + if use_proxy: + print "proxy: %s ************" % str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass)) - self.sock.connect(sa) + self.sock.connect((self.host, self.port) + sa[2:]) except socket.error, msg: if self.debuglevel > 0: - print 'connect fail:', (self.host, self.port) + print "connect fail: (%s, %s)" % (self.host, self.port) + if use_proxy: + print "proxy: %s" % str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass)) if self.sock: self.sock.close() self.sock = None @@ -755,56 +913,265 @@ class HTTPConnectionWithTimeout(httplib.HTTPConnection): raise socket.error, msg class HTTPSConnectionWithTimeout(httplib.HTTPSConnection): - "This class allows communication via SSL." + """ + This class allows communication via SSL. + All timeouts are in seconds. If None is passed for timeout then + Python's default timeout for sockets will be used. See for example + the docs of socket.setdefaulttimeout(): + http://docs.python.org/library/socket.html#socket.setdefaulttimeout + """ def __init__(self, host, port=None, key_file=None, cert_file=None, - strict=None, timeout=None, proxy_info=None): - httplib.HTTPSConnection.__init__(self, host, port=port, key_file=key_file, - cert_file=cert_file, strict=strict) + strict=None, timeout=None, proxy_info=None, + ca_certs=None, disable_ssl_certificate_validation=False): + httplib.HTTPSConnection.__init__(self, host, port=port, + key_file=key_file, + cert_file=cert_file, strict=strict) self.timeout = timeout self.proxy_info = proxy_info + if ca_certs is None: + ca_certs = CA_CERTS + self.ca_certs = ca_certs + self.disable_ssl_certificate_validation = \ + disable_ssl_certificate_validation + + # The following two methods were adapted from https_wrapper.py, released + # with the Google Appengine SDK at + # http://googleappengine.googlecode.com/svn-history/r136/trunk/python/google/appengine/tools/https_wrapper.py + # under the following license: + # + # Copyright 2007 Google Inc. + # + # Licensed under the Apache License, Version 2.0 (the "License"); + # you may not use this file except in compliance with the License. + # You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + # + + def _GetValidHostsForCert(self, cert): + """Returns a list of valid host globs for an SSL certificate. + + Args: + cert: A dictionary representing an SSL certificate. + Returns: + list: A list of valid host globs. + """ + if 'subjectAltName' in cert: + return [x[1] for x in cert['subjectAltName'] + if x[0].lower() == 'dns'] + else: + return [x[0][1] for x in cert['subject'] + if x[0][0].lower() == 'commonname'] + + def _ValidateCertificateHostname(self, cert, hostname): + """Validates that a given hostname is valid for an SSL certificate. + + Args: + cert: A dictionary representing an SSL certificate. + hostname: The hostname to test. + Returns: + bool: Whether or not the hostname is valid for this certificate. + """ + hosts = self._GetValidHostsForCert(cert) + for host in hosts: + host_re = host.replace('.', '\.').replace('*', '[^.]*') + if re.search('^%s$' % (host_re,), hostname, re.I): + return True + return False def connect(self): "Connect to a host on a given (SSL) port." + msg = "getaddrinfo returns an empty list" if self.proxy_info and self.proxy_info.isgood(): - sock = socks.socksocket(socket.AF_INET, socket.SOCK_STREAM) - sock.setproxy(*self.proxy_info.astuple()) + use_proxy = True + proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass = self.proxy_info.astuple() + else: + use_proxy = False + if use_proxy and proxy_rdns: + host = proxy_host + port = proxy_port else: - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - - if has_timeout(self.timeout): - sock.settimeout(self.timeout) - sock.connect((self.host, self.port)) - self.sock =_ssl_wrap_socket(sock, self.key_file, self.cert_file) + host = self.host + port = self.port + + address_info = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM) + for family, socktype, proto, canonname, sockaddr in address_info: + try: + if use_proxy: + sock = socks.socksocket(family, socktype, proto) + + sock.setproxy(proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass) + else: + sock = socket.socket(family, socktype, proto) + sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) + + if has_timeout(self.timeout): + sock.settimeout(self.timeout) + sock.connect((self.host, self.port)) + self.sock =_ssl_wrap_socket( + sock, self.key_file, self.cert_file, + self.disable_ssl_certificate_validation, self.ca_certs) + if self.debuglevel > 0: + print "connect: (%s, %s)" % (self.host, self.port) + if use_proxy: + print "proxy: %s" % str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass)) + if not self.disable_ssl_certificate_validation: + cert = self.sock.getpeercert() + hostname = self.host.split(':', 0)[0] + if not self._ValidateCertificateHostname(cert, hostname): + raise CertificateHostnameMismatch( + 'Server presented certificate that does not match ' + 'host %s: %s' % (hostname, cert), hostname, cert) + except ssl_SSLError, e: + if sock: + sock.close() + if self.sock: + self.sock.close() + self.sock = None + # Unfortunately the ssl module doesn't seem to provide any way + # to get at more detailed error information, in particular + # whether the error is due to certificate validation or + # something else (such as SSL protocol mismatch). + if e.errno == ssl.SSL_ERROR_SSL: + raise SSLHandshakeError(e) + else: + raise + except (socket.timeout, socket.gaierror): + raise + except socket.error, msg: + if self.debuglevel > 0: + print "connect fail: (%s, %s)" % (self.host, self.port) + if use_proxy: + print "proxy: %s" % str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass)) + if self.sock: + self.sock.close() + self.sock = None + continue + break + if not self.sock: + raise socket.error, msg + +SCHEME_TO_CONNECTION = { + 'http': HTTPConnectionWithTimeout, + 'https': HTTPSConnectionWithTimeout +} +# Use a different connection object for Google App Engine +try: + try: + from google.appengine.api import apiproxy_stub_map + if apiproxy_stub_map.apiproxy.GetStub('urlfetch') is None: + raise ImportError # Bail out; we're not actually running on App Engine. + from google.appengine.api.urlfetch import fetch + from google.appengine.api.urlfetch import InvalidURLError + except (ImportError, AttributeError): + from google3.apphosting.api import apiproxy_stub_map + if apiproxy_stub_map.apiproxy.GetStub('urlfetch') is None: + raise ImportError # Bail out; we're not actually running on App Engine. + from google3.apphosting.api.urlfetch import fetch + from google3.apphosting.api.urlfetch import InvalidURLError + + def _new_fixed_fetch(validate_certificate): + def fixed_fetch(url, payload=None, method="GET", headers={}, + allow_truncated=False, follow_redirects=True, + deadline=5): + return fetch(url, payload=payload, method=method, headers=headers, + allow_truncated=allow_truncated, + follow_redirects=follow_redirects, deadline=deadline, + validate_certificate=validate_certificate) + return fixed_fetch + + class AppEngineHttpConnection(httplib.HTTPConnection): + """Use httplib on App Engine, but compensate for its weirdness. + + The parameters key_file, cert_file, proxy_info, ca_certs, and + disable_ssl_certificate_validation are all dropped on the ground. + """ + def __init__(self, host, port=None, key_file=None, cert_file=None, + strict=None, timeout=None, proxy_info=None, ca_certs=None, + disable_ssl_certificate_validation=False): + httplib.HTTPConnection.__init__(self, host, port=port, + strict=strict, timeout=timeout) + + class AppEngineHttpsConnection(httplib.HTTPSConnection): + """Same as AppEngineHttpConnection, but for HTTPS URIs.""" + def __init__(self, host, port=None, key_file=None, cert_file=None, + strict=None, timeout=None, proxy_info=None, ca_certs=None, + disable_ssl_certificate_validation=False): + httplib.HTTPSConnection.__init__(self, host, port=port, + key_file=key_file, + cert_file=cert_file, strict=strict, + timeout=timeout) + self._fetch = _new_fixed_fetch( + not disable_ssl_certificate_validation) + + # Update the connection classes to use the Googel App Engine specific ones. + SCHEME_TO_CONNECTION = { + 'http': AppEngineHttpConnection, + 'https': AppEngineHttpsConnection + } +except (ImportError, AttributeError): + pass class Http(object): """An HTTP client that handles: -- all methods -- caching -- ETags -- compression, -- HTTPS -- Basic -- Digest -- WSSE - -and more. - """ - def __init__(self, cache=None, timeout=None, proxy_info=None): - """The value of proxy_info is a ProxyInfo instance. -If 'cache' is a string then it is used as a directory name -for a disk cache. Otherwise it must be an object that supports -the same interface as FileCache.""" + - all methods + - caching + - ETags + - compression, + - HTTPS + - Basic + - Digest + - WSSE + + and more. + """ + def __init__(self, cache=None, timeout=None, + proxy_info=proxy_info_from_environment, + ca_certs=None, disable_ssl_certificate_validation=False): + """If 'cache' is a string then it is used as a directory name for + a disk cache. Otherwise it must be an object that supports the + same interface as FileCache. + + All timeouts are in seconds. If None is passed for timeout + then Python's default timeout for sockets will be used. See + for example the docs of socket.setdefaulttimeout(): + http://docs.python.org/library/socket.html#socket.setdefaulttimeout + + `proxy_info` may be: + - a callable that takes the http scheme ('http' or 'https') and + returns a ProxyInfo instance per request. By default, uses + proxy_nfo_from_environment. + - a ProxyInfo instance (static proxy config). + - None (proxy disabled). + + ca_certs is the path of a file containing root CA certificates for SSL + server certificate validation. By default, a CA cert file bundled with + httplib2 is used. + + If disable_ssl_certificate_validation is true, SSL cert validation will + not be performed. + """ self.proxy_info = proxy_info + self.ca_certs = ca_certs + self.disable_ssl_certificate_validation = \ + disable_ssl_certificate_validation + # Map domain name to an httplib connection self.connections = {} # The location of the cache, for now a directory # where cached responses are held. - if cache and isinstance(cache, str): + if cache and isinstance(cache, basestring): self.cache = FileCache(cache) else: self.cache = cache @@ -820,10 +1187,10 @@ the same interface as FileCache.""" # If set to False then no redirects are followed, even safe ones. self.follow_redirects = True - + # Which HTTP methods do we apply optimistic concurrency to, i.e. # which methods get an "if-match:" etag header added to them. - self.optimistic_concurrency_methods = ["PUT"] + self.optimistic_concurrency_methods = ["PUT", "PATCH"] # If 'follow_redirects' is True, and this is set to True then # all redirecs are followed, including unsafe ones. @@ -831,10 +1198,27 @@ the same interface as FileCache.""" self.ignore_etag = False - self.force_exception_to_status_code = False + self.force_exception_to_status_code = False self.timeout = timeout + # Keep Authorization: headers on a redirect. + self.forward_authorization_headers = False + + def __getstate__(self): + state_dict = copy.copy(self.__dict__) + # In case request is augmented by some foreign object such as + # credentials which handle auth + if 'request' in state_dict: + del state_dict['request'] + if 'connections' in state_dict: + del state_dict['connections'] + return state_dict + + def __setstate__(self, state): + self.__dict__.update(state) + self.connections = {} + def _auth_from_challenge(self, host, request_uri, headers, response, content): """A generator that creates Authorization objects that can be applied to requests. @@ -862,37 +1246,56 @@ the same interface as FileCache.""" self.authorizations = [] def _conn_request(self, conn, request_uri, method, body, headers): - for i in range(2): + for i in range(RETRIES): try: + if hasattr(conn, 'sock') and conn.sock is None: + conn.connect() conn.request(method, request_uri, body, headers) + except socket.timeout: + raise except socket.gaierror: conn.close() raise ServerNotFoundError("Unable to find the server at %s" % conn.host) + except ssl_SSLError: + conn.close() + raise except socket.error, e: - if not hasattr(e, 'errno'): # I don't know what this is so lets raise it if it happens - raise - elif e.errno == errno.ECONNREFUSED: # Connection refused + err = 0 + if hasattr(e, 'args'): + err = getattr(e, 'args')[0] + else: + err = e.errno + if err == errno.ECONNREFUSED: # Connection refused raise - # Just because the server closed the connection doesn't apparently mean - # that the server didn't send a response. - pass except httplib.HTTPException: # Just because the server closed the connection doesn't apparently mean # that the server didn't send a response. - pass + if hasattr(conn, 'sock') and conn.sock is None: + if i < RETRIES-1: + conn.close() + conn.connect() + continue + else: + conn.close() + raise + if i < RETRIES-1: + conn.close() + conn.connect() + continue try: response = conn.getresponse() except (socket.error, httplib.HTTPException): - if i == 0: + if i < RETRIES-1: conn.close() conn.connect() continue else: + conn.close() raise else: content = "" if method == "HEAD": - response.close() + conn.close() else: content = response.read() response = Response(response) @@ -908,12 +1311,12 @@ the same interface as FileCache.""" auths = [(auth.depth(request_uri), auth) for auth in self.authorizations if auth.inscope(host, request_uri)] auth = auths and sorted(auths)[0][1] or None - if auth: + if auth: auth.request(method, request_uri, headers, body) (response, content) = self._conn_request(conn, request_uri, method, body, headers) - if auth: + if auth: if auth.response(response, body): auth.request(method, request_uri, headers, body) (response, content) = self._conn_request(conn, request_uri, method, body, headers ) @@ -921,7 +1324,7 @@ the same interface as FileCache.""" if response.status == 401: for authorization in self._auth_from_challenge(host, request_uri, headers, response, content): - authorization.request(method, request_uri, headers, body) + authorization.request(method, request_uri, headers, body) (response, content) = self._conn_request(conn, request_uri, method, body, headers, ) if response.status != 401: self.authorizations.append(authorization) @@ -944,26 +1347,31 @@ the same interface as FileCache.""" if response.status == 301 and method in ["GET", "HEAD"]: response['-x-permanent-redirect-url'] = response['location'] if not response.has_key('content-location'): - response['content-location'] = absolute_uri + response['content-location'] = absolute_uri _updateCache(headers, response, content, self.cache, cachekey) if headers.has_key('if-none-match'): del headers['if-none-match'] if headers.has_key('if-modified-since'): del headers['if-modified-since'] + if 'authorization' in headers and not self.forward_authorization_headers: + del headers['authorization'] if response.has_key('location'): location = response['location'] old_response = copy.deepcopy(response) if not old_response.has_key('content-location'): - old_response['content-location'] = absolute_uri - redirect_method = ((response.status == 303) and (method not in ["GET", "HEAD"])) and "GET" or method + old_response['content-location'] = absolute_uri + redirect_method = method + if response.status in [302, 303]: + redirect_method = "GET" + body = None (response, content) = self.request(location, redirect_method, body=body, headers = headers, redirections = redirections - 1) response.previous = old_response else: - raise RedirectLimit( _("Redirected more times than rediection_limit allows."), response, content) - elif response.status in [200, 203] and method == "GET": + raise RedirectLimit("Redirected more times than rediection_limit allows.", response, content) + elif response.status in [200, 203] and method in ["GET", "HEAD"]: # Don't cache 206's since we aren't going to handle byte range requests if not response.has_key('content-location'): - response['content-location'] = absolute_uri + response['content-location'] = absolute_uri _updateCache(headers, response, content, self.cache, cachekey) return (response, content) @@ -978,24 +1386,25 @@ the same interface as FileCache.""" def request(self, uri, method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS, connection_type=None): """ Performs a single HTTP request. -The 'uri' is the URI of the HTTP resource and can begin -with either 'http' or 'https'. The value of 'uri' must be an absolute URI. -The 'method' is the HTTP method to perform, such as GET, POST, DELETE, etc. -There is no restriction on the methods allowed. + The 'uri' is the URI of the HTTP resource and can begin with either + 'http' or 'https'. The value of 'uri' must be an absolute URI. + + The 'method' is the HTTP method to perform, such as GET, POST, DELETE, + etc. There is no restriction on the methods allowed. -The 'body' is the entity body to be sent with the request. It is a string -object. + The 'body' is the entity body to be sent with the request. It is a + string object. -Any extra headers that are to be sent with the request should be provided in the -'headers' dictionary. + Any extra headers that are to be sent with the request should be + provided in the 'headers' dictionary. -The maximum number of redirect to follow before raising an -exception is 'redirections. The default is 5. + The maximum number of redirect to follow before raising an + exception is 'redirections. The default is 5. -The return value is a tuple of (response, content), the first -being and instance of the 'Response' class, the second being -a string that contains the response entity body. + The return value is a tuple of (response, content), the first + being and instance of the 'Response' class, the second being + a string that contains the response entity body. """ try: if headers is None: @@ -1004,7 +1413,7 @@ a string that contains the response entity body. headers = self._normalize_headers(headers) if not headers.has_key('user-agent'): - headers['user-agent'] = "Python-httplib2/%s" % __version__ + headers['user-agent'] = "Python-httplib2/%s (gzip)" % __version__ uri = iri2uri(uri) @@ -1014,21 +1423,38 @@ a string that contains the response entity body. scheme = 'https' authority = domain_port[0] + proxy_info = self._get_proxy_info(scheme, authority) + conn_key = scheme+":"+authority if conn_key in self.connections: conn = self.connections[conn_key] else: if not connection_type: - connection_type = (scheme == 'https') and HTTPSConnectionWithTimeout or HTTPConnectionWithTimeout + connection_type = SCHEME_TO_CONNECTION[scheme] certs = list(self.certificates.iter(authority)) - if scheme == 'https' and certs: - conn = self.connections[conn_key] = connection_type(authority, key_file=certs[0][0], - cert_file=certs[0][1], timeout=self.timeout, proxy_info=self.proxy_info) + if scheme == 'https': + if certs: + conn = self.connections[conn_key] = connection_type( + authority, key_file=certs[0][0], + cert_file=certs[0][1], timeout=self.timeout, + proxy_info=proxy_info, + ca_certs=self.ca_certs, + disable_ssl_certificate_validation= + self.disable_ssl_certificate_validation) + else: + conn = self.connections[conn_key] = connection_type( + authority, timeout=self.timeout, + proxy_info=proxy_info, + ca_certs=self.ca_certs, + disable_ssl_certificate_validation= + self.disable_ssl_certificate_validation) else: - conn = self.connections[conn_key] = connection_type(authority, timeout=self.timeout, proxy_info=self.proxy_info) + conn = self.connections[conn_key] = connection_type( + authority, timeout=self.timeout, + proxy_info=proxy_info) conn.set_debuglevel(debuglevel) - if method in ["GET", "HEAD"] and 'range' not in headers and 'accept-encoding' not in headers: + if 'range' not in headers and 'accept-encoding' not in headers: headers['accept-encoding'] = 'gzip, deflate' info = email.Message.Message() @@ -1048,7 +1474,7 @@ a string that contains the response entity body. feedparser.feed(info) info = feedparser.close() feedparser._parse = None - except IndexError, ValueError: + except (IndexError, ValueError): self.cache.delete(cachekey) cachekey = None cached_value = None @@ -1071,13 +1497,15 @@ a string that contains the response entity body. for header in vary_headers: key = '-varied-%s' % header value = info[key] - if headers.get(header, '') != value: - cached_value = None - break + if headers.get(header, None) != value: + cached_value = None + break if cached_value and method in ["GET", "HEAD"] and self.cache and 'range' not in headers: if info.has_key('-x-permanent-redirect-url'): # Should cached permanent redirects be counted in our redirection count? For now, yes. + if redirections <= 0: + raise RedirectLimit("Redirected more times than rediection_limit allows.", {}, "") (response, new_content) = self.request(info['-x-permanent-redirect-url'], "GET", headers = headers, redirections = redirections - 1) response.previous = Response(info) response.previous.fromcache = True @@ -1085,13 +1513,13 @@ a string that contains the response entity body. # Determine our course of action: # Is the cached entry fresh or stale? # Has the client requested a non-cached response? - # - # There seems to be three possible answers: + # + # There seems to be three possible answers: # 1. [FRESH] Return the cache entry w/o doing a GET # 2. [STALE] Do the GET (but add in cache validators if available) # 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request - entry_disposition = _entry_disposition(info, headers) - + entry_disposition = _entry_disposition(info, headers) + if entry_disposition == "FRESH": if not cached_value: info['status'] = '504' @@ -1113,7 +1541,7 @@ a string that contains the response entity body. if response.status == 304 and method == "GET": # Rewrite the cache entry with the new end-to-end headers - # Take all headers that are in response + # Take all headers that are in response # and overwrite their values in info. # unless they are hop-by-hop, or are listed in the connection header. @@ -1125,14 +1553,14 @@ a string that contains the response entity body. _updateCache(headers, merged_response, content, self.cache, cachekey) response = merged_response response.status = 200 - response.fromcache = True + response.fromcache = True elif response.status == 200: content = new_content else: self.cache.delete(cachekey) - content = new_content - else: + content = new_content + else: cc = _parse_cache_control(headers) if cc.has_key('only-if-cached'): info['status'] = '504' @@ -1146,34 +1574,47 @@ a string that contains the response entity body. response = e.response content = e.content response.status = 500 - response.reason = str(e) - elif isinstance(e, socket.timeout) or (isinstance(e, socket.error) and 'timed out' in str(e)): + response.reason = str(e) + elif isinstance(e, socket.timeout): content = "Request Timeout" - response = Response( { - "content-type": "text/plain", - "status": "408", - "content-length": len(content) - }) + response = Response({ + "content-type": "text/plain", + "status": "408", + "content-length": len(content) + }) response.reason = "Request Timeout" else: - content = str(e) - response = Response( { - "content-type": "text/plain", - "status": "400", - "content-length": len(content) - }) - response.reason = "Bad Request" + content = str(e) + response = Response({ + "content-type": "text/plain", + "status": "400", + "content-length": len(content) + }) + response.reason = "Bad Request" else: raise - + return (response, content) - + def _get_proxy_info(self, scheme, authority): + """Return a ProxyInfo instance (or None) based on the scheme + and authority. + """ + hostname, port = urllib.splitport(authority) + proxy_info = self.proxy_info + if callable(proxy_info): + proxy_info = proxy_info(scheme) + + if (hasattr(proxy_info, 'applies_to') + and not proxy_info.applies_to(hostname)): + proxy_info = None + return proxy_info + class Response(dict): """An object more like email.Message than httplib.HTTPResponse.""" - + """Is this response from our local cache""" fromcache = False @@ -1189,27 +1630,28 @@ class Response(dict): previous = None def __init__(self, info): - # info is either an email.Message or + # info is either an email.Message or # an httplib.HTTPResponse object. if isinstance(info, httplib.HTTPResponse): - for key, value in info.getheaders(): - self[key.lower()] = value + for key, value in info.getheaders(): + self[key.lower()] = value self.status = info.status self['status'] = str(self.status) self.reason = info.reason self.version = info.version elif isinstance(info, email.Message.Message): - for key, value in info.items(): - self[key] = value + for key, value in info.items(): + self[key.lower()] = value self.status = int(self['status']) else: - for key, value in info.iteritems(): - self[key] = value + for key, value in info.iteritems(): + self[key.lower()] = value self.status = int(self.get('status', self.status)) + self.reason = self.get('reason', self.reason) def __getattr__(self, name): if name == 'dict': - return self - else: - raise AttributeError, name + return self + else: + raise AttributeError, name diff --git a/libs/httplib2/cacerts.txt b/libs/httplib2/cacerts.txt new file mode 100644 index 0000000..d8a0027 --- /dev/null +++ b/libs/httplib2/cacerts.txt @@ -0,0 +1,739 @@ +# Certifcate Authority certificates for validating SSL connections. +# +# This file contains PEM format certificates generated from +# http://mxr.mozilla.org/seamonkey/source/security/nss/lib/ckfw/builtins/certdata.txt +# +# ***** BEGIN LICENSE BLOCK ***** +# Version: MPL 1.1/GPL 2.0/LGPL 2.1 +# +# The contents of this file are subject to the Mozilla Public License Version +# 1.1 (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# http://www.mozilla.org/MPL/ +# +# Software distributed under the License is distributed on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License +# for the specific language governing rights and limitations under the +# License. +# +# The Original Code is the Netscape security libraries. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1994-2000 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# +# Alternatively, the contents of this file may be used under the terms of +# either the GNU General Public License Version 2 or later (the "GPL"), or +# the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), +# in which case the provisions of the GPL or the LGPL are applicable instead +# of those above. If you wish to allow use of your version of this file only +# under the terms of either the GPL or the LGPL, and not to allow others to +# use your version of this file under the terms of the MPL, indicate your +# decision by deleting the provisions above and replace them with the notice +# and other provisions required by the GPL or the LGPL. If you do not delete +# the provisions above, a recipient may use your version of this file under +# the terms of any one of the MPL, the GPL or the LGPL. +# +# ***** END LICENSE BLOCK ***** + +Verisign/RSA Secure Server CA +============================= + +-----BEGIN CERTIFICATE----- +MIICNDCCAaECEAKtZn5ORf5eV288mBle3cAwDQYJKoZIhvcNAQECBQAwXzELMAkG +A1UEBhMCVVMxIDAeBgNVBAoTF1JTQSBEYXRhIFNlY3VyaXR5LCBJbmMuMS4wLAYD +VQQLEyVTZWN1cmUgU2VydmVyIENlcnRpZmljYXRpb24gQXV0aG9yaXR5MB4XDTk0 +MTEwOTAwMDAwMFoXDTEwMDEwNzIzNTk1OVowXzELMAkGA1UEBhMCVVMxIDAeBgNV +BAoTF1JTQSBEYXRhIFNlY3VyaXR5LCBJbmMuMS4wLAYDVQQLEyVTZWN1cmUgU2Vy +dmVyIENlcnRpZmljYXRpb24gQXV0aG9yaXR5MIGbMA0GCSqGSIb3DQEBAQUAA4GJ +ADCBhQJ+AJLOesGugz5aqomDV6wlAXYMra6OLDfO6zV4ZFQD5YRAUcm/jwjiioII +0haGN1XpsSECrXZogZoFokvJSyVmIlZsiAeP94FZbYQHZXATcXY+m3dM41CJVphI +uR2nKRoTLkoRWZweFdVJVCxzOmmCsZc5nG1wZ0jl3S3WyB57AgMBAAEwDQYJKoZI +hvcNAQECBQADfgBl3X7hsuyw4jrg7HFGmhkRuNPHoLQDQCYCPgmc4RKz0Vr2N6W3 +YQO2WxZpO8ZECAyIUwxrl0nHPjXcbLm7qt9cuzovk2C2qUtN8iD3zV9/ZHuO3ABc +1/p3yjkWWW8O6tO1g39NTUJWdrTJXwT4OPjr0l91X817/OWOgHz8UA== +-----END CERTIFICATE----- + +Thawte Personal Basic CA +======================== + +-----BEGIN CERTIFICATE----- +MIIDITCCAoqgAwIBAgIBADANBgkqhkiG9w0BAQQFADCByzELMAkGA1UEBhMCWkEx +FTATBgNVBAgTDFdlc3Rlcm4gQ2FwZTESMBAGA1UEBxMJQ2FwZSBUb3duMRowGAYD +VQQKExFUaGF3dGUgQ29uc3VsdGluZzEoMCYGA1UECxMfQ2VydGlmaWNhdGlvbiBT +ZXJ2aWNlcyBEaXZpc2lvbjEhMB8GA1UEAxMYVGhhd3RlIFBlcnNvbmFsIEJhc2lj +IENBMSgwJgYJKoZIhvcNAQkBFhlwZXJzb25hbC1iYXNpY0B0aGF3dGUuY29tMB4X +DTk2MDEwMTAwMDAwMFoXDTIwMTIzMTIzNTk1OVowgcsxCzAJBgNVBAYTAlpBMRUw +EwYDVQQIEwxXZXN0ZXJuIENhcGUxEjAQBgNVBAcTCUNhcGUgVG93bjEaMBgGA1UE +ChMRVGhhd3RlIENvbnN1bHRpbmcxKDAmBgNVBAsTH0NlcnRpZmljYXRpb24gU2Vy +dmljZXMgRGl2aXNpb24xITAfBgNVBAMTGFRoYXd0ZSBQZXJzb25hbCBCYXNpYyBD +QTEoMCYGCSqGSIb3DQEJARYZcGVyc29uYWwtYmFzaWNAdGhhd3RlLmNvbTCBnzAN +BgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEAvLyTU23AUE+CFeZIlDWmWr5vQvoPR+53 +dXLdjUmbllegeNTKP1GzaQuRdhciB5dqxFGTS+CN7zeVoQxN2jSQHReJl+A1OFdK +wPQIcOk8RHtQfmGakOMj04gRRif1CwcOu93RfyAKiLlWCy4cgNrx454p7xS9CkT7 +G1sY0b8jkyECAwEAAaMTMBEwDwYDVR0TAQH/BAUwAwEB/zANBgkqhkiG9w0BAQQF +AAOBgQAt4plrsD16iddZopQBHyvdEktTwq1/qqcAXJFAVyVKOKqEcLnZgA+le1z7 +c8a914phXAPjLSeoF+CEhULcXpvGt7Jtu3Sv5D/Lp7ew4F2+eIMllNLbgQ95B21P +9DkVWlIBe94y1k049hJcBlDfBVu9FEuh3ym6O0GN92NWod8isQ== +-----END CERTIFICATE----- + +Thawte Personal Premium CA +========================== + +-----BEGIN CERTIFICATE----- +MIIDKTCCApKgAwIBAgIBADANBgkqhkiG9w0BAQQFADCBzzELMAkGA1UEBhMCWkEx +FTATBgNVBAgTDFdlc3Rlcm4gQ2FwZTESMBAGA1UEBxMJQ2FwZSBUb3duMRowGAYD +VQQKExFUaGF3dGUgQ29uc3VsdGluZzEoMCYGA1UECxMfQ2VydGlmaWNhdGlvbiBT +ZXJ2aWNlcyBEaXZpc2lvbjEjMCEGA1UEAxMaVGhhd3RlIFBlcnNvbmFsIFByZW1p +dW0gQ0ExKjAoBgkqhkiG9w0BCQEWG3BlcnNvbmFsLXByZW1pdW1AdGhhd3RlLmNv +bTAeFw05NjAxMDEwMDAwMDBaFw0yMDEyMzEyMzU5NTlaMIHPMQswCQYDVQQGEwJa +QTEVMBMGA1UECBMMV2VzdGVybiBDYXBlMRIwEAYDVQQHEwlDYXBlIFRvd24xGjAY +BgNVBAoTEVRoYXd0ZSBDb25zdWx0aW5nMSgwJgYDVQQLEx9DZXJ0aWZpY2F0aW9u +IFNlcnZpY2VzIERpdmlzaW9uMSMwIQYDVQQDExpUaGF3dGUgUGVyc29uYWwgUHJl +bWl1bSBDQTEqMCgGCSqGSIb3DQEJARYbcGVyc29uYWwtcHJlbWl1bUB0aGF3dGUu +Y29tMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDJZtn4B0TPuYwu8KHvE0Vs +Bd/eJxZRNkERbGw77f4QfRKe5ZtCmv5gMcNmt3M6SK5O0DI3lIi1DbbZ8/JE2dWI +Et12TfIa/G8jHnrx2JhFTgcQ7xZC0EN1bUre4qrJMf8fAHB8Zs8QJQi6+u4A6UYD +ZicRFTuqW/KY3TZCstqIdQIDAQABoxMwETAPBgNVHRMBAf8EBTADAQH/MA0GCSqG +SIb3DQEBBAUAA4GBAGk2ifc0KjNyL2071CKyuG+axTZmDhs8obF1Wub9NdP4qPIH +b4Vnjt4rueIXsDqg8A6iAJrf8xQVbrvIhVqYgPn/vnQdPfP+MCXRNzRn+qVxeTBh +KXLA4CxM+1bkOqhv5TJZUtt1KFBZDPgLGeSs2a+WjS9Q2wfD6h+rM+D1KzGJ +-----END CERTIFICATE----- + +Thawte Personal Freemail CA +=========================== + +-----BEGIN CERTIFICATE----- +MIIDLTCCApagAwIBAgIBADANBgkqhkiG9w0BAQQFADCB0TELMAkGA1UEBhMCWkEx +FTATBgNVBAgTDFdlc3Rlcm4gQ2FwZTESMBAGA1UEBxMJQ2FwZSBUb3duMRowGAYD +VQQKExFUaGF3dGUgQ29uc3VsdGluZzEoMCYGA1UECxMfQ2VydGlmaWNhdGlvbiBT +ZXJ2aWNlcyBEaXZpc2lvbjEkMCIGA1UEAxMbVGhhd3RlIFBlcnNvbmFsIEZyZWVt +YWlsIENBMSswKQYJKoZIhvcNAQkBFhxwZXJzb25hbC1mcmVlbWFpbEB0aGF3dGUu +Y29tMB4XDTk2MDEwMTAwMDAwMFoXDTIwMTIzMTIzNTk1OVowgdExCzAJBgNVBAYT +AlpBMRUwEwYDVQQIEwxXZXN0ZXJuIENhcGUxEjAQBgNVBAcTCUNhcGUgVG93bjEa +MBgGA1UEChMRVGhhd3RlIENvbnN1bHRpbmcxKDAmBgNVBAsTH0NlcnRpZmljYXRp +b24gU2VydmljZXMgRGl2aXNpb24xJDAiBgNVBAMTG1RoYXd0ZSBQZXJzb25hbCBG +cmVlbWFpbCBDQTErMCkGCSqGSIb3DQEJARYccGVyc29uYWwtZnJlZW1haWxAdGhh +d3RlLmNvbTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEA1GnX1LCUZFtx6UfY +DFG26nKRsIRefS0Nj3sS34UldSh0OkIsYyeflXtL734Zhx2G6qPduc6WZBrCFG5E +rHzmj+hND3EfQDimAKOHePb5lIZererAXnbr2RSjXW56fAylS1V/Bhkpf56aJtVq +uzgkCGqYx7Hao5iR/Xnb5VrEHLkCAwEAAaMTMBEwDwYDVR0TAQH/BAUwAwEB/zAN +BgkqhkiG9w0BAQQFAAOBgQDH7JJ+Tvj1lqVnYiqk8E0RYNBvjWBYYawmu1I1XAjP +MPuoSpaKH2JCI4wXD/S6ZJwXrEcp352YXtJsYHFcoqzceePnbgBHH7UNKOgCneSa +/RP0ptl8sfjcXyMmCZGAc9AUG95DqYMl8uacLxXK/qarigd1iwzdUYRr5PjRznei +gQ== +-----END CERTIFICATE----- + +Thawte Server CA +================ + +-----BEGIN CERTIFICATE----- +MIIDEzCCAnygAwIBAgIBATANBgkqhkiG9w0BAQQFADCBxDELMAkGA1UEBhMCWkEx +FTATBgNVBAgTDFdlc3Rlcm4gQ2FwZTESMBAGA1UEBxMJQ2FwZSBUb3duMR0wGwYD +VQQKExRUaGF3dGUgQ29uc3VsdGluZyBjYzEoMCYGA1UECxMfQ2VydGlmaWNhdGlv +biBTZXJ2aWNlcyBEaXZpc2lvbjEZMBcGA1UEAxMQVGhhd3RlIFNlcnZlciBDQTEm +MCQGCSqGSIb3DQEJARYXc2VydmVyLWNlcnRzQHRoYXd0ZS5jb20wHhcNOTYwODAx +MDAwMDAwWhcNMjAxMjMxMjM1OTU5WjCBxDELMAkGA1UEBhMCWkExFTATBgNVBAgT +DFdlc3Rlcm4gQ2FwZTESMBAGA1UEBxMJQ2FwZSBUb3duMR0wGwYDVQQKExRUaGF3 +dGUgQ29uc3VsdGluZyBjYzEoMCYGA1UECxMfQ2VydGlmaWNhdGlvbiBTZXJ2aWNl +cyBEaXZpc2lvbjEZMBcGA1UEAxMQVGhhd3RlIFNlcnZlciBDQTEmMCQGCSqGSIb3 +DQEJARYXc2VydmVyLWNlcnRzQHRoYXd0ZS5jb20wgZ8wDQYJKoZIhvcNAQEBBQAD +gY0AMIGJAoGBANOkUG7I/1Zr5s9dtuoMaHVHoqrC2oQl/Kj0R1HahbUgdJSGHg91 +yekIYfUGbTBuFRkC6VLAYttNmZ7iagxEOM3+vuNkCXDF/rFrKbYvScg71CcEJRCX +L+eQbcAoQpnXTEPew/UhbVSfXcNY4cDk2VuwuNy0e982OsK1ZiIS1ocNAgMBAAGj +EzARMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQEEBQADgYEAB/pMaVz7lcxG +7oWDTSEwjsrZqG9JGubaUeNgcGyEYRGhGshIPllDfU+VPaGLtwtimHp1it2ITk6e +QNuozDJ0uW8NxuOzRAvZim+aKZuZGCg70eNAKJpaPNW15yAbi8qkq43pUdniTCxZ +qdq5snUb9kLy78fyGPmJvKP/iiMucEc= +-----END CERTIFICATE----- + +Thawte Premium Server CA +======================== + +-----BEGIN CERTIFICATE----- +MIIDJzCCApCgAwIBAgIBATANBgkqhkiG9w0BAQQFADCBzjELMAkGA1UEBhMCWkEx +FTATBgNVBAgTDFdlc3Rlcm4gQ2FwZTESMBAGA1UEBxMJQ2FwZSBUb3duMR0wGwYD +VQQKExRUaGF3dGUgQ29uc3VsdGluZyBjYzEoMCYGA1UECxMfQ2VydGlmaWNhdGlv +biBTZXJ2aWNlcyBEaXZpc2lvbjEhMB8GA1UEAxMYVGhhd3RlIFByZW1pdW0gU2Vy +dmVyIENBMSgwJgYJKoZIhvcNAQkBFhlwcmVtaXVtLXNlcnZlckB0aGF3dGUuY29t +MB4XDTk2MDgwMTAwMDAwMFoXDTIwMTIzMTIzNTk1OVowgc4xCzAJBgNVBAYTAlpB +MRUwEwYDVQQIEwxXZXN0ZXJuIENhcGUxEjAQBgNVBAcTCUNhcGUgVG93bjEdMBsG +A1UEChMUVGhhd3RlIENvbnN1bHRpbmcgY2MxKDAmBgNVBAsTH0NlcnRpZmljYXRp +b24gU2VydmljZXMgRGl2aXNpb24xITAfBgNVBAMTGFRoYXd0ZSBQcmVtaXVtIFNl +cnZlciBDQTEoMCYGCSqGSIb3DQEJARYZcHJlbWl1bS1zZXJ2ZXJAdGhhd3RlLmNv +bTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEA0jY2aovXwlue2oFBYo847kkE +VdbQ7xwblRZH7xhINTpS9CtqBo87L+pW46+GjZ4X9560ZXUCTe/LCaIhUdib0GfQ +ug2SBhRz1JPLlyoAnFxODLz6FVL88kRu2hFKbgifLy3j+ao6hnO2RlNYyIkFvYMR +uHM/qgeN9EJN50CdHDcCAwEAAaMTMBEwDwYDVR0TAQH/BAUwAwEB/zANBgkqhkiG +9w0BAQQFAAOBgQAmSCwWwlj66BZ0DKqqX1Q/8tfJeGBeXm43YyJ3Nn6yF8Q0ufUI +hfzJATj/Tb7yFkJD57taRvvBxhEf8UqwKEbJw8RCfbz6q1lu1bdRiBHjpIUZa4JM +pAwSremkrj/xw0llmozFyD4lt5SZu5IycQfwhl7tUCemDaYj+bvLpgcUQg== +-----END CERTIFICATE----- + +Equifax Secure CA +================= + +-----BEGIN CERTIFICATE----- +MIIDIDCCAomgAwIBAgIENd70zzANBgkqhkiG9w0BAQUFADBOMQswCQYDVQQGEwJV +UzEQMA4GA1UEChMHRXF1aWZheDEtMCsGA1UECxMkRXF1aWZheCBTZWN1cmUgQ2Vy +dGlmaWNhdGUgQXV0aG9yaXR5MB4XDTk4MDgyMjE2NDE1MVoXDTE4MDgyMjE2NDE1 +MVowTjELMAkGA1UEBhMCVVMxEDAOBgNVBAoTB0VxdWlmYXgxLTArBgNVBAsTJEVx +dWlmYXggU2VjdXJlIENlcnRpZmljYXRlIEF1dGhvcml0eTCBnzANBgkqhkiG9w0B +AQEFAAOBjQAwgYkCgYEAwV2xWGcIYu6gmi0fCG2RFGiYCh7+2gRvE4RiIcPRfM6f +BeC4AfBONOziipUEZKzxa1NfBbPLZ4C/QgKO/t0BCezhABRP/PvwDN1Dulsr4R+A +cJkVV5MW8Q+XarfCaCMczE1ZMKxRHjuvK9buY0V7xdlfUNLjUA86iOe/FP3gx7kC +AwEAAaOCAQkwggEFMHAGA1UdHwRpMGcwZaBjoGGkXzBdMQswCQYDVQQGEwJVUzEQ +MA4GA1UEChMHRXF1aWZheDEtMCsGA1UECxMkRXF1aWZheCBTZWN1cmUgQ2VydGlm +aWNhdGUgQXV0aG9yaXR5MQ0wCwYDVQQDEwRDUkwxMBoGA1UdEAQTMBGBDzIwMTgw +ODIyMTY0MTUxWjALBgNVHQ8EBAMCAQYwHwYDVR0jBBgwFoAUSOZo+SvSspXXR9gj +IBBPM5iQn9QwHQYDVR0OBBYEFEjmaPkr0rKV10fYIyAQTzOYkJ/UMAwGA1UdEwQF +MAMBAf8wGgYJKoZIhvZ9B0EABA0wCxsFVjMuMGMDAgbAMA0GCSqGSIb3DQEBBQUA +A4GBAFjOKer89961zgK5F7WF0bnj4JXMJTENAKaSbn+2kmOeUJXRmm/kEd5jhW6Y +7qj/WsjTVbJmcVfewCHrPSqnI0kBBIZCe/zuf6IWUrVnZ9NA2zsmWLIodz2uFHdh +1voqZiegDfqnc1zqcPGUIWVEX/r87yloqaKHee9570+sB3c4 +-----END CERTIFICATE----- + +Verisign Class 1 Public Primary Certification Authority +======================================================= + +-----BEGIN CERTIFICATE----- +MIICPTCCAaYCEQDNun9W8N/kvFT+IqyzcqpVMA0GCSqGSIb3DQEBAgUAMF8xCzAJ +BgNVBAYTAlVTMRcwFQYDVQQKEw5WZXJpU2lnbiwgSW5jLjE3MDUGA1UECxMuQ2xh +c3MgMSBQdWJsaWMgUHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw05 +NjAxMjkwMDAwMDBaFw0yODA4MDEyMzU5NTlaMF8xCzAJBgNVBAYTAlVTMRcwFQYD +VQQKEw5WZXJpU2lnbiwgSW5jLjE3MDUGA1UECxMuQ2xhc3MgMSBQdWJsaWMgUHJp +bWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTCBnzANBgkqhkiG9w0BAQEFAAOB +jQAwgYkCgYEA5Rm/baNWYS2ZSHH2Z965jeu3noaACpEO+jglr0aIguVzqKCbJF0N +H8xlbgyw0FaEGIeaBpsQoXPftFg5a27B9hXVqKg/qhIGjTGsf7A01480Z4gJzRQR +4k5FVmkfeAKA2txHkSm7NsljXMXg1y2He6G3MrB7MLoqLzGq7qNn2tsCAwEAATAN +BgkqhkiG9w0BAQIFAAOBgQBMP7iLxmjf7kMzDl3ppssHhE16M/+SG/Q2rdiVIjZo +EWx8QszznC7EBz8UsA9P/5CSdvnivErpj82ggAr3xSnxgiJduLHdgSOjeyUVRjB5 +FvjqBUuUfx3CHMjjt/QQQDwTw18fU+hI5Ia0e6E1sHslurjTjqs/OJ0ANACY89Fx +lA== +-----END CERTIFICATE----- + +Verisign Class 2 Public Primary Certification Authority +======================================================= + +-----BEGIN CERTIFICATE----- +MIICPDCCAaUCEC0b/EoXjaOR6+f/9YtFvgswDQYJKoZIhvcNAQECBQAwXzELMAkG +A1UEBhMCVVMxFzAVBgNVBAoTDlZlcmlTaWduLCBJbmMuMTcwNQYDVQQLEy5DbGFz +cyAyIFB1YmxpYyBQcmltYXJ5IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MB4XDTk2 +MDEyOTAwMDAwMFoXDTI4MDgwMTIzNTk1OVowXzELMAkGA1UEBhMCVVMxFzAVBgNV +BAoTDlZlcmlTaWduLCBJbmMuMTcwNQYDVQQLEy5DbGFzcyAyIFB1YmxpYyBQcmlt +YXJ5IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MIGfMA0GCSqGSIb3DQEBAQUAA4GN +ADCBiQKBgQC2WoujDWojg4BrzzmH9CETMwZMJaLtVRKXxaeAufqDwSCg+i8VDXyh +YGt+eSz6Bg86rvYbb7HS/y8oUl+DfUvEerf4Zh+AVPy3wo5ZShRXRtGak75BkQO7 +FYCTXOvnzAhsPz6zSvz/S2wj1VCCJkQZjiPDceoZJEcEnnW/yKYAHwIDAQABMA0G +CSqGSIb3DQEBAgUAA4GBAIobK/o5wXTXXtgZZKJYSi034DNHD6zt96rbHuSLBlxg +J8pFUs4W7z8GZOeUaHxgMxURaa+dYo2jA1Rrpr7l7gUYYAS/QoD90KioHgE796Nc +r6Pc5iaAIzy4RHT3Cq5Ji2F4zCS/iIqnDupzGUH9TQPwiNHleI2lKk/2lw0Xd8rY +-----END CERTIFICATE----- + +Verisign Class 3 Public Primary Certification Authority +======================================================= + +-----BEGIN CERTIFICATE----- +MIICPDCCAaUCEHC65B0Q2Sk0tjjKewPMur8wDQYJKoZIhvcNAQECBQAwXzELMAkG +A1UEBhMCVVMxFzAVBgNVBAoTDlZlcmlTaWduLCBJbmMuMTcwNQYDVQQLEy5DbGFz +cyAzIFB1YmxpYyBQcmltYXJ5IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MB4XDTk2 +MDEyOTAwMDAwMFoXDTI4MDgwMTIzNTk1OVowXzELMAkGA1UEBhMCVVMxFzAVBgNV +BAoTDlZlcmlTaWduLCBJbmMuMTcwNQYDVQQLEy5DbGFzcyAzIFB1YmxpYyBQcmlt +YXJ5IENlcnRpZmljYXRpb24gQXV0aG9yaXR5MIGfMA0GCSqGSIb3DQEBAQUAA4GN +ADCBiQKBgQDJXFme8huKARS0EN8EQNvjV69qRUCPhAwL0TPZ2RHP7gJYHyX3KqhE +BarsAx94f56TuZoAqiN91qyFomNFx3InzPRMxnVx0jnvT0Lwdd8KkMaOIG+YD/is +I19wKTakyYbnsZogy1Olhec9vn2a/iRFM9x2Fe0PonFkTGUugWhFpwIDAQABMA0G +CSqGSIb3DQEBAgUAA4GBALtMEivPLCYATxQT3ab7/AoRhIzzKBxnki98tsX63/Do +lbwdj2wsqFHMc9ikwFPwTtYmwHYBV4GSXiHx0bH/59AhWM1pF+NEHJwZRDmJXNyc +AA9WjQKZ7aKQRUzkuxCkPfAyAw7xzvjoyVGM5mKf5p/AfbdynMk2OmufTqj/ZA1k +-----END CERTIFICATE----- + +Verisign Class 1 Public Primary Certification Authority - G2 +============================================================ + +-----BEGIN CERTIFICATE----- +MIIDAjCCAmsCEEzH6qqYPnHTkxD4PTqJkZIwDQYJKoZIhvcNAQEFBQAwgcExCzAJ +BgNVBAYTAlVTMRcwFQYDVQQKEw5WZXJpU2lnbiwgSW5jLjE8MDoGA1UECxMzQ2xh +c3MgMSBQdWJsaWMgUHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eSAtIEcy +MTowOAYDVQQLEzEoYykgMTk5OCBWZXJpU2lnbiwgSW5jLiAtIEZvciBhdXRob3Jp +emVkIHVzZSBvbmx5MR8wHQYDVQQLExZWZXJpU2lnbiBUcnVzdCBOZXR3b3JrMB4X +DTk4MDUxODAwMDAwMFoXDTI4MDgwMTIzNTk1OVowgcExCzAJBgNVBAYTAlVTMRcw +FQYDVQQKEw5WZXJpU2lnbiwgSW5jLjE8MDoGA1UECxMzQ2xhc3MgMSBQdWJsaWMg +UHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eSAtIEcyMTowOAYDVQQLEzEo +YykgMTk5OCBWZXJpU2lnbiwgSW5jLiAtIEZvciBhdXRob3JpemVkIHVzZSBvbmx5 +MR8wHQYDVQQLExZWZXJpU2lnbiBUcnVzdCBOZXR3b3JrMIGfMA0GCSqGSIb3DQEB +AQUAA4GNADCBiQKBgQCq0Lq+Fi24g9TK0g+8djHKlNgdk4xWArzZbxpvUjZudVYK +VdPfQ4chEWWKfo+9Id5rMj8bhDSVBZ1BNeuS65bdqlk/AVNtmU/t5eIqWpDBucSm +Fc/IReumXY6cPvBkJHalzasab7bYe1FhbqZ/h8jit+U03EGI6glAvnOSPWvndQID +AQABMA0GCSqGSIb3DQEBBQUAA4GBAKlPww3HZ74sy9mozS11534Vnjty637rXC0J +h9ZrbWB85a7FkCMMXErQr7Fd88e2CtvgFZMN3QO8x3aKtd1Pw5sTdbgBwObJW2ul +uIncrKTdcu1OofdPvAbT6shkdHvClUGcZXNY8ZCaPGqxmMnEh7zPRW1F4m4iP/68 +DzFc6PLZ +-----END CERTIFICATE----- + +Verisign Class 2 Public Primary Certification Authority - G2 +============================================================ + +-----BEGIN CERTIFICATE----- +MIIDAzCCAmwCEQC5L2DMiJ+hekYJuFtwbIqvMA0GCSqGSIb3DQEBBQUAMIHBMQsw +CQYDVQQGEwJVUzEXMBUGA1UEChMOVmVyaVNpZ24sIEluYy4xPDA6BgNVBAsTM0Ns +YXNzIDIgUHVibGljIFByaW1hcnkgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkgLSBH +MjE6MDgGA1UECxMxKGMpIDE5OTggVmVyaVNpZ24sIEluYy4gLSBGb3IgYXV0aG9y +aXplZCB1c2Ugb25seTEfMB0GA1UECxMWVmVyaVNpZ24gVHJ1c3QgTmV0d29yazAe +Fw05ODA1MTgwMDAwMDBaFw0yODA4MDEyMzU5NTlaMIHBMQswCQYDVQQGEwJVUzEX +MBUGA1UEChMOVmVyaVNpZ24sIEluYy4xPDA6BgNVBAsTM0NsYXNzIDIgUHVibGlj +IFByaW1hcnkgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkgLSBHMjE6MDgGA1UECxMx +KGMpIDE5OTggVmVyaVNpZ24sIEluYy4gLSBGb3IgYXV0aG9yaXplZCB1c2Ugb25s +eTEfMB0GA1UECxMWVmVyaVNpZ24gVHJ1c3QgTmV0d29yazCBnzANBgkqhkiG9w0B +AQEFAAOBjQAwgYkCgYEAp4gBIXQs5xoD8JjhlzwPIQjxnNuX6Zr8wgQGE75fUsjM +HiwSViy4AWkszJkfrbCWrnkE8hM5wXuYuggs6MKEEyyqaekJ9MepAqRCwiNPStjw +DqL7MWzJ5m+ZJwf15vRMeJ5t60aG+rmGyVTyssSv1EYcWskVMP8NbPUtDm3Of3cC +AwEAATANBgkqhkiG9w0BAQUFAAOBgQByLvl/0fFx+8Se9sVeUYpAmLho+Jscg9ji +nb3/7aHmZuovCfTK1+qlK5X2JGCGTUQug6XELaDTrnhpb3LabK4I8GOSN+a7xDAX +rXfMSTWqz9iP0b63GJZHc2pUIjRkLbYWm1lbtFFZOrMLFPQS32eg9K0yZF6xRnIn +jBJ7xUS0rg== +-----END CERTIFICATE----- + +Verisign Class 3 Public Primary Certification Authority - G2 +============================================================ + +-----BEGIN CERTIFICATE----- +MIIDAjCCAmsCEH3Z/gfPqB63EHln+6eJNMYwDQYJKoZIhvcNAQEFBQAwgcExCzAJ +BgNVBAYTAlVTMRcwFQYDVQQKEw5WZXJpU2lnbiwgSW5jLjE8MDoGA1UECxMzQ2xh +c3MgMyBQdWJsaWMgUHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eSAtIEcy +MTowOAYDVQQLEzEoYykgMTk5OCBWZXJpU2lnbiwgSW5jLiAtIEZvciBhdXRob3Jp +emVkIHVzZSBvbmx5MR8wHQYDVQQLExZWZXJpU2lnbiBUcnVzdCBOZXR3b3JrMB4X +DTk4MDUxODAwMDAwMFoXDTI4MDgwMTIzNTk1OVowgcExCzAJBgNVBAYTAlVTMRcw +FQYDVQQKEw5WZXJpU2lnbiwgSW5jLjE8MDoGA1UECxMzQ2xhc3MgMyBQdWJsaWMg +UHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eSAtIEcyMTowOAYDVQQLEzEo +YykgMTk5OCBWZXJpU2lnbiwgSW5jLiAtIEZvciBhdXRob3JpemVkIHVzZSBvbmx5 +MR8wHQYDVQQLExZWZXJpU2lnbiBUcnVzdCBOZXR3b3JrMIGfMA0GCSqGSIb3DQEB +AQUAA4GNADCBiQKBgQDMXtERXVxp0KvTuWpMmR9ZmDCOFoUgRm1HP9SFIIThbbP4 +pO0M8RcPO/mn+SXXwc+EY/J8Y8+iR/LGWzOOZEAEaMGAuWQcRXfH2G71lSk8UOg0 +13gfqLptQ5GVj0VXXn7F+8qkBOvqlzdUMG+7AUcyM83cV5tkaWH4mx0ciU9cZwID +AQABMA0GCSqGSIb3DQEBBQUAA4GBAFFNzb5cy5gZnBWyATl4Lk0PZ3BwmcYQWpSk +U01UbSuvDV1Ai2TT1+7eVmGSX6bEHRBhNtMsJzzoKQm5EWR0zLVznxxIqbxhAe7i +F6YM40AIOw7n60RzKprxaZLvcRTDOaxxp5EJb+RxBrO6WVcmeQD2+A2iMzAo1KpY +oJ2daZH9 +-----END CERTIFICATE----- + +Verisign Class 4 Public Primary Certification Authority - G2 +============================================================ + +-----BEGIN CERTIFICATE----- +MIIDAjCCAmsCEDKIjprS9esTR/h/xCA3JfgwDQYJKoZIhvcNAQEFBQAwgcExCzAJ +BgNVBAYTAlVTMRcwFQYDVQQKEw5WZXJpU2lnbiwgSW5jLjE8MDoGA1UECxMzQ2xh +c3MgNCBQdWJsaWMgUHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eSAtIEcy +MTowOAYDVQQLEzEoYykgMTk5OCBWZXJpU2lnbiwgSW5jLiAtIEZvciBhdXRob3Jp +emVkIHVzZSBvbmx5MR8wHQYDVQQLExZWZXJpU2lnbiBUcnVzdCBOZXR3b3JrMB4X +DTk4MDUxODAwMDAwMFoXDTI4MDgwMTIzNTk1OVowgcExCzAJBgNVBAYTAlVTMRcw +FQYDVQQKEw5WZXJpU2lnbiwgSW5jLjE8MDoGA1UECxMzQ2xhc3MgNCBQdWJsaWMg +UHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eSAtIEcyMTowOAYDVQQLEzEo +YykgMTk5OCBWZXJpU2lnbiwgSW5jLiAtIEZvciBhdXRob3JpemVkIHVzZSBvbmx5 +MR8wHQYDVQQLExZWZXJpU2lnbiBUcnVzdCBOZXR3b3JrMIGfMA0GCSqGSIb3DQEB +AQUAA4GNADCBiQKBgQC68OTP+cSuhVS5B1f5j8V/aBH4xBewRNzjMHPVKmIquNDM +HO0oW369atyzkSTKQWI8/AIBvxwWMZQFl3Zuoq29YRdsTjCG8FE3KlDHqGKB3FtK +qsGgtG7rL+VXxbErQHDbWk2hjh+9Ax/YA9SPTJlxvOKCzFjomDqG04Y48wApHwID +AQABMA0GCSqGSIb3DQEBBQUAA4GBAIWMEsGnuVAVess+rLhDityq3RS6iYF+ATwj +cSGIL4LcY/oCRaxFWdcqWERbt5+BO5JoPeI3JPV7bI92NZYJqFmduc4jq3TWg/0y +cyfYaT5DdPauxYma51N86Xv2S/PBZYPejYqcPIiNOVn8qj8ijaHBZlCBckztImRP +T8qAkbYp +-----END CERTIFICATE----- + +Verisign Class 1 Public Primary Certification Authority - G3 +============================================================ + +-----BEGIN CERTIFICATE----- +MIIEGjCCAwICEQCLW3VWhFSFCwDPrzhIzrGkMA0GCSqGSIb3DQEBBQUAMIHKMQsw +CQYDVQQGEwJVUzEXMBUGA1UEChMOVmVyaVNpZ24sIEluYy4xHzAdBgNVBAsTFlZl +cmlTaWduIFRydXN0IE5ldHdvcmsxOjA4BgNVBAsTMShjKSAxOTk5IFZlcmlTaWdu +LCBJbmMuIC0gRm9yIGF1dGhvcml6ZWQgdXNlIG9ubHkxRTBDBgNVBAMTPFZlcmlT +aWduIENsYXNzIDEgUHVibGljIFByaW1hcnkgQ2VydGlmaWNhdGlvbiBBdXRob3Jp +dHkgLSBHMzAeFw05OTEwMDEwMDAwMDBaFw0zNjA3MTYyMzU5NTlaMIHKMQswCQYD +VQQGEwJVUzEXMBUGA1UEChMOVmVyaVNpZ24sIEluYy4xHzAdBgNVBAsTFlZlcmlT +aWduIFRydXN0IE5ldHdvcmsxOjA4BgNVBAsTMShjKSAxOTk5IFZlcmlTaWduLCBJ +bmMuIC0gRm9yIGF1dGhvcml6ZWQgdXNlIG9ubHkxRTBDBgNVBAMTPFZlcmlTaWdu +IENsYXNzIDEgUHVibGljIFByaW1hcnkgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkg +LSBHMzCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAN2E1Lm0+afY8wR4 +nN493GwTFtl63SRRZsDHJlkNrAYIwpTRMx/wgzUfbhvI3qpuFU5UJ+/EbRrsC+MO +8ESlV8dAWB6jRx9x7GD2bZTIGDnt/kIYVt/kTEkQeE4BdjVjEjbdZrwBBDajVWjV +ojYJrKshJlQGrT/KFOCsyq0GHZXi+J3x4GD/wn91K0zM2v6HmSHquv4+VNfSWXjb +PG7PoBMAGrgnoeS+Z5bKoMWznN3JdZ7rMJpfo83ZrngZPyPpXNspva1VyBtUjGP2 +6KbqxzcSXKMpHgLZ2x87tNcPVkeBFQRKr4Mn0cVYiMHd9qqnoxjaaKptEVHhv2Vr +n5Z20T0CAwEAATANBgkqhkiG9w0BAQUFAAOCAQEAq2aN17O6x5q25lXQBfGfMY1a +qtmqRiYPce2lrVNWYgFHKkTp/j90CxObufRNG7LRX7K20ohcs5/Ny9Sn2WCVhDr4 +wTcdYcrnsMXlkdpUpqwxga6X3s0IrLjAl4B/bnKk52kTlWUfxJM8/XmPBNQ+T+r3 +ns7NZ3xPZQL/kYVUc8f/NveGLezQXk//EZ9yBta4GvFMDSZl4kSAHsef493oCtrs +pSCAaWihT37ha88HQfqDjrw43bAuEbFrskLMmrz5SCJ5ShkPshw+IHTZasO+8ih4 +E1Z5T21Q6huwtVexN2ZYI/PcD98Kh8TvhgXVOBRgmaNL3gaWcSzy27YfpO8/7g== +-----END CERTIFICATE----- + +Verisign Class 2 Public Primary Certification Authority - G3 +============================================================ + +-----BEGIN CERTIFICATE----- +MIIEGTCCAwECEGFwy0mMX5hFKeewptlQW3owDQYJKoZIhvcNAQEFBQAwgcoxCzAJ +BgNVBAYTAlVTMRcwFQYDVQQKEw5WZXJpU2lnbiwgSW5jLjEfMB0GA1UECxMWVmVy +aVNpZ24gVHJ1c3QgTmV0d29yazE6MDgGA1UECxMxKGMpIDE5OTkgVmVyaVNpZ24s +IEluYy4gLSBGb3IgYXV0aG9yaXplZCB1c2Ugb25seTFFMEMGA1UEAxM8VmVyaVNp +Z24gQ2xhc3MgMiBQdWJsaWMgUHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0 +eSAtIEczMB4XDTk5MTAwMTAwMDAwMFoXDTM2MDcxNjIzNTk1OVowgcoxCzAJBgNV +BAYTAlVTMRcwFQYDVQQKEw5WZXJpU2lnbiwgSW5jLjEfMB0GA1UECxMWVmVyaVNp +Z24gVHJ1c3QgTmV0d29yazE6MDgGA1UECxMxKGMpIDE5OTkgVmVyaVNpZ24sIElu +Yy4gLSBGb3IgYXV0aG9yaXplZCB1c2Ugb25seTFFMEMGA1UEAxM8VmVyaVNpZ24g +Q2xhc3MgMiBQdWJsaWMgUHJpbWFyeSBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eSAt +IEczMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEArwoNwtUs22e5LeWU +J92lvuCwTY+zYVY81nzD9M0+hsuiiOLh2KRpxbXiv8GmR1BeRjmL1Za6tW8UvxDO +JxOeBUebMXoT2B/Z0wI3i60sR/COgQanDTAM6/c8DyAd3HJG7qUCyFvDyVZpTMUY +wZF7C9UTAJu878NIPkZgIIUq1ZC2zYugzDLdt/1AVbJQHFauzI13TccgTacxdu9o +koqQHgiBVrKtaaNS0MscxCM9H5n+TOgWY47GCI72MfbS+uV23bUckqNJzc0BzWjN +qWm6o+sdDZykIKbBoMXRRkwXbdKsZj+WjOCE1Db/IlnF+RFgqF8EffIa9iVCYQ/E +Srg+iQIDAQABMA0GCSqGSIb3DQEBBQUAA4IBAQA0JhU8wI1NQ0kdvekhktdmnLfe +xbjQ5F1fdiLAJvmEOjr5jLX77GDx6M4EsMjdpwOPMPOY36TmpDHf0xwLRtxyID+u +7gU8pDM/CzmscHhzS5kr3zDCVLCoO1Wh/hYozUK9dG6A2ydEp85EXdQbkJgNHkKU +sQAsBNB0owIFImNjzYO1+8FtYmtpdf1dcEG59b98377BMnMiIYtYgXsVkXq642RI +sH/7NiXaldDxJBQX3RiAa0YjOVT1jmIJBB2UkKab5iXiQkWquJCtvgiPqQtCGJTP +cjnhsUPgKM+351psE2tJs//jGHyJizNdrDPXp/naOlXJWBD5qu9ats9LS98q +-----END CERTIFICATE----- + +Verisign Class 3 Public Primary Certification Authority - G3 +============================================================ + +-----BEGIN CERTIFICATE----- +MIIEGjCCAwICEQCbfgZJoz5iudXukEhxKe9XMA0GCSqGSIb3DQEBBQUAMIHKMQsw +CQYDVQQGEwJVUzEXMBUGA1UEChMOVmVyaVNpZ24sIEluYy4xHzAdBgNVBAsTFlZl +cmlTaWduIFRydXN0IE5ldHdvcmsxOjA4BgNVBAsTMShjKSAxOTk5IFZlcmlTaWdu +LCBJbmMuIC0gRm9yIGF1dGhvcml6ZWQgdXNlIG9ubHkxRTBDBgNVBAMTPFZlcmlT +aWduIENsYXNzIDMgUHVibGljIFByaW1hcnkgQ2VydGlmaWNhdGlvbiBBdXRob3Jp +dHkgLSBHMzAeFw05OTEwMDEwMDAwMDBaFw0zNjA3MTYyMzU5NTlaMIHKMQswCQYD +VQQGEwJVUzEXMBUGA1UEChMOVmVyaVNpZ24sIEluYy4xHzAdBgNVBAsTFlZlcmlT +aWduIFRydXN0IE5ldHdvcmsxOjA4BgNVBAsTMShjKSAxOTk5IFZlcmlTaWduLCBJ +bmMuIC0gRm9yIGF1dGhvcml6ZWQgdXNlIG9ubHkxRTBDBgNVBAMTPFZlcmlTaWdu +IENsYXNzIDMgUHVibGljIFByaW1hcnkgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkg +LSBHMzCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAMu6nFL8eB8aHm8b +N3O9+MlrlBIwT/A2R/XQkQr1F8ilYcEWQE37imGQ5XYgwREGfassbqb1EUGO+i2t +KmFZpGcmTNDovFJbcCAEWNF6yaRpvIMXZK0Fi7zQWM6NjPXr8EJJC52XJ2cybuGu +kxUccLwgTS8Y3pKI6GyFVxEa6X7jJhFUokWWVYPKMIno3Nij7SqAP395ZVc+FSBm +CC+Vk7+qRy+oRpfwEuL+wgorUeZ25rdGt+INpsyow0xZVYnm6FNcHOqd8GIWC6fJ +Xwzw3sJ2zq/3avL6QaaiMxTJ5Xpj055iN9WFZZ4O5lMkdBteHRJTW8cs54NJOxWu +imi5V5cCAwEAATANBgkqhkiG9w0BAQUFAAOCAQEAERSWwauSCPc/L8my/uRan2Te +2yFPhpk0djZX3dAVL8WtfxUfN2JzPtTnX84XA9s1+ivbrmAJXx5fj267Cz3qWhMe +DGBvtcC1IyIuBwvLqXTLR7sdwdela8wv0kL9Sd2nic9TutoAWii/gt/4uhMdUIaC +/Y4wjylGsB49Ndo4YhYYSq3mtlFs3q9i6wHQHiT+eo8SGhJouPtmmRQURVyu565p +F4ErWjfJXir0xuKhXFSbplQAz/DxwceYMBo7Nhbbo27q/a2ywtrvAkcTisDxszGt +TxzhT5yvDwyd93gN2PQ1VoDat20Xj50egWTh/sVFuq1ruQp6Tk9LhO5L8X3dEQ== +-----END CERTIFICATE----- + +Verisign Class 4 Public Primary Certification Authority - G3 +============================================================ + +-----BEGIN CERTIFICATE----- +MIIEGjCCAwICEQDsoKeLbnVqAc/EfMwvlF7XMA0GCSqGSIb3DQEBBQUAMIHKMQsw +CQYDVQQGEwJVUzEXMBUGA1UEChMOVmVyaVNpZ24sIEluYy4xHzAdBgNVBAsTFlZl +cmlTaWduIFRydXN0IE5ldHdvcmsxOjA4BgNVBAsTMShjKSAxOTk5IFZlcmlTaWdu +LCBJbmMuIC0gRm9yIGF1dGhvcml6ZWQgdXNlIG9ubHkxRTBDBgNVBAMTPFZlcmlT +aWduIENsYXNzIDQgUHVibGljIFByaW1hcnkgQ2VydGlmaWNhdGlvbiBBdXRob3Jp +dHkgLSBHMzAeFw05OTEwMDEwMDAwMDBaFw0zNjA3MTYyMzU5NTlaMIHKMQswCQYD +VQQGEwJVUzEXMBUGA1UEChMOVmVyaVNpZ24sIEluYy4xHzAdBgNVBAsTFlZlcmlT +aWduIFRydXN0IE5ldHdvcmsxOjA4BgNVBAsTMShjKSAxOTk5IFZlcmlTaWduLCBJ +bmMuIC0gRm9yIGF1dGhvcml6ZWQgdXNlIG9ubHkxRTBDBgNVBAMTPFZlcmlTaWdu +IENsYXNzIDQgUHVibGljIFByaW1hcnkgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkg +LSBHMzCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAK3LpRFpxlmr8Y+1 +GQ9Wzsy1HyDkniYlS+BzZYlZ3tCD5PUPtbut8XzoIfzk6AzufEUiGXaStBO3IFsJ ++mGuqPKljYXCKtbeZjbSmwL0qJJgfJxptI8kHtCGUvYynEFYHiK9zUVilQhu0Gbd +U6LM8BDcVHOLBKFGMzNcF0C5nk3T875Vg+ixiY5afJqWIpA7iCXy0lOIAgwLePLm +NxdLMEYH5IBtptiWLugs+BGzOA1mppvqySNb247i8xOOGlktqgLw7KSHZtzBP/XY +ufTsgsbSPZUd5cBPhMnZo0QoBmrXRazwa2rvTl/4EYIeOGM0ZlDUPpNz+jDDZq3/ +ky2X7wMCAwEAATANBgkqhkiG9w0BAQUFAAOCAQEAj/ola09b5KROJ1WrIhVZPMq1 +CtRK26vdoV9TxaBXOcLORyu+OshWv8LZJxA6sQU8wHcxuzrTBXttmhwwjIDLk5Mq +g6sFUYICABFna/OIYUdfA5PVWw3g8dShMjWFsjrbsIKr0csKvE+MW8VLADsfKoKm +fjaF3H48ZwC15DtS4KjrXRX5xm3wrR0OhbepmnMUWluPQSjA1egtTaRezarZ7c7c +2NU8Qh0XwRJdRTjDOPP8hS6DRkiy1yBfkjaP53kPmF6Z6PDQpLv1U70qzlmwr25/ +bLvSHgCwIe34QWKCudiyxLtGUPMxxY8BqHTr9Xgn2uf3ZkPznoM+IKrDNWCRzg== +-----END CERTIFICATE----- + +Equifax Secure Global eBusiness CA +================================== + +-----BEGIN CERTIFICATE----- +MIICkDCCAfmgAwIBAgIBATANBgkqhkiG9w0BAQQFADBaMQswCQYDVQQGEwJVUzEc +MBoGA1UEChMTRXF1aWZheCBTZWN1cmUgSW5jLjEtMCsGA1UEAxMkRXF1aWZheCBT +ZWN1cmUgR2xvYmFsIGVCdXNpbmVzcyBDQS0xMB4XDTk5MDYyMTA0MDAwMFoXDTIw +MDYyMTA0MDAwMFowWjELMAkGA1UEBhMCVVMxHDAaBgNVBAoTE0VxdWlmYXggU2Vj +dXJlIEluYy4xLTArBgNVBAMTJEVxdWlmYXggU2VjdXJlIEdsb2JhbCBlQnVzaW5l +c3MgQ0EtMTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEAuucXkAJlsTRVPEnC +UdXfp9E3j9HngXNBUmCbnaEXJnitx7HoJpQytd4zjTov2/KaelpzmKNc6fuKcxtc +58O/gGzNqfTWK8D3+ZmqY6KxRwIP1ORROhI8bIpaVIRw28HFkM9yRcuoWcDNM50/ +o5brhTMhHD4ePmBudpxnhcXIw2ECAwEAAaNmMGQwEQYJYIZIAYb4QgEBBAQDAgAH +MA8GA1UdEwEB/wQFMAMBAf8wHwYDVR0jBBgwFoAUvqigdHJQa0S3ySPY+6j/s1dr +aGwwHQYDVR0OBBYEFL6ooHRyUGtEt8kj2Puo/7NXa2hsMA0GCSqGSIb3DQEBBAUA +A4GBADDiAVGqx+pf2rnQZQ8w1j7aDRRJbpGTJxQx78T3LUX47Me/okENI7SS+RkA +Z70Br83gcfxaz2TE4JaY0KNA4gGK7ycH8WUBikQtBmV1UsCGECAhX2xrD2yuCRyv +8qIYNMR1pHMc8Y3c7635s3a0kr/clRAevsvIO1qEYBlWlKlV +-----END CERTIFICATE----- + +Equifax Secure eBusiness CA 1 +============================= + +-----BEGIN CERTIFICATE----- +MIICgjCCAeugAwIBAgIBBDANBgkqhkiG9w0BAQQFADBTMQswCQYDVQQGEwJVUzEc +MBoGA1UEChMTRXF1aWZheCBTZWN1cmUgSW5jLjEmMCQGA1UEAxMdRXF1aWZheCBT +ZWN1cmUgZUJ1c2luZXNzIENBLTEwHhcNOTkwNjIxMDQwMDAwWhcNMjAwNjIxMDQw +MDAwWjBTMQswCQYDVQQGEwJVUzEcMBoGA1UEChMTRXF1aWZheCBTZWN1cmUgSW5j +LjEmMCQGA1UEAxMdRXF1aWZheCBTZWN1cmUgZUJ1c2luZXNzIENBLTEwgZ8wDQYJ +KoZIhvcNAQEBBQADgY0AMIGJAoGBAM4vGbwXt3fek6lfWg0XTzQaDJj0ItlZ1MRo +RvC0NcWFAyDGr0WlIVFFQesWWDYyb+JQYmT5/VGcqiTZ9J2DKocKIdMSODRsjQBu +WqDZQu4aIZX5UkxVWsUPOE9G+m34LjXWHXzr4vCwdYDIqROsvojvOm6rXyo4YgKw +Env+j6YDAgMBAAGjZjBkMBEGCWCGSAGG+EIBAQQEAwIABzAPBgNVHRMBAf8EBTAD +AQH/MB8GA1UdIwQYMBaAFEp4MlIR21kWNl7fwRQ2QGpHfEyhMB0GA1UdDgQWBBRK +eDJSEdtZFjZe38EUNkBqR3xMoTANBgkqhkiG9w0BAQQFAAOBgQB1W6ibAxHm6VZM +zfmpTMANmvPMZWnmJXbMWbfWVMMdzZmsGd20hdXgPfxiIKeES1hl8eL5lSE/9dR+ +WB5Hh1Q+WKG1tfgq73HnvMP2sUlG4tega+VWeponmHxGYhTnyfxuAxJ5gDgdSIKN +/Bf+KpYrtWKmpj29f5JZzVoqgrI3eQ== +-----END CERTIFICATE----- + +Equifax Secure eBusiness CA 2 +============================= + +-----BEGIN CERTIFICATE----- +MIIDIDCCAomgAwIBAgIEN3DPtTANBgkqhkiG9w0BAQUFADBOMQswCQYDVQQGEwJV +UzEXMBUGA1UEChMORXF1aWZheCBTZWN1cmUxJjAkBgNVBAsTHUVxdWlmYXggU2Vj +dXJlIGVCdXNpbmVzcyBDQS0yMB4XDTk5MDYyMzEyMTQ0NVoXDTE5MDYyMzEyMTQ0 +NVowTjELMAkGA1UEBhMCVVMxFzAVBgNVBAoTDkVxdWlmYXggU2VjdXJlMSYwJAYD +VQQLEx1FcXVpZmF4IFNlY3VyZSBlQnVzaW5lc3MgQ0EtMjCBnzANBgkqhkiG9w0B +AQEFAAOBjQAwgYkCgYEA5Dk5kx5SBhsoNviyoynF7Y6yEb3+6+e0dMKP/wXn2Z0G +vxLIPw7y1tEkshHe0XMJitSxLJgJDR5QRrKDpkWNYmi7hRsgcDKqQM2mll/EcTc/ +BPO3QSQ5BxoeLmFYoBIL5aXfxavqN3HMHMg3OrmXUqesxWoklE6ce8/AatbfIb0C +AwEAAaOCAQkwggEFMHAGA1UdHwRpMGcwZaBjoGGkXzBdMQswCQYDVQQGEwJVUzEX +MBUGA1UEChMORXF1aWZheCBTZWN1cmUxJjAkBgNVBAsTHUVxdWlmYXggU2VjdXJl +IGVCdXNpbmVzcyBDQS0yMQ0wCwYDVQQDEwRDUkwxMBoGA1UdEAQTMBGBDzIwMTkw +NjIzMTIxNDQ1WjALBgNVHQ8EBAMCAQYwHwYDVR0jBBgwFoAUUJ4L6q9euSBIplBq +y/3YIHqngnYwHQYDVR0OBBYEFFCeC+qvXrkgSKZQasv92CB6p4J2MAwGA1UdEwQF +MAMBAf8wGgYJKoZIhvZ9B0EABA0wCxsFVjMuMGMDAgbAMA0GCSqGSIb3DQEBBQUA +A4GBAAyGgq3oThr1jokn4jVYPSm0B482UJW/bsGe68SQsoWou7dC4A8HOd/7npCy +0cE+U58DRLB+S/Rv5Hwf5+Kx5Lia78O9zt4LMjTZ3ijtM2vE1Nc9ElirfQkty3D1 +E4qUoSek1nDFbZS1yX2doNLGCEnZZpum0/QL3MUmV+GRMOrN +-----END CERTIFICATE----- + +Thawte Time Stamping CA +======================= + +-----BEGIN CERTIFICATE----- +MIICoTCCAgqgAwIBAgIBADANBgkqhkiG9w0BAQQFADCBizELMAkGA1UEBhMCWkEx +FTATBgNVBAgTDFdlc3Rlcm4gQ2FwZTEUMBIGA1UEBxMLRHVyYmFudmlsbGUxDzAN +BgNVBAoTBlRoYXd0ZTEdMBsGA1UECxMUVGhhd3RlIENlcnRpZmljYXRpb24xHzAd +BgNVBAMTFlRoYXd0ZSBUaW1lc3RhbXBpbmcgQ0EwHhcNOTcwMTAxMDAwMDAwWhcN +MjAxMjMxMjM1OTU5WjCBizELMAkGA1UEBhMCWkExFTATBgNVBAgTDFdlc3Rlcm4g +Q2FwZTEUMBIGA1UEBxMLRHVyYmFudmlsbGUxDzANBgNVBAoTBlRoYXd0ZTEdMBsG +A1UECxMUVGhhd3RlIENlcnRpZmljYXRpb24xHzAdBgNVBAMTFlRoYXd0ZSBUaW1l +c3RhbXBpbmcgQ0EwgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBANYrWHhhRYZT +6jR7UZztsOYuGA7+4F+oJ9O0yeB8WU4WDnNUYMF/9p8u6TqFJBU820cEY8OexJQa +Wt9MevPZQx08EHp5JduQ/vBR5zDWQQD9nyjfeb6Uu522FOMjhdepQeBMpHmwKxqL +8vg7ij5FrHGSALSQQZj7X+36ty6K+Ig3AgMBAAGjEzARMA8GA1UdEwEB/wQFMAMB +Af8wDQYJKoZIhvcNAQEEBQADgYEAZ9viwuaHPUCDhjc1fR/OmsMMZiCouqoEiYbC +9RAIDb/LogWK0E02PvTX72nGXuSwlG9KuefeW4i2e9vjJ+V2w/A1wcu1J5szedyQ +pgCed/r8zSeUQhac0xxo7L9c3eWpexAKMnRUEzGLhQOEkbdYATAUOK8oyvyxUBkZ +CayJSdM= +-----END CERTIFICATE----- + +thawte Primary Root CA +====================== + +-----BEGIN CERTIFICATE----- +MIIEIDCCAwigAwIBAgIQNE7VVyDV7exJ9C/ON9srbTANBgkqhkiG9w0BAQUFADCB +qTELMAkGA1UEBhMCVVMxFTATBgNVBAoTDHRoYXd0ZSwgSW5jLjEoMCYGA1UECxMf +Q2VydGlmaWNhdGlvbiBTZXJ2aWNlcyBEaXZpc2lvbjE4MDYGA1UECxMvKGMpIDIw +MDYgdGhhd3RlLCBJbmMuIC0gRm9yIGF1dGhvcml6ZWQgdXNlIG9ubHkxHzAdBgNV +BAMTFnRoYXd0ZSBQcmltYXJ5IFJvb3QgQ0EwHhcNMDYxMTE3MDAwMDAwWhcNMzYw +NzE2MjM1OTU5WjCBqTELMAkGA1UEBhMCVVMxFTATBgNVBAoTDHRoYXd0ZSwgSW5j +LjEoMCYGA1UECxMfQ2VydGlmaWNhdGlvbiBTZXJ2aWNlcyBEaXZpc2lvbjE4MDYG +A1UECxMvKGMpIDIwMDYgdGhhd3RlLCBJbmMuIC0gRm9yIGF1dGhvcml6ZWQgdXNl +IG9ubHkxHzAdBgNVBAMTFnRoYXd0ZSBQcmltYXJ5IFJvb3QgQ0EwggEiMA0GCSqG +SIb3DQEBAQUAA4IBDwAwggEKAoIBAQCsoPD7gFnUnMekz52hWXMJEEUMDSxuaPFs +W0hoSVk3/AszGcJ3f8wQLZU0HObrTQmnHNK4yZc2AreJ1CRfBsDMRJSUjQJib+ta +3RGNKJpchJAQeg29dGYvajig4tVUROsdB58Hum/u6f1OCyn1PoSgAfGcq/gcfomk +6KHYcWUNo1F77rzSImANuVud37r8UVsLr5iy6S7pBOhih94ryNdOwUxkHt3Ph1i6 +Sk/KaAcdHJ1KxtUvkcx8cXIcxcBn6zL9yZJclNqFwJu/U30rCfSMnZEfl2pSy94J +NqR32HuHUETVPm4pafs5SSYeCaWAe0At6+gnhcn+Yf1+5nyXHdWdAgMBAAGjQjBA +MA8GA1UdEwEB/wQFMAMBAf8wDgYDVR0PAQH/BAQDAgEGMB0GA1UdDgQWBBR7W0XP +r87Lev0xkhpqtvNG61dIUDANBgkqhkiG9w0BAQUFAAOCAQEAeRHAS7ORtvzw6WfU +DW5FvlXok9LOAz/t2iWwHVfLHjp2oEzsUHboZHIMpKnxuIvW1oeEuzLlQRHAd9mz +YJ3rG9XRbkREqaYB7FViHXe4XI5ISXycO1cRrK1zN44veFyQaEfZYGDm/Ac9IiAX +xPcW6cTYcvnIc3zfFi8VqT79aie2oetaupgf1eNNZAqdE8hhuvU5HIe6uL17In/2 +/qxAeeWsEG89jxt5dovEN7MhGITlNgDrYyCZuen+MwS7QcjBAvlEYyCegc5C09Y/ +LHbTY5xZ3Y+m4Q6gLkH3LpVHz7z9M/P2C2F+fpErgUfCJzDupxBdN49cOSvkBPB7 +jVaMaA== +-----END CERTIFICATE----- + +VeriSign Class 3 Public Primary Certification Authority - G5 +============================================================ + +-----BEGIN CERTIFICATE----- +MIIE0zCCA7ugAwIBAgIQGNrRniZ96LtKIVjNzGs7SjANBgkqhkiG9w0BAQUFADCB +yjELMAkGA1UEBhMCVVMxFzAVBgNVBAoTDlZlcmlTaWduLCBJbmMuMR8wHQYDVQQL +ExZWZXJpU2lnbiBUcnVzdCBOZXR3b3JrMTowOAYDVQQLEzEoYykgMjAwNiBWZXJp +U2lnbiwgSW5jLiAtIEZvciBhdXRob3JpemVkIHVzZSBvbmx5MUUwQwYDVQQDEzxW +ZXJpU2lnbiBDbGFzcyAzIFB1YmxpYyBQcmltYXJ5IENlcnRpZmljYXRpb24gQXV0 +aG9yaXR5IC0gRzUwHhcNMDYxMTA4MDAwMDAwWhcNMzYwNzE2MjM1OTU5WjCByjEL +MAkGA1UEBhMCVVMxFzAVBgNVBAoTDlZlcmlTaWduLCBJbmMuMR8wHQYDVQQLExZW +ZXJpU2lnbiBUcnVzdCBOZXR3b3JrMTowOAYDVQQLEzEoYykgMjAwNiBWZXJpU2ln +biwgSW5jLiAtIEZvciBhdXRob3JpemVkIHVzZSBvbmx5MUUwQwYDVQQDEzxWZXJp +U2lnbiBDbGFzcyAzIFB1YmxpYyBQcmltYXJ5IENlcnRpZmljYXRpb24gQXV0aG9y +aXR5IC0gRzUwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCvJAgIKXo1 +nmAMqudLO07cfLw8RRy7K+D+KQL5VwijZIUVJ/XxrcgxiV0i6CqqpkKzj/i5Vbex +t0uz/o9+B1fs70PbZmIVYc9gDaTY3vjgw2IIPVQT60nKWVSFJuUrjxuf6/WhkcIz +SdhDY2pSS9KP6HBRTdGJaXvHcPaz3BJ023tdS1bTlr8Vd6Gw9KIl8q8ckmcY5fQG +BO+QueQA5N06tRn/Arr0PO7gi+s3i+z016zy9vA9r911kTMZHRxAy3QkGSGT2RT+ +rCpSx4/VBEnkjWNHiDxpg8v+R70rfk/Fla4OndTRQ8Bnc+MUCH7lP59zuDMKz10/ +NIeWiu5T6CUVAgMBAAGjgbIwga8wDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8E +BAMCAQYwbQYIKwYBBQUHAQwEYTBfoV2gWzBZMFcwVRYJaW1hZ2UvZ2lmMCEwHzAH +BgUrDgMCGgQUj+XTGoasjY5rw8+AatRIGCx7GS4wJRYjaHR0cDovL2xvZ28udmVy +aXNpZ24uY29tL3ZzbG9nby5naWYwHQYDVR0OBBYEFH/TZafC3ey78DAJ80M5+gKv +MzEzMA0GCSqGSIb3DQEBBQUAA4IBAQCTJEowX2LP2BqYLz3q3JktvXf2pXkiOOzE +p6B4Eq1iDkVwZMXnl2YtmAl+X6/WzChl8gGqCBpH3vn5fJJaCGkgDdk+bW48DW7Y +5gaRQBi5+MHt39tBquCWIMnNZBU4gcmU7qKEKQsTb47bDN0lAtukixlE0kF6BWlK +WE9gyn6CagsCqiUXObXbf+eEZSqVir2G3l6BFoMtEMze/aiCKm0oHw0LxOXnGiYZ +4fQRbxC1lfznQgUy286dUV4otp6F01vvpX1FQHKOtw5rDgb7MzVIcbidJ4vEZV8N +hnacRHr2lVz2XTIIM6RUthg/aFzyQkqFOFSDX9HoLPKsEdao7WNq +-----END CERTIFICATE----- + +Entrust.net Secure Server Certification Authority +================================================= + +-----BEGIN CERTIFICATE----- +MIIE2DCCBEGgAwIBAgIEN0rSQzANBgkqhkiG9w0BAQUFADCBwzELMAkGA1UEBhMC +VVMxFDASBgNVBAoTC0VudHJ1c3QubmV0MTswOQYDVQQLEzJ3d3cuZW50cnVzdC5u +ZXQvQ1BTIGluY29ycC4gYnkgcmVmLiAobGltaXRzIGxpYWIuKTElMCMGA1UECxMc +KGMpIDE5OTkgRW50cnVzdC5uZXQgTGltaXRlZDE6MDgGA1UEAxMxRW50cnVzdC5u +ZXQgU2VjdXJlIFNlcnZlciBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw05OTA1 +MjUxNjA5NDBaFw0xOTA1MjUxNjM5NDBaMIHDMQswCQYDVQQGEwJVUzEUMBIGA1UE +ChMLRW50cnVzdC5uZXQxOzA5BgNVBAsTMnd3dy5lbnRydXN0Lm5ldC9DUFMgaW5j +b3JwLiBieSByZWYuIChsaW1pdHMgbGlhYi4pMSUwIwYDVQQLExwoYykgMTk5OSBF +bnRydXN0Lm5ldCBMaW1pdGVkMTowOAYDVQQDEzFFbnRydXN0Lm5ldCBTZWN1cmUg +U2VydmVyIENlcnRpZmljYXRpb24gQXV0aG9yaXR5MIGdMA0GCSqGSIb3DQEBAQUA +A4GLADCBhwKBgQDNKIM0VBuJ8w+vN5Ex/68xYMmo6LIQaO2f55M28Qpku0f1BBc/ +I0dNxScZgSYMVHINiC3ZH5oSn7yzcdOAGT9HZnuMNSjSuQrfJNqc1lB5gXpa0zf3 +wkrYKZImZNHkmGw6AIr1NJtl+O3jEP/9uElY3KDegjlrgbEWGWG5VLbmQwIBA6OC +AdcwggHTMBEGCWCGSAGG+EIBAQQEAwIABzCCARkGA1UdHwSCARAwggEMMIHeoIHb +oIHYpIHVMIHSMQswCQYDVQQGEwJVUzEUMBIGA1UEChMLRW50cnVzdC5uZXQxOzA5 +BgNVBAsTMnd3dy5lbnRydXN0Lm5ldC9DUFMgaW5jb3JwLiBieSByZWYuIChsaW1p +dHMgbGlhYi4pMSUwIwYDVQQLExwoYykgMTk5OSBFbnRydXN0Lm5ldCBMaW1pdGVk +MTowOAYDVQQDEzFFbnRydXN0Lm5ldCBTZWN1cmUgU2VydmVyIENlcnRpZmljYXRp +b24gQXV0aG9yaXR5MQ0wCwYDVQQDEwRDUkwxMCmgJ6AlhiNodHRwOi8vd3d3LmVu +dHJ1c3QubmV0L0NSTC9uZXQxLmNybDArBgNVHRAEJDAigA8xOTk5MDUyNTE2MDk0 +MFqBDzIwMTkwNTI1MTYwOTQwWjALBgNVHQ8EBAMCAQYwHwYDVR0jBBgwFoAU8Bdi +E1U9s/8KAGv7UISX8+1i0BowHQYDVR0OBBYEFPAXYhNVPbP/CgBr+1CEl/PtYtAa +MAwGA1UdEwQFMAMBAf8wGQYJKoZIhvZ9B0EABAwwChsEVjQuMAMCBJAwDQYJKoZI +hvcNAQEFBQADgYEAkNwwAvpkdMKnCqV8IY00F6j7Rw7/JXyNEwr75Ji174z4xRAN +95K+8cPV1ZVqBLssziY2ZcgxxufuP+NXdYR6Ee9GTxj005i7qIcyunL2POI9n9cd +2cNgQ4xYDiKWL2KjLB+6rQXvqzJ4h6BUcxm1XAX5Uj5tLUUL9wqT6u0G+bI= +-----END CERTIFICATE----- + +Go Daddy Certification Authority Root Certificate Bundle +======================================================== + +-----BEGIN CERTIFICATE----- +MIIE3jCCA8agAwIBAgICAwEwDQYJKoZIhvcNAQEFBQAwYzELMAkGA1UEBhMCVVMx +ITAfBgNVBAoTGFRoZSBHbyBEYWRkeSBHcm91cCwgSW5jLjExMC8GA1UECxMoR28g +RGFkZHkgQ2xhc3MgMiBDZXJ0aWZpY2F0aW9uIEF1dGhvcml0eTAeFw0wNjExMTYw +MTU0MzdaFw0yNjExMTYwMTU0MzdaMIHKMQswCQYDVQQGEwJVUzEQMA4GA1UECBMH +QXJpem9uYTETMBEGA1UEBxMKU2NvdHRzZGFsZTEaMBgGA1UEChMRR29EYWRkeS5j +b20sIEluYy4xMzAxBgNVBAsTKmh0dHA6Ly9jZXJ0aWZpY2F0ZXMuZ29kYWRkeS5j +b20vcmVwb3NpdG9yeTEwMC4GA1UEAxMnR28gRGFkZHkgU2VjdXJlIENlcnRpZmlj +YXRpb24gQXV0aG9yaXR5MREwDwYDVQQFEwgwNzk2OTI4NzCCASIwDQYJKoZIhvcN +AQEBBQADggEPADCCAQoCggEBAMQt1RWMnCZM7DI161+4WQFapmGBWTtwY6vj3D3H +KrjJM9N55DrtPDAjhI6zMBS2sofDPZVUBJ7fmd0LJR4h3mUpfjWoqVTr9vcyOdQm +VZWt7/v+WIbXnvQAjYwqDL1CBM6nPwT27oDyqu9SoWlm2r4arV3aLGbqGmu75RpR +SgAvSMeYddi5Kcju+GZtCpyz8/x4fKL4o/K1w/O5epHBp+YlLpyo7RJlbmr2EkRT +cDCVw5wrWCs9CHRK8r5RsL+H0EwnWGu1NcWdrxcx+AuP7q2BNgWJCJjPOq8lh8BJ +6qf9Z/dFjpfMFDniNoW1fho3/Rb2cRGadDAW/hOUoz+EDU8CAwEAAaOCATIwggEu +MB0GA1UdDgQWBBT9rGEyk2xF1uLuhV+auud2mWjM5zAfBgNVHSMEGDAWgBTSxLDS +kdRMEXGzYcs9of7dqGrU4zASBgNVHRMBAf8ECDAGAQH/AgEAMDMGCCsGAQUFBwEB +BCcwJTAjBggrBgEFBQcwAYYXaHR0cDovL29jc3AuZ29kYWRkeS5jb20wRgYDVR0f +BD8wPTA7oDmgN4Y1aHR0cDovL2NlcnRpZmljYXRlcy5nb2RhZGR5LmNvbS9yZXBv +c2l0b3J5L2dkcm9vdC5jcmwwSwYDVR0gBEQwQjBABgRVHSAAMDgwNgYIKwYBBQUH +AgEWKmh0dHA6Ly9jZXJ0aWZpY2F0ZXMuZ29kYWRkeS5jb20vcmVwb3NpdG9yeTAO +BgNVHQ8BAf8EBAMCAQYwDQYJKoZIhvcNAQEFBQADggEBANKGwOy9+aG2Z+5mC6IG +OgRQjhVyrEp0lVPLN8tESe8HkGsz2ZbwlFalEzAFPIUyIXvJxwqoJKSQ3kbTJSMU +A2fCENZvD117esyfxVgqwcSeIaha86ykRvOe5GPLL5CkKSkB2XIsKd83ASe8T+5o +0yGPwLPk9Qnt0hCqU7S+8MxZC9Y7lhyVJEnfzuz9p0iRFEUOOjZv2kWzRaJBydTX +RE4+uXR21aITVSzGh6O1mawGhId/dQb8vxRMDsxuxN89txJx9OjxUUAiKEngHUuH +qDTMBqLdElrRhjZkAzVvb3du6/KFUJheqwNTrZEjYx8WnM25sgVjOuH0aBsXBTWV +U+4= +-----END CERTIFICATE----- +-----BEGIN CERTIFICATE----- +MIIE+zCCBGSgAwIBAgICAQ0wDQYJKoZIhvcNAQEFBQAwgbsxJDAiBgNVBAcTG1Zh +bGlDZXJ0IFZhbGlkYXRpb24gTmV0d29yazEXMBUGA1UEChMOVmFsaUNlcnQsIElu +Yy4xNTAzBgNVBAsTLFZhbGlDZXJ0IENsYXNzIDIgUG9saWN5IFZhbGlkYXRpb24g +QXV0aG9yaXR5MSEwHwYDVQQDExhodHRwOi8vd3d3LnZhbGljZXJ0LmNvbS8xIDAe +BgkqhkiG9w0BCQEWEWluZm9AdmFsaWNlcnQuY29tMB4XDTA0MDYyOTE3MDYyMFoX +DTI0MDYyOTE3MDYyMFowYzELMAkGA1UEBhMCVVMxITAfBgNVBAoTGFRoZSBHbyBE +YWRkeSBHcm91cCwgSW5jLjExMC8GA1UECxMoR28gRGFkZHkgQ2xhc3MgMiBDZXJ0 +aWZpY2F0aW9uIEF1dGhvcml0eTCCASAwDQYJKoZIhvcNAQEBBQADggENADCCAQgC +ggEBAN6d1+pXGEmhW+vXX0iG6r7d/+TvZxz0ZWizV3GgXne77ZtJ6XCAPVYYYwhv +2vLM0D9/AlQiVBDYsoHUwHU9S3/Hd8M+eKsaA7Ugay9qK7HFiH7Eux6wwdhFJ2+q +N1j3hybX2C32qRe3H3I2TqYXP2WYktsqbl2i/ojgC95/5Y0V4evLOtXiEqITLdiO +r18SPaAIBQi2XKVlOARFmR6jYGB0xUGlcmIbYsUfb18aQr4CUWWoriMYavx4A6lN +f4DD+qta/KFApMoZFv6yyO9ecw3ud72a9nmYvLEHZ6IVDd2gWMZEewo+YihfukEH +U1jPEX44dMX4/7VpkI+EdOqXG68CAQOjggHhMIIB3TAdBgNVHQ4EFgQU0sSw0pHU +TBFxs2HLPaH+3ahq1OMwgdIGA1UdIwSByjCBx6GBwaSBvjCBuzEkMCIGA1UEBxMb +VmFsaUNlcnQgVmFsaWRhdGlvbiBOZXR3b3JrMRcwFQYDVQQKEw5WYWxpQ2VydCwg +SW5jLjE1MDMGA1UECxMsVmFsaUNlcnQgQ2xhc3MgMiBQb2xpY3kgVmFsaWRhdGlv +biBBdXRob3JpdHkxITAfBgNVBAMTGGh0dHA6Ly93d3cudmFsaWNlcnQuY29tLzEg +MB4GCSqGSIb3DQEJARYRaW5mb0B2YWxpY2VydC5jb22CAQEwDwYDVR0TAQH/BAUw +AwEB/zAzBggrBgEFBQcBAQQnMCUwIwYIKwYBBQUHMAGGF2h0dHA6Ly9vY3NwLmdv +ZGFkZHkuY29tMEQGA1UdHwQ9MDswOaA3oDWGM2h0dHA6Ly9jZXJ0aWZpY2F0ZXMu +Z29kYWRkeS5jb20vcmVwb3NpdG9yeS9yb290LmNybDBLBgNVHSAERDBCMEAGBFUd +IAAwODA2BggrBgEFBQcCARYqaHR0cDovL2NlcnRpZmljYXRlcy5nb2RhZGR5LmNv +bS9yZXBvc2l0b3J5MA4GA1UdDwEB/wQEAwIBBjANBgkqhkiG9w0BAQUFAAOBgQC1 +QPmnHfbq/qQaQlpE9xXUhUaJwL6e4+PrxeNYiY+Sn1eocSxI0YGyeR+sBjUZsE4O +WBsUs5iB0QQeyAfJg594RAoYC5jcdnplDQ1tgMQLARzLrUc+cb53S8wGd9D0Vmsf +SxOaFIqII6hR8INMqzW/Rn453HWkrugp++85j09VZw== +-----END CERTIFICATE----- +-----BEGIN CERTIFICATE----- +MIIC5zCCAlACAQEwDQYJKoZIhvcNAQEFBQAwgbsxJDAiBgNVBAcTG1ZhbGlDZXJ0 +IFZhbGlkYXRpb24gTmV0d29yazEXMBUGA1UEChMOVmFsaUNlcnQsIEluYy4xNTAz +BgNVBAsTLFZhbGlDZXJ0IENsYXNzIDIgUG9saWN5IFZhbGlkYXRpb24gQXV0aG9y +aXR5MSEwHwYDVQQDExhodHRwOi8vd3d3LnZhbGljZXJ0LmNvbS8xIDAeBgkqhkiG +9w0BCQEWEWluZm9AdmFsaWNlcnQuY29tMB4XDTk5MDYyNjAwMTk1NFoXDTE5MDYy +NjAwMTk1NFowgbsxJDAiBgNVBAcTG1ZhbGlDZXJ0IFZhbGlkYXRpb24gTmV0d29y +azEXMBUGA1UEChMOVmFsaUNlcnQsIEluYy4xNTAzBgNVBAsTLFZhbGlDZXJ0IENs +YXNzIDIgUG9saWN5IFZhbGlkYXRpb24gQXV0aG9yaXR5MSEwHwYDVQQDExhodHRw +Oi8vd3d3LnZhbGljZXJ0LmNvbS8xIDAeBgkqhkiG9w0BCQEWEWluZm9AdmFsaWNl +cnQuY29tMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDOOnHK5avIWZJV16vY +dA757tn2VUdZZUcOBVXc65g2PFxTXdMwzzjsvUGJ7SVCCSRrCl6zfN1SLUzm1NZ9 +WlmpZdRJEy0kTRxQb7XBhVQ7/nHk01xC+YDgkRoKWzk2Z/M/VXwbP7RfZHM047QS +v4dk+NoS/zcnwbNDu+97bi5p9wIDAQABMA0GCSqGSIb3DQEBBQUAA4GBADt/UG9v +UJSZSWI4OB9L+KXIPqeCgfYrx+jFzug6EILLGACOTb2oWH+heQC1u+mNr0HZDzTu +IYEZoDJJKPTEjlbVUjP9UNV+mWwD5MlM/Mtsq2azSiGM5bUMMj4QssxsodyamEwC +W/POuZ6lcg5Ktz885hZo+L7tdEy8W9ViH0Pd +-----END CERTIFICATE----- + +GeoTrust Global CA +================== + +-----BEGIN CERTIFICATE----- +MIIDfTCCAuagAwIBAgIDErvmMA0GCSqGSIb3DQEBBQUAME4xCzAJBgNVBAYTAlVT +MRAwDgYDVQQKEwdFcXVpZmF4MS0wKwYDVQQLEyRFcXVpZmF4IFNlY3VyZSBDZXJ0 +aWZpY2F0ZSBBdXRob3JpdHkwHhcNMDIwNTIxMDQwMDAwWhcNMTgwODIxMDQwMDAw +WjBCMQswCQYDVQQGEwJVUzEWMBQGA1UEChMNR2VvVHJ1c3QgSW5jLjEbMBkGA1UE +AxMSR2VvVHJ1c3QgR2xvYmFsIENBMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIB +CgKCAQEA2swYYzD99BcjGlZ+W988bDjkcbd4kdS8odhM+KhDtgPpTSEHCIjaWC9m +OSm9BXiLnTjoBbdqfnGk5sRgprDvgOSJKA+eJdbtg/OtppHHmMlCGDUUna2YRpIu +T8rxh0PBFpVXLVDviS2Aelet8u5fa9IAjbkU+BQVNdnARqN7csiRv8lVK83Qlz6c +JmTM386DGXHKTubU1XupGc1V3sjs0l44U+VcT4wt/lAjNvxm5suOpDkZALeVAjmR +Cw7+OC7RHQWa9k0+bw8HHa8sHo9gOeL6NlMTOdReJivbPagUvTLrGAMoUgRx5asz +PeE4uwc2hGKceeoWMPRfwCvocWvk+QIDAQABo4HwMIHtMB8GA1UdIwQYMBaAFEjm +aPkr0rKV10fYIyAQTzOYkJ/UMB0GA1UdDgQWBBTAephojYn7qwVkDBF9qn1luMrM +TjAPBgNVHRMBAf8EBTADAQH/MA4GA1UdDwEB/wQEAwIBBjA6BgNVHR8EMzAxMC+g +LaArhilodHRwOi8vY3JsLmdlb3RydXN0LmNvbS9jcmxzL3NlY3VyZWNhLmNybDBO +BgNVHSAERzBFMEMGBFUdIAAwOzA5BggrBgEFBQcCARYtaHR0cHM6Ly93d3cuZ2Vv +dHJ1c3QuY29tL3Jlc291cmNlcy9yZXBvc2l0b3J5MA0GCSqGSIb3DQEBBQUAA4GB +AHbhEm5OSxYShjAGsoEIz/AIx8dxfmbuwu3UOx//8PDITtZDOLC5MH0Y0FWDomrL +NhGc6Ehmo21/uBPUR/6LWlxz/K7ZGzIZOKuXNBSqltLroxwUCEm2u+WR74M26x1W +b8ravHNjkOR/ez4iyz0H7V84dJzjA1BOoa+Y7mHyhD8S +-----END CERTIFICATE----- + diff --git a/libs/httplib2/iri2uri.py b/libs/httplib2/iri2uri.py index 70667ed..d88c91f 100644 --- a/libs/httplib2/iri2uri.py +++ b/libs/httplib2/iri2uri.py @@ -16,7 +16,7 @@ import urlparse # Convert an IRI to a URI following the rules in RFC 3987 -# +# # The characters we need to enocde and escape are defined in the spec: # # iprivate = %xE000-F8FF / %xF0000-FFFFD / %x100000-10FFFD @@ -28,28 +28,28 @@ import urlparse # / %xD0000-DFFFD / %xE1000-EFFFD escape_range = [ - (0xA0, 0xD7FF ), - (0xE000, 0xF8FF ), - (0xF900, 0xFDCF ), - (0xFDF0, 0xFFEF), - (0x10000, 0x1FFFD ), - (0x20000, 0x2FFFD ), - (0x30000, 0x3FFFD), - (0x40000, 0x4FFFD ), - (0x50000, 0x5FFFD ), - (0x60000, 0x6FFFD), - (0x70000, 0x7FFFD ), - (0x80000, 0x8FFFD ), - (0x90000, 0x9FFFD), - (0xA0000, 0xAFFFD ), - (0xB0000, 0xBFFFD ), - (0xC0000, 0xCFFFD), - (0xD0000, 0xDFFFD ), - (0xE1000, 0xEFFFD), - (0xF0000, 0xFFFFD ), - (0x100000, 0x10FFFD) + (0xA0, 0xD7FF), + (0xE000, 0xF8FF), + (0xF900, 0xFDCF), + (0xFDF0, 0xFFEF), + (0x10000, 0x1FFFD), + (0x20000, 0x2FFFD), + (0x30000, 0x3FFFD), + (0x40000, 0x4FFFD), + (0x50000, 0x5FFFD), + (0x60000, 0x6FFFD), + (0x70000, 0x7FFFD), + (0x80000, 0x8FFFD), + (0x90000, 0x9FFFD), + (0xA0000, 0xAFFFD), + (0xB0000, 0xBFFFD), + (0xC0000, 0xCFFFD), + (0xD0000, 0xDFFFD), + (0xE1000, 0xEFFFD), + (0xF0000, 0xFFFFD), + (0x100000, 0x10FFFD), ] - + def encode(c): retval = c i = ord(c) @@ -63,19 +63,19 @@ def encode(c): def iri2uri(uri): - """Convert an IRI to a URI. Note that IRIs must be + """Convert an IRI to a URI. Note that IRIs must be passed in a unicode strings. That is, do not utf-8 encode - the IRI before passing it into the function.""" + the IRI before passing it into the function.""" if isinstance(uri ,unicode): (scheme, authority, path, query, fragment) = urlparse.urlsplit(uri) authority = authority.encode('idna') # For each character in 'ucschar' or 'iprivate' # 1. encode as utf-8 - # 2. then %-encode each octet of that utf-8 + # 2. then %-encode each octet of that utf-8 uri = urlparse.urlunsplit((scheme, authority, path, query, fragment)) uri = "".join([encode(c) for c in uri]) return uri - + if __name__ == "__main__": import unittest @@ -83,7 +83,7 @@ if __name__ == "__main__": def test_uris(self): """Test that URIs are invariant under the transformation.""" - invariant = [ + invariant = [ u"ftp://ftp.is.co.za/rfc/rfc1808.txt", u"http://www.ietf.org/rfc/rfc2396.txt", u"ldap://[2001:db8::7]/c=GB?objectClass?one", @@ -94,7 +94,7 @@ if __name__ == "__main__": u"urn:oasis:names:specification:docbook:dtd:xml:4.1.2" ] for uri in invariant: self.assertEqual(uri, iri2uri(uri)) - + def test_iri(self): """ Test that the right type of escaping is done for each part of the URI.""" self.assertEqual("http://xn--o3h.com/%E2%98%84", iri2uri(u"http://\N{COMET}.com/\N{COMET}")) @@ -107,4 +107,4 @@ if __name__ == "__main__": unittest.main() - + diff --git a/libs/httplib2/socks.py b/libs/httplib2/socks.py new file mode 100644 index 0000000..0991f4c --- /dev/null +++ b/libs/httplib2/socks.py @@ -0,0 +1,438 @@ +"""SocksiPy - Python SOCKS module. +Version 1.00 + +Copyright 2006 Dan-Haim. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of Dan Haim nor the names of his contributors may be used + to endorse or promote products derived from this software without specific + prior written permission. + +THIS SOFTWARE IS PROVIDED BY DAN HAIM "AS IS" AND ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +EVENT SHALL DAN HAIM OR HIS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA +OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMANGE. + + +This module provides a standard socket-like interface for Python +for tunneling connections through SOCKS proxies. + +""" + +""" + +Minor modifications made by Christopher Gilbert (http://motomastyle.com/) +for use in PyLoris (http://pyloris.sourceforge.net/) + +Minor modifications made by Mario Vilas (http://breakingcode.wordpress.com/) +mainly to merge bug fixes found in Sourceforge + +""" + +import base64 +import socket +import struct +import sys + +if getattr(socket, 'socket', None) is None: + raise ImportError('socket.socket missing, proxy support unusable') + +PROXY_TYPE_SOCKS4 = 1 +PROXY_TYPE_SOCKS5 = 2 +PROXY_TYPE_HTTP = 3 +PROXY_TYPE_HTTP_NO_TUNNEL = 4 + +_defaultproxy = None +_orgsocket = socket.socket + +class ProxyError(Exception): pass +class GeneralProxyError(ProxyError): pass +class Socks5AuthError(ProxyError): pass +class Socks5Error(ProxyError): pass +class Socks4Error(ProxyError): pass +class HTTPError(ProxyError): pass + +_generalerrors = ("success", + "invalid data", + "not connected", + "not available", + "bad proxy type", + "bad input") + +_socks5errors = ("succeeded", + "general SOCKS server failure", + "connection not allowed by ruleset", + "Network unreachable", + "Host unreachable", + "Connection refused", + "TTL expired", + "Command not supported", + "Address type not supported", + "Unknown error") + +_socks5autherrors = ("succeeded", + "authentication is required", + "all offered authentication methods were rejected", + "unknown username or invalid password", + "unknown error") + +_socks4errors = ("request granted", + "request rejected or failed", + "request rejected because SOCKS server cannot connect to identd on the client", + "request rejected because the client program and identd report different user-ids", + "unknown error") + +def setdefaultproxy(proxytype=None, addr=None, port=None, rdns=True, username=None, password=None): + """setdefaultproxy(proxytype, addr[, port[, rdns[, username[, password]]]]) + Sets a default proxy which all further socksocket objects will use, + unless explicitly changed. + """ + global _defaultproxy + _defaultproxy = (proxytype, addr, port, rdns, username, password) + +def wrapmodule(module): + """wrapmodule(module) + Attempts to replace a module's socket library with a SOCKS socket. Must set + a default proxy using setdefaultproxy(...) first. + This will only work on modules that import socket directly into the namespace; + most of the Python Standard Library falls into this category. + """ + if _defaultproxy != None: + module.socket.socket = socksocket + else: + raise GeneralProxyError((4, "no proxy specified")) + +class socksocket(socket.socket): + """socksocket([family[, type[, proto]]]) -> socket object + Open a SOCKS enabled socket. The parameters are the same as + those of the standard socket init. In order for SOCKS to work, + you must specify family=AF_INET, type=SOCK_STREAM and proto=0. + """ + + def __init__(self, family=socket.AF_INET, type=socket.SOCK_STREAM, proto=0, _sock=None): + _orgsocket.__init__(self, family, type, proto, _sock) + if _defaultproxy != None: + self.__proxy = _defaultproxy + else: + self.__proxy = (None, None, None, None, None, None) + self.__proxysockname = None + self.__proxypeername = None + self.__httptunnel = True + + def __recvall(self, count): + """__recvall(count) -> data + Receive EXACTLY the number of bytes requested from the socket. + Blocks until the required number of bytes have been received. + """ + data = self.recv(count) + while len(data) < count: + d = self.recv(count-len(data)) + if not d: raise GeneralProxyError((0, "connection closed unexpectedly")) + data = data + d + return data + + def sendall(self, content, *args): + """ override socket.socket.sendall method to rewrite the header + for non-tunneling proxies if needed + """ + if not self.__httptunnel: + content = self.__rewriteproxy(content) + return super(socksocket, self).sendall(content, *args) + + def __rewriteproxy(self, header): + """ rewrite HTTP request headers to support non-tunneling proxies + (i.e. those which do not support the CONNECT method). + This only works for HTTP (not HTTPS) since HTTPS requires tunneling. + """ + host, endpt = None, None + hdrs = header.split("\r\n") + for hdr in hdrs: + if hdr.lower().startswith("host:"): + host = hdr + elif hdr.lower().startswith("get") or hdr.lower().startswith("post"): + endpt = hdr + if host and endpt: + hdrs.remove(host) + hdrs.remove(endpt) + host = host.split(" ")[1] + endpt = endpt.split(" ") + if (self.__proxy[4] != None and self.__proxy[5] != None): + hdrs.insert(0, self.__getauthheader()) + hdrs.insert(0, "Host: %s" % host) + hdrs.insert(0, "%s http://%s%s %s" % (endpt[0], host, endpt[1], endpt[2])) + return "\r\n".join(hdrs) + + def __getauthheader(self): + auth = self.__proxy[4] + ":" + self.__proxy[5] + return "Proxy-Authorization: Basic " + base64.b64encode(auth) + + def setproxy(self, proxytype=None, addr=None, port=None, rdns=True, username=None, password=None): + """setproxy(proxytype, addr[, port[, rdns[, username[, password]]]]) + Sets the proxy to be used. + proxytype - The type of the proxy to be used. Three types + are supported: PROXY_TYPE_SOCKS4 (including socks4a), + PROXY_TYPE_SOCKS5 and PROXY_TYPE_HTTP + addr - The address of the server (IP or DNS). + port - The port of the server. Defaults to 1080 for SOCKS + servers and 8080 for HTTP proxy servers. + rdns - Should DNS queries be preformed on the remote side + (rather than the local side). The default is True. + Note: This has no effect with SOCKS4 servers. + username - Username to authenticate with to the server. + The default is no authentication. + password - Password to authenticate with to the server. + Only relevant when username is also provided. + """ + self.__proxy = (proxytype, addr, port, rdns, username, password) + + def __negotiatesocks5(self, destaddr, destport): + """__negotiatesocks5(self,destaddr,destport) + Negotiates a connection through a SOCKS5 server. + """ + # First we'll send the authentication packages we support. + if (self.__proxy[4]!=None) and (self.__proxy[5]!=None): + # The username/password details were supplied to the + # setproxy method so we support the USERNAME/PASSWORD + # authentication (in addition to the standard none). + self.sendall(struct.pack('BBBB', 0x05, 0x02, 0x00, 0x02)) + else: + # No username/password were entered, therefore we + # only support connections with no authentication. + self.sendall(struct.pack('BBB', 0x05, 0x01, 0x00)) + # We'll receive the server's response to determine which + # method was selected + chosenauth = self.__recvall(2) + if chosenauth[0:1] != chr(0x05).encode(): + self.close() + raise GeneralProxyError((1, _generalerrors[1])) + # Check the chosen authentication method + if chosenauth[1:2] == chr(0x00).encode(): + # No authentication is required + pass + elif chosenauth[1:2] == chr(0x02).encode(): + # Okay, we need to perform a basic username/password + # authentication. + self.sendall(chr(0x01).encode() + chr(len(self.__proxy[4])) + self.__proxy[4] + chr(len(self.__proxy[5])) + self.__proxy[5]) + authstat = self.__recvall(2) + if authstat[0:1] != chr(0x01).encode(): + # Bad response + self.close() + raise GeneralProxyError((1, _generalerrors[1])) + if authstat[1:2] != chr(0x00).encode(): + # Authentication failed + self.close() + raise Socks5AuthError((3, _socks5autherrors[3])) + # Authentication succeeded + else: + # Reaching here is always bad + self.close() + if chosenauth[1] == chr(0xFF).encode(): + raise Socks5AuthError((2, _socks5autherrors[2])) + else: + raise GeneralProxyError((1, _generalerrors[1])) + # Now we can request the actual connection + req = struct.pack('BBB', 0x05, 0x01, 0x00) + # If the given destination address is an IP address, we'll + # use the IPv4 address request even if remote resolving was specified. + try: + ipaddr = socket.inet_aton(destaddr) + req = req + chr(0x01).encode() + ipaddr + except socket.error: + # Well it's not an IP number, so it's probably a DNS name. + if self.__proxy[3]: + # Resolve remotely + ipaddr = None + req = req + chr(0x03).encode() + chr(len(destaddr)).encode() + destaddr + else: + # Resolve locally + ipaddr = socket.inet_aton(socket.gethostbyname(destaddr)) + req = req + chr(0x01).encode() + ipaddr + req = req + struct.pack(">H", destport) + self.sendall(req) + # Get the response + resp = self.__recvall(4) + if resp[0:1] != chr(0x05).encode(): + self.close() + raise GeneralProxyError((1, _generalerrors[1])) + elif resp[1:2] != chr(0x00).encode(): + # Connection failed + self.close() + if ord(resp[1:2])<=8: + raise Socks5Error((ord(resp[1:2]), _socks5errors[ord(resp[1:2])])) + else: + raise Socks5Error((9, _socks5errors[9])) + # Get the bound address/port + elif resp[3:4] == chr(0x01).encode(): + boundaddr = self.__recvall(4) + elif resp[3:4] == chr(0x03).encode(): + resp = resp + self.recv(1) + boundaddr = self.__recvall(ord(resp[4:5])) + else: + self.close() + raise GeneralProxyError((1,_generalerrors[1])) + boundport = struct.unpack(">H", self.__recvall(2))[0] + self.__proxysockname = (boundaddr, boundport) + if ipaddr != None: + self.__proxypeername = (socket.inet_ntoa(ipaddr), destport) + else: + self.__proxypeername = (destaddr, destport) + + def getproxysockname(self): + """getsockname() -> address info + Returns the bound IP address and port number at the proxy. + """ + return self.__proxysockname + + def getproxypeername(self): + """getproxypeername() -> address info + Returns the IP and port number of the proxy. + """ + return _orgsocket.getpeername(self) + + def getpeername(self): + """getpeername() -> address info + Returns the IP address and port number of the destination + machine (note: getproxypeername returns the proxy) + """ + return self.__proxypeername + + def __negotiatesocks4(self,destaddr,destport): + """__negotiatesocks4(self,destaddr,destport) + Negotiates a connection through a SOCKS4 server. + """ + # Check if the destination address provided is an IP address + rmtrslv = False + try: + ipaddr = socket.inet_aton(destaddr) + except socket.error: + # It's a DNS name. Check where it should be resolved. + if self.__proxy[3]: + ipaddr = struct.pack("BBBB", 0x00, 0x00, 0x00, 0x01) + rmtrslv = True + else: + ipaddr = socket.inet_aton(socket.gethostbyname(destaddr)) + # Construct the request packet + req = struct.pack(">BBH", 0x04, 0x01, destport) + ipaddr + # The username parameter is considered userid for SOCKS4 + if self.__proxy[4] != None: + req = req + self.__proxy[4] + req = req + chr(0x00).encode() + # DNS name if remote resolving is required + # NOTE: This is actually an extension to the SOCKS4 protocol + # called SOCKS4A and may not be supported in all cases. + if rmtrslv: + req = req + destaddr + chr(0x00).encode() + self.sendall(req) + # Get the response from the server + resp = self.__recvall(8) + if resp[0:1] != chr(0x00).encode(): + # Bad data + self.close() + raise GeneralProxyError((1,_generalerrors[1])) + if resp[1:2] != chr(0x5A).encode(): + # Server returned an error + self.close() + if ord(resp[1:2]) in (91, 92, 93): + self.close() + raise Socks4Error((ord(resp[1:2]), _socks4errors[ord(resp[1:2]) - 90])) + else: + raise Socks4Error((94, _socks4errors[4])) + # Get the bound address/port + self.__proxysockname = (socket.inet_ntoa(resp[4:]), struct.unpack(">H", resp[2:4])[0]) + if rmtrslv != None: + self.__proxypeername = (socket.inet_ntoa(ipaddr), destport) + else: + self.__proxypeername = (destaddr, destport) + + def __negotiatehttp(self, destaddr, destport): + """__negotiatehttp(self,destaddr,destport) + Negotiates a connection through an HTTP server. + """ + # If we need to resolve locally, we do this now + if not self.__proxy[3]: + addr = socket.gethostbyname(destaddr) + else: + addr = destaddr + headers = ["CONNECT ", addr, ":", str(destport), " HTTP/1.1\r\n"] + headers += ["Host: ", destaddr, "\r\n"] + if (self.__proxy[4] != None and self.__proxy[5] != None): + headers += [self.__getauthheader(), "\r\n"] + headers.append("\r\n") + self.sendall("".join(headers).encode()) + # We read the response until we get the string "\r\n\r\n" + resp = self.recv(1) + while resp.find("\r\n\r\n".encode()) == -1: + resp = resp + self.recv(1) + # We just need the first line to check if the connection + # was successful + statusline = resp.splitlines()[0].split(" ".encode(), 2) + if statusline[0] not in ("HTTP/1.0".encode(), "HTTP/1.1".encode()): + self.close() + raise GeneralProxyError((1, _generalerrors[1])) + try: + statuscode = int(statusline[1]) + except ValueError: + self.close() + raise GeneralProxyError((1, _generalerrors[1])) + if statuscode != 200: + self.close() + raise HTTPError((statuscode, statusline[2])) + self.__proxysockname = ("0.0.0.0", 0) + self.__proxypeername = (addr, destport) + + def connect(self, destpair): + """connect(self, despair) + Connects to the specified destination through a proxy. + destpar - A tuple of the IP/DNS address and the port number. + (identical to socket's connect). + To select the proxy server use setproxy(). + """ + # Do a minimal input check first + if (not type(destpair) in (list,tuple)) or (len(destpair) < 2) or (not isinstance(destpair[0], basestring)) or (type(destpair[1]) != int): + raise GeneralProxyError((5, _generalerrors[5])) + if self.__proxy[0] == PROXY_TYPE_SOCKS5: + if self.__proxy[2] != None: + portnum = self.__proxy[2] + else: + portnum = 1080 + _orgsocket.connect(self, (self.__proxy[1], portnum)) + self.__negotiatesocks5(destpair[0], destpair[1]) + elif self.__proxy[0] == PROXY_TYPE_SOCKS4: + if self.__proxy[2] != None: + portnum = self.__proxy[2] + else: + portnum = 1080 + _orgsocket.connect(self,(self.__proxy[1], portnum)) + self.__negotiatesocks4(destpair[0], destpair[1]) + elif self.__proxy[0] == PROXY_TYPE_HTTP: + if self.__proxy[2] != None: + portnum = self.__proxy[2] + else: + portnum = 8080 + _orgsocket.connect(self,(self.__proxy[1], portnum)) + self.__negotiatehttp(destpair[0], destpair[1]) + elif self.__proxy[0] == PROXY_TYPE_HTTP_NO_TUNNEL: + if self.__proxy[2] != None: + portnum = self.__proxy[2] + else: + portnum = 8080 + _orgsocket.connect(self,(self.__proxy[1],portnum)) + if destpair[1] == 443: + self.__negotiatehttp(destpair[0],destpair[1]) + else: + self.__httptunnel = False + elif self.__proxy[0] == None: + _orgsocket.connect(self, (destpair[0], destpair[1])) + else: + raise GeneralProxyError((4, _generalerrors[4])) diff --git a/libs/pyasn1/__init__.py b/libs/pyasn1/__init__.py index 7de39fe..88aff79 100644 --- a/libs/pyasn1/__init__.py +++ b/libs/pyasn1/__init__.py @@ -1 +1,8 @@ -majorVersionId = '1' +import sys + +# http://www.python.org/dev/peps/pep-0396/ +__version__ = '0.1.7' + +if sys.version_info[:2] < (2, 4): + raise RuntimeError('PyASN1 requires Python 2.4 or later') + diff --git a/libs/pyasn1/codec/__init__.py b/libs/pyasn1/codec/__init__.py index e69de29..8c3066b 100644 --- a/libs/pyasn1/codec/__init__.py +++ b/libs/pyasn1/codec/__init__.py @@ -0,0 +1 @@ +# This file is necessary to make this directory a package. diff --git a/libs/pyasn1/codec/ber/__init__.py b/libs/pyasn1/codec/ber/__init__.py index e69de29..8c3066b 100644 --- a/libs/pyasn1/codec/ber/__init__.py +++ b/libs/pyasn1/codec/ber/__init__.py @@ -0,0 +1 @@ +# This file is necessary to make this directory a package. diff --git a/libs/pyasn1/codec/ber/decoder.py b/libs/pyasn1/codec/ber/decoder.py index ae9311c..be0cf49 100644 --- a/libs/pyasn1/codec/ber/decoder.py +++ b/libs/pyasn1/codec/ber/decoder.py @@ -1,21 +1,24 @@ # BER decoder from pyasn1.type import tag, base, univ, char, useful, tagmap from pyasn1.codec.ber import eoo -from pyasn1.compat.octets import oct2int, octs2ints -from pyasn1 import error +from pyasn1.compat.octets import oct2int, octs2ints, isOctetsType +from pyasn1 import debug, error class AbstractDecoder: protoComponent = None def valueDecoder(self, fullSubstrate, substrate, asn1Spec, tagSet, - length, state, decodeFun): - raise error.PyAsn1Error('Decoder not implemented for %s' % tagSet) + length, state, decodeFun, substrateFun): + raise error.PyAsn1Error('Decoder not implemented for %s' % (tagSet,)) def indefLenValueDecoder(self, fullSubstrate, substrate, asn1Spec, tagSet, - length, state, decodeFun): - raise error.PyAsn1Error('Indefinite length mode decoder not implemented for %s' % tagSet) + length, state, decodeFun, substrateFun): + raise error.PyAsn1Error('Indefinite length mode decoder not implemented for %s' % (tagSet,)) class AbstractSimpleDecoder(AbstractDecoder): + tagFormats = (tag.tagFormatSimple,) def _createComponent(self, asn1Spec, tagSet, value=None): + if tagSet[0][1] not in self.tagFormats: + raise error.PyAsn1Error('Invalid tag format %r for %r' % (tagSet[0], self.protoComponent,)) if asn1Spec is None: return self.protoComponent.clone(value, tagSet) elif value is None: @@ -24,7 +27,10 @@ class AbstractSimpleDecoder(AbstractDecoder): return asn1Spec.clone(value) class AbstractConstructedDecoder(AbstractDecoder): + tagFormats = (tag.tagFormatConstructed,) def _createComponent(self, asn1Spec, tagSet, value=None): + if tagSet[0][1] not in self.tagFormats: + raise error.PyAsn1Error('Invalid tag format %r for %r' % (tagSet[0], self.protoComponent,)) if asn1Spec is None: return self.protoComponent.clone(tagSet) else: @@ -32,19 +38,34 @@ class AbstractConstructedDecoder(AbstractDecoder): class EndOfOctetsDecoder(AbstractSimpleDecoder): def valueDecoder(self, fullSubstrate, substrate, asn1Spec, tagSet, - length, state, decodeFun): - return eoo.endOfOctets, substrate[:length] + length, state, decodeFun, substrateFun): + return eoo.endOfOctets, substrate[length:] class ExplicitTagDecoder(AbstractSimpleDecoder): + protoComponent = univ.Any('') + tagFormats = (tag.tagFormatConstructed,) def valueDecoder(self, fullSubstrate, substrate, asn1Spec, tagSet, - length, state, decodeFun): - return decodeFun(substrate[:length], asn1Spec, tagSet, length) + length, state, decodeFun, substrateFun): + if substrateFun: + return substrateFun( + self._createComponent(asn1Spec, tagSet, ''), + substrate, length + ) + head, tail = substrate[:length], substrate[length:] + value, _ = decodeFun(head, asn1Spec, tagSet, length) + return value, tail def indefLenValueDecoder(self, fullSubstrate, substrate, asn1Spec, tagSet, - length, state, decodeFun): + length, state, decodeFun, substrateFun): + if substrateFun: + return substrateFun( + self._createComponent(asn1Spec, tagSet, ''), + substrate, length + ) value, substrate = decodeFun(substrate, asn1Spec, tagSet, length) terminator, substrate = decodeFun(substrate) - if terminator == eoo.endOfOctets: + if eoo.endOfOctets.isSameTypeWith(terminator) and \ + terminator == eoo.endOfOctets: return value, substrate else: raise error.PyAsn1Error('Missing end-of-octets terminator') @@ -71,79 +92,71 @@ class IntegerDecoder(AbstractSimpleDecoder): '\xfb': -5 } - def _valueFilter(self, value): - try: - return int(value) - except OverflowError: - return value - def valueDecoder(self, fullSubstrate, substrate, asn1Spec, tagSet, length, - state, decodeFun): - substrate = substrate[:length] - if not substrate: - raise error.PyAsn1Error('Empty substrate') - if substrate in self.precomputedValues: - value = self.precomputedValues[substrate] + state, decodeFun, substrateFun): + head, tail = substrate[:length], substrate[length:] + if not head: + return self._createComponent(asn1Spec, tagSet, 0), tail + if head in self.precomputedValues: + value = self.precomputedValues[head] else: - firstOctet = oct2int(substrate[0]) + firstOctet = oct2int(head[0]) if firstOctet & 0x80: value = -1 else: value = 0 - for octet in substrate: + for octet in head: value = value << 8 | oct2int(octet) - value = self._valueFilter(value) - return self._createComponent(asn1Spec, tagSet, value), substrate + return self._createComponent(asn1Spec, tagSet, value), tail class BooleanDecoder(IntegerDecoder): protoComponent = univ.Boolean(0) - def _valueFilter(self, value): - if value: - return 1 - else: - return 0 + def _createComponent(self, asn1Spec, tagSet, value=None): + return IntegerDecoder._createComponent(self, asn1Spec, tagSet, value and 1 or 0) class BitStringDecoder(AbstractSimpleDecoder): protoComponent = univ.BitString(()) + tagFormats = (tag.tagFormatSimple, tag.tagFormatConstructed) def valueDecoder(self, fullSubstrate, substrate, asn1Spec, tagSet, length, - state, decodeFun): - substrate = substrate[:length] + state, decodeFun, substrateFun): + head, tail = substrate[:length], substrate[length:] if tagSet[0][1] == tag.tagFormatSimple: # XXX what tag to check? - if not substrate: - raise error.PyAsn1Error('Missing initial octet') - trailingBits = oct2int(substrate[0]) + if not head: + raise error.PyAsn1Error('Empty substrate') + trailingBits = oct2int(head[0]) if trailingBits > 7: raise error.PyAsn1Error( 'Trailing bits overflow %s' % trailingBits ) - substrate = substrate[1:] - lsb = p = 0; l = len(substrate)-1; b = () + head = head[1:] + lsb = p = 0; l = len(head)-1; b = () while p <= l: if p == l: lsb = trailingBits j = 7 - o = oct2int(substrate[p]) + o = oct2int(head[p]) while j >= lsb: b = b + ((o>>j)&0x01,) j = j - 1 p = p + 1 - return self._createComponent(asn1Spec, tagSet, b), '' + return self._createComponent(asn1Spec, tagSet, b), tail r = self._createComponent(asn1Spec, tagSet, ()) - if not decodeFun: - return r, substrate - while substrate: - component, substrate = decodeFun(substrate) + if substrateFun: + return substrateFun(r, substrate, length) + while head: + component, head = decodeFun(head) r = r + component - return r, substrate + return r, tail def indefLenValueDecoder(self, fullSubstrate, substrate, asn1Spec, tagSet, - length, state, decodeFun): + length, state, decodeFun, substrateFun): r = self._createComponent(asn1Spec, tagSet, '') - if not decodeFun: - return r, substrate + if substrateFun: + return substrateFun(r, substrate, length) while substrate: component, substrate = decodeFun(substrate) - if component == eoo.endOfOctets: + if eoo.endOfOctets.isSameTypeWith(component) and \ + component == eoo.endOfOctets: break r = r + component else: @@ -154,27 +167,29 @@ class BitStringDecoder(AbstractSimpleDecoder): class OctetStringDecoder(AbstractSimpleDecoder): protoComponent = univ.OctetString('') + tagFormats = (tag.tagFormatSimple, tag.tagFormatConstructed) def valueDecoder(self, fullSubstrate, substrate, asn1Spec, tagSet, length, - state, decodeFun): - substrate = substrate[:length] + state, decodeFun, substrateFun): + head, tail = substrate[:length], substrate[length:] if tagSet[0][1] == tag.tagFormatSimple: # XXX what tag to check? - return self._createComponent(asn1Spec, tagSet, substrate), '' + return self._createComponent(asn1Spec, tagSet, head), tail r = self._createComponent(asn1Spec, tagSet, '') - if not decodeFun: - return r, substrate - while substrate: - component, substrate = decodeFun(substrate) + if substrateFun: + return substrateFun(r, substrate, length) + while head: + component, head = decodeFun(head) r = r + component - return r, substrate + return r, tail def indefLenValueDecoder(self, fullSubstrate, substrate, asn1Spec, tagSet, - length, state, decodeFun): + length, state, decodeFun, substrateFun): r = self._createComponent(asn1Spec, tagSet, '') - if not decodeFun: - return r, substrate + if substrateFun: + return substrateFun(r, substrate, length) while substrate: component, substrate = decodeFun(substrate) - if component == eoo.endOfOctets: + if eoo.endOfOctets.isSameTypeWith(component) and \ + component == eoo.endOfOctets: break r = r + component else: @@ -186,93 +201,89 @@ class OctetStringDecoder(AbstractSimpleDecoder): class NullDecoder(AbstractSimpleDecoder): protoComponent = univ.Null('') def valueDecoder(self, fullSubstrate, substrate, asn1Spec, tagSet, - length, state, decodeFun): - substrate = substrate[:length] + length, state, decodeFun, substrateFun): + head, tail = substrate[:length], substrate[length:] r = self._createComponent(asn1Spec, tagSet) - if substrate: - raise error.PyAsn1Error('Unexpected substrate for Null') - return r, substrate + if head: + raise error.PyAsn1Error('Unexpected %d-octet substrate for Null' % length) + return r, tail class ObjectIdentifierDecoder(AbstractSimpleDecoder): protoComponent = univ.ObjectIdentifier(()) def valueDecoder(self, fullSubstrate, substrate, asn1Spec, tagSet, length, - state, decodeFun): - substrate = substrate[:length] - if not substrate: + state, decodeFun, substrateFun): + head, tail = substrate[:length], substrate[length:] + if not head: raise error.PyAsn1Error('Empty substrate') - oid = (); index = 0 + # Get the first subid - subId = oct2int(substrate[index]) - oid = oid + divmod(subId, 40) + subId = oct2int(head[0]) + oid = divmod(subId, 40) - index = index + 1 - substrateLen = len(substrate) - + index = 1 + substrateLen = len(head) while index < substrateLen: - subId = oct2int(substrate[index]) - if subId < 128: - oid = oid + (subId,) - index = index + 1 - else: + subId = oct2int(head[index]) + index = index + 1 + if subId == 128: + # ASN.1 spec forbids leading zeros (0x80) in sub-ID OID + # encoding, tolerating it opens a vulnerability. + # See http://www.cosic.esat.kuleuven.be/publications/article-1432.pdf page 7 + raise error.PyAsn1Error('Invalid leading 0x80 in sub-OID') + elif subId > 128: # Construct subid from a number of octets nextSubId = subId subId = 0 - while nextSubId >= 128 and index < substrateLen: + while nextSubId >= 128: subId = (subId << 7) + (nextSubId & 0x7F) + if index >= substrateLen: + raise error.SubstrateUnderrunError( + 'Short substrate for sub-OID past %s' % (oid,) + ) + nextSubId = oct2int(head[index]) index = index + 1 - nextSubId = oct2int(substrate[index]) - if index == substrateLen: - raise error.SubstrateUnderrunError( - 'Short substrate for OID %s' % oid - ) subId = (subId << 7) + nextSubId - oid = oid + (subId,) - index = index + 1 - return self._createComponent(asn1Spec, tagSet, oid), substrate[index:] + oid = oid + (subId,) + return self._createComponent(asn1Spec, tagSet, oid), tail class RealDecoder(AbstractSimpleDecoder): protoComponent = univ.Real() def valueDecoder(self, fullSubstrate, substrate, asn1Spec, tagSet, - length, state, decodeFun): - substrate = substrate[:length] - if not length: - raise error.SubstrateUnderrunError('Short substrate for Real') - fo = oct2int(substrate[0]); substrate = substrate[1:] - if fo & 0x40: # infinite value - value = fo & 0x01 and '-inf' or 'inf' - elif fo & 0x80: # binary enoding - if fo & 0x11 == 0: - n = 1 - elif fo & 0x01: - n = 2 - elif fo & 0x02: - n = 3 - else: - n = oct2int(substrate[0]) - eo, substrate = substrate[:n], substrate[n:] - if not eo or not substrate: + length, state, decodeFun, substrateFun): + head, tail = substrate[:length], substrate[length:] + if not head: + return self._createComponent(asn1Spec, tagSet, 0.0), tail + fo = oct2int(head[0]); head = head[1:] + if fo & 0x80: # binary enoding + n = (fo & 0x03) + 1 + if n == 4: + n = oct2int(head[0]) + eo, head = head[:n], head[n:] + if not eo or not head: raise error.PyAsn1Error('Real exponent screwed') - e = 0 + e = oct2int(eo[0]) & 0x80 and -1 or 0 while eo: # exponent e <<= 8 e |= oct2int(eo[0]) eo = eo[1:] p = 0 - while substrate: # value + while head: # value p <<= 8 - p |= oct2int(substrate[0]) - substrate = substrate[1:] + p |= oct2int(head[0]) + head = head[1:] if fo & 0x40: # sign bit p = -p value = (p, 2, e) + elif fo & 0x40: # infinite value + value = fo & 0x01 and '-inf' or 'inf' elif fo & 0xc0 == 0: # character encoding try: if fo & 0x3 == 0x1: # NR1 - value = (int(substrate), 10, 0) + value = (int(head), 10, 0) elif fo & 0x3 == 0x2: # NR2 - value = float(substrate) + value = float(head) elif fo & 0x3 == 0x3: # NR3 - value = float(substrate) + value = float(head) else: raise error.SubstrateUnderrunError( 'Unknown NR (tag %s)' % fo @@ -281,13 +292,11 @@ class RealDecoder(AbstractSimpleDecoder): raise error.SubstrateUnderrunError( 'Bad character Real syntax' ) - elif fo & 0xc0 == 0x40: # special real value - pass else: raise error.SubstrateUnderrunError( 'Unknown encoding (tag %s)' % fo ) - return self._createComponent(asn1Spec, tagSet, value), substrate + return self._createComponent(asn1Spec, tagSet, value), tail class SequenceDecoder(AbstractConstructedDecoder): protoComponent = univ.Sequence() @@ -301,17 +310,15 @@ class SequenceDecoder(AbstractConstructedDecoder): return r.getComponentPositionNearType(t, idx) def valueDecoder(self, fullSubstrate, substrate, asn1Spec, tagSet, - length, state, decodeFun): - substrate = substrate[:length] + length, state, decodeFun, substrateFun): + head, tail = substrate[:length], substrate[length:] r = self._createComponent(asn1Spec, tagSet) idx = 0 - if not decodeFun: - return r, substrate - while substrate: + if substrateFun: + return substrateFun(r, substrate, length) + while head: asn1Spec = self._getComponentTagMap(r, idx) - component, substrate = decodeFun( - substrate, asn1Spec - ) + component, head = decodeFun(head, asn1Spec) idx = self._getComponentPositionByType( r, component.getEffectiveTagSet(), idx ) @@ -319,18 +326,19 @@ class SequenceDecoder(AbstractConstructedDecoder): idx = idx + 1 r.setDefaultComponents() r.verifySizeSpec() - return r, substrate + return r, tail def indefLenValueDecoder(self, fullSubstrate, substrate, asn1Spec, tagSet, - length, state, decodeFun): + length, state, decodeFun, substrateFun): r = self._createComponent(asn1Spec, tagSet) + if substrateFun: + return substrateFun(r, substrate, length) idx = 0 while substrate: asn1Spec = self._getComponentTagMap(r, idx) - if not decodeFun: - return r, substrate component, substrate = decodeFun(substrate, asn1Spec) - if component == eoo.endOfOctets: + if eoo.endOfOctets.isSameTypeWith(component) and \ + component == eoo.endOfOctets: break idx = self._getComponentPositionByType( r, component.getEffectiveTagSet(), idx @@ -348,32 +356,31 @@ class SequenceDecoder(AbstractConstructedDecoder): class SequenceOfDecoder(AbstractConstructedDecoder): protoComponent = univ.SequenceOf() def valueDecoder(self, fullSubstrate, substrate, asn1Spec, tagSet, - length, state, decodeFun): - substrate = substrate[:length] + length, state, decodeFun, substrateFun): + head, tail = substrate[:length], substrate[length:] r = self._createComponent(asn1Spec, tagSet) + if substrateFun: + return substrateFun(r, substrate, length) asn1Spec = r.getComponentType() idx = 0 - if not decodeFun: - return r, substrate - while substrate: - component, substrate = decodeFun( - substrate, asn1Spec - ) + while head: + component, head = decodeFun(head, asn1Spec) r.setComponentByPosition(idx, component, asn1Spec is None) idx = idx + 1 r.verifySizeSpec() - return r, substrate + return r, tail def indefLenValueDecoder(self, fullSubstrate, substrate, asn1Spec, tagSet, - length, state, decodeFun): + length, state, decodeFun, substrateFun): r = self._createComponent(asn1Spec, tagSet) + if substrateFun: + return substrateFun(r, substrate, length) asn1Spec = r.getComponentType() idx = 0 - if not decodeFun: - return r, substrate while substrate: component, substrate = decodeFun(substrate, asn1Spec) - if component == eoo.endOfOctets: + if eoo.endOfOctets.isSameTypeWith(component) and \ + component == eoo.endOfOctets: break r.setComponentByPosition(idx, component, asn1Spec is None) idx = idx + 1 @@ -401,43 +408,68 @@ class SetOfDecoder(SequenceOfDecoder): class ChoiceDecoder(AbstractConstructedDecoder): protoComponent = univ.Choice() + tagFormats = (tag.tagFormatSimple, tag.tagFormatConstructed) def valueDecoder(self, fullSubstrate, substrate, asn1Spec, tagSet, - length, state, decodeFun): - substrate = substrate[:length] + length, state, decodeFun, substrateFun): + head, tail = substrate[:length], substrate[length:] r = self._createComponent(asn1Spec, tagSet) - if not decodeFun: - return r, substrate + if substrateFun: + return substrateFun(r, substrate, length) if r.getTagSet() == tagSet: # explicitly tagged Choice - component, substrate = decodeFun( - substrate, r.getComponentTagMap() + component, head = decodeFun( + head, r.getComponentTagMap() ) else: - component, substrate = decodeFun( - substrate, r.getComponentTagMap(), tagSet, length, state + component, head = decodeFun( + head, r.getComponentTagMap(), tagSet, length, state ) if isinstance(component, univ.Choice): effectiveTagSet = component.getEffectiveTagSet() else: effectiveTagSet = component.getTagSet() r.setComponentByType(effectiveTagSet, component, 0, asn1Spec is None) - return r, substrate + return r, tail - indefLenValueDecoder = valueDecoder + def indefLenValueDecoder(self, fullSubstrate, substrate, asn1Spec, tagSet, + length, state, decodeFun, substrateFun): + r = self._createComponent(asn1Spec, tagSet) + if substrateFun: + return substrateFun(r, substrate, length) + if r.getTagSet() == tagSet: # explicitly tagged Choice + component, substrate = decodeFun(substrate, r.getComponentTagMap()) + eooMarker, substrate = decodeFun(substrate) # eat up EOO marker + if not eoo.endOfOctets.isSameTypeWith(eooMarker) or \ + eooMarker != eoo.endOfOctets: + raise error.PyAsn1Error('No EOO seen before substrate ends') + else: + component, substrate= decodeFun( + substrate, r.getComponentTagMap(), tagSet, length, state + ) + if isinstance(component, univ.Choice): + effectiveTagSet = component.getEffectiveTagSet() + else: + effectiveTagSet = component.getTagSet() + r.setComponentByType(effectiveTagSet, component, 0, asn1Spec is None) + return r, substrate class AnyDecoder(AbstractSimpleDecoder): protoComponent = univ.Any() + tagFormats = (tag.tagFormatSimple, tag.tagFormatConstructed) def valueDecoder(self, fullSubstrate, substrate, asn1Spec, tagSet, - length, state, decodeFun): + length, state, decodeFun, substrateFun): if asn1Spec is None or \ asn1Spec is not None and tagSet != asn1Spec.getTagSet(): # untagged Any container, recover inner header substrate length = length + len(fullSubstrate) - len(substrate) substrate = fullSubstrate - substrate = substrate[:length] - return self._createComponent(asn1Spec, tagSet, value=substrate), '' + if substrateFun: + return substrateFun(self._createComponent(asn1Spec, tagSet), + substrate, length) + head, tail = substrate[:length], substrate[length:] + return self._createComponent(asn1Spec, tagSet, value=head), tail def indefLenValueDecoder(self, fullSubstrate, substrate, asn1Spec, tagSet, - length, state, decodeFun): + length, state, decodeFun, substrateFun): if asn1Spec is not None and tagSet == asn1Spec.getTagSet(): # tagged Any type -- consume header substrate header = '' @@ -450,11 +482,12 @@ class AnyDecoder(AbstractSimpleDecoder): # Any components do not inherit initial tag asn1Spec = self.protoComponent - if not decodeFun: - return r, substrate + if substrateFun: + return substrateFun(r, substrate, length) while substrate: component, substrate = decodeFun(substrate, asn1Spec) - if component == eoo.endOfOctets: + if eoo.endOfOctets.isSameTypeWith(component) and \ + component == eoo.endOfOctets: break r = r + component else: @@ -550,7 +583,10 @@ class Decoder: self.__tagSetCache = {} def __call__(self, substrate, asn1Spec=None, tagSet=None, - length=None, state=stDecodeTag, recursiveFlag=1): + length=None, state=stDecodeTag, recursiveFlag=1, + substrateFun=None): + if debug.logger & debug.flagDecoder: + debug.logger('decoder called at scope %s with state %d, working with up to %d octets of substrate: %s' % (debug.scope, state, len(substrate), debug.hexdump(substrate))) fullSubstrate = substrate while state != stStop: if state == stDecodeTag: @@ -559,6 +595,9 @@ class Decoder: raise error.SubstrateUnderrunError( 'Short octet stream on tag decoding' ) + if not isOctetsType(substrate) and \ + not isinstance(substrate, univ.OctetString): + raise error.PyAsn1Error('Bad octet stream type') firstOctet = substrate[0] substrate = substrate[1:] @@ -598,6 +637,7 @@ class Decoder: else: tagSet = lastTag + tagSet state = stDecodeLength + debug.logger and debug.logger & debug.flagDecoder and debug.logger('tag decoded into %r, decoding length' % tagSet) if state == stDecodeLength: # Decode length if not substrate: @@ -625,12 +665,13 @@ class Decoder: for char in lengthString: length = (length << 8) | oct2int(char) size = size + 1 - state = stGetValueDecoder substrate = substrate[size:] if length != -1 and len(substrate) < length: raise error.SubstrateUnderrunError( '%d-octet short' % (length - len(substrate)) ) + state = stGetValueDecoder + debug.logger and debug.logger & debug.flagDecoder and debug.logger('value length decoded into %d, payload substrate is: %s' % (length, debug.hexdump(length == -1 and substrate or substrate[:length]))) if state == stGetValueDecoder: if asn1Spec is None: state = stGetValueDecoderByTag @@ -669,14 +710,27 @@ class Decoder: state = stDecodeValue else: state = stTryAsExplicitTag + if debug.logger and debug.logger & debug.flagDecoder: + debug.logger('codec %s chosen by a built-in type, decoding %s' % (concreteDecoder and concreteDecoder.__class__.__name__ or "<none>", state == stDecodeValue and 'value' or 'as explicit tag')) + debug.scope.push(concreteDecoder is None and '?' or concreteDecoder.protoComponent.__class__.__name__) if state == stGetValueDecoderByAsn1Spec: if isinstance(asn1Spec, (dict, tagmap.TagMap)): if tagSet in asn1Spec: __chosenSpec = asn1Spec[tagSet] else: __chosenSpec = None + if debug.logger and debug.logger & debug.flagDecoder: + debug.logger('candidate ASN.1 spec is a map of:') + for t, v in asn1Spec.getPosMap().items(): + debug.logger(' %r -> %s' % (t, v.__class__.__name__)) + if asn1Spec.getNegMap(): + debug.logger('but neither of: ') + for i in asn1Spec.getNegMap().items(): + debug.logger(' %r -> %s' % (t, v.__class__.__name__)) + debug.logger('new candidate ASN.1 spec is %s, chosen by %r' % (__chosenSpec is None and '<none>' or __chosenSpec.__class__.__name__, tagSet)) else: __chosenSpec = asn1Spec + debug.logger and debug.logger & debug.flagDecoder and debug.logger('candidate ASN.1 spec is %s' % asn1Spec.__class__.__name__) if __chosenSpec is not None and ( tagSet == __chosenSpec.getTagSet() or \ tagSet in __chosenSpec.getTagMap() @@ -687,9 +741,11 @@ class Decoder: __chosenSpec.typeId in self.__typeMap: # ambiguous type concreteDecoder = self.__typeMap[__chosenSpec.typeId] + debug.logger and debug.logger & debug.flagDecoder and debug.logger('value decoder chosen for an ambiguous type by type ID %s' % (__chosenSpec.typeId,)) elif baseTagSet in self.__tagMap: # base type or tagged subtype concreteDecoder = self.__tagMap[baseTagSet] + debug.logger and debug.logger & debug.flagDecoder and debug.logger('value decoder chosen by base %r' % (baseTagSet,)) else: concreteDecoder = None if concreteDecoder: @@ -700,8 +756,13 @@ class Decoder: elif tagSet == self.__endOfOctetsTagSet: concreteDecoder = self.__tagMap[tagSet] state = stDecodeValue + debug.logger and debug.logger & debug.flagDecoder and debug.logger('end-of-octets found') else: + concreteDecoder = None state = stTryAsExplicitTag + if debug.logger and debug.logger & debug.flagDecoder: + debug.logger('codec %s chosen by ASN.1 spec, decoding %s' % (state == stDecodeValue and concreteDecoder.__class__.__name__ or "<none>", state == stDecodeValue and 'value' or 'as explicit tag')) + debug.scope.push(__chosenSpec is None and '?' or __chosenSpec.__class__.__name__) if state == stTryAsExplicitTag: if tagSet and \ tagSet[0][1] == tag.tagFormatConstructed and \ @@ -710,34 +771,35 @@ class Decoder: concreteDecoder = explicitTagDecoder state = stDecodeValue else: + concreteDecoder = None state = self.defaultErrorState + debug.logger and debug.logger & debug.flagDecoder and debug.logger('codec %s chosen, decoding %s' % (concreteDecoder and concreteDecoder.__class__.__name__ or "<none>", state == stDecodeValue and 'value' or 'as failure')) if state == stDumpRawValue: concreteDecoder = self.defaultRawDecoder + debug.logger and debug.logger & debug.flagDecoder and debug.logger('codec %s chosen, decoding value' % concreteDecoder.__class__.__name__) state = stDecodeValue if state == stDecodeValue: - if recursiveFlag: - decodeFun = self - else: - decodeFun = None + if recursiveFlag == 0 and not substrateFun: # legacy + substrateFun = lambda a,b,c: (a,b[:c]) if length == -1: # indef length value, substrate = concreteDecoder.indefLenValueDecoder( fullSubstrate, substrate, asn1Spec, tagSet, length, - stGetValueDecoder, decodeFun + stGetValueDecoder, self, substrateFun ) else: - value, _substrate = concreteDecoder.valueDecoder( + value, substrate = concreteDecoder.valueDecoder( fullSubstrate, substrate, asn1Spec, tagSet, length, - stGetValueDecoder, decodeFun + stGetValueDecoder, self, substrateFun ) - if recursiveFlag: - substrate = substrate[length:] - else: - substrate = _substrate state = stStop + debug.logger and debug.logger & debug.flagDecoder and debug.logger('codec %s yields type %s, value:\n%s\n...remaining substrate is: %s' % (concreteDecoder.__class__.__name__, value.__class__.__name__, value.prettyPrint(), substrate and debug.hexdump(substrate) or '<none>')) if state == stErrorCondition: raise error.PyAsn1Error( '%r not in asn1Spec: %r' % (tagSet, asn1Spec) ) + if debug.logger and debug.logger & debug.flagDecoder: + debug.scope.pop() + debug.logger('decoder left scope %s, call completed' % debug.scope) return value, substrate decode = Decoder(tagMap, typeMap) diff --git a/libs/pyasn1/codec/ber/encoder.py b/libs/pyasn1/codec/ber/encoder.py index 2149b0b..173949d 100644 --- a/libs/pyasn1/codec/ber/encoder.py +++ b/libs/pyasn1/codec/ber/encoder.py @@ -1,8 +1,8 @@ # BER encoder from pyasn1.type import base, tag, univ, char, useful from pyasn1.codec.ber import eoo -from pyasn1.compat.octets import int2oct, ints2octs, null, str2octs -from pyasn1 import error +from pyasn1.compat.octets import int2oct, oct2int, ints2octs, null, str2octs +from pyasn1 import debug, error class Error(Exception): pass @@ -78,9 +78,24 @@ class ExplicitlyTaggedItemEncoder(AbstractItemEncoder): explicitlyTaggedItemEncoder = ExplicitlyTaggedItemEncoder() +class BooleanEncoder(AbstractItemEncoder): + supportIndefLenMode = 0 + _true = ints2octs((1,)) + _false = ints2octs((0,)) + def encodeValue(self, encodeFun, value, defMode, maxChunkSize): + return value and self._true or self._false, 0 + class IntegerEncoder(AbstractItemEncoder): supportIndefLenMode = 0 + supportCompactZero = False def encodeValue(self, encodeFun, value, defMode, maxChunkSize): + if value == 0: # shortcut for zero value + if self.supportCompactZero: + # this seems to be a correct way for encoding zeros + return null, 0 + else: + # this seems to be a widespread way for encoding zeros + return ints2octs((0,)), 0 octets = [] value = int(value) # to save on ops on asn1 type while 1: @@ -149,18 +164,15 @@ class ObjectIdentifierEncoder(AbstractItemEncoder): index = 5 else: if len(oid) < 2: - raise error.PyAsn1Error('Short OID %s' % value) + raise error.PyAsn1Error('Short OID %s' % (value,)) # Build the first twos - index = 0 - subid = oid[index] * 40 - subid = subid + oid[index+1] - if subid < 0 or subid > 0xff: + if oid[0] > 6 or oid[1] > 39 or oid[0] == 6 and oid[1] > 15: raise error.PyAsn1Error( - 'Initial sub-ID overflow %s in OID %s' % (oid[index:], value) + 'Initial sub-ID overflow %s in OID %s' % (oid[:2], value) ) - octets = (subid,) - index = index + 2 + octets = (oid[0] * 40 + oid[1],) + index = 2 # Cycle through subids for subid in oid[index:]: @@ -184,6 +196,7 @@ class ObjectIdentifierEncoder(AbstractItemEncoder): return ints2octs(octets), 0 class RealEncoder(AbstractItemEncoder): + supportIndefLenMode = 0 def encodeValue(self, encodeFun, value, defMode, maxChunkSize): if value.isPlusInfinity(): return int2oct(0x40), 0 @@ -206,9 +219,11 @@ class RealEncoder(AbstractItemEncoder): m >>= 1 e += 1 eo = null - while e: + while e not in (0, -1): eo = int2oct(e&0xff) + eo e >>= 8 + if e == 0 and eo and oct2int(eo[0]) & 0x80: + eo = int2oct(0) + eo n = len(eo) if n > 0xff: raise error.PyAsn1Error('Real exponent overflow') @@ -268,7 +283,7 @@ class AnyEncoder(OctetStringEncoder): tagMap = { eoo.endOfOctets.tagSet: EndOfOctetsEncoder(), - univ.Boolean.tagSet: IntegerEncoder(), + univ.Boolean.tagSet: BooleanEncoder(), univ.Integer.tagSet: IntegerEncoder(), univ.BitString.tagSet: BitStringEncoder(), univ.OctetString.tagSet: OctetStringEncoder(), @@ -313,6 +328,7 @@ class Encoder: self.__typeMap = typeMap def __call__(self, value, defMode=1, maxChunkSize=0): + debug.logger & debug.flagEncoder and debug.logger('encoder called in %sdef mode, chunk size %s for type %s, value:\n%s' % (not defMode and 'in' or '', maxChunkSize, value.__class__.__name__, value.prettyPrint())) tagSet = value.getTagSet() if len(tagSet) > 1: concreteEncoder = explicitlyTaggedItemEncoder @@ -322,13 +338,16 @@ class Encoder: elif tagSet in self.__tagMap: concreteEncoder = self.__tagMap[tagSet] else: - baseTagSet = value.baseTagSet - if baseTagSet in self.__tagMap: - concreteEncoder = self.__tagMap[baseTagSet] + tagSet = value.baseTagSet + if tagSet in self.__tagMap: + concreteEncoder = self.__tagMap[tagSet] else: - raise Error('No encoder for %s' % value) - return concreteEncoder.encode( + raise Error('No encoder for %s' % (value,)) + debug.logger & debug.flagEncoder and debug.logger('using value codec %s chosen by %r' % (concreteEncoder.__class__.__name__, tagSet)) + substrate = concreteEncoder.encode( self, value, defMode, maxChunkSize ) + debug.logger & debug.flagEncoder and debug.logger('built %s octets of substrate: %s\nencoder completed' % (len(substrate), debug.hexdump(substrate))) + return substrate encode = Encoder(tagMap, typeMap) diff --git a/libs/pyasn1/codec/cer/__init__.py b/libs/pyasn1/codec/cer/__init__.py index e69de29..8c3066b 100644 --- a/libs/pyasn1/codec/cer/__init__.py +++ b/libs/pyasn1/codec/cer/__init__.py @@ -0,0 +1 @@ +# This file is necessary to make this directory a package. diff --git a/libs/pyasn1/codec/cer/decoder.py b/libs/pyasn1/codec/cer/decoder.py index 71395d2..9fd37c1 100644 --- a/libs/pyasn1/codec/cer/decoder.py +++ b/libs/pyasn1/codec/cer/decoder.py @@ -7,22 +7,25 @@ from pyasn1 import error class BooleanDecoder(decoder.AbstractSimpleDecoder): protoComponent = univ.Boolean(0) def valueDecoder(self, fullSubstrate, substrate, asn1Spec, tagSet, length, - state, decodeFun): - substrate = substrate[:length] - if not substrate: + state, decodeFun, substrateFun): + head, tail = substrate[:length], substrate[length:] + if not head: raise error.PyAsn1Error('Empty substrate') - byte = oct2int(substrate[0]) + byte = oct2int(head[0]) + # CER/DER specifies encoding of TRUE as 0xFF and FALSE as 0x0, while + # BER allows any non-zero value as TRUE; cf. sections 8.2.2. and 11.1 + # in http://www.itu.int/ITU-T/studygroups/com17/languages/X.690-0207.pdf if byte == 0xff: value = 1 elif byte == 0x00: value = 0 else: raise error.PyAsn1Error('Boolean CER violation: %s' % byte) - return self._createComponent(asn1Spec, tagSet, value), substrate[1:] + return self._createComponent(asn1Spec, tagSet, value), tail tagMap = decoder.tagMap.copy() tagMap.update({ - univ.Boolean.tagSet: BooleanDecoder(), + univ.Boolean.tagSet: BooleanDecoder() }) typeMap = decoder.typeMap diff --git a/libs/pyasn1/codec/der/__init__.py b/libs/pyasn1/codec/der/__init__.py index e69de29..8c3066b 100644 --- a/libs/pyasn1/codec/der/__init__.py +++ b/libs/pyasn1/codec/der/__init__.py @@ -0,0 +1 @@ +# This file is necessary to make this directory a package. diff --git a/libs/pyasn1/codec/der/decoder.py b/libs/pyasn1/codec/der/decoder.py index 0f5a24c..604abec 100644 --- a/libs/pyasn1/codec/der/decoder.py +++ b/libs/pyasn1/codec/der/decoder.py @@ -2,4 +2,8 @@ from pyasn1.type import univ from pyasn1.codec.cer import decoder -decode = decoder.Decoder(decoder.tagMap, decoder.typeMap) +tagMap = decoder.tagMap +typeMap = decoder.typeMap +Decoder = decoder.Decoder + +decode = Decoder(tagMap, typeMap) diff --git a/libs/pyasn1/compat/__init__.py b/libs/pyasn1/compat/__init__.py index e69de29..8c3066b 100644 --- a/libs/pyasn1/compat/__init__.py +++ b/libs/pyasn1/compat/__init__.py @@ -0,0 +1 @@ +# This file is necessary to make this directory a package. diff --git a/libs/pyasn1/compat/octets.py b/libs/pyasn1/compat/octets.py index d0303ea..f7f2a29 100644 --- a/libs/pyasn1/compat/octets.py +++ b/libs/pyasn1/compat/octets.py @@ -8,6 +8,7 @@ if version_info[0] <= 2: octs2ints = lambda s: [ oct2int(x) for x in s ] str2octs = lambda x: x octs2str = lambda x: x + isOctetsType = lambda s: isinstance(s, str) else: ints2octs = bytes int2oct = lambda x: ints2octs((x,)) @@ -16,3 +17,4 @@ else: octs2ints = lambda s: [ x for x in s ] str2octs = lambda x: x.encode() octs2str = lambda x: x.decode() + isOctetsType = lambda s: isinstance(s, bytes) diff --git a/libs/pyasn1/debug.py b/libs/pyasn1/debug.py new file mode 100644 index 0000000..c27cb1d --- /dev/null +++ b/libs/pyasn1/debug.py @@ -0,0 +1,65 @@ +import sys +from pyasn1.compat.octets import octs2ints +from pyasn1 import error +from pyasn1 import __version__ + +flagNone = 0x0000 +flagEncoder = 0x0001 +flagDecoder = 0x0002 +flagAll = 0xffff + +flagMap = { + 'encoder': flagEncoder, + 'decoder': flagDecoder, + 'all': flagAll + } + +class Debug: + defaultPrinter = sys.stderr.write + def __init__(self, *flags): + self._flags = flagNone + self._printer = self.defaultPrinter + self('running pyasn1 version %s' % __version__) + for f in flags: + if f not in flagMap: + raise error.PyAsn1Error('bad debug flag %s' % (f,)) + self._flags = self._flags | flagMap[f] + self('debug category \'%s\' enabled' % f) + + def __str__(self): + return 'logger %s, flags %x' % (self._printer, self._flags) + + def __call__(self, msg): + self._printer('DBG: %s\n' % msg) + + def __and__(self, flag): + return self._flags & flag + + def __rand__(self, flag): + return flag & self._flags + +logger = 0 + +def setLogger(l): + global logger + logger = l + +def hexdump(octets): + return ' '.join( + [ '%s%.2X' % (n%16 == 0 and ('\n%.5d: ' % n) or '', x) + for n,x in zip(range(len(octets)), octs2ints(octets)) ] + ) + +class Scope: + def __init__(self): + self._list = [] + + def __str__(self): return '.'.join(self._list) + + def push(self, token): + self._list.append(token) + + def pop(self): + return self._list.pop() + +scope = Scope() diff --git a/libs/pyasn1/type/__init__.py b/libs/pyasn1/type/__init__.py index e69de29..8c3066b 100644 --- a/libs/pyasn1/type/__init__.py +++ b/libs/pyasn1/type/__init__.py @@ -0,0 +1 @@ +# This file is necessary to make this directory a package. diff --git a/libs/pyasn1/type/base.py b/libs/pyasn1/type/base.py index db31671..4087371 100644 --- a/libs/pyasn1/type/base.py +++ b/libs/pyasn1/type/base.py @@ -120,7 +120,12 @@ class AbstractSimpleAsn1Item(Asn1ItemBase): def prettyIn(self, value): return value def prettyOut(self, value): return str(value) - def prettyPrint(self, scope=0): return self.prettyOut(self._value) + def prettyPrint(self, scope=0): + if self._value is noValue: + return '<no value>' + else: + return self.prettyOut(self._value) + # XXX Compatibility stub def prettyPrinter(self, scope=0): return self.prettyPrint(scope) diff --git a/libs/pyasn1/type/namedtype.py b/libs/pyasn1/type/namedtype.py index aa9c567..48967a5 100644 --- a/libs/pyasn1/type/namedtype.py +++ b/libs/pyasn1/type/namedtype.py @@ -60,12 +60,12 @@ class NamedTypes: tagMap = self.__namedTypes[idx].getType().getTagMap() for t in tagMap.getPosMap(): if t in self.__tagToPosIdx: - raise error.PyAsn1Error('Duplicate type %s' % t) + raise error.PyAsn1Error('Duplicate type %s' % (t,)) self.__tagToPosIdx[t] = idx try: return self.__tagToPosIdx[tagSet] except KeyError: - raise error.PyAsn1Error('Type %s not found' % tagSet) + raise error.PyAsn1Error('Type %s not found' % (tagSet,)) def getNameByPosition(self, idx): try: @@ -79,12 +79,12 @@ class NamedTypes: idx = idx - 1 n = self.__namedTypes[idx].getName() if n in self.__nameToPosIdx: - raise error.PyAsn1Error('Duplicate name %s' % n) + raise error.PyAsn1Error('Duplicate name %s' % (n,)) self.__nameToPosIdx[n] = idx try: return self.__nameToPosIdx[name] except KeyError: - raise error.PyAsn1Error('Name %s not found' % name) + raise error.PyAsn1Error('Name %s not found' % (name,)) def __buildAmbigiousTagMap(self): ambigiousTypes = () diff --git a/libs/pyasn1/type/namedval.py b/libs/pyasn1/type/namedval.py index 815e2d4..d0fea7c 100644 --- a/libs/pyasn1/type/namedval.py +++ b/libs/pyasn1/type/namedval.py @@ -15,10 +15,10 @@ class NamedValues: name = namedValue val = automaticVal if name in self.nameToValIdx: - raise error.PyAsn1Error('Duplicate name %s' % name) + raise error.PyAsn1Error('Duplicate name %s' % (name,)) self.nameToValIdx[name] = val if val in self.valToNameIdx: - raise error.PyAsn1Error('Duplicate value %s' % name) + raise error.PyAsn1Error('Duplicate value %s=%s' % (name, val)) self.valToNameIdx[val] = name self.namedValues = self.namedValues + ((name, val),) automaticVal = automaticVal + 1 diff --git a/libs/pyasn1/type/tag.py b/libs/pyasn1/type/tag.py index 0cf67eb..1144907 100644 --- a/libs/pyasn1/type/tag.py +++ b/libs/pyasn1/type/tag.py @@ -18,7 +18,7 @@ class Tag: def __init__(self, tagClass, tagFormat, tagId): if tagId < 0: raise error.PyAsn1Error( - 'Negative tag ID (%s) not allowed' % tagId + 'Negative tag ID (%s) not allowed' % (tagId,) ) self.__tag = (tagClass, tagFormat, tagId) self.uniq = (tagClass, tagId) diff --git a/libs/pyasn1/type/tagmap.py b/libs/pyasn1/type/tagmap.py index 53e1791..7cec3a1 100644 --- a/libs/pyasn1/type/tagmap.py +++ b/libs/pyasn1/type/tagmap.py @@ -28,7 +28,7 @@ class TagMap: def clone(self, parentType, tagMap, uniq=False): if self.__defType is not None and tagMap.getDef() is not None: - raise error.PyAsn1Error('Duplicate default value at %s' % self) + raise error.PyAsn1Error('Duplicate default value at %s' % (self,)) if tagMap.getDef() is not None: defType = tagMap.getDef() else: @@ -37,7 +37,7 @@ class TagMap: posMap = self.__posMap.copy() for k in tagMap.getPosMap(): if uniq and k in posMap: - raise error.PyAsn1Error('Duplicate positive key %s' % k) + raise error.PyAsn1Error('Duplicate positive key %s' % (k,)) posMap[k] = parentType negMap = self.__negMap.copy() diff --git a/libs/pyasn1/type/univ.py b/libs/pyasn1/type/univ.py index cb4f49b..9cd16f8 100644 --- a/libs/pyasn1/type/univ.py +++ b/libs/pyasn1/type/univ.py @@ -69,13 +69,18 @@ class Integer(base.AbstractSimpleAsn1Item): def prettyIn(self, value): if not isinstance(value, str): - return int(value) + try: + return int(value) + except: + raise error.PyAsn1Error( + 'Can\'t coerce %s into integer: %s' % (value, sys.exc_info()[1]) + ) r = self.__namedValues.getValue(value) if r is not None: return r try: return int(value) - except ValueError: + except: raise error.PyAsn1Error( 'Can\'t coerce %s into integer: %s' % (value, sys.exc_info()[1]) ) @@ -224,14 +229,14 @@ class BitString(base.AbstractSimpleAsn1Item): return tuple(r) else: raise error.PyAsn1Error( - 'Bad BIT STRING value notation %s' % value + 'Bad BIT STRING value notation %s' % (value,) ) else: for i in value.split(','): j = self.__namedValues.getValue(i) if j is None: raise error.PyAsn1Error( - 'Unknown bit identifier \'%s\'' % i + 'Unknown bit identifier \'%s\'' % (i,) ) if j >= len(r): r.extend([0]*(j-len(r)+1)) @@ -528,7 +533,7 @@ class Real(base.AbstractSimpleAsn1Item): ) if value[1] not in (2, 10): raise error.PyAsn1Error( - 'Prohibited base for Real value: %s' % value[1] + 'Prohibited base for Real value: %s' % (value[1],) ) if value[1] == 10: value = self.__normalizeBase10(value) @@ -648,7 +653,7 @@ class SetOf(base.AbstractConstructedAsn1Item): def _verifyComponent(self, idx, value): if self._componentType is not None and \ not self._componentType.isSuperTypeOf(value): - raise error.PyAsn1Error('Component type error %s' % value) + raise error.PyAsn1Error('Component type error %s' % (value,)) def getComponentByPosition(self, idx): return self._componentValues[idx] def setComponentByPosition(self, idx, value=None, verifyConstraints=True): @@ -924,9 +929,9 @@ class Choice(Set): return self._componentValues[self._currentIdx] >= other return NotImplemented if sys.version_info[0] <= 2: - def __nonzero__(self, other): return bool(self._componentValues) + def __nonzero__(self): return bool(self._componentValues) else: - def __bool__(self, other): return bool(self._componentValues) + def __bool__(self): return bool(self._componentValues) def __len__(self): return self._currentIdx is not None and 1 or 0 diff --git a/libs/pyutil/_version.py b/libs/pyutil/_version.py index 617d220..376b2b9 100644 --- a/libs/pyutil/_version.py +++ b/libs/pyutil/_version.py @@ -6,7 +6,7 @@ # pyutil.version_class for a description of what the different fields mean. __pkgname__ = "pyutil" -verstr = "1.9.3" +verstr = "1.9.7" try: from pyutil.version_class import Version as pyutil_Version __version__ = pyutil_Version(verstr) diff --git a/libs/pyutil/benchutil.py b/libs/pyutil/benchutil.py index 3e773a6..6c28634 100644 --- a/libs/pyutil/benchutil.py +++ b/libs/pyutil/benchutil.py @@ -1,4 +1,4 @@ -# Copyright (c) 2002-2012 Zooko Wilcox-O'Hearn +# Copyright (c) 2002-2013 Zooko Wilcox-O'Hearn # This file is part of pyutil; see README.rst for licensing terms. """ @@ -21,10 +21,10 @@ the second, e.g.: >>> rep_bench(fib, 25, UNITS_PER_SECOND=1000) best: 1.968e+00, 3th-best: 1.987e+00, mean: 2.118e+00, 3th-worst: 2.175e+00, worst: 2.503e+00 (of 10) -The output is reporting the number of milliseconds that executing the function -took, divided by N, from ten different invocations of fib(). It reports the -best, worst, M-th best, M-th worst, and mean, where "M" is the natural log of -the number of invocations (in this case 10). +The output is reporting the number of milliseconds that executing the +function took, divided by N, from ten different invocations of +fib(). It reports the best, worst, M-th best, M-th worst, and mean, +where "M" is 1/4 of the number of invocations (in this case 10). 2. Now run it with different values of N and look for patterns: @@ -74,10 +74,12 @@ and the main function is to make them be methods of the same object, e.g.: 4. Things to fix: - a. I used to have it hooked up to use the "hotshot" profiler on the code being - measured. I recently tried to change it to use the newer cProfile profiler - instead, but I don't understand the interface to cProfiler so it just gives an - exception if you pass profile=True. Please fix this and send me a patch. + a. I used to have it hooked up to use the "hotshot" profiler on the + code being measured. I recently tried to change it to use the newer + cProfile profiler instead, but I don't understand the interface to + cProfiler so it just gives an exception if you pass + profile=True. Please fix this and send me a patch. xxx change it to + statprof b. Wouldn't it be great if this script emitted results in a json format that was understood by a tool to make pretty interactive explorable graphs? The @@ -122,7 +124,7 @@ def mult(a, b): except TypeError: return to_decimal(a) * to_decimal(b) -def rep_bench(func, n, initfunc=None, MAXREPS=10, MAXTIME=60.0, profile=False, profresults="pyutil-benchutil.prof", UNITS_PER_SECOND=1, quiet=False): +def rep_bench(func, n, runtime=1.0, initfunc=None, MAXREPS=10, MAXTIME=60.0, profile=False, profresults="pyutil-benchutil.prof", UNITS_PER_SECOND=1, quiet=False): """ Will run the func up to MAXREPS times, but won't start a new run if MAXTIME (wall-clock time) has already elapsed (unless MAXTIME is None). @@ -130,33 +132,43 @@ def rep_bench(func, n, initfunc=None, MAXREPS=10, MAXTIME=60.0, profile=False, p @param quiet Don't print anything--just return the results dict. """ assert isinstance(n, int), (n, type(n)) + global worstemptymeasure + emsta = clock() + do_nothing(2**32) + emstop = clock() + empty = emstop - emsta + if empty > worstemptymeasure: + worstemptymeasure = empty + if (worstemptymeasure*2) >= runtime: + raise BadMeasure("Apparently simply invoking an empty Python function can take as long as %0.10f seconds, and we were running iterations for only about %0.10f seconds. So the measurement of the runtime of the code under benchmark is not reliable. Please pass a higher number for the 'runtime' argument to bench_it().") + startwallclocktime = time.time() - tls = [] # elapsed time in seconds + tls = [] # (elapsed time per iter in seconds, iters) bmes = [] while ((len(tls) < MAXREPS) or (MAXREPS is None)) and ((MAXTIME is None) or ((time.time() - startwallclocktime) < MAXTIME)): if initfunc: initfunc(n) try: - tl = bench_it(func, n, profile=profile, profresults=profresults) + tl, iters = bench_it(func, n, runtime=runtime, profile=profile, profresults=profresults) except BadMeasure, bme: bmes.append(bme) else: - tls.append(tl) + tls.append((tl, iters)) if len(tls) == 0: raise Exception("Couldn't get any measurements within time limits or number-of-attempts limits. Maybe something is wrong with your clock? %s" % (bmes,)) - sumtls = reduce(operator.__add__, tls) + sumtls = sum([tl for (tl, iters) in tls]) mean = sumtls / len(tls) tls.sort() - worst = tls[-1] - best = tls[0] - _assert(best > worstemptymeasure*MARGINOFERROR, "%s(n=%s) took %0.10f seconds, but we cannot measure times much less than about %0.10f seconds. Try a more time-consuming variant (such as higher n)." % (func, n, best, worstemptymeasure*MARGINOFERROR,)) + worst = tls[-1][0] + best = tls[0][0] + m = len(tls)/4 if m > 0: - mthbest = tls[m-1] - mthworst = tls[-m] + mthbest = tls[m-1][0] + mthworst = tls[-m][0] else: - mthbest = tls[0] - mthworst = tls[-1] + mthbest = tls[0][0] + mthworst = tls[-1][0] # The +/-0 index is the best/worst, the +/-1 index is the 2nd-best/worst, # etc, so we use mp1 to name it. @@ -196,26 +208,22 @@ class BadMeasure(Exception): def do_nothing(n): pass -def bench_it(func, n, profile=False, profresults="pyutil-benchutil.prof"): +def bench_it(func, n, runtime=1.0, profile=False, profresults="pyutil-benchutil.prof"): if profile: - st = clock() - cProfile.run('func(n)', profresults) - sto = clock() + raise NotImplementedException() else: + iters = 0 st = clock() - func(n) + deadline = st + runtime sto = clock() + while sto < deadline: + func(n) + iters += 1 + sto = clock() timeelapsed = sto - st - if timeelapsed <= 0: - raise BadMeasure(timeelapsed) - global worstemptymeasure - emsta = clock() - do_nothing(2**32) - emstop = clock() - empty = emstop - emsta - if empty > worstemptymeasure: - worstemptymeasure = empty - return timeelapsed + if (timeelapsed <= 0) or (iters == 0): + raise BadMeasure((timeelapsed, iters)) + return (timeelapsed / iters, iters) def bench(func, initfunc=None, TOPXP=21, MAXREPS=5, MAXTIME=60.0, profile=False, profresults="pyutil-benchutil.prof", outputjson=False, jsonresultsfname="pyutil-benchutil-results.json", UNITS_PER_SECOND=1): BSIZES = [] diff --git a/libs/pyutil/benchutil.py~ b/libs/pyutil/benchutil.py~ index 3ec323e..a33111e 100644 --- a/libs/pyutil/benchutil.py~ +++ b/libs/pyutil/benchutil.py~ @@ -1,4 +1,4 @@ -# Copyright (c) 2002-2012 Zooko Wilcox-O'Hearn +# Copyright (c) 2002-2013 Zooko Wilcox-O'Hearn # This file is part of pyutil; see README.rst for licensing terms. """ @@ -104,6 +104,24 @@ def makeg(func): func() return blah +def to_decimal(x): + """ + See if D(x) returns something. If instead it raises TypeError, x must have been a float, so convert it to Decimal by way of string. (In Python >= 2.7, D(x) does this automatically. + """ + try: + return D(x) + except TypeError: + return D("%0.54f" % (x,)) + +def mult(a, b): + """ + If we get TypeError from * (possibly because one is float and the other is Decimal), then promote them both to Decimal. + """ + try: + return a * b + except TypeError: + return to_decimal(a) * to_decimal(b) + def rep_bench(func, n, initfunc=None, MAXREPS=10, MAXTIME=60.0, profile=False, profresults="pyutil-benchutil.prof", UNITS_PER_SECOND=1, quiet=False): """ Will run the func up to MAXREPS times, but won't start a new run if MAXTIME @@ -144,12 +162,12 @@ def rep_bench(func, n, initfunc=None, MAXREPS=10, MAXTIME=60.0, profile=False, p # etc, so we use mp1 to name it. mp1 = m+1 res = { - 'worst': (worst*UNITS_PER_SECOND)/n, - 'best': (best*UNITS_PER_SECOND)/n, + 'worst': mult(worst, UNITS_PER_SECOND)/n, + 'best': mult(best, UNITS_PER_SECOND)/n, 'mp1': mp1, - 'mth-best': (mthbest*UNITS_PER_SECOND)/n, - 'mth-worst': (mthworst*UNITS_PER_SECOND)/n, - 'mean': (mean*UNITS_PER_SECOND)/n, + 'mth-best': mult(mthbest, UNITS_PER_SECOND)/n, + 'mth-worst': mult(mthworst, UNITS_PER_SECOND)/n, + 'mean': mult(mean, UNITS_PER_SECOND)/n, 'num': len(tls), } @@ -178,7 +196,10 @@ class BadMeasure(Exception): def do_nothing(n): pass -def bench_it(func, n, profile=False, profresults="pyutil-benchutil.prof"): +def bench_it(func, n, runtime=0.1, profile=False, profresults="pyutil-benchutil.prof"): + """ + runtime is how many seconds to + """ if profile: st = clock() cProfile.run('func(n)', profresults) diff --git a/libs/pyutil/data/wordlist.txt b/libs/pyutil/data/wordlist.txt new file mode 100644 index 0000000..e1048b9 --- /dev/null +++ b/libs/pyutil/data/wordlist.txt @@ -0,0 +1,7248 @@ +fawn +yellow +four +prefix +payoff +scold +outwit +lore +lord +swivel +deli +pigment +foul +fur +disturb +prize +broiler +wooden +satchel +crotch +fritter +charter +tired +miller +bacon +second +tether +ruthless +thunder +fossil +succumb +cull +specialist +hero +avert +herb +splinter +here +herd +china +dogwood +cult +shriek +chink +pancreas +robin +neurologist +climber +diplomat +golden +gridiron +lengthen +summons +remnant +stern +unit +spoke +exhort +statesmanship +music +bedrock +passport +strike +teaspoon +relay +relax +hurt +meteorologist +glass +hurl +hole +hold +unpack +sweeten +blade +locker +locket +plunger +wand +wane +unjust +household +digit +malign +caution +want +rayon +hog +hoe +travel +copious +cutback +revisit +how +hot +hop +cheetah +diagram +possum +modest +antonym +pigtail +revolt +alias +decoy +wing +squint +wine +feedback +misdemeanor +kickoff +foodstuff +butcher +dreamer +fir +bowlder +fix +fib +fig +fin +undercut +enrich +slate +interrupt +sixteen +silver +scholar +thyme +seamstress +debut +arrow +debug +volcano +burial +whim +concord +knockout +garment +allah +spider +crocus +turnip +yiddish +fortnight +allay +whir +whip +diction +smirk +mason +semiconductor +re +adapt +outburst +knit +scruff +silicon +miaow +thumbtack +shopper +wasp +wash +instruct +rhododendron +tango +master +architect +bitter +listen +wisdom +swish +sulphur +crawl +trek +peril +outlay +coward +tree +shower +pneumonia +sheen +acclaim +entail +girder +runner +spectrum +headland +increment +quay +dozen +kidnap +gripe +hum +greenback +tipi +matriarch +stirrup +object +toil +microsecond +mouth +addict +letter +fluster +drought +thriller +expound +singer +upend +grove +professor +camp +detriment +nineteenth +scream +marvel +bomb +reactor +heckler +ulcer +caper +layout +menu +bust +cougar +bush +bliss +rich +mend +rice +plate +pocket +cushion +fetish +relish +jaguar +boarder +pretzel +patch +hasten +respond +fair +heirloom +radium +radius +result +fail +crouch +clef +best +irk +yogurt +ire +wage +extend +vestment +souvenir +extent +wheelbarrow +carbon +debt +roller +accident +trickster +veer +disdain +cup +logic +genus +rehash +gopher +canyon +bewilder +chrome +onomatopoeia +advert +grapefruit +stadium +jackass +counterattack +life +retrospect +spit +worker +wish +lift +toboggan +chile +child +chili +spin +wildcat +dissect +employ +calcium +delicatessen +locksmith +letdown +player +elicit +eighteen +violin +doorman +specter +hone +toaster +honk +rebellion +split +bid +european +typhoid +boiler +ownership +supper +tuna +tune +furlough +noblewoman +unhook +abound +bellow +beset +plight +brandish +previous +ham +hag +hay +prison +falter +east +hat +quirk +birth +shadow +gangplank +remind +pavement +battlefield +attorney +right +old +creek +crowd +creed +crown +glove +billboard +creep +chorus +okra +bottom +circumvent +inhuman +fox +foe +fog +binder +yoke +slither +recollect +despair +rebut +eightieth +sob +sod +overshadow +honeymoon +overgrow +sop +sow +wrap +fabric +panorama +support +tame +avail +width +hothead +call +overhand +overhang +telegraph +offer +thesaurus +beech +squalid +safeguard +otter +duel +misinform +paprika +vanguard +pest +duet +proud +tournament +proven +exist +quintuplet +dealer +leer +floor +glacier +actor +flood +role +entomologist +sunset +smell +leek +intend +glutton +ointment +asterisk +taurus +intent +cleaver +entrust +windscreen +puss +lowdown +time +push +gown +chain +viaduct +skate +chair +midst +millisecond +ballet +uneven +vex +crater +oversight +jerk +ameba +embark +flora +mourn +knapsack +southpaw +exact +epic +judaism +tear +teas +teat +crustacean +subway +team +skewer +prevent +meadow +gremlin +attic +sigh +milligram +heavyweight +crescent +playpen +crackpot +melt +current +boost +abscond +gnaw +splice +address +brilliant +endow +queue +influx +love +radish +prefer +piranha +fake +instal +forefront +sky +homesick +turret +wicker +wicket +scope +prosecutor +wicked +afford +refrain +visual +appendix +behalf +mascot +lumberjack +pretend +descriptor +dispossess +stole +winter +savor +sputter +meddler +slush +spot +textual +date +suck +dove +pulley +stress +conscious +bluster +wheelchair +quadrant +mango +so +skirmish +truce +drunken +archeologist +footstep +yearn +jig +disconnect +thumb +accordion +nearsighted +councillor +hubbub +suspicion +thump +apron +civilian +insomnia +nation +amulet +twilight +ketchup +handiwork +revert +fisherman +quarter +quartet +receipt +fireproof +breakthrough +sponsor +troll +naked +canvas +onrush +trauma +formula +dumbfound +million +envelop +vicious +disrespect +mime +plea +byte +workmanship +punk +wrong +ostrich +punt +footwear +neglect +gunshot +potter +one +reopen +chide +conifer +vote +paleontologist +languish +boulevard +wrath +convent +bite +extortion +shiver +draft +cite +starfish +shawl +artifact +snatch +antic +boyfriend +iceberg +rival +stammer +counselor +janitor +prospect +sac +greyhound +argument +alley +sad +say +borough +saw +handicraft +tulip +general +knead +zoo +note +take +destroy +printer +buffer +squalor +compress +buffet +crochet +knee +byway +lawn +enamel +blockhead +sale +cocoanut +wind +axe +salt +cobra +homespun +lotus +friction +bright +slot +slow +slop +unkind +gourd +transact +cloak +debunk +slog +hockey +slob +robe +clank +dissimilar +psychiatrist +clang +outlet +prime +artist +saliva +borrow +soloist +carrion +handcuff +primp +landlord +tortilla +where +xmas +vision +gout +gangster +cheesecloth +diver +bugler +mutton +plummet +bootleg +teacup +bureau +mope +vender +jumper +spars +screen +dome +supermarket +adept +jovial +spare +spark +quack +oust +fit +madcap +mane +flipper +backpack +twin +boar +supervisor +extinct +twig +boat +companionship +stretch +west +breath +reflex +gist +thousand +photon +cloudburst +turtleneck +former +jute +scarlet +straighten +spotlight +girth +brow +canon +dubious +monk +blab +fame +spunk +breakdown +hideaway +deft +barber +disown +booster +driftwood +veal +pewter +dimension +scholarship +summer +manifold +poach +disconcert +slime +rest +invalid +alarmist +mandolin +instrument +overthrow +stopwatch +haystack +joyful +sportsmanship +rejoin +dart +dark +brazier +snarl +traffic +cranium +vacuum +world +snare +dare +clan +clam +stranger +shutter +glamor +clay +claw +inter +kennel +clap +auditorium +obstruct +grub +potion +lobster +racial +endeavor +tote +tube +moslem +tuba +nook +exit +refer +zest +ration +leadership +standpoint +stone +ace +slender +meal +tumor +neighbor +act +mean +invert +braggart +homeless +wade +hypnotist +racquet +hew +burglar +her +gleam +glean +mindless +harpsichord +italic +hem +hen +defrost +epilog +pull +regimen +darken +wafer +rage +hooray +tripe +ruse +flirt +reprimand +whiz +torso +pulp +rust +ohm +gong +ad +fright +certain +epaulet +catchup +hoodlum +ay +ax +tranquil +jargon +slobber +cream +yoga +collector +abolish +tight +backgammon +congress +annex +slant +midget +brotherhood +slang +rostrum +neuter +thorn +groom +mask +kilogram +mash +mimic +mast +mass +ringworm +waiver +retch +gingham +influenza +consider +neigh +upkeep +taxicab +tinsel +to +tail +smile +norm +debit +baton +candid +salesperson +cobalt +strand +laud +pedant +sand +adjust +small +mammal +peon +ninetieth +plaid +past +burnish +gossip +canvass +healer +hick +offbeat +clock +section +succinct +method +contrast +full +hash +lobbyist +saleswoman +dramatist +backlash +brutal +prior +hamster +skyrocket +social +action +welder +raze +depart +vie +sherbet +regiment +captor +coercion +entrap +select +casket +enliven +petroleum +maltreat +pearl +sitter +morn +ballad +more +teen +teem +door +tester +signpost +nomad +doom +cunning +fatal +malt +chisel +patriarch +knocker +midstream +mall +learn +grope +male +stewardess +prompt +taunt +gallop +scab +accept +autumn +gallon +scar +rustler +condemn +huge +speedboat +fruition +cling +clink +plant +anoint +blotter +variant +unsound +plane +waver +flutter +pucker +wrench +trellis +patio +pant +instep +trade +paper +pang +brim +mislay +hearsay +buttercup +epoch +coarsen +bypass +motley +sucker +gadget +consign +imperil +skipper +harrow +nugget +fount +found +lantern +status +eyelash +clockwork +scribe +penicillin +lipstick +research +highway +bungler +belief +porcelain +bedlam +cockpit +loafer +suntan +acorn +riser +reproach +prefab +drivel +sicken +bumper +testament +clump +major +purport +limerick +number +feeder +slipper +footprint +florist +glitter +guess +guest +jet +swipe +vocalist +saint +gnash +relationship +tightwad +typhoon +mural +consult +grace +frock +getaway +vocal +video +defect +waft +pedestrian +graffito +caress +blond +gasket +sell +ballerina +ragamuffin +tarnish +spaghetti +self +trowel +poplar +brace +bobbin +kneecap +hypochondriac +blackboard +nasal +twine +raucous +virus +plan +wive +foyer +oyster +unequal +arson +covet +cover +barren +barrel +bulletin +chowder +golf +cruiser +affix +session +freight +impact +condor +writer +peculiar +condom +factor +downpour +dandelion +streamer +resent +actress +compass +banner +tumult +sojourn +caramel +enema +weaver +river +outlaw +prospectus +manger +set +creator +overwhelm +jade +sex +see +sea +contour +analog +project +urchin +fission +crossword +pickup +crosswalk +kneel +candor +mildew +hardship +disallow +incident +dividend +pagan +scatterbrain +lass +last +thou +opal +feminist +amoeba +lash +whole +load +loaf +electrician +pendulum +bell +loam +loan +hollow +scallop +church +psychoanalyst +underlay +napalm +airfield +devil +filth +imbed +proprietor +veneer +firm +sweetheart +champion +fire +infect +upstart +fund +deport +hostess +straight +budget +error +outskirt +real +pound +moth +vow +chasm +vanish +chase +starlight +seek +shorten +wasteland +specimen +commune +snail +teeter +cigar +epithet +alert +opinion +stack +recent +expend +clime +person +sixtieth +crayfish +telegram +aroma +belabor +amp +demerit +sandal +goblet +chest +eager +horseradish +homeland +wrongdoer +input +limp +cordon +format +bureaucrat +quest +cataclysm +blackjack +falcon +abduct +flannel +spine +consensus +crescendo +spring +beckon +palm +pall +sight +curious +sprint +battalion +pale +gruel +benefit +religion +be +odor +agreement +carol +by +scepter +coexist +hatchet +sacrament +ambush +biennial +repair +contributor +next +span +sock +submit +custom +spay +suit +spar +spat +blueprint +perplex +poster +lint +slump +pastor +overbear +link +atom +line +up +slander +foist +hornet +insignia +genial +aerial +nationalist +haunt +char +chap +chat +parsec +breather +phantom +paradox +tuft +uranium +scrape +parakeet +swirl +freighter +tart +tedium +scapegoat +trouser +scrub +gardenia +hackney +lane +land +fighter +algorithm +scotch +age +feud +summit +walker +fresh +crowbar +rescuer +hello +essay +code +partial +serviceman +scratch +broaden +totem +soften +leggin +renown +prim +flashback +young +send +moor +tremor +garret +armament +garden +quadruplet +llama +precinct +wipe +magic +harbor +eve +anxious +race +rack +mishap +crook +croon +odd +ode +victor +index +yock +sauerkraut +apparatus +indian +proffer +bird +inspect +leg +punch +acquit +let +fifteen +vinegar +great +casino +screech +scatter +survey +insulin +grandchild +buss +popcorn +mussel +maker +grower +sire +disobey +causal +zip +archbishop +theme +aeon +eleven +doubt +yardstick +midday +pencil +babe +shipwreck +patrol +rubbish +central +hoard +pout +pour +thin +drill +coffin +cherubim +bent +pawn +process +lock +slim +high +slit +bend +slip +pelvis +martyr +trumpet +weaken +rhubarb +delay +blackhead +luster +stow +halter +singular +await +wristwatch +notebook +tier +marrow +hawk +autograph +tomato +counter +robot +element +writ +allot +allow +alloy +thigh +mute +insight +spatula +comma +mutt +warren +perfect +decay +shudder +garnet +derelict +prosper +python +belch +bat +launder +dock +snake +kiss +bar +cage +wrangler +truth +scorch +subset +bump +static +thirteen +mete +jagged +disco +tenth +wander +matrix +bag +fatherland +venom +czar +oblong +lob +shut +perish +tempo +graze +tempt +shun +embarrass +minstrel +chilli +mainland +spill +length +stickler +scare +scarf +manuscript +scene +cobweb +owner +scent +prank +lop +opossum +sergeant +spaceship +painful +stomach +chagrin +vouch +rotunda +haven +steel +wet +bother +aggressor +psalm +disband +unman +steep +torrent +misunderstand +beggar +viewer +partnership +correspond +tonight +receptionist +fourteenth +mischief +depict +soak +bacterium +bassoon +hammer +adjunct +lilt +soap +soar +calculus +manor +raindrop +cipher +vise +segment +fervent +instil +locust +enlist +soprano +fiasco +brew +fact +bring +brine +bedroom +rough +asylum +trivial +brink +redirect +disillusion +planter +jay +jaw +jar +jam +tape +bourbon +flinch +hope +jackpot +move +familiar +scorn +sinus +wring +antagonist +smash +shaver +summon +stuff +rein +withstand +pronoun +packer +frame +packet +bellhop +airmail +dungeon +wire +mien +partisan +unravel +piston +pistol +email +browbeat +fetich +physicist +courtyard +lawsuit +tantrum +drum +quitter +ramp +drug +doorway +puff +roughen +medallion +revamp +migrant +distil +javelin +indict +chromium +lectern +mailman +gondola +quaver +blatant +feather +ballast +sheepish +crisscross +federalist +mannequin +altruism +banish +laser +runway +bathtub +maul +groin +ripe +lush +site +lust +mockingbird +tenor +passbook +ransom +tattoo +inquest +terrorist +buffoon +outbreak +android +balm +ball +balk +dusk +fiesta +bale +bald +dust +broccoli +mosaic +audit +off +shotgun +polyp +command +diphtheria +audio +maggot +compel +glut +glue +rambler +web +generous +clergyman +wee +wed +arrest +crack +scoundrel +government +chancellor +crux +haul +cedar +desk +password +recurs +placenta +crisp +onion +resin +alkali +stagger +imprison +nymph +sprain +overcast +foray +habitat +thief +daylight +flush +wisecrack +ballot +transport +henchman +disbelief +hoarder +avoid +disk +doer +passion +saucepan +stairway +putt +drift +ornithologist +stage +iris +sister +adverb +peal +ingest +union +artefact +parsley +assess +lung +mere +muck +commission +caviar +watchman +stamina +much +function +funnel +cosmopolitan +frisk +shyness +grate +rectum +count +congresswoman +smooth +monument +problem +baptism +cordial +kowtow +sirloin +retina +inn +replica +ink +anesthesia +furl +sexual +saviour +behold +reckless +chum +monday +repeal +veil +vein +ghost +eon +rule +dynamo +torrid +pension +tryout +abhor +buoy +inning +dote +rapid +mansion +defraud +voter +spew +bludgeon +bike +daze +regal +chill +regalia +whack +ale +compassion +blanket +distort +mania +chauvinist +chapel +whisk +daydream +pinch +scalar +handout +roadblock +unblock +math +triumph +chew +paperback +phoney +speck +heliport +sabbath +horn +chef +aristocrat +panda +stardom +lizard +walkout +toll +crunch +dustpan +pursuit +paraffin +sorceress +hairdo +daughter +envoy +adopt +tankard +smoke +loincloth +bunker +anarchist +envious +sanatorium +infield +spigot +thrust +hindsight +total +bra +plot +plow +plop +sweater +gloss +ploy +insult +plod +knoll +beeswax +solicit +award +yard +tariff +overrun +word +err +crest +work +grovel +tinder +era +elbow +spendthrift +quiver +serpent +flunk +impair +liter +chameleon +sever +moron +disappoint +beach +pizza +fever +lad +ladder +lag +lab +lay +law +arch +cosmonaut +retort +greet +greek +green +south +worst +order +greed +salon +gumption +devote +muffin +misconduct +mayor +sheaf +avocado +valor +carton +shear +then +fragment +safe +break +band +bang +coffer +overprint +tzar +bank +bread +crock +gallows +lisp +iguana +schemer +transient +prawn +sled +flock +slew +hostel +burlap +network +diesel +fellowship +amethyst +marigold +barrier +veto +standard +stencil +lollipop +morass +drench +ticket +maniac +raisin +flawless +renew +sprig +regress +vanquish +kin +render +system +hamstring +chopper +disembark +comic +overs +neck +upshot +tourniquet +kiwi +emblem +luncheon +cereal +rebuff +minibus +guild +target +tavern +hike +medley +iota +guilt +iron +minus +pessimist +lull +ponder +strength +realm +widen +silversmith +latter +hamper +transmit +curfew +maiden +boxcar +sue +negro +phase +proverb +grave +deacon +swamp +bracket +aunt +rickshaw +oppress +mitten +crust +boyhood +nephew +toast +geyser +layman +geologist +predecessor +do +ardor +ecologist +roundabout +slack +rebirth +runt +rune +rung +crucifixion +steak +steal +steam +ghoul +reread +misdirect +christian +goulash +pastel +gentleman +cellist +contraband +drawl +accord +unfold +kitchen +cop +cot +cow +brat +excrement +ill +cob +brag +cod +cog +bran +coo +con +emporium +eyesight +tone +spear +royal +trunk +nonconformist +infirm +speak +charisma +scarecrow +warmth +leech +baud +hacksaw +millionth +hoist +spellbind +gracious +physician +inhibit +gnu +launcher +air +aim +ail +abrupt +thrash +aid +stink +have +sticker +sting +throat +brake +cone +hebrew +uplift +stint +descent +perform +descend +decibel +wheel +raid +fuss +nil +swell +hang +evil +hand +fuse +nip +nit +scenario +drip +ragged +client +mamma +kinship +indigo +photo +victim +extol +thyroid +exalt +shout +cognac +board +zillion +righteous +plasma +intercom +fusion +boxer +cape +retreat +cooler +night +flatter +born +rile +flatten +bore +orchid +cede +humor +peek +peel +pose +confer +peer +peep +chafe +foreskin +chaff +diner +coral +visa +banker +horizon +cherish +gingerbread +octopus +croak +faint +dilemma +tetanus +float +profession +bound +curios +sedan +loin +beet +piggyback +wag +bookend +wad +frill +sovereign +fight +gybe +way +wax +burro +war +fizz +peninsula +holdup +boon +true +reset +absent +nursemaid +smidgeon +maximum +crystal +veterinarian +emir +emit +aorta +flat +abstract +molt +flaw +postscript +subsist +prayer +cacao +face +mold +mole +stake +shrine +test +upholster +unwilling +frolic +shrink +heyday +chairperson +hemophilia +faze +affidavit +loyal +longshoreman +igloo +concept +matron +consul +fulcrum +datum +horseback +supplement +toothpick +varnish +grape +zone +mallet +flask +graph +hump +flash +manicurist +glad +rhythm +tusk +terror +idealist +southwest +brown +congest +kitten +blast +brows +ophthalmologist +gun +gum +gut +guy +diarrhoea +upper +brave +regret +bravo +thinker +cost +helpless +tempest +cargo +appear +economist +menthol +medal +havoc +uniform +tarantula +appeal +caveat +genes +gawk +jester +disclaim +goldfish +teacher +buck +merriment +fogey +precursor +plotter +eke +disavow +trial +convertor +pillow +bolt +extra +paragon +keeper +marker +firearm +market +streetcar +prove +subvert +live +matador +club +cluck +clue +logarithm +prepay +graphic +slogan +car +cap +caw +cat +meow +can +cab +heart +hears +chip +sake +bridesmaid +abort +chin +chic +serum +bankrupt +freezer +write +lobe +storeroom +criterion +entrench +afternoon +product +dive +southern +bawl +motorway +pave +drastic +flourish +crepe +grandson +explicit +offend +barnyard +forfeit +haircut +ledger +brain +nitrogen +cold +braid +ethic +willow +theorem +window +artisan +factual +tiara +gizzard +nought +halt +fling +nod +rake +overcrowd +dishearten +hale +half +recap +courtship +taillight +provision +discuss +halo +wont +concerto +servant +drop +domain +supplant +year +operand +wavelength +happen +album +accomplish +space +thirst +rational +thong +carp +cart +virtuoso +quart +rebel +marina +prospector +card +care +fungus +tomahawk +british +honest +nonprofit +profess +blind +madam +blink +rink +rind +ring +drove +tomorrow +size +sheep +sheer +sheet +silent +bookmark +breed +callous +traction +checker +tragic +heartburn +friend +pomp +courier +that +peck +scalpel +rugged +recruit +magnesium +optimist +extinguish +angel +slay +slat +premier +slap +racetrack +slam +anger +breakfast +recover +slab +upbeat +veteran +shore +snout +siesta +begin +prick +halibut +price +foothold +dream +tooth +aerosol +washcloth +fifth +ground +gnat +snack +ratio +stair +proportion +jolt +stain +juror +shrill +pumpkin +cannon +loath +stroll +leather +thermal +husband +druggist +concert +burst +spore +whitewash +unfit +staunch +sport +incisor +concern +crawfish +glaze +complexion +import +clench +pluck +blame +broil +impromptu +whisker +guffaw +pertain +priestess +temper +aura +comet +evict +adroit +dispatch +exploit +semicolon +lioness +harmless +rebuild +toss +sisterhood +textbook +bloodhound +crumb +these +trick +scum +cherub +fool +marksman +zenith +mucus +soil +agnostic +laggard +bias +eras +bestial +beaver +waterway +petunia +helium +develop +media +pester +poetic +document +sweeper +finish +thunderbolt +foal +foam +cymbal +fruit +volley +trawler +smelt +quartz +theater +framework +patchwork +demean +taxi +livestock +fester +battleship +typhus +neon +touch +speed +death +refurbish +treatment +baloney +momentum +lade +ream +hover +frown +spectacular +larva +read +ruler +swig +leapfrog +earnest +detract +stunt +execution +reap +hovel +rear +postcard +incest +roll +engross +oblivion +output +downward +falsehood +laugh +verbal +landslid +squirm +garland +putter +cleanser +deficit +squirt +wheat +sadden +throb +sixth +tuition +strainer +bazaar +throw +comparison +placard +hiatus +chop +fell +wolf +parson +chow +ruff +assassin +processor +heater +outlook +earring +watchdog +your +restless +stare +grater +log +area +start +stealth +low +lot +wigwam +groan +pitcher +rump +posterior +recoil +omelet +lymph +thaw +corduroy +wholes +hire +fraud +default +bucket +draftsman +cornmeal +gibberish +charlatan +scanner +pickax +sheath +mover +antenna +housework +valid +colt +you +houseboat +poor +polar +poop +peat +pear +peas +overreact +podium +peak +pool +fiscal +assert +moonlight +repay +forswear +mourner +skeleton +breadth +groundwork +angler +month +unrest +bequeath +carpet +corps +gymnast +foster +spearhead +fervor +fountain +washroom +horror +verb +minded +heaven +ceaseless +saboteur +tendril +blackout +smock +homonym +resound +exodus +casual +bomber +protractor +lurch +milk +turmoil +vet +excess +strong +arena +divisor +noticeboard +outgrowth +vegetarian +whine +soldier +amount +base +trainer +put +haemoglobin +seventeenth +taker +helper +pup +titbit +assessor +chestnut +yuck +suction +grill +nine +parasol +transcend +pusher +boycott +archipelago +tract +phrase +magenta +frostbit +sheikh +warhead +spreadsheet +lyre +snippet +reject +gash +circuit +rude +sneak +denial +gasp +reelect +undergo +figment +overnight +ego +dread +egg +lynch +earthworm +help +reservoir +slouch +auburn +reclaim +preempt +soot +helm +hell +clarinet +limelight +prowler +lateral +heron +astonish +forbear +dim +food +musket +terrain +vomit +foot +stopper +holiday +payer +twelfth +bless +radial +trailer +pamper +thirteenth +talisman +event +magnet +vertigo +wedlock +teak +publish +eardrum +sustain +shrivel +outrun +ass +pun +drink +bass +dirt +dung +dune +cornstarch +reason +heroin +ask +ash +turnout +bask +bash +pus +launch +curtsey +round +bridegroom +caption +liaison +heartbeat +blush +assign +arsenal +demagog +elder +effortless +mist +miss +blossom +minion +station +expand +fingernail +scheme +banana +merciless +slosh +behind +bowel +trapper +boredom +sign +leotard +tunic +lament +ouster +hyaena +bride +bobsled +currant +toward +weapon +phobia +chipmunk +yowl +null +sensual +lid +lie +koala +cave +lip +useless +honeycomb +popular +quota +plunder +mace +watt +smoulder +clear +cleat +succor +clean +skein +blend +humid +phenomenon +cowgirl +flicker +sheik +crayon +copyright +paranoid +scooter +less +ikon +ampersand +waterproof +custodian +outlast +strut +strum +basement +chimney +monogram +fluff +chasten +geld +courteous +cramp +backtrack +grey +close +despatch +bandwagon +haddock +aqueduct +wow +grocer +won +woodchuck +wok +woe +stalk +bettor +wreath +philanthropist +spray +distinguish +zipper +garrison +delimit +eggplant +buzz +vault +protector +mausoleum +onward +oversleep +liken +proton +header +badminton +vessel +catwalk +stamp +damp +nape +damn +threaten +dame +alto +liven +exempt +deter +liver +hobnob +furrow +pact +loom +utmost +look +socialist +governor +rope +pace +while +smart +fleet +loot +loop +pack +petal +hoax +grant +belong +makeshift +discredit +grand +conflict +sham +hallway +optic +dime +bonus +banter +overweight +user +boa +grind +auditor +five +ambassador +chore +abstain +bearer +morsel +tick +botch +pier +carat +march +albino +game +jibe +banjo +optician +signal +manifest +eel +sleigh +sketch +creation +undress +yolk +urgent +impoverish +mustang +clothespin +fundamentalist +gild +simmer +slash +slapstick +run +rum +rub +booklet +benefactor +rug +stem +step +stew +taboo +subtract +rut +discus +shine +faith +pigpen +letup +portico +reappear +aye +congressman +block +foreswear +misbehavior +dude +within +gilt +pentagon +connector +syllabus +palomino +harem +frost +reed +womankind +reef +reek +reel +dull +skulk +swagger +chiropractor +ringlet +foresight +similar +psychopath +kidney +straitjacket +nab +sullen +nag +objection +obelisk +nap +department +nay +draw +resign +drag +tundra +drab +formal +horseplay +outing +orbit +depth +bribe +pinion +underbrush +cheerful +go +emboss +dunk +ammonia +compact +aquarium +baron +aria +stave +ameer +shack +geranium +warpath +epitaph +velour +schooner +virtuous +fond +wave +trough +cellular +tenet +nausea +stiff +asparagus +gender +button +hive +verdict +cloister +pilfer +picket +blitz +jump +booth +ardent +languor +cartel +click +poke +wallet +colonel +valet +cell +rotten +experiment +stanch +brooch +fifteenth +quell +weirdo +convert +chant +gent +repel +behead +ricksha +wig +daybreak +danger +win +clout +wit +ligament +infest +gimmick +wiz +cloud +metaphor +snoop +copperhead +crag +crab +cram +expressway +compatriot +mismatch +starter +salad +consort +ride +donut +archer +meet +control +wharf +halloween +glade +skirt +bandana +filament +circular +fare +farm +thunderstorm +canker +foment +corral +scoop +encyclopedia +scoot +agenda +american +cadet +sperm +gunman +hock +brood +broom +brook +walnut +youngster +skater +frond +auto +dike +relentless +snorkel +placid +stout +hands +front +refuel +muff +perfectionist +mode +upward +commonwealth +unwind +chunk +mollusc +seesaw +apartheid +mollusk +special +gallant +armor +confess +fathom +remiss +activist +playground +wick +obsess +umbrella +hopscotch +watermark +jilt +undo +advisor +sneer +princess +shrew +timer +keep +counterpart +elector +keel +mad +blogger +seventeen +bitch +drinker +equinox +dump +wrapper +chintz +attach +attack +jellyfish +final +beard +introvert +punish +feint +noun +plough +piecework +waist +photograph +spurn +cartoonist +beg +bed +bee +discolor +swindler +firework +spurt +bet +are +exhibit +fume +tabu +torment +sundown +portrait +need +border +rotor +bastard +sprinkler +gunner +jaunt +tactic +truck +detector +visor +brand +african +camper +rigor +awe +plumber +eject +spleen +urn +upset +snapshot +businessman +constrain +skunk +affair +indoor +crate +molest +cavort +sheriff +fiberglass +winner +wreak +rash +earner +rasp +gradual +fuel +sulfur +joint +fallout +buyer +endless +gray +tobacco +gust +ordain +topaz +nutmeg +she +contain +recoup +grab +conduct +widow +hardwood +shake +orphan +portend +computer +driveway +equip +portent +unearth +southward +tend +state +lug +tens +antler +tent +bleed +castoff +blinker +keg +bemoan +key +overhaul +thank +sniff +career +admit +spatter +plankton +jersey +christen +tuesday +poem +sari +tread +shaikh +yap +cent +quiz +yam +treat +yak +whisper +poet +fibber +spaniel +nuptial +brunch +debtor +novel +ripen +pandemonium +harden +neuron +hearten +steer +generic +balloon +speaker +northwest +blight +fireman +flesh +absorb +powwow +inbreed +spree +magician +rift +weld +surfboard +lunch +glower +well +drone +welt +underdog +discord +mistaken +dose +distant +laurel +skill +cinder +jackal +dais +ovum +snicker +stratum +possess +warrant +homework +canter +rafter +fate +burden +loss +clown +tablespoons +lose +divest +satirist +rote +page +backlog +shed +glare +twitter +hush +redress +home +peter +competitor +pinpoint +overlay +broad +overlap +hinder +individualist +journal +offset +instinct +smidgen +refuge +freedom +cleans +nightclub +rodeo +dominion +wallop +buzzard +cocoa +pointless +gerbil +snowplow +mastermind +museum +poinsettia +drumstick +mohair +jinx +backhand +cricket +north +gait +admonish +neutral +ho +technician +overflow +ear +eat +he +leper +limit +cello +display +wringer +twist +entreat +contest +meteor +finch +chemist +fodder +star +stay +stag +foil +stab +phosphorus +appoint +sphinx +shunt +broncho +atheist +atheism +portion +pardon +mackerel +demand +unfurl +protest +asian +captain +gamut +swab +swan +sinew +swat +swap +anticlimax +sway +loon +appal +void +vase +smack +govern +affect +hitch +vast +pilgrim +naturalist +vector +washout +whirlwind +quilt +crave +yack +cactus +quill +pander +wreck +orchestra +bikini +spokesman +haze +new +net +maverick +seventh +mew +onus +cardboard +interpret +taper +credit +harass +jamb +permit +prolog +menial +hunch +campaign +bayonet +moral +handlebar +overhear +ore +overheat +calk +overhead +calm +intrust +recommend +type +tell +calf +demon +wart +warp +warn +dogma +warm +pecan +adult +qualm +flotilla +ward +blindfold +confound +rook +room +candlestick +worth +bungalow +headway +root +defer +vodka +give +climax +assent +honey +surveyor +quail +freshen +polio +rib +stockyard +answer +abdomen +plank +coup +fracas +passageway +waterfront +lesbian +guerilla +attempt +third +maintain +capitalist +fetus +deck +keyboard +windshield +furlong +harmonica +crew +better +persist +pass +workout +microfilm +caterpillar +grammar +meat +mistrust +roast +side +bone +luck +caustic +aids +dawn +extract +jell +contend +velvet +gradient +open +crucial +content +reader +linear +whiff +bestow +mistress +needlework +steward +fleck +loud +skinflint +playwright +grade +hoop +hoot +buttock +hook +ditch +hoof +hood +hydrant +acquaint +spinach +historian +enthral +woodwind +brainwash +dwell +inferno +twister +gym +somewhat +gambler +symptom +preacher +affront +keyword +matter +loiter +mink +seep +quench +modern +mind +mine +ginger +seed +seem +churn +mint +unfasten +alibi +desist +chess +sleeper +quarterback +phonograph +chatterbox +regular +condominium +blacklist +don +alarm +impostor +dog +doe +solemn +digress +constrict +consumer +dot +hunger +visitor +probe +syntax +chord +sundial +northeast +explain +jailer +sugar +folder +inventor +edict +patter +smut +stop +coast +pincushion +watermelon +smug +earl +earn +peacock +bay +reload +bad +troop +cower +ban +stinger +linguist +enchant +attest +subject +snuff +scrap +sail +causeway +scram +baboon +warrior +triplet +vitamin +lotion +orthodontist +beautician +cousin +motto +sprinter +pate +typist +height +gusher +aftermath +arsenic +ether +accustom +tint +recur +three +erect +ting +chrysanthemum +trigger +interest +basic +basil +basin +idol +chug +mushroom +suppress +dismiss +deepen +encyclopaedia +unpopular +tank +affirm +tang +near +moratorium +neat +motorist +anchor +spawn +seven +cane +diaphragm +it +shame +jest +in +id +disappear +if +abscess +growl +sap +parish +make +kit +delight +squat +garlic +warden +unicorn +jealous +overt +bequest +kid +butter +romp +smoker +inherit +bedspread +diphthong +left +protocol +just +yen +unfair +psych +human +yes +cretin +yew +legion +character +wretch +save +opt +discreet +background +shoulder +nude +manual +pillar +dean +squander +deal +deaf +maxim +dead +revel +intern +dear +strife +sprawl +pail +collect +normal +councilor +flounder +bullfight +tartar +bold +statistician +burn +blackmail +cottontail +sift +protagonist +burp +burr +tartan +super +innuendo +crucifix +craftsman +commit +marshal +unsay +paunch +chimp +down +lieu +chime +initial +lampoon +editor +fraction +unseat +fork +undergrowth +form +fore +ford +diaper +overburden +analyst +fort +pavilion +whiskey +boomerang +cosmos +propaganda +shin +disciplinarian +classic +covert +sidestep +drive +gland +scrawl +fatherhood +ship +graft +vista +marxist +smidgin +excel +handed +venison +congeal +marxism +sling +faction +handicap +slink +felt +diet +parenthood +journey +reign +stoke +weekend +derail +billion +potato +jacket +gorilla +almanac +teeth +meander +befriend +proletariat +woodwork +skip +skit +invent +adjourn +mild +mile +skim +skin +mill +primer +proletarian +skid +surplus +seasick +misread +depend +swoon +father +countdown +deject +swoop +regatta +unburden +string +yeast +pathologist +merit +join +jettison +stiffen +hoorah +din +stapler +nectar +die +dig +democrat +noiseless +item +dip +blur +shave +thresher +villa +worm +slake +sunup +talker +fillet +suspect +drunkard +shoo +dwarf +dweller +wail +guardian +clerk +makeup +stallion +waif +detest +tangent +deceit +rue +wait +box +boy +cuckoo +shift +bow +dither +boo +raccoon +cyst +bob +nylon +bog +elect +plumb +kayak +surmount +transplant +saki +wealth +perk +visit +vineyard +somersault +sharpen +yoghurt +aspirin +labyrinth +curriculum +downtown +tandem +rigid +savior +effort +gnome +demolish +pageant +moccasin +melodrama +flu +soul +impel +soup +sour +claim +plaza +reflector +predict +agent +drawer +council +craze +pink +purr +arbor +tilt +clever +parch +pine +till +sunday +sword +tile +pathway +pint +map +mar +mat +may +gelatin +membership +mankind +tablecloth +grow +man +relinquish +aimless +hemlock +omen +tale +switch +jail +deposit +talc +unleash +basket +longhand +talk +shield +rabbi +moralist +lyric +pitch +solder +pointer +group +monitor +bedbug +maid +drummer +maim +mail +main +tonic +killer +shatter +minuet +safari +teller +rattler +outweigh +feverish +peasant +careless +rock +hijack +eyelid +latin +bookshop +poker +gavel +unlock +manifesto +girl +stitch +monolog +priest +dutch +blubber +sensor +correct +monster +zombi +vinyl +jaywalk +cough +underwear +waiter +buzzer +thing +registrar +blacksmith +think +frequent +first +lone +crib +long +extrovert +thoughtless +lap +autocrat +escort +daunt +mermaid +anus +yoghourt +memo +broadcast +butt +proofread +tractor +coconut +lick +piccolo +marijuana +dash +comedian +sulk +nazi +sherbert +stopgap +daredevil +acumen +squad +interior +channel +pain +trace +roster +track +acrid +zigzag +whizz +assault +billow +pair +synonym +napkin +typeset +scowl +voodoo +toucan +amir +shop +lexicon +shot +show +cornea +veranda +therapist +shoe +threshold +corner +label +cornet +enthusiast +fend +objector +flapjack +dice +plume +enough +syphon +black +consent +enthusiasm +fiendish +plump +get +straggler +stomp +midriff +slyness +gee +gibber +neckerchief +gem +disinherit +beseech +skull +businesswoman +yield +stupid +nostril +tallow +kernel +sear +eighteenth +seat +seam +seal +stigma +calendar +wonder +puma +parent +limber +ornament +forecast +gage +pump +august +foreword +slingshot +tacit +wednesday +gauntlet +childbirth +tug +tuck +trader +tour +tout +delirium +stretcher +cancer +spank +cancel +tub +mare +underworld +imp +undershirt +mark +mart +workshop +rancher +fiftieth +chalet +graveyard +squash +wake +sound +litterbug +epidermis +slumber +cock +strait +strain +sudden +protein +par +pat +harsh +paw +pay +woodland +same +heartbreak +pad +cotton +pal +pan +exhaust +oil +chloroform +munch +companion +foghorn +polygon +drain +vertebra +soundproof +outdoor +suitor +money +imprint +leeway +aspect +flavor +asthma +godchild +comradeship +forgo +pile +pill +grip +grit +mop +mow +moo +mom +mob +railroad +grim +grin +oxygen +server +chamber +nose +hallelujah +fulfil +sneaker +afflict +witchcraft +ascend +dole +ascent +spasm +gross +confirm +pioneer +inject +gladden +highbrow +linoleum +intravenous +knife +raincoat +broker +squall +bravado +racoon +opium +contagion +roar +island +insect +mixer +thrive +partizan +road +checkup +dagger +coupon +splint +empress +whiten +strip +uptown +skillet +paraphernalia +jigsaw +totalitarian +madden +tycoon +tripod +striker +shroud +hiccup +gore +spice +ember +magnolia +grouch +conqueror +embed +deadlock +affection +deer +deep +fellow +planetarium +deem +file +girlfriend +deed +hound +film +fill +tedious +selfish +personnel +hybrid +repent +drouth +field +prism +astronaut +fruitless +lapel +shelter +gander +unload +burrow +god +gangway +oral +motel +represent +forger +pheasant +forget +founder +suburban +dollar +rebind +zinc +implement +crimson +hideous +premium +parcel +straightforward +scout +scour +fall +bottleneck +pueblo +hinterland +dampen +dictatorship +flyover +neighborhood +clinch +gnarl +burger +zero +cottonwood +lawyer +further +misrepresent +ribbon +dial +skeptic +stool +trinket +stoop +plush +movement +girlhood +malaria +intrench +twang +mule +ranger +beacon +bigamist +capacitor +search +stupor +margin +airport +chipper +chieftain +narrow +fatten +quotient +wizard +caravan +transit +sadist +sadism +establish +dachshund +hobgoblin +eye +score +distinct +two +splash +libel +furor +wiper +diamond +brisk +opportunist +particular +disfavor +nineteen +town +hour +cluster +fast +dew +remain +paragraph +den +abandon +stubborn +shark +buttress +onslaught +share +sphere +minimum +rainstorm +attain +junket +sharp +botanist +siren +awkward +comfort +rapport +stir +bleat +whopper +blacken +blood +bloom +chute +coax +orchard +coat +doctor +spiteful +electron +blunder +mislead +coal +sect +infant +setback +radar +dough +lava +suffer +hundredth +sodium +bosom +late +speech +clamor +lath +lookout +goof +good +goon +detour +frigid +compound +detach +complain +bombard +headroom +countersign +token +monsoon +clamp +harm +hark +mental +hare +hard +beret +banquet +connect +fist +callus +hart +orient +harp +flower +creditor +trooper +pigeon +seaport +granola +print +foreground +assist +cockroach +pleasant +gig +faucet +prophet +omit +wither +pure +corkscrew +copper +perturb +barbarian +shoal +cups +jabber +razor +construct +paint +leash +statement +mama +hummingbird +catapult +pare +park +selector +glycerin +dentist +part +pars +youth +totter +plead +hangout +cistern +blanch +mountain +cardigan +couch +onset +build +zucchini +flute +salmon +chart +most +charm +moss +eskimo +organist +humanitarian +mammoth +pennant +squelch +weigh +standoff +sector +sparrow +fine +find +giant +merger +nervous +ruin +fiend +boulder +prowess +paperweight +cholera +express +ferret +cheapen +batter +breast +theft +silk +pellet +restart +silo +huff +common +archaeologist +printout +vine +lion +overeat +tender +expert +burner +myriad +stowaway +subscript +hypochondria +premiss +egotist +complement +figurehead +mailbox +pagoda +aircraft +sultan +archway +annual +foreign +point +smother +newborn +pamphlet +dancer +esophagus +platinum +pocketbook +secret +amnesia +reformat +finalist +ram +gay +gas +gap +holler +gal +understand +gag +chatter +gab +bile +politician +metro +solid +bill +holocaust +crutch +fun +lingo +manner +mystic +astound +rancor +eczema +ingrain +anaesthesia +sociologist +dishonor +ewe +seminar +corridor +neutron +itch +leopard +yesterday +moment +stripe +unveil +timid +task +werewolf +withdraw +landmark +grid +recant +spend +howl +darn +shape +snot +timber +rundown +impetus +cut +cur +pollster +snag +forbid +cue +punter +cub +snap +bridal +easter +brainstorm +bin +squawk +rebound +bib +judgement +redeem +bit +knock +disrepair +blemish +flue +fagot +flux +bamboo +foolish +walrus +sequin +transgress +often +back +impeach +extremist +mirror +lightning +scald +scale +culprit +pet +pelt +pew +pep +pen +scalp +lard +lark +peg +pea +patient +fed +megaton +constraint +oatmeal +drama +catnip +pediatrician +offshoot +obstetrician +gambit +maelstrom +tiff +clack +lesson +jockey +few +doll +errand +camera +handbook +forward +nougat +sideshow +showman +switchboard +calico +lifeguard +planet +jumbo +azalea +constant +flow +possessor +lye +curd +cure +curb +curl +prevail +stagecoach +leaflet +crypt +underweight +cellar +lend +tablespoon +papa +lens +cater +desert +statesman +mantel +notion +uterus +anguish +caribou +stroller +seaman +golfer +strew +parrakeet +peanut +welter +mower +rudder +compost +blaze +atlas +gravel +queen +dessert +rhyme +claustrophobia +surgeon +molar +verandah +knight +shock +crow +queer +crop +append +power +junior +anthem +access +clipboard +bachelor +intercept +sink +sing +roof +bode +implicit +remark +talent +conceit +resurrect +weekday +climb +honor +blizzard +liqueur +talon +oval +scandal +gateway +sermon +lime +patron +asteroid +butler +charcoal +trait +kiosk +thatch +trail +train +armadillo +harvest +fan +account +tunnel +carrot +obvious +smear +parkway +unread +fetch +employe +truism +sanitarium +teamster +boney +spruce +serial +contempt +hangar +lamb +lame +lamp +forest +goner +stock +roam +leukemia +bluff +terrier +fray +drape +bind +guru +liner +linen +chief +poacher +furious +furnish +disarm +meter +bunch +marshmallow +decorum +labor +kindergarten +heroism +willing +marsh +dad +junction +dab +dam +spell +swordfish +mention +courtroom +sonata +day +strive +flail +snowdrift +thrill +slacken +cider +memoir +sawdust +disregard +flair +thwart +jailor +jugular +pivot +cupboard +lentil +salesman +hippopotamus +matt +defend +rev +repress +stub +mate +barley +stud +smog +stun +red +franc +frank +hanker +fourteen +salami +likelihood +afterward +squadron +indent +mortar +skateboard +yarn +mortal +workbook +retain +retail +waitress +suffix +overshoot +ethnic +sack +brute +whoop +puppet +guidebook +vandal +pauper +ancient +monkey +bologna +laps +vulgar +vagina +hexagon +scant +liquor +cabin +sixteenth +gear +eavesdrop +bulldog +smolder +forethought +springboard +nun +bodyguard +prune +shrapnel +shampoo +linchpin +lover +anthropologist +tide +cavern +pedlar +countryman +waken +optimum +mix +parka +spinster +meek +dryness +hazel +eight +clamber +handbag +hoodwink +transcript +payment +gullet +gulley +gather +request +absurd +rendezvous +occasion +thicken +recess +kite +text +hamlet +traitor +industrialist +sidetrack +portfolio +floodlight +thicket +staff +communism +scorpion +madman +prolong +resubmit +satan +oppressor +communist +inferior +equilibrium +gumdrop +starch +beat +rush +bear +beam +bean +october +beak +bead +organ +ashtray +nutriment +eyebrow +motherhood +mascara +conform +showdown +infidel +racket +interview +reform +pattern +nebula +brunt +hammock +progress +tailspin +sorrow +stratagem +deliver +blackbird +boloney +exclaim +instant +joke +equal +kosher +swim +swallow +highland +guerrilla +glorious +wear +comment +vent +denim +overcoat +commend +vend +harpoon +manhood +citizenship +copier +newscast +gaze +teamwork +gulch +curtain +curtail +hyacinth +juggler +censor +goddess +bulk +bull +bulb +skew +carburetor +cypher +plain +homey +bray +kinfolk +bicker +dissent +squid +blimp +creak +prose +partner +inspector +lynchpin +portray +whirl +grinder +matchbook +defiant +anorak +tumbler +infer +whirr +tighten +pockmark +sauna +ion +grandstand +sunburn +judgment +retard +center +builder +pickpocket +thought +starboard +usual +coaster +humdrum +fingerprint +storey +clinic +interim +surpass +tough +earshot +flashlight +tong +flee +lupin +lake +bench +add +citizen +ado +crossroad +ravel +match +raven +cantaloup +punctual +newsstand +dryer +insert +flamingo +like +success +sofa +journalist +heed +arraign +chick +soft +heel +outfield +propel +fuze +hail +hair +convey +proper +paddock +novelist +shrug +shrub +slide +tureen +regain +pepper +hose +slight +host +panel +beaker +actual +socket +flake +preen +toadstool +pickaback +discard +tomb +tome +snitch +chronic +guard +esteem +custard +underpass +glimmer +gene +maze +globe +buy +bus +coke +sequel +but +bun +bum +bug +bud +embargo +woodsman +wise +ecosystem +debrief +flip +wisp +wist +trapezoid +condiment +plutonium +pin +garter +domino +circus +pie +pig +pit +campus +gush +oaf +cashew +oak +detail +virtual +detain +sewer +oar +redden +dresser +wallow +nutrient +godsend +yelp +baker +jab +hiker +pupil +yell +cookbook +vermin +sleek +sleet +sleep +liar +hate +trolley +sallow +tweet +glider +under +tweed +pride +merchant +lure +risk +rise +lurk +jack +confetti +anemia +school +parrot +enjoy +overdo +cracker +almond +direct +nail +street +monorail +ransack +blue +hide +worsen +poison +beater +supplier +dashboard +wink +even +pontoon +studio +path +crossbow +enrol +connoisseur +forum +ravish +auction +settler +mentor +midway +blowtorch +stray +straw +strap +cassino +would +phlegm +bayou +asset +spike +preview +musk +mush +saber +muse +grief +phone +muss +pouch +must +shoot +hutch +ma +ms +mr +machinist +fortress +quarrel +loosen +joyous +hemoglobin +dolphin +mayhem +attract +end +trill +keen +bunk +vagrant +rhinoceros +shred +toxin +gate +ancestor +dialect +moisten +kilowatt +mess +lump +mesh +sparkler +parallel +stronghold +splendid +spout +patent +enter +vapor +hedgehog +fetter +deform +clapper +sprout +over +bleach +mallard +oven +caster +digest +forehead +theologian +womanhood +comprehend +tramp +drawback +fade +croquet +tourist +plaster +roost +knighthood +monarch +rental +gloom +chuck +choir +prohibit +hanger +unscrew +gymnasium +poncho +truant +saturday +depress +goo +lair +dinosaur +nitwit +psychic +tonsil +gob +emphysema +nite +washer +resistor +carcass +rail +free +rain +acrobat +fret +harpist +ritual +filter +hopeless +soda +rang +accent +puck +rank +restrict +rant +sober +toy +their +sarcasm +top +tow +tot +fiction +ton +duress +toe +urban +murder +overdraw +tool +hearth +embellish +solicitor +toot +incur +western +nonpartisan +lather +prong +flame +mirth +countess +rag +donkey +fashion +handkerchief +ruckus +taint +raw +rat +rap +protract +spade +ray +snow +thorough +contact +hatch +snob +cleft +extravert +quicken +rider +evangelist +shallow +milkman +coil +coin +glow +interject +flop +metal +freeway +policewoman +flog +yank +chariot +bait +endear +saga +alight +random +sage +dupe +radio +rector +earth +bail +shellfish +spite +stanza +disgust +axiom +waltz +gees +watch +fluid +ultimatum +report +reconstruct +noon +spokesperson +egoism +public +erupt +pacifist +pummel +habit +wrest +nut +resist +corrupt +hourglass +mull +mud +mug +finger +mum +approach +wean +weak +contort +boss +toothbrush +southeast +larynx +devour +devout +censorship +newt +protect +irregular +fault +papyrus +facet +elf +smuggler +trust +bingo +bathroom +beef +legend +beer +spread +communion +loft +bladder +uncommon +craft +spearmint +catch +snipe +teapot +misfit +lessen +thousandth +referendum +pyramid +handrail +broth +lollypop +exterior +suggest +wound +overstep +utilitarian +complex +papaya +screw +pick +deflect +suburb +portal +postmark +tassel +ocean +mother +bough +bugger +rodent +shorthand +enlighten +elk +elm +moonbeam +flutist +kelp +misprint +teetotal +upturn +ramrod +dismount +quicksand +spanner +authorship +roach +befit +rumor +apart +ditto +gift +zeal +contradict +hunt +dishonest +zoom +mongrel +hunk +mosquito +hunchback +sanction +excerpt +curio +accost +usher +indirect +intellect +doorstep +nobleman +cooper +combat +letterhead +ice +rhino +newsprint +skylight +convict +christmas +splendor +cord +core +khaki +brawl +corn +brawn +cork +discount +shuck +plug +census +cowboy +plum +choke +surround +caulk +dinner +plus +alga +duke +abet +civic +civil +bath +engulf +cafeteria +art +scamper +transform +sunlight +forbad +virgin +gin +head +medium +amateur +heal +stereo +heat +hear +heap +raft +counsel +muster +bargain +bide +latch +adorn +trim +trio +forearm +cobbler +trip +no +tit +when +junta +tin +whet +tie +implant +depot +pseudonym +evergreen +cleric +toad +geneticist +bullet +navel +yacht +withhold +fasten +backward +coach +impression +rob +rod +focus +livelihood +snip +yokel +rot +discern +environment +aplomb +melon +prop +coop +impend +plantain +cook +cool +looney +level +brother +encroach +quick +lever +pork +drier +trend +bullfrog +pore +inland +voucher +takeoff +bake +port +colic +hymn +choral +postman +spire +theist +thresh +tormentor +humorist +water +fluke +entertain +witch +twentieth +tire +boast +catnap +blotch +cinnamon +prude +weird +tweak +brighten +touchdown +post +panacea +concoct +scan +handler +prey +today +chapter +conductor +altar +cashier +drown +dismal +inhabit +judo +conceal +flagship +hullabaloo +fauna +laughter +streak +overpass +sandbag +trump +stream +despot +stroke +cube +hydrogen +bigot +dress +vital +fourth +dope +ballroom +fascist +clone +scoff +fascism +birthday +apprenticeship +eighth +repeat +classroom +twitch +liquid +inform +reaper +lagoon +superscript +refund +rye +midnight +blare +worship +thermostat +apex +platform +farmer +meridian +cutter +underneath +conquer +fern +rescind +wagon +term +name +realist +opera +bunion +bullion +realism +ailment +torch +zebra +distributor +hysteria +hacker +concur +profit +middleman +gram +clover +hull +hulk +flyer +tuner +flare +highjack +motion +turn +place +swine +swing +turf +preach +childhood +origin +pelican +feign +suspend +insist +scollop +bobcat +array +peddler +given +afterthought +district +opus +trillion +plastic +assort +white +hue +hug +hub +cope +season +hut +enigma +naught +grunt +holder +wide +bewitch +spokeswoman +oath +powder +rend +froth +pro +ani +ant +rent +dragon +stolid +marathon +ideal +blunt +surf +sure +aspen +equestrian +tornado +freshman +librarian +bluebird +icon +latex +tendon +annul +seafood +later +koran +readjust +senior +slope +perch +convoy +cheat +cheap +trespass +hack +broach +hustler +trot +woolen +gulf +genius +gull +shimmer +crime +gulp +woof +wood +deign +wool +entrant +viewpoint +lighten +jazz +festoon +tailor +lighter +dye +homestead +reveal +aluminum +workman +joker +dumfound +bison +picnic +pane +vizor +prowl +optometrist +detect +crooked +review +spoons +hiss +smokestack +caucus +fearless +guitar +coma +comb +come +zodiac +isthmus +reaction +superstar +region +quiet +contract +railway +penal +adjoin +color +armchair +pot +period +pop +pole +colon +polo +pod +poll +runaway +turkey +hobo +schoolboy +tiger +padlock +hernia +careful +spirit +robber +pilot +case +shaft +amend +mount +cash +cask +cast +mound +ventriloquist +vest +exult +clutter +helmet +projector +author +alphabet +fender +bowl +check +macaroni +catfish +bellboy +hermit +week +sang +applaud +nest +driver +weed +director +petticoat +lute +puke +vowel +muffler +weep +cartoon +ranch +relief +model +reward +sinner +clod +clog +tip +kilt +ninth +clot +lavish +violent +kill +kiln +kilo +polish +satin +blow +blot +hint +rose +regent +except +blog +bloc +blob +hind +scrapbook +disrupt +impound +kingdom +blowout +sandman +mugger +towel +bracelet +snort +friar +tower +node +deduct +wombat +interlock +canteen +slice +mood +slick +legal +moon +moos +teardrop +moot +heir +porter +metropolis +quit +unmask +slaughter +quip +ok +oh +of +jeer +shrimp +pistachio +karat +stand +ox +doze +accredit +amber +tribe +vicar +polka +garb +spinal +forewarn +feudal +whaler +there +racism +strict +racist +valley +fish +gibe +relic +jug +regard +cabinet +castaway +strenuous +jut +terminus +feeler +grasp +grass +toilet +ruffian +cinema +frighten +lichen +encompass +bishop +incorrect +abyss +fiddler +heather +idiot +diarrhea +rubber +idiom +heathen +trash +stalwart +championship +symbol +cove +nucleus +serious +brass +wife +invest +derrick +treason +apparel +platter +all +lace +duplex +lack +spacecraft +disc +dish +follow +settlement +titter +wanton +thursday +program +neglig +woman +song +fat +roe +psychologist +retract +ultraviolet +awful +dapper +fad +induct +stimulus +list +trench +align +flick +ten +tea +tee +rate +design +chalk +what +sub +sun +sum +whimper +rascal +brief +overload +crush +version +pulpit +intersect +row +womb +lacquer +pumpernickel +backer +goodby +thrift +misinterpret +heifer +jogger +cataract +haemophilia +murmur +snug +snub +herring +proceed +tarpaulin +wield +hurray +rustic +quash +inlay +garnish +hurrah +minor +ladybug +wretched +flap +mire +protestor +stutter +flit +flag +stick +mellow +chaplain +berth +wrestler +plagiarist +searchlight +sunscreen +pond +court +goal +goad +goat +sandwich +okay +algebra +headrest +embalm +reflect +catalog +numb +short +ricochet +tsar +shade +waylay +mission +scientist +flaunt +reconnect +pretext +stride +islam +thirtieth +style +glide +pray +inward +wilder +abbey +mattress +resort +airstrip +bout +soccer +might +alter +return +hunter +underground +abacus +mathematician +liniment +policeman +refresh +tactless +friendship +weight +needless +duchess +falsetto +expect +inflict +wager +alcohol +disquiet +hilt +dugout +loll +health +hill +shipment +fiber +solvent +friday +differ +effect +disinfect +octagon +physic +teach +sidewalk +jew +blister +thread +threat +bushel +feed +dine +feel +sailor +revolution +least +blank +cigaret +idea +moan +script +gourmet +interact +grime +stork +swarm +storm +moat +syrup +store +mainstay +imperfect +option +hotel +fidget +king +kind +vial +kink +stall +cuff +foreleg +stale +restful +amass +cleaner +exert +strengthen +shrewd +bookworm +gale +gala +gall +remodel +smallpox +toughen +bacteria +chairman +donor +pianist +buff +gill +foreman +rapist +reckon +english +reach +react +nothing +quorum +hyena +amphibian +saloon +notch +scaffold +asphalt +memorandum +felon +font +anvil +firewood +betray +hip +shepherd +hit +deaden +reprint +him +adulthood +snowstorm +forego +stump +martyrdom +arc +bare +bard +bark +ark +arm +barn +blurt +parchment +various +plywood +nincompoop +solo +muslim +sole +outfit +succeed +inertia +orangutang +blazer +bandit +context +bond +cynic +sloth +flier +distress +chaperon +sweet +wastebasket +sweep +weasel +rave +shaykh +bolster +dub +overlook +dud +due +buttermilk +pa +watchword +brick +pi +flight +quintet +dropout +marihuana +cinch +temperament +instructor +heighten +toga +shove +batch +pitchfork +kick +behavior +incognito +lodger +bluegrass +sirup +rip +shamrock +rim +frantic +rig +rid +reprogram +chauffeur +shirt +kimono +viola +shirk +sliver +straightjacket +restraint +painless +throwback +cement +birch +robust +knack +lower +earmark +cheek +cheep +cheer +pollen +facial +vigilant +cabaret +continent +tablet +contractor +plateau +tuxedo +complaint +vendor +foreshadow +awaken +confront +uproar +distrust +breeder +hallmark +play +global +litter +wonderland +butterscotch +saucer +prow +seller +prom +prod +sag +perpendicular +tinker +raider +vivid +cautious +undertow +yawn +ordeal +militia +dialog +tomboy +conquest +momma +piteous +holster +vagabond +stench +canal +pundit +question +swill +parsnip +etch +filet +potassium +glamour +cloth +crank +usurp +delta +upright +crane +outpost +penguin +patriot +consist +apricot +caller +peppermint +husk +cartwheel +highlight +dill +freak +dismay +sublet +sagebrush +rainbow +lemon +riot +peach +grouper +saffron +nick +parlor +ferment +bandstand +mock +nice +mustard +chirp +meaning +vigil +vice +ocular +remit +pyre +buffalo +scroll +pervert +lean +alien +dispel +gang +theorist +gold +uphold +floss +breach +sniper +materialist +toenail +spool +spoon +spook +spoof +harlot +outdo +pleas +pleat +trawl +procession +fold +reunion +acid +folk +sandstorm +outsmart +acronym +relent +kangaroo +gloat +miser +cyclist +barb +survivor +guarantor +orangutan +armpit +shovel +duct +ensign +apt +volt +motor +duck +thick +redo +ape +use +fee +fen +frog +germ +modicum +fez +sort +parliament +porch +musician +impress +sore +rabbit +recount +penis +sculptor +annoy +topic +augment +critic +lumber +executor +proof +bittersweet +tap +tar +tax +villain +tag +condescend +tab +spa +silt +tan +rape +counterfeit +sir +sip +scuff +sit +tamper +six +outclass +occur +sic +carrier +goldsmith +toddler +panic +sin +defeat +tension +lesion +attend +tact +hazard +discomfort +tack +wrist +taco +footpath +aftereffect +light +arduous +schoolchild +sailboat +stamped +minnow +damsel +accompanist +hemp +tyrant +badger +glen +superior +inlet +sill +glee +nostalgia +flank +restrain +glisten +turban +redhead +bye +flex +crash +citrus +flour +flout +emerald +flea +republican +investor +successor +easel +footstool +profound +edit +feast +fuzz +trap +blacktop +cocoon +tray +lilac +mincemeat +interplay +our +proclaim +out +semen +tabloid +cocktail +sentiment +frontier +vehement +disarray +clatter +impart +plural +proviso +planner +utensil +tenement +pendant +gospel +tenant +greenhorn +tanker +zoologist +rivet +uproot +embryo +sew +bouquet +echo +bonnet +eleventh +synagog +salient +droop +unknown +galley +snore +anaemia +drool +boil +tidbit +shell +shelf +transistor +woo +diminish +persecutor +goblin +institution +kickback +frugal +brazen +yodel +laughingstock +clip +fowl +splatter +flunkey +blip +footwork +outstrip +disjoint +pallor +catholic +clove +rout +outward +bagel +lope +divert +trivia +pharmacist +divers +clash +petrol +siphon +filch +fortieth +class +clasp +fang +dens +dent +pipe +vernacular +gain +son +stove +sonnet +utter +chicken +feat +winch +dandruff +rioter +herald +piano +local +counsellor +vigor +sued +skimp +plaintiff +spud +watercolor +barter +bronco +spur +rite +ghetto +bisect +compliment +ascertain +sediment +view +unison +workbench +ebb +expel +hymnal +distract +violet +still +closet +superb +favor +viper +crude +torpedo +avow +jot +exam +amen +joy +foetus +job +spoil +jog +swift +memento +lifeboat +april +grain +commando +wall +hyphen +walk +respect +unclean +decent +trademark +tutor +reindeer +mike +nickel +cypress +penmanship +dearth +overturn +present +kerchief +corset +wilt +vanilla +priesthood +will +fingertip +wild +whirlpool +layer +mutant +motif +apprehend +rooster +lightweight +thug +thud +whore +headlight +cross +member +pediatrist +inch +grandeur +slave +diploma +outcast +beast +student +pedal +whale +collar +gutter +masochist +overwork +scissor +twirl +flint +outgrow +bandanna +rocker +cameo +rocket +camel +boot +wren +obtain +replenish +biologist +daub +distend +smite +now +panther +drunk +smith +hall +book +ski +enact +knob +sick +myth +know +knot +press +redesign +doughnut +loser +cutlet +vortex +clutch +exceed +setter +flagrant +birthmark +demeanor +growth +export +leaf +lead +leak +miner +leap +belt +leader +trout +obey +slur +mitt +slut +slum +pasta +mite +slug +throne +pike +throng +rare +linger +column +biscuit +fear +swear +sweat +udder +emperor +owl +outset +own +owe +weather +champ +brush +billfold +gape +rowboat +van +platoon +transfer +spiral +grotto +cliff +vat +nourish +catsup +unwrap +saunter +mutter +brassier +assail +tomcat +daffodil +nightgown +record +cake +faggot +maroon +boardwalk +abbot +counteract +limb +squirrel +mutual +glint +boor +percent +other +boom +branch +cutthroat +junk +mulch +june +squeak +squeal +extort +jewel +gynecologist +vane +sash diff --git a/libs/pyutil/fileutil.py~ b/libs/pyutil/fileutil.py~ deleted file mode 100644 index e37eb79..0000000 --- a/libs/pyutil/fileutil.py~ +++ /dev/null @@ -1,271 +0,0 @@ -# Copyright (c) 2002-2010 Zooko Wilcox-O'Hearn -# This file is part of pyutil; see README.rst for licensing terms. - -""" -Futz with files like a pro. -""" - -import errno, exceptions, os, stat, tempfile - -try: - import bsddb -except ImportError: - DBNoSuchFileError = None -else: - DBNoSuchFileError = bsddb.db.DBNoSuchFileError - -# read_file() and write_file() copied from Mark Seaborn's blog post. Please -# read it for complete rationale: -# http://lackingrhoticity.blogspot.com/2009/12/readfile-and-writefile-in-python.html - -def read_file(filename, mode='rb'): - """ Read the contents of the file named filename and return it in - a string. This function closes the file handle before it returns - (even if the underlying Python implementation's garbage collector - doesn't). """ - fh = open(filename, mode) - try: - return fh.read() - finally: - fh.close() - -def write_file(filename, data, mode='wb'): - """ Write the string data into a file named filename. This - function closes the file handle (ensuring that the written data is - flushed from the perspective of the Python implementation) before - it returns (even if the underlying Python implementation's garbage - collector doesn't).""" - fh = open(filename, mode) - try: - fh.write(data) - finally: - fh.close() - -# For backwards-compatibility in case someone is using these names. We used to -# have a superkludge in fileutil.py under these names. -def rename(src, dst, tries=4, basedelay=0.1): - return os.rename(src, dst) - -def remove(f, tries=4, basedelay=0.1): - return os.remove(f) - -def rmdir(f, tries=4, basedelay=0.1): - return os.rmdir(f) - -class _Dir(object): - """ - Hold a set of files and subdirs and clean them all up when asked to. - """ - def __init__(self, name, cleanup=True): - self.name = name - self.cleanup = cleanup - self.files = [] - self.subdirs = set() - - def file(self, fname, mode=None): - """ - Create a file in the tempdir and remember it so as to close() it - before attempting to cleanup the temp dir. - - @rtype: file - """ - ffn = os.path.join(self.name, fname) - if mode is not None: - fo = open(ffn, mode) - else: - fo = open(ffn) - self.register_file(fo) - return fo - - def subdir(self, dirname): - """ - Create a subdirectory in the tempdir and remember it so as to call - shutdown() on it before attempting to clean up. - - @rtype: _Dir instance - """ - ffn = os.path.join(self.name, dirname) - sd = _Dir(ffn, self.cleanup) - self.register_subdir(sd) - make_dirs(sd.name) - return sd - - def register_file(self, fileobj): - """ - Remember the file object and call close() on it before attempting to - clean up. - """ - self.files.append(fileobj) - - def register_subdir(self, dirobj): - """ - Remember the _Dir object and call shutdown() on it before attempting - to clean up. - """ - self.subdirs.add(dirobj) - - def shutdown(self): - if self.cleanup: - for subdir in hasattr(self, 'subdirs') and self.subdirs or []: - subdir.shutdown() - for fileobj in hasattr(self, 'files') and self.files or []: - if DBNoSuchFileError is None: - fileobj.close() # "close()" is idempotent so we don't need to catch exceptions here - else: - try: - fileobj.close() - except DBNoSuchFileError: - # Ah, except that the bsddb module's file-like object (a DB object) has a non-idempotent close... - pass - - if hasattr(self, 'name'): - rm_dir(self.name) - - def __repr__(self): - return "<%s instance at %x %s>" % (self.__class__.__name__, id(self), self.name) - - def __str__(self): - return self.__repr__() - - def __del__(self): - try: - self.shutdown() - except: - import traceback - traceback.print_exc() - -class NamedTemporaryDirectory(_Dir): - """ - Call tempfile.mkdtemp(), store the name of the dir in self.name, and - rm_dir() when it gets garbage collected or "shutdown()". - - Also keep track of file objects for files within the tempdir and call - close() on them before rm_dir(). This is a convenient way to open temp - files within the directory, and it is very helpful on Windows because you - can't delete a directory which contains a file which is currently open. - """ - - def __init__(self, cleanup=True, *args, **kwargs): - """ If cleanup, then the directory will be rmrf'ed when the object is shutdown. """ - name = tempfile.mkdtemp(*args, **kwargs) - _Dir.__init__(self, name, cleanup) - -class ReopenableNamedTemporaryFile: - """ - This uses tempfile.mkstemp() to generate a secure temp file. It then closes - the file, leaving a zero-length file as a placeholder. You can get the - filename with ReopenableNamedTemporaryFile.name. When the - ReopenableNamedTemporaryFile instance is garbage collected or its shutdown() - method is called, it deletes the file. - """ - def __init__(self, *args, **kwargs): - fd, self.name = tempfile.mkstemp(*args, **kwargs) - os.close(fd) - - def __repr__(self): - return "<%s instance at %x %s>" % (self.__class__.__name__, id(self), self.name) - - def __str__(self): - return self.__repr__() - - def __del__(self): - self.shutdown() - - def shutdown(self): - remove(self.name) - -def make_dirs(dirname, mode=0777): - """ - An idempotent version of os.makedirs(). If the dir already exists, do - nothing and return without raising an exception. If this call creates the - dir, return without raising an exception. If there is an error that - prevents creation or if the directory gets deleted after make_dirs() creates - it and before make_dirs() checks that it exists, raise an exception. - """ - tx = None - try: - os.makedirs(dirname, mode) - except OSError, x: - tx = x - - if not os.path.isdir(dirname): - if tx: - raise tx - raise exceptions.IOError, "unknown error prevented creation of directory, or deleted the directory immediately after creation: %s" % dirname # careful not to construct an IOError with a 2-tuple, as that has a special meaning... - -def rmtree(dirname): - """ - A threadsafe and idempotent version of shutil.rmtree(). If the dir is - already gone, do nothing and return without raising an exception. If this - call removes the dir, return without raising an exception. If there is an - error that prevents deletion or if the directory gets created again after - rm_dir() deletes it and before rm_dir() checks that it is gone, raise an - exception. - """ - excs = [] - try: - os.chmod(dirname, stat.S_IWRITE | stat.S_IEXEC | stat.S_IREAD) - for f in os.listdir(dirname): - fullname = os.path.join(dirname, f) - if os.path.isdir(fullname): - rm_dir(fullname) - else: - remove(fullname) - os.rmdir(dirname) - except EnvironmentError, le: - # Ignore "No such file or directory", collect any other exception. - if (le.args[0] != 2 and le.args[0] != 3) or (le.args[0] != errno.ENOENT): - excs.append(le) - except Exception, le: - excs.append(le) - - # Okay, now we've recursively removed everything, ignoring any "No - # such file or directory" errors, and collecting any other errors. - - if os.path.exists(dirname): - if len(excs) == 1: - raise excs[0] - if len(excs) == 0: - raise OSError, "Failed to remove dir for unknown reason." - raise OSError, excs - -def rm_dir(dirname): - # Renamed to be like shutil.rmtree and unlike rmdir. - return rmtree(dirname) - -def remove_if_possible(f): - try: - remove(f) - except EnvironmentError: - pass - -def remove_if_present(f): - try: - remove(f) - except EnvironmentError, le: - # Ignore "No such file or directory", re-raise any other exception. - if (le.args[0] != 2 and le.args[0] != 3) or (le.args[0] != errno.ENOENT): - raise - -def rmdir_if_possible(f): - try: - rmdir(f) - except EnvironmentError: - pass - -def open_or_create(fname, binarymode=True): - try: - f = open(fname, binarymode and "r+b" or "r+") - except EnvironmentError: - f = open(fname, binarymode and "w+b" or "w+") - return f - -def du(basedir): - size = 0 - - for root, dirs, files in os.walk(basedir): - for f in files: - fn = os.path.join(root, f) - size += os.path.getsize(fn) - - return size diff --git a/libs/pyutil/iputil.py b/libs/pyutil/iputil.py index b8b9636..cb3e7c0 100644 --- a/libs/pyutil/iputil.py +++ b/libs/pyutil/iputil.py @@ -1,22 +1,12 @@ -# portions extracted from ipaddresslib by Autonomous Zone Industries, LGPL (author: Greg Smith) -# portions adapted from nattraverso.ipdiscover -# portions authored by Brian Warner, working for Allmydata -# most recent version authored by Zooko O'Whielacronx, working for Allmydata - # from the Python Standard Library -import os, re, socket, sys +import os, re, socket, sys, subprocess # from Twisted -from twisted.internet import defer, reactor -from twisted.python import failure +from twisted.internet import defer, threads, reactor from twisted.internet.protocol import DatagramProtocol -from twisted.internet.utils import getProcessOutput from twisted.python.procutils import which from twisted.python import log -# from pyutil -import observer - try: import resource def increase_rlimits(): @@ -77,6 +67,7 @@ except ImportError: # since one might be shadowing the other. This hack appeases pyflakes. increase_rlimits = _increase_rlimits + def get_local_addresses_async(target="198.41.0.4"): # A.ROOT-SERVERS.NET """ Return a Deferred that fires with a list of IPv4 addresses (as dotted-quad @@ -121,14 +112,16 @@ def get_local_ip_for(target): except socket.gaierror: # DNS isn't running, or somehow we encountered an error - # note: if an interface is configured and up, but nothing is connected to it, - # gethostbyname("A.ROOT-SERVERS.NET") will take 20 seconds to raise socket.gaierror - # . This is synchronous and occurs for each node being started, so users of certain unit - # tests will see something like 120s of delay, which may be enough to hit the default - # trial timeouts. For that reason, get_local_addresses_async() was changed to default to - # the numerical ip address for A.ROOT-SERVERS.NET, to avoid this DNS lookup. This also - # makes node startup a tad faster. - + # note: if an interface is configured and up, but nothing is + # connected to it, gethostbyname("A.ROOT-SERVERS.NET") will take 20 + # seconds to raise socket.gaierror . This is synchronous and occurs + # for each node being started, so users of + # test.common.SystemTestMixin (like test_system) will see something + # like 120s of delay, which may be enough to hit the default trial + # timeouts. For that reason, get_local_addresses_async() was changed + # to default to the numerical ip address for A.ROOT-SERVERS.NET, to + # avoid this DNS lookup. This also makes node startup fractionally + # faster. return None udpprot = DatagramProtocol() port = reactor.listenUDP(0, udpprot) @@ -146,16 +139,29 @@ _platform_map = { "linux-i386": "linux", # redhat "linux-ppc": "linux", # redhat "linux2": "linux", # debian + "linux3": "linux", # debian "win32": "win32", "irix6-n32": "irix", "irix6-n64": "irix", "irix6": "irix", "openbsd2": "bsd", + "openbsd3": "bsd", + "openbsd4": "bsd", + "openbsd5": "bsd", "darwin": "bsd", # Mac OS X "freebsd4": "bsd", "freebsd5": "bsd", "freebsd6": "bsd", + "freebsd7": "bsd", + "freebsd8": "bsd", + "freebsd9": "bsd", "netbsd1": "bsd", + "netbsd2": "bsd", + "netbsd3": "bsd", + "netbsd4": "bsd", + "netbsd5": "bsd", + "netbsd6": "bsd", + "dragonfly2": "bsd", "sunos5": "sunos", "cygwin": "cygwin", } @@ -173,12 +179,12 @@ _win32_re = re.compile('^\s*\d+\.\d+\.\d+\.\d+\s.+\s(?P<address>\d+\.\d+\.\d+\.\ # These work in Redhat 6.x and Debian 2.2 potato _linux_path = '/sbin/ifconfig' -_linux_re = re.compile('^\s*inet addr:(?P<address>\d+\.\d+\.\d+\.\d+)\s.+$', flags=re.M|re.I|re.S) +_linux_re = re.compile('^\s*inet [a-zA-Z]*:?(?P<address>\d+\.\d+\.\d+\.\d+)\s.+$', flags=re.M|re.I|re.S) -# originally NetBSD 1.4 (submitted by Rhialto), Darwin, Mac OS X, FreeBSD, OpenBSD -_bsd_path = '/sbin/ifconfig' -_bsd_args = ('-a',) -_bsd_re = re.compile('^\s+inet (?P<address>\d+\.\d+\.\d+\.\d+)\s.+$', flags=re.M|re.I|re.S) +# NetBSD 1.4 (submitted by Rhialto), Darwin, Mac OS X +_netbsd_path = '/sbin/ifconfig' +_netbsd_args = ('-a',) +_netbsd_re = re.compile('^\s+inet [a-zA-Z]*:?(?P<address>\d+\.\d+\.\d+\.\d+)\s.+$', flags=re.M|re.I|re.S) # Irix 6.5 _irix_path = '/usr/etc/ifconfig' @@ -186,39 +192,6 @@ _irix_path = '/usr/etc/ifconfig' # Solaris 2.x _sunos_path = '/usr/sbin/ifconfig' -class SequentialTrier(object): - """ I hold a list of executables to try and try each one in turn - until one gives me a list of IP addresses.""" - - def __init__(self, exebasename, args, regex): - assert not os.path.isabs(exebasename) - self.exes_left_to_try = which(exebasename) - self.exes_left_to_try.reverse() - self.args = args - self.regex = regex - self.o = observer.OneShotObserverList() - self._try_next() - - def _try_next(self): - if not self.exes_left_to_try: - self.o.fire(None) - else: - exe = self.exes_left_to_try.pop() - d2 = _query(exe, self.args, self.regex) - - def cb(res): - if res: - self.o.fire(res) - else: - self._try_next() - - def eb(why): - self._try_next() - - d2.addCallbacks(cb, eb) - - def when_tried(self): - return self.o.when_fired() # k: platform string as provided in the value of _platform_map # v: tuple of (path_to_tool, args, regex,) @@ -226,19 +199,22 @@ _tool_map = { "linux": (_linux_path, (), _linux_re,), "win32": (_win32_path, _win32_args, _win32_re,), "cygwin": (_win32_path, _win32_args, _win32_re,), - "bsd": (_bsd_path, _bsd_args, _bsd_re,), - "irix": (_irix_path, _bsd_args, _bsd_re,), - "sunos": (_sunos_path, _bsd_args, _bsd_re,), + "bsd": (_netbsd_path, _netbsd_args, _netbsd_re,), + "irix": (_irix_path, _netbsd_args, _netbsd_re,), + "sunos": (_sunos_path, _netbsd_args, _netbsd_re,), } + def _find_addresses_via_config(): - # originally by Greg Smith, hacked by Zooko to conform to Brian Warner's API. + return threads.deferToThread(_synchronously_find_addresses_via_config) + +def _synchronously_find_addresses_via_config(): + # originally by Greg Smith, hacked by Zooko to conform to Brian's API platform = _platform_map.get(sys.platform) - (pathtotool, args, regex,) = _tool_map.get(platform, ('ifconfig', _bsd_args, _bsd_re,)) + if not platform: + raise UnsupportedPlatformError(sys.platform) - # If the platform isn't known then we attempt BSD-style ifconfig. If it - # turns out that we don't get anything resembling a dotted quad IPv4 address - # out of it, then we'll raise UnsupportedPlatformError. + (pathtotool, args, regex,) = _tool_map[platform] # If pathtotool is a fully qualified path then we just try that. # If it is merely an executable name then we use Twisted's @@ -246,34 +222,33 @@ def _find_addresses_via_config(): # gives us something that resembles a dotted-quad IPv4 address. if os.path.isabs(pathtotool): - d = _query(pathtotool, args, regex) - else: - d = SequentialTrier(pathtotool, args, regex).when_tried() - - d.addCallback(_check_result) - return d - -def _check_result(result): - if not result and not _platform_map.has_key(sys.platform): - return failure.Failure(UnsupportedPlatformError(sys.platform)) + return _query(pathtotool, args, regex) else: - return result + exes_to_try = which(pathtotool) + for exe in exes_to_try: + try: + addresses = _query(exe, args, regex) + except Exception: + addresses = [] + if addresses: + return addresses + return [] def _query(path, args, regex): - d = getProcessOutput(path, args) - def _parse(output): - addresses = [] - outputsplit = output.split('\n') - for outline in outputsplit: - m = regex.match(outline) - if m: - addr = m.groupdict()['address'] - if addr not in addresses: - addresses.append(addr) + env = {'LANG': 'en_US.UTF-8'} + p = subprocess.Popen([path] + list(args), stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env) + (output, err) = p.communicate() - return addresses - d.addCallback(_parse) - return d + addresses = [] + outputsplit = output.split('\n') + for outline in outputsplit: + m = regex.match(outline) + if m: + addr = m.groupdict()['address'] + if addr not in addresses: + addresses.append(addr) + + return addresses def _cygwin_hack_find_addresses(target): addresses = [] diff --git a/libs/pyutil/iputil.py~ b/libs/pyutil/iputil.py~ new file mode 100644 index 0000000..b8b9636 --- /dev/null +++ b/libs/pyutil/iputil.py~ @@ -0,0 +1,288 @@ +# portions extracted from ipaddresslib by Autonomous Zone Industries, LGPL (author: Greg Smith) +# portions adapted from nattraverso.ipdiscover +# portions authored by Brian Warner, working for Allmydata +# most recent version authored by Zooko O'Whielacronx, working for Allmydata + +# from the Python Standard Library +import os, re, socket, sys + +# from Twisted +from twisted.internet import defer, reactor +from twisted.python import failure +from twisted.internet.protocol import DatagramProtocol +from twisted.internet.utils import getProcessOutput +from twisted.python.procutils import which +from twisted.python import log + +# from pyutil +import observer + +try: + import resource + def increase_rlimits(): + # We'd like to raise our soft resource.RLIMIT_NOFILE, since certain + # systems (OS-X, probably solaris) start with a relatively low limit + # (256), and some unit tests want to open up more sockets than this. + # Most linux systems start with both hard and soft limits at 1024, + # which is plenty. + + # unfortunately the values to pass to setrlimit() vary widely from + # one system to another. OS-X reports (256, HUGE), but the real hard + # limit is 10240, and accepts (-1,-1) to mean raise it to the + # maximum. Cygwin reports (256, -1), then ignores a request of + # (-1,-1): instead you have to guess at the hard limit (it appears to + # be 3200), so using (3200,-1) seems to work. Linux reports a + # sensible (1024,1024), then rejects (-1,-1) as trying to raise the + # maximum limit, so you could set it to (1024,1024) but you might as + # well leave it alone. + + try: + current = resource.getrlimit(resource.RLIMIT_NOFILE) + except AttributeError: + # we're probably missing RLIMIT_NOFILE + return + + if current[0] >= 1024: + # good enough, leave it alone + return + + try: + if current[1] > 0 and current[1] < 1000000: + # solaris reports (256, 65536) + resource.setrlimit(resource.RLIMIT_NOFILE, + (current[1], current[1])) + else: + # this one works on OS-X (bsd), and gives us 10240, but + # it doesn't work on linux (on which both the hard and + # soft limits are set to 1024 by default). + resource.setrlimit(resource.RLIMIT_NOFILE, (-1,-1)) + new = resource.getrlimit(resource.RLIMIT_NOFILE) + if new[0] == current[0]: + # probably cygwin, which ignores -1. Use a real value. + resource.setrlimit(resource.RLIMIT_NOFILE, (3200,-1)) + + except ValueError: + log.msg("unable to set RLIMIT_NOFILE: current value %s" + % (resource.getrlimit(resource.RLIMIT_NOFILE),)) + except: + # who knows what. It isn't very important, so log it and continue + log.err() +except ImportError: + def _increase_rlimits(): + # TODO: implement this for Windows. Although I suspect the + # solution might be "be running under the iocp reactor and + # make this function be a no-op". + pass + # pyflakes complains about two 'def FOO' statements in the same time, + # since one might be shadowing the other. This hack appeases pyflakes. + increase_rlimits = _increase_rlimits + +def get_local_addresses_async(target="198.41.0.4"): # A.ROOT-SERVERS.NET + """ + Return a Deferred that fires with a list of IPv4 addresses (as dotted-quad + strings) that are currently configured on this host, sorted in descending + order of how likely we think they are to work. + + @param target: we want to learn an IP address they could try using to + connect to us; The default value is fine, but it might help if you + pass the address of a host that you are actually trying to be + reachable to. + """ + addresses = [] + local_ip = get_local_ip_for(target) + if local_ip: + addresses.append(local_ip) + + if sys.platform == "cygwin": + d = _cygwin_hack_find_addresses(target) + else: + d = _find_addresses_via_config() + + def _collect(res): + for addr in res: + if addr != "0.0.0.0" and not addr in addresses: + addresses.append(addr) + return addresses + d.addCallback(_collect) + + return d + +def get_local_ip_for(target): + """Find out what our IP address is for use by a given target. + + @return: the IP address as a dotted-quad string which could be used by + to connect to us. It might work for them, it might not. If + there is no suitable address (perhaps we don't currently have an + externally-visible interface), this will return None. + """ + + try: + target_ipaddr = socket.gethostbyname(target) + except socket.gaierror: + # DNS isn't running, or somehow we encountered an error + + # note: if an interface is configured and up, but nothing is connected to it, + # gethostbyname("A.ROOT-SERVERS.NET") will take 20 seconds to raise socket.gaierror + # . This is synchronous and occurs for each node being started, so users of certain unit + # tests will see something like 120s of delay, which may be enough to hit the default + # trial timeouts. For that reason, get_local_addresses_async() was changed to default to + # the numerical ip address for A.ROOT-SERVERS.NET, to avoid this DNS lookup. This also + # makes node startup a tad faster. + + return None + udpprot = DatagramProtocol() + port = reactor.listenUDP(0, udpprot) + try: + udpprot.transport.connect(target_ipaddr, 7) + localip = udpprot.transport.getHost().host + except socket.error: + # no route to that host + localip = None + port.stopListening() # note, this returns a Deferred + return localip + +# k: result of sys.platform, v: which kind of IP configuration reader we use +_platform_map = { + "linux-i386": "linux", # redhat + "linux-ppc": "linux", # redhat + "linux2": "linux", # debian + "win32": "win32", + "irix6-n32": "irix", + "irix6-n64": "irix", + "irix6": "irix", + "openbsd2": "bsd", + "darwin": "bsd", # Mac OS X + "freebsd4": "bsd", + "freebsd5": "bsd", + "freebsd6": "bsd", + "netbsd1": "bsd", + "sunos5": "sunos", + "cygwin": "cygwin", + } + +class UnsupportedPlatformError(Exception): + pass + +# Wow, I'm really amazed at home much mileage we've gotten out of calling +# the external route.exe program on windows... It appears to work on all +# versions so far. Still, the real system calls would much be preferred... +# ... thus wrote Greg Smith in time immemorial... +_win32_path = 'route.exe' +_win32_args = ('print',) +_win32_re = re.compile('^\s*\d+\.\d+\.\d+\.\d+\s.+\s(?P<address>\d+\.\d+\.\d+\.\d+)\s+(?P<metric>\d+)\s*$', flags=re.M|re.I|re.S) + +# These work in Redhat 6.x and Debian 2.2 potato +_linux_path = '/sbin/ifconfig' +_linux_re = re.compile('^\s*inet addr:(?P<address>\d+\.\d+\.\d+\.\d+)\s.+$', flags=re.M|re.I|re.S) + +# originally NetBSD 1.4 (submitted by Rhialto), Darwin, Mac OS X, FreeBSD, OpenBSD +_bsd_path = '/sbin/ifconfig' +_bsd_args = ('-a',) +_bsd_re = re.compile('^\s+inet (?P<address>\d+\.\d+\.\d+\.\d+)\s.+$', flags=re.M|re.I|re.S) + +# Irix 6.5 +_irix_path = '/usr/etc/ifconfig' + +# Solaris 2.x +_sunos_path = '/usr/sbin/ifconfig' + +class SequentialTrier(object): + """ I hold a list of executables to try and try each one in turn + until one gives me a list of IP addresses.""" + + def __init__(self, exebasename, args, regex): + assert not os.path.isabs(exebasename) + self.exes_left_to_try = which(exebasename) + self.exes_left_to_try.reverse() + self.args = args + self.regex = regex + self.o = observer.OneShotObserverList() + self._try_next() + + def _try_next(self): + if not self.exes_left_to_try: + self.o.fire(None) + else: + exe = self.exes_left_to_try.pop() + d2 = _query(exe, self.args, self.regex) + + def cb(res): + if res: + self.o.fire(res) + else: + self._try_next() + + def eb(why): + self._try_next() + + d2.addCallbacks(cb, eb) + + def when_tried(self): + return self.o.when_fired() + +# k: platform string as provided in the value of _platform_map +# v: tuple of (path_to_tool, args, regex,) +_tool_map = { + "linux": (_linux_path, (), _linux_re,), + "win32": (_win32_path, _win32_args, _win32_re,), + "cygwin": (_win32_path, _win32_args, _win32_re,), + "bsd": (_bsd_path, _bsd_args, _bsd_re,), + "irix": (_irix_path, _bsd_args, _bsd_re,), + "sunos": (_sunos_path, _bsd_args, _bsd_re,), + } +def _find_addresses_via_config(): + # originally by Greg Smith, hacked by Zooko to conform to Brian Warner's API. + + platform = _platform_map.get(sys.platform) + (pathtotool, args, regex,) = _tool_map.get(platform, ('ifconfig', _bsd_args, _bsd_re,)) + + # If the platform isn't known then we attempt BSD-style ifconfig. If it + # turns out that we don't get anything resembling a dotted quad IPv4 address + # out of it, then we'll raise UnsupportedPlatformError. + + # If pathtotool is a fully qualified path then we just try that. + # If it is merely an executable name then we use Twisted's + # "which()" utility and try each executable in turn until one + # gives us something that resembles a dotted-quad IPv4 address. + + if os.path.isabs(pathtotool): + d = _query(pathtotool, args, regex) + else: + d = SequentialTrier(pathtotool, args, regex).when_tried() + + d.addCallback(_check_result) + return d + +def _check_result(result): + if not result and not _platform_map.has_key(sys.platform): + return failure.Failure(UnsupportedPlatformError(sys.platform)) + else: + return result + +def _query(path, args, regex): + d = getProcessOutput(path, args) + def _parse(output): + addresses = [] + outputsplit = output.split('\n') + for outline in outputsplit: + m = regex.match(outline) + if m: + addr = m.groupdict()['address'] + if addr not in addresses: + addresses.append(addr) + + return addresses + d.addCallback(_parse) + return d + +def _cygwin_hack_find_addresses(target): + addresses = [] + for h in [target, "localhost", "127.0.0.1",]: + try: + addr = get_local_ip_for(h) + if addr not in addresses: + addresses.append(addr) + except socket.gaierror: + pass + + return defer.succeed(addresses) diff --git a/libs/pyutil/mathutil.py b/libs/pyutil/mathutil.py index 46781b0..9c16980 100644 --- a/libs/pyutil/mathutil.py +++ b/libs/pyutil/mathutil.py @@ -11,7 +11,7 @@ def div_ceil(n, d): """ The smallest integer k such that k*d >= n. """ - return (n/d) + (n%d != 0) + return int((n//d) + (n%d != 0)) def next_multiple(n, k): """ diff --git a/libs/pyutil/mathutil.py~ b/libs/pyutil/mathutil.py~ new file mode 100644 index 0000000..46781b0 --- /dev/null +++ b/libs/pyutil/mathutil.py~ @@ -0,0 +1,106 @@ +# Copyright (c) 2005-2010 Zooko Wilcox-O'Hearn +# This file is part of pyutil; see README.rst for licensing terms. + +""" +A few commonly needed functions. +""" + +import math + +def div_ceil(n, d): + """ + The smallest integer k such that k*d >= n. + """ + return (n/d) + (n%d != 0) + +def next_multiple(n, k): + """ + The smallest multiple of k which is >= n. Note that if n is 0 then the + answer is 0. + """ + return div_ceil(n, k) * k + +def pad_size(n, k): + """ + The smallest number that has to be added to n to equal a multiple of k. + """ + if n%k: + return k - n%k + else: + return 0 + +def is_power_of_k(n, k): + return k**int(math.log(n, k) + 0.5) == n + +def next_power_of_k(n, k): + p = 1 + while p < n: + p *= k + return p + +def ave(l): + return sum(l) / len(l) + +def log_ceil(n, b): + """ + The smallest integer k such that b^k >= n. + + log_ceil(n, 2) is the number of bits needed to store any of n values, e.g. + the number of bits needed to store any of 128 possible values is 7. + """ + p = 1 + k = 0 + while p < n: + p *= b + k += 1 + return k + +def log_floor(n, b): + """ + The largest integer k such that b^k <= n. + """ + p = 1 + k = 0 + while p <= n: + p *= b + k += 1 + return k - 1 + +def linear_fit_slope(ps): + """ + Single-independent-variable linear regression -- least squares method. + + At least, I *think* this function computes that answer. I no longer + remember where I learned this trick and at the moment I can't prove to + myself that this is correct. + + @param ps a sequence of tuples of (x, y) + """ + avex = ave([x for (x, y) in ps]) + avey = ave([y for (x, y) in ps]) + sxy = sum([ (x - avex) * (y - avey) for (x, y) in ps ]) + sxx = sum([ (x - avex) ** 2 for (x, y) in ps ]) + if sxx == 0: + return None + return sxy / sxx + +def permute(l): + """ + Return all possible permutations of l. + + @type l: sequence + @rtype a set of sequences + """ + if len(l) == 1: + return [l,] + + res = [] + for i in range(len(l)): + l2 = list(l[:]) + x = l2.pop(i) + for l3 in permute(l2): + l3.append(x) + res.append(l3) + + return res + diff --git a/libs/pyutil/odict.py~ b/libs/pyutil/odict.py~ deleted file mode 100644 index 0ed5ce7..0000000 --- a/libs/pyutil/odict.py~ +++ /dev/null @@ -1,552 +0,0 @@ -# Copyright (c) 2002-2009 Zooko "Zooko" Wilcox-O'Hearn - -""" -This module offers a Ordered Dict, which is a dict that preserves -insertion order. See PEP 372 for description of the problem. This -implementation uses a linked-list to get good O(1) asymptotic -performance. (Actually it is O(hashtable-update-cost), but whatever.) - -Warning: if -O optimizations are not turned on then OrderedDict performs -extensive self-analysis in every function call, which can take minutes -and minutes for a large cache. Turn on -O, or comment out assert -self._assert_invariants() -""" - -import operator - -from assertutil import _assert, precondition -from humanreadable import hr - -class OrderedDict: - """ - An efficient ordered dict. - - Adding an item that is already in the dict *does not* make it the - most- recently-added item although it may change the state of the - dict itself (if the value is different than the previous value). - - See also SmallOrderedDict (below), which is faster in some cases. - """ - class ItemIterator: - def __init__(self, c): - self.c = c - self.i = c.d[c.ts][1] - def __iter__(self): - return self - def next(self): - if self.i is self.c.hs: - raise StopIteration - k = self.i - precondition(self.c.d.has_key(k), "The iterated OrderedDict doesn't have the next key. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", k, self.c) - (v, p, n,) = self.c.d[k] - self.i = p - return (k, v,) - - class KeyIterator: - def __init__(self, c): - self.c = c - self.i = c.d[c.ts][1] - def __iter__(self): - return self - def next(self): - if self.i is self.c.hs: - raise StopIteration - k = self.i - precondition(self.c.d.has_key(k), "The iterated OrderedDict doesn't have the next key. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", k, self.c) - (v, p, n,) = self.c.d[k] - self.i = p - return k - - class ValIterator: - def __init__(self, c): - self.c = c - self.i = c.d[c.ts][1] - def __iter__(self): - return self - def next(self): - if self.i is self.c.hs: - raise StopIteration - precondition(self.c.d.has_key(self.i), "The iterated OrderedDict doesn't have the next key. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c) - (v, p, n,) = self.c.d[self.i] - self.i = p - return v - - class Sentinel: - def __init__(self, msg): - self.msg = msg - def __repr__(self): - return "<%s %s>" % (self.__class__.__name__, self.msg,) - - def __init__(self, initialdata={}): - self.d = {} # k: k, v: [v, prev, next,] # the dict - self.hs = OrderedDict.Sentinel("hs") - self.ts = OrderedDict.Sentinel("ts") - self.d[self.hs] = [None, self.hs, self.ts,] # This allows us to use sentinels as normal nodes. - self.d[self.ts] = [None, self.hs, self.ts,] # This allows us to use sentinels as normal nodes. - self.update(initialdata) - - assert self._assert_invariants() - - def __repr_n__(self, n=None): - s = ["{",] - try: - iter = self.iteritems() - x = iter.next() - s.append(str(x[0])); s.append(": "); s.append(str(x[1])) - i = 1 - while (n is None) or (i < n): - x = iter.next() - s.append(", "); s.append(str(x[0])); s.append(": "); s.append(str(x[1])) - except StopIteration: - pass - s.append("}") - return ''.join(s) - - def __repr__(self): - return "<%s %s>" % (self.__class__.__name__, self.__repr_n__(),) - - def __str__(self): - return "<%s %s>" % (self.__class__.__name__, self.__repr_n__(16),) - - def _assert_invariants(self): - _assert((len(self.d) > 2) == (self.d[self.hs][2] is not self.ts) == (self.d[self.ts][1] is not self.hs), "Head and tail point to something other than each other if and only if there is at least one element in the dictionary.", self.hs, self.ts, len(self.d)) - foundprevsentinel = 0 - foundnextsentinel = 0 - for (k, (v, p, n,)) in self.d.iteritems(): - _assert(v not in (self.hs, self.ts,)) - _assert(p is not self.ts, "A reference to the tail sentinel may not appear in prev.", k, v, p, n) - _assert(n is not self.hs, "A reference to the head sentinel may not appear in next.", k, v, p, n) - _assert(p in self.d, "Each prev is required to appear as a key in the dict.", k, v, p, n) - _assert(n in self.d, "Each next is required to appear as a key in the dict.", k, v, p, n) - if p is self.hs: - foundprevsentinel += 1 - _assert(foundprevsentinel <= 2, "No more than two references to the head sentinel may appear as a prev.", k, v, p, n) - if n is self.ts: - foundnextsentinel += 1 - _assert(foundnextsentinel <= 2, "No more than one reference to the tail sentinel may appear as a next.", k, v, p, n) - _assert(foundprevsentinel == 2, "A reference to the head sentinel is required appear as a prev (plus a self-referential reference).") - _assert(foundnextsentinel == 2, "A reference to the tail sentinel is required appear as a next (plus a self-referential reference).") - - count = 0 - for (k, v,) in self.iteritems(): - _assert(k not in (self.hs, self.ts,), k, self.hs, self.ts) - count += 1 - _assert(count == len(self.d)-2, count, len(self.d)) # -2 for the sentinels - - return True - - def move_to_most_recent(self, k, strictkey=False): - assert self._assert_invariants() - - if not self.d.has_key(k): - if strictkey: - raise KeyError, k - return - - node = self.d[k] - - # relink - self.d[node[1]][2] = node[2] - self.d[node[2]][1] = node[1] - - # move to front - hnode = self.d[self.hs] - - node[1] = self.hs - node[2] = hnode[2] - hnode[2] = k - self.d[node[2]][1] = k - - assert self._assert_invariants() - - def iteritems(self): - return OrderedDict.ItemIterator(self) - - def itervalues(self): - return OrderedDict.ValIterator(self) - - def iterkeys(self): - return self.__iter__() - - def __iter__(self): - return OrderedDict.KeyIterator(self) - - def __getitem__(self, key, default=None, strictkey=True): - node = self.d.get(key) - if not node: - if strictkey: - raise KeyError, key - return default - return node[0] - - def __setitem__(self, k, v=None): - assert self._assert_invariants() - - node = self.d.get(k) - if node: - node[0] = v - return - - hnode = self.d[self.hs] - n = hnode[2] - self.d[k] = [v, self.hs, n,] - hnode[2] = k - self.d[n][1] = k - - assert self._assert_invariants() - return v - - def __delitem__(self, key, default=None, strictkey=True): - """ - @param strictkey: True if you want a KeyError in the case that - key is not there, False if you want a reference to default - in the case that key is not there - @param default: the object to return if key is not there; This - is ignored if strictkey. - - @return: the value removed or default if there is not item by - that key and strictkey is False - """ - assert self._assert_invariants() - if self.d.has_key(key): - node = self.d[key] - # relink - self.d[node[1]][2] = node[2] - self.d[node[2]][1] = node[1] - del self.d[key] - assert self._assert_invariants() - return node[0] - elif strictkey: - assert self._assert_invariants() - raise KeyError, key - else: - assert self._assert_invariants() - return default - - def has_key(self, key): - assert self._assert_invariants() - if self.d.has_key(key): - assert self._assert_invariants() - return True - else: - assert self._assert_invariants() - return False - - def clear(self): - assert self._assert_invariants() - self.d.clear() - self.d[self.hs] = [None, self.hs, self.ts,] # This allows us to use sentinels as normal nodes. - self.d[self.ts] = [None, self.hs, self.ts,] # This allows us to use sentinels as normal nodes. - assert self._assert_invariants() - - def update(self, otherdict): - """ - @return: self - """ - assert self._assert_invariants() - - for (k, v,) in otherdict.iteritems(): - assert self._assert_invariants() - self[k] = v - assert self._assert_invariants() - - def pop(self): - assert self._assert_invariants() - if len(self.d) < 2: # the +2 is for the sentinels - raise KeyError, 'popitem(): dictionary is empty' - k = self.d[self.hs][2] - self.remove(k) - assert self._assert_invariants() - return k - - def popitem(self): - assert self._assert_invariants() - if len(self.d) < 2: # the +2 is for the sentinels - raise KeyError, 'popitem(): dictionary is empty' - k = self.d[self.hs][2] - val = self.remove(k) - assert self._assert_invariants() - return (k, val,) - - def keys_unsorted(self): - assert self._assert_invariants() - t = self.d.copy() - del t[self.hs] - del t[self.ts] - assert self._assert_invariants() - return t.keys() - - def keys(self): - res = [None] * len(self) - i = 0 - for k in self.iterkeys(): - res[i] = k - i += 1 - return res - - def values_unsorted(self): - assert self._assert_invariants() - t = self.d.copy() - del t[self.hs] - del t[self.ts] - assert self._assert_invariants() - return map(operator.__getitem__, t.values(), [0]*len(t)) - - def values(self): - res = [None] * len(self) - i = 0 - for v in self.itervalues(): - res[i] = v - i += 1 - return res - - def items(self): - res = [None] * len(self) - i = 0 - for it in self.iteritems(): - res[i] = it - i += 1 - return res - - def __len__(self): - return len(self.d) - 2 - - def insert(self, key, val=None): - assert self._assert_invariants() - result = self.__setitem__(key, val) - assert self._assert_invariants() - return result - - def setdefault(self, key, default=None): - assert self._assert_invariants() - if not self.has_key(key): - self[key] = default - assert self._assert_invariants() - return self[key] - - def get(self, key, default=None): - return self.__getitem__(key, default, strictkey=False) - - def remove(self, key, default=None, strictkey=True): - assert self._assert_invariants() - result = self.__delitem__(key, default, strictkey) - assert self._assert_invariants() - return result - -class SmallOrderedDict(dict): - """ - SmallOrderedDict is faster than OrderedDict for small sets. How small? That - depends on your machine and which operations you use most often. Use - performance profiling to determine whether the cache class that you are - using makes any difference to the performance of your program, and if it - does, then run "quick_bench()" in test/test_cache.py to see which cache - implementation is faster for the size of your datasets. - - A simple least-recently-used cache. It keeps an LRU queue, and - when the number of items in the cache reaches maxsize, it removes - the least recently used item. - - "Looking" at an item or a key such as with "has_key()" makes that - item become the most recently used item. - - You can also use "refresh()" to explicitly make an item become the most - recently used item. - - Adding an item that is already in the dict *does* make it the - most- recently-used item although it does not change the state of - the dict itself. - """ - class ItemIterator: - def __init__(self, c): - self.c = c - self.i = 0 - def __iter__(self): - return self - def next(self): - precondition(self.i <= len(self.c._lru), "The iterated SmallOrderedDict doesn't have this many elements. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c) - precondition(dict.has_key(self.c, self.c._lru[self.i]), "The iterated SmallOrderedDict doesn't have this key. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c._lru[self.i], self.c) - if self.i == len(self.c._lru): - raise StopIteration - k = self.i - self.i += 1 - return (k, dict.__getitem__(self.c, k),) - - class KeyIterator: - def __init__(self, c): - self.c = c - self.i = 0 - def __iter__(self): - return self - def next(self): - precondition(self.i <= len(self.c._lru), "The iterated SmallOrderedDict doesn't have this many elements. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c) - precondition(dict.has_key(self.c, self.c._lru[self.i]), "The iterated SmallOrderedDict doesn't have this key. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c._lru[self.i], self.c) - if self.i == len(self.c._lru): - raise StopIteration - k = self.i - self.i += 1 - return k - - class ValueIterator: - def __init__(self, c): - self.c = c - self.i = 0 - def __iter__(self): - return self - def next(self): - precondition(self.i <= len(self.c._lru), "The iterated SmallOrderedDict doesn't have this many elements. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c) - precondition(dict.has_key(self.c, self.c._lru[self.i]), "The iterated SmallOrderedDict doesn't have this key. Most likely this is because someone altered the contents of the OrderedDict while the iteration was in progress.", self.i, self.c._lru[self.i], self.c) - if self.i == len(self.c._lru): - raise StopIteration - k = self.i - self.i += 1 - return dict.__getitem__(self.c, k) - - def __init__(self, initialdata={}, maxsize=128): - dict.__init__(self, initialdata) - self._lru = initialdata.keys() # contains keys - self._maxsize = maxsize - over = len(self) - self._maxsize - if over > 0: - map(dict.__delitem__, [self]*over, self._lru[:over]) - del self._lru[:over] - assert self._assert_invariants() - - def _assert_invariants(self): - _assert(len(self._lru) <= self._maxsize, "Size is required to be <= maxsize.") - _assert(len(filter(lambda x: dict.has_key(self, x), self._lru)) == len(self._lru), "Each key in self._lru is required to be in dict.", filter(lambda x: not dict.has_key(self, x), self._lru), len(self._lru), self._lru, len(self), self) - _assert(len(filter(lambda x: x in self._lru, self.keys())) == len(self), "Each key in dict is required to be in self._lru.", filter(lambda x: x not in self._lru, self.keys()), len(self._lru), self._lru, len(self), self) - _assert(len(self._lru) == len(self), "internal consistency", filter(lambda x: x not in self.keys(), self._lru), len(self._lru), self._lru, len(self), self) - _assert(len(self._lru) <= self._maxsize, "internal consistency", len(self._lru), self._lru, self._maxsize) - return True - - def insert(self, key, item=None): - assert self._assert_invariants() - result = self.__setitem__(key, item) - assert self._assert_invariants() - return result - - def setdefault(self, key, default=None): - assert self._assert_invariants() - if not self.has_key(key): - self[key] = default - assert self._assert_invariants() - return self[key] - - def __setitem__(self, key, item=None): - assert self._assert_invariants() - if dict.has_key(self, key): - self._lru.remove(key) - else: - if len(self._lru) == self._maxsize: - # If this insert is going to increase the size of the cache to bigger than maxsize: - killkey = self._lru.pop(0) - dict.__delitem__(self, killkey) - dict.__setitem__(self, key, item) - self._lru.append(key) - assert self._assert_invariants() - return item - - def remove(self, key, default=None, strictkey=True): - assert self._assert_invariants() - result = self.__delitem__(key, default, strictkey) - assert self._assert_invariants() - return result - - def __delitem__(self, key, default=None, strictkey=True): - """ - @param strictkey: True if you want a KeyError in the case that - key is not there, False if you want a reference to default - in the case that key is not there - @param default: the object to return if key is not there; This - is ignored if strictkey. - - @return: the object removed or default if there is not item by - that key and strictkey is False - """ - assert self._assert_invariants() - if dict.has_key(self, key): - val = dict.__getitem__(self, key) - dict.__delitem__(self, key) - self._lru.remove(key) - assert self._assert_invariants() - return val - elif strictkey: - assert self._assert_invariants() - raise KeyError, key - else: - assert self._assert_invariants() - return default - - def clear(self): - assert self._assert_invariants() - dict.clear(self) - self._lru = [] - assert self._assert_invariants() - - def update(self, otherdict): - """ - @return: self - """ - assert self._assert_invariants() - if len(otherdict) > self._maxsize: - # Handling this special case here makes it possible to implement the - # other more common cases faster below. - dict.clear(self) - self._lru = [] - if self._maxsize > (len(otherdict) - self._maxsize): - dict.update(self, otherdict) - while len(self) > self._maxsize: - dict.popitem(self) - else: - for k, v, in otherdict.iteritems(): - if len(self) == self._maxsize: - break - dict.__setitem__(self, k, v) - self._lru = dict.keys(self) - assert self._assert_invariants() - return self - - for k in otherdict.iterkeys(): - if dict.has_key(self, k): - self._lru.remove(k) - self._lru.extend(otherdict.keys()) - dict.update(self, otherdict) - - over = len(self) - self._maxsize - if over > 0: - map(dict.__delitem__, [self]*over, self._lru[:over]) - del self._lru[:over] - - assert self._assert_invariants() - return self - - def has_key(self, key): - assert self._assert_invariants() - if dict.has_key(self, key): - assert key in self._lru, "key: %s, self._lru: %s" % tuple(map(hr, (key, self._lru,))) - self._lru.remove(key) - self._lru.append(key) - assert self._assert_invariants() - return True - else: - assert self._assert_invariants() - return False - - def refresh(self, key, strictkey=True): - """ - @param strictkey: raise a KeyError exception if key isn't present - """ - assert self._assert_invariants() - if not dict.has_key(self, key): - if strictkey: - raise KeyError, key - return - self._lru.remove(key) - self._lru.append(key) - - def popitem(self): - if not self._lru: - raise KeyError, 'popitem(): dictionary is empty' - k = self._lru[-1] - obj = self.remove(k) - return (k, obj,) diff --git a/libs/pyutil/randutil.py b/libs/pyutil/randutil.py index a3efb74..82eb3e1 100644 --- a/libs/pyutil/randutil.py +++ b/libs/pyutil/randutil.py @@ -80,6 +80,5 @@ seed = randobj.seed def randstr(n): return ''.join(map(chr, map(randrange, [0]*n, [256]*n))) -import random as insecurerandom def insecurerandstr(n): - return ''.join(map(chr, map(insecurerandom.randrange, [0]*n, [256]*n))) + return os.urandom(n) diff --git a/libs/pyutil/randutil.py~ b/libs/pyutil/randutil.py~ deleted file mode 100644 index b0f1c4f..0000000 --- a/libs/pyutil/randutil.py~ +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (c) 2002-2010 Zooko Wilcox-O'Hearn -# This file is part of pyutil; see README.rst for licensing terms. - -import warnings -import os, random - -try: - import hashexpand - class SHA256Random(hashexpand.SHA256Expander, random.Random): - def __init__(self, seed=None, deterministic=True): - warnings.warn("deprecated", DeprecationWarning) - if not deterministic: - raise NotImplementedError, "SHA256Expander is always deterministic. For non-deterministic, try urandomRandom." - - hashexpand.SHA256Expander.__init__(self) - random.Random.__init__(self, seed) - self.seed(seed) - - def seed(self, seed=None): - if seed is None: - import increasing_timer - seed = repr(increasing_timer.time()) - hashexpand.SHA256Expander.seed(self, seed) - - - class SHA256Random(hashexpand.SHA256Expander, random.Random): - def __init__(self, seed=""): - warnings.warn("deprecated", DeprecationWarning) - hashexpand.SHA256Expander.__init__(self) - self.seed(seed) - - def seed(self, seed=None): - if seed is None: - seed = os.urandom(32) - hashexpand.SHA256Expander.seed(self, seed) -except ImportError, le: - class InsecureSHA256Random: - def __init__(self, seed=None): - raise ImportError, le - class SHA256Random: - def __init__(self, seed=""): - raise ImportError, le - -class devrandomRandom(random.Random): - """ The problem with using this one, of course, is that it blocks. This - is, of course, a security flaw. (On Linux and probably on other - systems.) --Zooko 2005-03-04 - - Not repeatable. - """ - def __init__(self): - warnings.warn("deprecated", DeprecationWarning) - self.dr = open("/dev/random", "r") - - def get(self, bytes): - return self.dr.read(bytes) - - -class devurandomRandom(random.Random): - """ The problem with using this one is that it gives answers even when it - has never been properly seeded, e.g. when you are booting from CD and have - just started up and haven't yet gathered enough entropy to actually be - unguessable. (On Linux and probably on other systems.) --Zooko 2005-03-04 - - Not repeatable. - """ - def get(self, bytes): - warnings.warn("deprecated", DeprecationWarning) - return os.urandom(bytes) - - -randobj = devurandomRandom() -get = randobj.get -random = randobj.random -randrange = randobj.randrange -shuffle = randobj.shuffle -choice = randobj.choice -seed = randobj.seed - -def randstr(n): - return ''.join(map(chr, map(randrange, [0]*n, [256]*n))) - -import random as insecurerandom -def insecurerandstr(n): - return ''.join(map(chr, map(insecurerandom.randrange, [0]*n, [256]*n))) diff --git a/libs/pyutil/scripts/passphrase.py b/libs/pyutil/scripts/passphrase.py new file mode 100644 index 0000000..bed79c1 --- /dev/null +++ b/libs/pyutil/scripts/passphrase.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import argparse, math, random + +from pyutil.mathutil import div_ceil + +from pkg_resources import resource_stream + +def recursive_subset_sum(entropy_needed, wordlists): + # Pick a minimalish set of numbers which sum to at least + # entropy_needed. + + # Okay now what's the smallest number of words which will give us + # at least this much entropy? + entropy_of_biggest_wordlist = wordlists[-1][0] + assert isinstance(entropy_of_biggest_wordlist, float), wordlists[-1] + needed_words = div_ceil(entropy_needed, entropy_of_biggest_wordlist) + # How much entropy do we need from each word? + needed_entropy_per_word = entropy_needed / needed_words + # What's the smallest wordlist that offers at least this much + # entropy per word? + for (wlentropy, wl) in wordlists: + if wlentropy >= needed_entropy_per_word: + break + assert wlentropy >= needed_entropy_per_word, (wlentropy, needed_entropy_per_word) + + result = [(wlentropy, wl)] + # If we need more, recurse... + if wlentropy < entropy_needed: + rest = recursive_subset_sum(entropy_needed - wlentropy, wordlists) + result.extend(rest) + return result + +def gen_passphrase(entropy, allwords): + maxlenwords = [] + i = 2 # The smallest set is words of length 1 or 2. + words = [x for x in allwords if len(x) <= i] + maxlenwords.append((math.log(len(words), 2), words)) + while len(maxlenwords[-1][1]) < len(allwords): + i += 1 + words = [x for x in allwords if len(x) <= i] + maxlenwords.append((math.log(len(words), 2), words)) + + sr = random.SystemRandom() + passphrase = [] + + wordlists_to_use = recursive_subset_sum(entropy, maxlenwords) + + passphraseentropy = 0.0 + for (wle, wl) in wordlists_to_use: + passphrase.append(sr.choice(wl)) + passphraseentropy += wle + + return (u".".join(passphrase), passphraseentropy) + +def main(): + parser = argparse.ArgumentParser(prog="chbs", description="Create a random passphrase by picking a few random words.") + + parser.add_argument('-d', '--dictionary', help="what file to read a list of words from (or omit this option to use chbs's bundled dictionary)", type=argparse.FileType('rU'), metavar="DICT") + parser.add_argument('bits', help="how many bits of entropy minimum", type=float, metavar="BITS") + args = parser.parse_args() + + dicti = args.dictionary + if not dicti: + dicti = resource_stream('pyutil', 'data/wordlist.txt') + allwords = set([x.decode('utf-8').strip().lower() for x in dicti.readlines()]) + + passphrase, bits = gen_passphrase(args.bits, allwords) + + print u"Your new password is: '%s'. It is worth about %s bits." % (passphrase, bits) diff --git a/libs/pyutil/scripts/time_comparisons.py b/libs/pyutil/scripts/time_comparisons.py new file mode 100644 index 0000000..15a3885 --- /dev/null +++ b/libs/pyutil/scripts/time_comparisons.py @@ -0,0 +1,209 @@ +# If you run this file, it will make up a random secret and then crack it +# using timing information from a string comparison function. Maybe--if it +# gets lucky. It takes a long, long time to work. + +# So, the thing I need help with is statistics. The way this thing works is +# extremely stupid. Suppose you want to know which function invocation takes +# longer: comparison(secret, guess1) or comparison(secret, guess2)? + +# If you can correctly determine that one of them takes longer than the +# other, then (a) you can use that to crack the secret, and (b) this is a +# unit test demonstrating that comparison() is not timing-safe. + +# So how does this script do it? Extremely stupidly. First of all, you can't +# reliably measure tiny times, so to measure the time that a function takes, +# we run that function 10,000 times in a row, measure how long that took, and +# divide by 10,000 to estimate how long any one run would have taken. + +# Then, we do that 100 times in a row, and take the fastest of 100 runs. (I +# also experimented with taking the mean of 100 runs instead of the fastest.) + +# Then, we just say whichever comparison took longer (for its fastest run of +# 100 runs of 10,000 executions per run) is the one we think is a closer +# guess to the secret. + +# Now I would *like* to think that there is some kind of statistical analysis +# more sophisticated than "take the slowest of the fastest of 100 runs of +# 10,000 executions". Such improved statistical analysis would hopefully be +# able to answer these two questions: + +# 1. Are these two function calls -- comparison(secret, guess1) and +# comparison(secret, guess2) -- drawing from the same distribution or +# different? If you can answer that question, then you've answered the +# question of whether "comparison" is timing-safe or not. + +# And, this would also allow the cracker to recover from a false step. If it +# incorrectly decides the the prefix of the secret is ABCX, when the real +# secret is ABCD, then after that every next step it takes will be the +# "drawing from the same distribution" kind -- any difference between ABCXQ +# and ABCXR will be just due to noise, since both are equally far from the +# correct answer, which startsw with ABCD. If it could realize that there is +# no real difference between the distributions, then it could back-track and +# recover. + +# 2. Giving the ability to measure, noisily, the time taken by comparison(), +# how can you most efficiently figure out which guess takes the longest? If +# you can do that more efficiently, you can crack secrets more efficiently. + +# The script takes two arguments. The first is how many symbols in the +# secret, and the second is how big the alphabet from which the symbols are +# drawn. To prove that this script can *ever* work, try passing length 5 and +# alphabet size 2. Also try editing the code to let is use sillycomp. That'll +# definitely make it work. If you can improve this script (as per the thing +# above about "needing better statistics") to the degree that it can crack a +# secret with length 32 and alphabet size 256, then that would be awesome. + +# See the result of this commandline: + +# $ python -c 'import time_comparisons ; time_comparisons.print_measurements()' + + +from pyutil import benchutil + +import hashlib, random, os + +from decimal import Decimal +D=Decimal + +p1 = 'a'*32 +p1a = 'a'*32 +p2 = 'a'*31+'b' # close, but no cigar +p3 = 'b'*32 # different in the first byte + +def randstr(n, alphabetsize): + alphabet = [ chr(x) for x in range(alphabetsize) ] + return ''.join([random.choice(alphabet) for i in range(n)]) + +def compare(n, f, a, b): + for i in xrange(n): + f(a, b) + +def eqeqcomp(a, b): + return a == b + +def sillycomp(a, b): + # This exposes a lot of information in its timing about how many leading bytes match. + for i in range(len(a)): + if a[i] != b[i]: + return False + for i in xrange(2**9): + pass + if len(a) == len(b): + return True + else: + return False + +def hashcomp(a, b): + # Brian Warner invented this for Tahoe-LAFS. It seems like it should be very safe agaist timing leakage of any kind, because of the inclusion of a new random randkey every time. Note that exposing the value of the hash (i.e. the output of md5(randkey+secret)) is *not* a security problem. You can post that on your web site and let all attackers have it, no problem. (Provided that the value of "randkey" remains secret.) + + randkey = os.urandom(32) + return hashlib.md5(randkey+ a).digest() == hashlib.md5(randkey+b).digest() + +def xorcomp(a, b): + # This appears to be the most popular timing-insensitive string comparison function. I'm not completely sure it is fully timing-insensitive. (There are all sorts of funny things inside Python, such as caching of integer objects < 100...) + if len(a) != len(b): + return False + result = 0 + for x, y in zip(a, b): + result |= ord(x) ^ ord(y) + return result == 0 + +def print_measurements(): + N=10**4 + REPS=10**2 + + print "all times are in nanoseconds per comparison (in scientific notation)" + print + + for comparator in [eqeqcomp, hashcomp, xorcomp, sillycomp]: + print "using comparator ", comparator + + # for (a, b, desc) in [(p1, p1a, 'same'), (p1, p2, 'close'), (p1, p3, 'far')]: + trials = [(p1, p1a, 'same'), (p1, p2, 'close'), (p1, p3, 'far')] + random.shuffle(trials) + for (a, b, desc) in trials: + print "comparing two strings that are %s to each other" % (desc,) + + def f(n): + compare(n, comparator, a, b) + + benchutil.rep_bench(f, N, UNITS_PER_SECOND=10**9, MAXREPS=REPS) + + print + +def try_to_crack_secret(cracker, comparator, secretlen, alphabetsize): + secret = randstr(secretlen, alphabetsize) + + def test_guess(x): + return comparator(secret, x) + + print "Giving cracker %s a chance to figure out the secret. Don't tell him, but the secret is %s. Whenever he makes a guess, we'll use comparator %s to decide if his guess is right ..." % (cracker, secret.encode('hex'), comparator,) + + guess = cracker(test_guess, secretlen, alphabetsize) + + print "Cracker %s guessed %r" % (cracker, guess,) + if guess == secret: + print "HE FIGURED IT OUT!? HOW DID HE DO THAT." + else: + print "HAHA. Our secret is safe." + +def byte_at_a_time_cracker(test_guess, secretlen, alphabetsize): + # If we were cleverer, we'd add some backtracking behaviour where, if we can't find any x such that ABCx stands out from the crowd as taking longer than all the other ABCy's, then we start to think that we've taken a wrong step and we go back to trying ABy's. Make sense? But we're not that clever. Once we take a step, we don't backtrack. + + print + + guess=[] + + while len(guess) < secretlen: + best_next_byte = None + best_next_byte_time = None + + # For each possible byte... + for next_byte in range(alphabetsize): + c = chr(next_byte) + + # Construct a guess with our best candidate so far... + candidate_guess = guess[:] + + # Plus that byte... + candidate_guess.append(c) + s = ''.join(candidate_guess) + + # Plus random bytes... + s += os.urandom(32 - len(s)) + + # And see how long it takes the test_guess to consider it... + def f(n): + for i in xrange(n): + test_guess(s) + + times = benchutil.rep_bench(f, 10**7, MAXREPS=10**3, quiet=True) + + fastesttime = times['mean'] + + print "%s..."%(c.encode('hex'),), + if best_next_byte is None or fastesttime > best_next_byte_time: + print "new candidate for slowest next-char: %s, took: %s" % (c.encode('hex'), fastesttime,), + + best_next_byte_time = fastesttime + best_next_byte = c + + # Okay we've tried all possible next bytes. Our guess is this one (the one that took longest to be tested by test_guess): + guess.append(best_next_byte) + print "SLOWEST next-char %s! Current guess at secret: %s" % (best_next_byte.encode('hex'), ''.join(guess).encode('hex'),) + + guess = ''.join(guess) + print "Our guess for the secret: %r" % (guess,) + return guess + +if __name__ == '__main__': + import sys + secretlen = int(sys.argv[1]) + alphabetsize = int(sys.argv[2]) + if alphabetsize > 256: + raise Exception("We assume we can fit one element of the alphabet into a byte.") + + print "secretlen: %d, alphabetsize: %d" % (secretlen, alphabetsize,) + + # try_to_crack_secret(byte_at_a_time_cracker, sillycomp, secretlen, alphabetsize) + try_to_crack_secret(byte_at_a_time_cracker, eqeqcomp, secretlen, alphabetsize) diff --git a/libs/pyutil/test/current/test_mathutil.py b/libs/pyutil/test/current/test_mathutil.py index 7c189df..da78875 100644 --- a/libs/pyutil/test/current/test_mathutil.py +++ b/libs/pyutil/test/current/test_mathutil.py @@ -42,6 +42,13 @@ class MathUtilTestCase(unittest.TestCase): self.failUnlessEqual(f(5, 3), 2) self.failUnlessEqual(f(6, 3), 2) self.failUnlessEqual(f(7, 3), 3) + self.failUnless(isinstance(f(0.0, 1), int)) + self.failUnlessEqual(f(7.0, 3.0), 3) + self.failUnlessEqual(f(7, 3.0), 3) + self.failUnlessEqual(f(7.0, 3), 3) + self.failUnlessEqual(f(6.0, 3.0), 2) + self.failUnlessEqual(f(6.0, 3), 2) + self.failUnlessEqual(f(6, 3.0), 2) def test_next_multiple(self): f = mathutil.next_multiple diff --git a/libs/pyutil/test/current/test_mathutil.py~ b/libs/pyutil/test/current/test_mathutil.py~ new file mode 100644 index 0000000..7c189df --- /dev/null +++ b/libs/pyutil/test/current/test_mathutil.py~ @@ -0,0 +1,135 @@ +#!/usr/bin/env python + +import unittest + +from pyutil import mathutil +from pyutil.assertutil import _assert + +class MathUtilTestCase(unittest.TestCase): + def _help_test_is_power_of_k(self, k): + for i in range(2, 40): + _assert(mathutil.is_power_of_k(k**i, k), k, i) + + def test_is_power_of_k(self): + for i in range(2, 5): + self._help_test_is_power_of_k(i) + + def test_log_ceil(self): + f = mathutil.log_ceil + self.failUnlessEqual(f(1, 2), 0) + self.failUnlessEqual(f(1, 3), 0) + self.failUnlessEqual(f(2, 2), 1) + self.failUnlessEqual(f(2, 3), 1) + self.failUnlessEqual(f(3, 2), 2) + + def test_log_floor(self): + f = mathutil.log_floor + self.failUnlessEqual(f(1, 2), 0) + self.failUnlessEqual(f(1, 3), 0) + self.failUnlessEqual(f(2, 2), 1) + self.failUnlessEqual(f(2, 3), 0) + self.failUnlessEqual(f(3, 2), 1) + + def test_div_ceil(self): + f = mathutil.div_ceil + self.failUnlessEqual(f(0, 1), 0) + self.failUnlessEqual(f(0, 2), 0) + self.failUnlessEqual(f(0, 3), 0) + self.failUnlessEqual(f(1, 3), 1) + self.failUnlessEqual(f(2, 3), 1) + self.failUnlessEqual(f(3, 3), 1) + self.failUnlessEqual(f(4, 3), 2) + self.failUnlessEqual(f(5, 3), 2) + self.failUnlessEqual(f(6, 3), 2) + self.failUnlessEqual(f(7, 3), 3) + + def test_next_multiple(self): + f = mathutil.next_multiple + self.failUnlessEqual(f(5, 1), 5) + self.failUnlessEqual(f(5, 2), 6) + self.failUnlessEqual(f(5, 3), 6) + self.failUnlessEqual(f(5, 4), 8) + self.failUnlessEqual(f(5, 5), 5) + self.failUnlessEqual(f(5, 6), 6) + self.failUnlessEqual(f(32, 1), 32) + self.failUnlessEqual(f(32, 2), 32) + self.failUnlessEqual(f(32, 3), 33) + self.failUnlessEqual(f(32, 4), 32) + self.failUnlessEqual(f(32, 5), 35) + self.failUnlessEqual(f(32, 6), 36) + self.failUnlessEqual(f(32, 7), 35) + self.failUnlessEqual(f(32, 8), 32) + self.failUnlessEqual(f(32, 9), 36) + self.failUnlessEqual(f(32, 10), 40) + self.failUnlessEqual(f(32, 11), 33) + self.failUnlessEqual(f(32, 12), 36) + self.failUnlessEqual(f(32, 13), 39) + self.failUnlessEqual(f(32, 14), 42) + self.failUnlessEqual(f(32, 15), 45) + self.failUnlessEqual(f(32, 16), 32) + self.failUnlessEqual(f(32, 17), 34) + self.failUnlessEqual(f(32, 18), 36) + self.failUnlessEqual(f(32, 589), 589) + + def test_pad_size(self): + f = mathutil.pad_size + self.failUnlessEqual(f(0, 4), 0) + self.failUnlessEqual(f(1, 4), 3) + self.failUnlessEqual(f(2, 4), 2) + self.failUnlessEqual(f(3, 4), 1) + self.failUnlessEqual(f(4, 4), 0) + self.failUnlessEqual(f(5, 4), 3) + + def test_is_power_of_k_part_2(self): + f = mathutil.is_power_of_k + for i in range(1, 100): + if i in (1, 2, 4, 8, 16, 32, 64): + self.failUnless(f(i, 2), "but %d *is* a power of 2" % i) + else: + self.failIf(f(i, 2), "but %d is *not* a power of 2" % i) + for i in range(1, 100): + if i in (1, 3, 9, 27, 81): + self.failUnless(f(i, 3), "but %d *is* a power of 3" % i) + else: + self.failIf(f(i, 3), "but %d is *not* a power of 3" % i) + + def test_next_power_of_k(self): + f = mathutil.next_power_of_k + self.failUnlessEqual(f(0,2), 1) + self.failUnlessEqual(f(1,2), 1) + self.failUnlessEqual(f(2,2), 2) + self.failUnlessEqual(f(3,2), 4) + self.failUnlessEqual(f(4,2), 4) + for i in range(5, 8): self.failUnlessEqual(f(i,2), 8, "%d" % i) + for i in range(9, 16): self.failUnlessEqual(f(i,2), 16, "%d" % i) + for i in range(17, 32): self.failUnlessEqual(f(i,2), 32, "%d" % i) + for i in range(33, 64): self.failUnlessEqual(f(i,2), 64, "%d" % i) + for i in range(65, 100): self.failUnlessEqual(f(i,2), 128, "%d" % i) + + self.failUnlessEqual(f(0,3), 1) + self.failUnlessEqual(f(1,3), 1) + self.failUnlessEqual(f(2,3), 3) + self.failUnlessEqual(f(3,3), 3) + for i in range(4, 9): self.failUnlessEqual(f(i,3), 9, "%d" % i) + for i in range(10, 27): self.failUnlessEqual(f(i,3), 27, "%d" % i) + for i in range(28, 81): self.failUnlessEqual(f(i,3), 81, "%d" % i) + for i in range(82, 200): self.failUnlessEqual(f(i,3), 243, "%d" % i) + + def test_ave(self): + f = mathutil.ave + self.failUnlessEqual(f([1,2,3]), 2) + self.failUnlessEqual(f([0,0,0,4]), 1) + self.failUnlessAlmostEqual(f([0.0, 1.0, 1.0]), .666666666666) + + def failUnlessEqualContents(self, a, b): + self.failUnlessEqual(sorted(a), sorted(b)) + + def test_permute(self): + f = mathutil.permute + self.failUnlessEqualContents(f([]), []) + self.failUnlessEqualContents(f([1]), [[1]]) + self.failUnlessEqualContents(f([1,2]), [[1,2], [2,1]]) + self.failUnlessEqualContents(f([1,2,3]), + [[1,2,3], [1,3,2], + [2,1,3], [2,3,1], + [3,1,2], [3,2,1]]) diff --git a/libs/pyutil/time_comparisons.py b/libs/pyutil/time_comparisons.py deleted file mode 100644 index ee1bcfa..0000000 --- a/libs/pyutil/time_comparisons.py +++ /dev/null @@ -1,44 +0,0 @@ -from pyutil import benchutil - -import hashlib, random, os - -from decimal import Decimal -D=Decimal - -p1 = 'a'*32 -p1a = 'a'*32 -p2 = 'a'*31+'b' # close, but no cigar -p3 = 'b'*32 # different in the first byte - -def compare(n, f, a, b): - for i in xrange(n): - f(a, b) - -def eqeqcomp(a, b): - return a == b - -def hashcomp(a, b): - salt = os.urandom(32) - return hashlib.md5(salt+ a).digest() == hashlib.md5(salt+b).digest() - -N=10**4 -REPS=10**2 - -print "all times are in nanoseconds per comparison (scientific notation)" -print - -for comparator in [eqeqcomp, hashcomp]: - print "using comparator ", comparator - - # for (a, b, desc) in [(p1, p1a, 'same'), (p1, p2, 'close'), (p1, p3, 'far')]: - trials = [(p1, p1a, 'same'), (p1, p2, 'close'), (p1, p3, 'far')] - random.shuffle(trials) - for (a, b, desc) in trials: - print "comparing two strings that are %s to each other" % (desc,) - - def f(n): - compare(n, comparator, a, b) - - benchutil.rep_bench(f, N, UNITS_PER_SECOND=10**9, MAXREPS=REPS) - - print diff --git a/libs/pyutil/time_comparisons.py~ b/libs/pyutil/time_comparisons.py~ deleted file mode 100644 index abf151a..0000000 --- a/libs/pyutil/time_comparisons.py~ +++ /dev/null @@ -1,72 +0,0 @@ -from pyutil import benchutil - -import hashlib -import os - -from decimal import Decimal -D=Decimal - -p1 = 'a'*32 -p1a = 'a'*32 -p2 = 'a'*31+'b' # close, but no cigar -p3 = 'b'*32 # different in the first byte - -def compare(n, f, a, b): - for i in xrange(n): - f(a, b) - -def eqeq(a, b): - return a == b - -def equalsequals_s(n): - # return compare(n, eqeq, - for i in xrange(n): - p1 == p1a - -def equalsequals_c(n): - for i in xrange(n): - p1 == p2 - -def equalsequals_f(n): - for i in xrange(n): - p1 == p3 - -def hash_s(n): - for i in xrange(n): - salt = os.urandom(32) - hashlib.md5(salt+ p1).digest() == hashlib.md5(salt+p1a).digest() - -def hash_c(n): - for i in xrange(n): - salt = os.urandom(32) - hashlib.md5(salt+ p1).digest() == hashlib.md5(salt+p2).digest() - -def hash_f(n): - for i in xrange(n): - salt = os.urandom(32) - hashlib.md5(salt+ p1).digest() == hashlib.md5(salt+p3).digest() - -N=10**4 -REPS=10**2 - -print "using '=='" - -print "same" -benchutil.rep_bench(equalsequals_s, N, UNITS_PER_SECOND=10**9, MAXREPS=REPS) - -print "close" -benchutil.rep_bench(equalsequals_c, N, UNITS_PER_SECOND=10**9, MAXREPS=REPS) - -print "far" -benchutil.rep_bench(equalsequals_f, N, UNITS_PER_SECOND=10**9, MAXREPS=REPS) - -print "using hash" - -print "same" -benchutil.rep_bench(hash_s, N, UNITS_PER_SECOND=10**9, MAXREPS=REPS) - -print "far" -benchutil.rep_bench(hash_f, N, UNITS_PER_SECOND=10**9, MAXREPS=REPS) - -print "close" -benchutil.rep_bench(hash_c, N, UNITS_PER_SECOND=10**9, MAXREPS=REPS) diff --git a/libs/rtorrent/__init__.py b/libs/rtorrent/__init__.py index b6ff73a..683ef1c 100755 --- a/libs/rtorrent/__init__.py +++ b/libs/rtorrent/__init__.py @@ -17,18 +17,21 @@ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +import urllib +import os.path +import time +import xmlrpclib from rtorrent.common import find_torrent, \ is_valid_port, convert_version_tuple_to_str from rtorrent.lib.torrentparser import TorrentParser from rtorrent.lib.xmlrpc.http import HTTPServerProxy -from rtorrent.rpc import Method, BasicAuthTransport +from rtorrent.lib.xmlrpc.scgi import SCGIServerProxy +from rtorrent.rpc import Method +from rtorrent.lib.xmlrpc.basic_auth import BasicAuthTransport from rtorrent.torrent import Torrent from rtorrent.group import Group -import os.path import rtorrent.rpc # @UnresolvedImport -import time -import xmlrpclib __version__ = "0.2.9" __author__ = "Chris Lucas" @@ -43,13 +46,25 @@ class RTorrent: """ Create a new rTorrent connection """ rpc_prefix = None - def __init__(self, url, username=None, password=None, - verify=False, sp=HTTPServerProxy, sp_kwargs={}): - self.url = url # : From X{__init__(self, url)} + def __init__(self, uri, username=None, password=None, + verify=False, sp=None, sp_kwargs=None): + self.uri = uri # : From X{__init__(self, url)} + self.username = username self.password = password - self.sp = sp - self.sp_kwargs = sp_kwargs + + self.schema = urllib.splittype(uri)[0] + + if sp: + self.sp = sp + elif self.schema in ['http', 'https']: + self.sp = HTTPServerProxy + elif self.schema == 'scgi': + self.sp = SCGIServerProxy + else: + raise NotImplementedError() + + self.sp_kwargs = sp_kwargs or {} self.torrents = [] # : List of L{Torrent} instances self._rpc_methods = [] # : List of rTorrent RPC methods @@ -62,12 +77,16 @@ class RTorrent: def _get_conn(self): """Get ServerProxy instance""" if self.username is not None and self.password is not None: + if self.schema == 'scgi': + raise NotImplementedError() + return self.sp( - self.url, + self.uri, transport=BasicAuthTransport(self.username, self.password), **self.sp_kwargs ) - return self.sp(self.url, **self.sp_kwargs) + + return self.sp(self.uri, **self.sp_kwargs) def _verify_conn(self): # check for rpc methods that should be available @@ -96,6 +115,11 @@ class RTorrent: return self._client_version_tuple + def _update_rpc_methods(self): + self._rpc_methods = self._get_conn().system.listMethods() + + return self._rpc_methods + def _get_rpc_methods(self): """ Get list of raw RPC commands @@ -103,10 +127,7 @@ class RTorrent: @rtype: list """ - if self._rpc_methods == []: - self._rpc_methods = self._get_conn().system.listMethods() - - return(self._rpc_methods) + return(self._rpc_methods or self._update_rpc_methods()) def get_torrents(self, view="main"): """Get list of all torrents in specified view @@ -298,6 +319,8 @@ class RTorrent: assert view is not None, "view parameter required on non-persistent groups" p.group.insert('', name, view) + self._update_rpc_methods() + def get_group(self, name): assert name is not None, "group name required" diff --git a/libs/rtorrent/lib/xmlrpc/basic_auth.py b/libs/rtorrent/lib/xmlrpc/basic_auth.py new file mode 100644 index 0000000..20c02d9 --- /dev/null +++ b/libs/rtorrent/lib/xmlrpc/basic_auth.py @@ -0,0 +1,73 @@ +# +# Copyright (c) 2013 Dean Gardiner, <gardiner91@gmail.com> +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +from base64 import encodestring +import string +import xmlrpclib + + +class BasicAuthTransport(xmlrpclib.Transport): + def __init__(self, username=None, password=None): + xmlrpclib.Transport.__init__(self) + + self.username = username + self.password = password + + def send_auth(self, h): + if self.username is not None and self.password is not None: + h.putheader('AUTHORIZATION', "Basic %s" % string.replace( + encodestring("%s:%s" % (self.username, self.password)), + "\012", "" + )) + + def single_request(self, host, handler, request_body, verbose=0): + # issue XML-RPC request + + h = self.make_connection(host) + if verbose: + h.set_debuglevel(1) + + try: + self.send_request(h, handler, request_body) + self.send_host(h, host) + self.send_user_agent(h) + self.send_auth(h) + self.send_content(h, request_body) + + response = h.getresponse(buffering=True) + if response.status == 200: + self.verbose = verbose + return self.parse_response(response) + except xmlrpclib.Fault: + raise + except Exception: + self.close() + raise + + #discard any response data and raise exception + if response.getheader("content-length", 0): + response.read() + raise xmlrpclib.ProtocolError( + host + handler, + response.status, response.reason, + response.msg, + ) diff --git a/libs/rtorrent/lib/xmlrpc/scgi.py b/libs/rtorrent/lib/xmlrpc/scgi.py new file mode 100644 index 0000000..5ba61fa --- /dev/null +++ b/libs/rtorrent/lib/xmlrpc/scgi.py @@ -0,0 +1,219 @@ +#!/usr/bin/python + +# rtorrent_xmlrpc +# (c) 2011 Roger Que <alerante@bellsouth.net> +# +# Modified portions: +# (c) 2013 Dean Gardiner <gardiner91@gmail.com> +# +# Python module for interacting with rtorrent's XML-RPC interface +# directly over SCGI, instead of through an HTTP server intermediary. +# Inspired by Glenn Washburn's xmlrpc2scgi.py [1], but subclasses the +# built-in xmlrpclib classes so that it is compatible with features +# such as MultiCall objects. +# +# [1] <http://libtorrent.rakshasa.no/wiki/UtilsXmlrpc2scgi> +# +# Usage: server = SCGIServerProxy('scgi://localhost:7000/') +# server = SCGIServerProxy('scgi:///path/to/scgi.sock') +# print server.system.listMethods() +# mc = xmlrpclib.MultiCall(server) +# mc.get_up_rate() +# mc.get_down_rate() +# print mc() +# +# +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# In addition, as a special exception, the copyright holders give +# permission to link the code of portions of this program with the +# OpenSSL library under certain conditions as described in each +# individual source file, and distribute linked combinations +# including the two. +# +# You must obey the GNU General Public License in all respects for +# all of the code used other than OpenSSL. If you modify file(s) +# with this exception, you may extend this exception to your version +# of the file(s), but you are not obligated to do so. If you do not +# wish to do so, delete this exception statement from your version. +# If you delete this exception statement from all source files in the +# program, then also delete it here. +# +# +# +# Portions based on Python's xmlrpclib: +# +# Copyright (c) 1999-2002 by Secret Labs AB +# Copyright (c) 1999-2002 by Fredrik Lundh +# +# By obtaining, using, and/or copying this software and/or its +# associated documentation, you agree that you have read, understood, +# and will comply with the following terms and conditions: +# +# Permission to use, copy, modify, and distribute this software and +# its associated documentation for any purpose and without fee is +# hereby granted, provided that the above copyright notice appears in +# all copies, and that both that copyright notice and this permission +# notice appear in supporting documentation, and that the name of +# Secret Labs AB or the author not be used in advertising or publicity +# pertaining to distribution of the software without specific, written +# prior permission. +# +# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD +# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- +# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR +# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY +# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE +# OF THIS SOFTWARE. + +import httplib +import re +import socket +import urllib +import xmlrpclib +import errno + + +class SCGITransport(xmlrpclib.Transport): + # Added request() from Python 2.7 xmlrpclib here to backport to Python 2.6 + def request(self, host, handler, request_body, verbose=0): + #retry request once if cached connection has gone cold + for i in (0, 1): + try: + return self.single_request(host, handler, request_body, verbose) + except socket.error, e: + if i or e.errno not in (errno.ECONNRESET, errno.ECONNABORTED, errno.EPIPE): + raise + except httplib.BadStatusLine: #close after we sent request + if i: + raise + + def single_request(self, host, handler, request_body, verbose=0): + # Add SCGI headers to the request. + headers = {'CONTENT_LENGTH': str(len(request_body)), 'SCGI': '1'} + header = '\x00'.join(('%s\x00%s' % item for item in headers.iteritems())) + '\x00' + header = '%d:%s' % (len(header), header) + request_body = '%s,%s' % (header, request_body) + + sock = None + + try: + if host: + host, port = urllib.splitport(host) + addrinfo = socket.getaddrinfo(host, int(port), socket.AF_INET, + socket.SOCK_STREAM) + sock = socket.socket(*addrinfo[0][:3]) + sock.connect(addrinfo[0][4]) + else: + sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + sock.connect(handler) + + self.verbose = verbose + + sock.send(request_body) + return self.parse_response(sock.makefile()) + finally: + if sock: + sock.close() + + def parse_response(self, response): + p, u = self.getparser() + + response_body = '' + while True: + data = response.read(1024) + if not data: + break + response_body += data + + # Remove SCGI headers from the response. + response_header, response_body = re.split(r'\n\s*?\n', response_body, + maxsplit=1) + + if self.verbose: + print 'body:', repr(response_body) + + p.feed(response_body) + p.close() + + return u.close() + + +class SCGIServerProxy(xmlrpclib.ServerProxy): + def __init__(self, uri, transport=None, encoding=None, verbose=False, + allow_none=False, use_datetime=False): + type, uri = urllib.splittype(uri) + if type not in ('scgi'): + raise IOError('unsupported XML-RPC protocol') + self.__host, self.__handler = urllib.splithost(uri) + if not self.__handler: + self.__handler = '/' + + if transport is None: + transport = SCGITransport(use_datetime=use_datetime) + self.__transport = transport + + self.__encoding = encoding + self.__verbose = verbose + self.__allow_none = allow_none + + def __close(self): + self.__transport.close() + + def __request(self, methodname, params): + # call a method on the remote server + + request = xmlrpclib.dumps(params, methodname, encoding=self.__encoding, + allow_none=self.__allow_none) + + response = self.__transport.request( + self.__host, + self.__handler, + request, + verbose=self.__verbose + ) + + if len(response) == 1: + response = response[0] + + return response + + def __repr__(self): + return ( + "<SCGIServerProxy for %s%s>" % + (self.__host, self.__handler) + ) + + __str__ = __repr__ + + def __getattr__(self, name): + # magic method dispatcher + return xmlrpclib._Method(self.__request, name) + + # note: to call a remote object with an non-standard name, use + # result getattr(server, "strange-python-name")(args) + + def __call__(self, attr): + """A workaround to get special attributes on the ServerProxy + without interfering with the magic __getattr__ + """ + if attr == "close": + return self.__close + elif attr == "transport": + return self.__transport + raise AttributeError("Attribute %r not found" % (attr,)) diff --git a/libs/rtorrent/rpc/__init__.py b/libs/rtorrent/rpc/__init__.py index 034f4ee..116ca1c 100755 --- a/libs/rtorrent/rpc/__init__.py +++ b/libs/rtorrent/rpc/__init__.py @@ -17,66 +17,16 @@ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -from base64 import encodestring -import httplib -import inspect -import string +import inspect import rtorrent import re from rtorrent.common import bool_to_int, convert_version_tuple_to_str,\ safe_repr -from rtorrent.err import RTorrentVersionError, MethodError +from rtorrent.err import MethodError from rtorrent.compat import xmlrpclib -class BasicAuthTransport(xmlrpclib.Transport): - def __init__(self, username=None, password=None): - xmlrpclib.Transport.__init__(self) - self.username = username - self.password = password - - def send_auth(self, h): - if self.username is not None and self.password is not None: - h.putheader('AUTHORIZATION', "Basic %s" % string.replace( - encodestring("%s:%s" % (self.username, self.password)), - "\012", "" - )) - - def single_request(self, host, handler, request_body, verbose=0): - # issue XML-RPC request - - h = self.make_connection(host) - if verbose: - h.set_debuglevel(1) - - try: - self.send_request(h, handler, request_body) - self.send_host(h, host) - self.send_user_agent(h) - self.send_auth(h) - self.send_content(h, request_body) - - response = h.getresponse(buffering=True) - if response.status == 200: - self.verbose = verbose - return self.parse_response(response) - except xmlrpclib.Fault: - raise - except Exception: - self.close() - raise - - #discard any response data and raise exception - if (response.getheader("content-length", 0)): - response.read() - raise xmlrpclib.ProtocolError( - host + handler, - response.status, response.reason, - response.msg, - ) - - def get_varname(rpc_call): """Transform rpc method into variable name. diff --git a/libs/rtorrent/torrent.py b/libs/rtorrent/torrent.py index c610e36..bd6bb68 100755 --- a/libs/rtorrent/torrent.py +++ b/libs/rtorrent/torrent.py @@ -172,6 +172,17 @@ class Torrent: self.directory = m.call()[-1] + def set_directory_base(self, d): + """Modify base download directory + + @note: Needs to stop torrent in order to change the directory. + Also doesn't restart after directory is set, that must be called + separately. + """ + m = rtorrent.rpc.Multicall(self) + self.multicall_add(m, "d.try_stop") + self.multicall_add(m, "d.set_directory_base", d) + def start(self): """Start the torrent""" m = rtorrent.rpc.Multicall(self) diff --git a/libs/tornado/auth.py b/libs/tornado/auth.py index 0cbfa7c..a2cef35 100755 --- a/libs/tornado/auth.py +++ b/libs/tornado/auth.py @@ -549,7 +549,7 @@ class OAuth2Mixin(object): @return_future def authorize_redirect(self, redirect_uri=None, client_id=None, client_secret=None, extra_params=None, - callback=None): + callback=None, scope=None, response_type="code"): """Redirects the user to obtain OAuth authorization for this service. Some providers require that you register a redirect URL with @@ -566,10 +566,13 @@ class OAuth2Mixin(object): """ args = { "redirect_uri": redirect_uri, - "client_id": client_id + "client_id": client_id, + "response_type": response_type } if extra_params: args.update(extra_params) + if scope: + args['scope'] = ' '.join(scope) self.redirect( url_concat(self._OAUTH_AUTHORIZE_URL, args)) callback() @@ -945,6 +948,67 @@ class GoogleMixin(OpenIdMixin, OAuthMixin): return OpenIdMixin.get_authenticated_user(self) +class GoogleOAuth2Mixin(OAuth2Mixin): + """Google authentication using OAuth2.""" + _OAUTH_AUTHORIZE_URL = "https://accounts.google.com/o/oauth2/auth" + _OAUTH_ACCESS_TOKEN_URL = "https://accounts.google.com/o/oauth2/token" + _OAUTH_NO_CALLBACKS = False + _OAUTH_SETTINGS_KEY = 'google_oauth' + + @_auth_return_future + def get_authenticated_user(self, redirect_uri, code, callback): + """Handles the login for the Google user, returning a user object. + + Example usage:: + + class GoogleOAuth2LoginHandler(LoginHandler, tornado.auth.GoogleOAuth2Mixin): + @tornado.web.asynchronous + @tornado.gen.coroutine + def get(self): + if self.get_argument("code", False): + user = yield self.get_authenticated_user( + redirect_uri='http://your.site.com/auth/google', + code=self.get_argument("code")) + # Save the user with e.g. set_secure_cookie + else: + yield self.authorize_redirect( + redirect_uri='http://your.site.com/auth/google', + client_id=self.settings["google_consumer_key"], + scope=['openid', 'email'], + response_type='code', + extra_params={"approval_prompt": "auto"}) + """ + http = self.get_auth_http_client() + body = urllib_parse.urlencode({ + "redirect_uri": redirect_uri, + "code": code, + "client_id": self.settings[self._OAUTH_SETTINGS_KEY]['key'], + "client_secret": self.settings[self._OAUTH_SETTINGS_KEY]['secret'], + "grant_type": "authorization_code", + }) + + http.fetch(self._OAUTH_ACCESS_TOKEN_URL, + self.async_callback(self._on_access_token, callback), + method="POST", headers={'Content-Type': 'application/x-www-form-urlencoded'}, body=body) + + def _on_access_token(self, future, response): + """Callback function for the exchange to the access token.""" + if response.error: + future.set_exception(AuthError('Google auth error: %s' % str(response))) + return + + args = escape.json_decode(response.body) + future.set_result(args) + + def get_auth_http_client(self): + """Returns the `.AsyncHTTPClient` instance to be used for auth requests. + + May be overridden by subclasses to use an HTTP client other than + the default. + """ + return httpclient.AsyncHTTPClient() + + class FacebookMixin(object): """Facebook Connect authentication. diff --git a/libs/tornado/autoreload.py b/libs/tornado/autoreload.py index 0575429..79cccb4 100755 --- a/libs/tornado/autoreload.py +++ b/libs/tornado/autoreload.py @@ -16,11 +16,15 @@ """xAutomatically restart the server when a source file is modified. -Most applications should not access this module directly. Instead, pass the -keyword argument ``debug=True`` to the `tornado.web.Application` constructor. -This will enable autoreload mode as well as checking for changes to templates -and static resources. Note that restarting is a destructive operation -and any requests in progress will be aborted when the process restarts. +Most applications should not access this module directly. Instead, +pass the keyword argument ``autoreload=True`` to the +`tornado.web.Application` constructor (or ``debug=True``, which +enables this setting and several others). This will enable autoreload +mode as well as checking for changes to templates and static +resources. Note that restarting is a destructive operation and any +requests in progress will be aborted when the process restarts. (If +you want to disable autoreload while using other debug-mode features, +pass both ``debug=True`` and ``autoreload=False``). This module can also be used as a command-line wrapper around scripts such as unit test runners. See the `main` method for details. @@ -38,6 +42,7 @@ Reloading loses any Python interpreter command-line arguments (e.g. ``-u``) because it re-executes Python using ``sys.executable`` and ``sys.argv``. Additionally, modifying these variables will cause reloading to behave incorrectly. + """ from __future__ import absolute_import, division, print_function, with_statement diff --git a/libs/tornado/curl_httpclient.py b/libs/tornado/curl_httpclient.py index e090056..cb97710 100755 --- a/libs/tornado/curl_httpclient.py +++ b/libs/tornado/curl_httpclient.py @@ -360,6 +360,7 @@ def _curl_setup_request(curl, request, buffer, headers): curl.setopt(pycurl.PROXYUSERPWD, credentials) else: curl.setopt(pycurl.PROXY, '') + curl.unsetopt(pycurl.PROXYUSERPWD) if request.validate_cert: curl.setopt(pycurl.SSL_VERIFYPEER, 1) curl.setopt(pycurl.SSL_VERIFYHOST, 2) @@ -382,6 +383,8 @@ def _curl_setup_request(curl, request, buffer, headers): # that we can't reach, so allow ipv6 unless the user asks to disable. # (but see version check in _process_queue above) curl.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4) + else: + curl.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_WHATEVER) # Set the request method through curl's irritating interface which makes # up names for almost every single method @@ -404,6 +407,11 @@ def _curl_setup_request(curl, request, buffer, headers): # Handle curl's cryptic options for every individual HTTP method if request.method in ("POST", "PUT"): + if request.body is None: + raise AssertionError( + 'Body must not be empty for "%s" request' + % request.method) + request_buffer = BytesIO(utf8(request.body)) curl.setopt(pycurl.READFUNCTION, request_buffer.read) if request.method == "POST": @@ -414,6 +422,9 @@ def _curl_setup_request(curl, request, buffer, headers): curl.setopt(pycurl.POSTFIELDSIZE, len(request.body)) else: curl.setopt(pycurl.INFILESIZE, len(request.body)) + elif request.method == "GET": + if request.body is not None: + raise AssertionError('Body must be empty for GET request') if request.auth_username is not None: userpwd = "%s:%s" % (request.auth_username, request.auth_password or '') diff --git a/libs/tornado/gen.py b/libs/tornado/gen.py index 92b7458..217ebdf 100755 --- a/libs/tornado/gen.py +++ b/libs/tornado/gen.py @@ -38,8 +38,8 @@ since it is both shorter and provides better exception handling):: def get(self): yield gen.Task(AsyncHTTPClient().fetch, "http://example.com") -You can also yield a list of ``Futures`` and/or ``Tasks``, which will be -started at the same time and run in parallel; a list of results will +You can also yield a list or dict of ``Futures`` and/or ``Tasks``, which will be +started at the same time and run in parallel; a list or dict of results will be returned when they are all finished:: @gen.coroutine @@ -47,6 +47,13 @@ be returned when they are all finished:: http_client = AsyncHTTPClient() response1, response2 = yield [http_client.fetch(url1), http_client.fetch(url2)] + response_dict = yield dict(response3=http_client.fetch(url3), + response4=http_client.fetch(url4)) + response3 = response_dict['response3'] + response4 = response_dict['response4'] + +.. versionchanged:: 3.2 + Dict support added. For more complicated interfaces, `Task` can be split into two parts: `Callback` and `Wait`:: @@ -404,6 +411,10 @@ class Multi(YieldPoint): a list of ``YieldPoints``. """ def __init__(self, children): + self.keys = None + if isinstance(children, dict): + self.keys = list(children.keys()) + children = children.values() self.children = [] for i in children: if isinstance(i, Future): @@ -423,7 +434,11 @@ class Multi(YieldPoint): return not self.unfinished_children def get_result(self): - return [i.get_result() for i in self.children] + result = (i.get_result() for i in self.children) + if self.keys is not None: + return dict(zip(self.keys, result)) + else: + return list(result) class _NullYieldPoint(YieldPoint): @@ -523,7 +538,7 @@ class Runner(object): self.finished = True self.yield_point = _null_yield_point raise - if isinstance(yielded, list): + if isinstance(yielded, (list, dict)): yielded = Multi(yielded) elif isinstance(yielded, Future): yielded = YieldFuture(yielded) diff --git a/libs/tornado/httpclient.py b/libs/tornado/httpclient.py index 6767589..b58a834 100755 --- a/libs/tornado/httpclient.py +++ b/libs/tornado/httpclient.py @@ -282,7 +282,8 @@ class HTTPRequest(object): :arg int max_redirects: Limit for ``follow_redirects`` :arg string user_agent: String to send as ``User-Agent`` header :arg bool use_gzip: Request gzip encoding from the server - :arg string network_interface: Network interface to use for request + :arg string network_interface: Network interface to use for request. + ``curl_httpclient`` only; see note below. :arg callable streaming_callback: If set, ``streaming_callback`` will be run with each chunk of data as it is received, and ``HTTPResponse.body`` and ``HTTPResponse.buffer`` will be empty in @@ -310,14 +311,26 @@ class HTTPRequest(object): :arg bool validate_cert: For HTTPS requests, validate the server's certificate? :arg string ca_certs: filename of CA certificates in PEM format, - or None to use defaults. Note that in ``curl_httpclient``, if - any request uses a custom ``ca_certs`` file, they all must (they - don't have to all use the same ``ca_certs``, but it's not possible - to mix requests with ``ca_certs`` and requests that use the defaults. + or None to use defaults. See note below when used with + ``curl_httpclient``. :arg bool allow_ipv6: Use IPv6 when available? Default is false in ``simple_httpclient`` and true in ``curl_httpclient`` - :arg string client_key: Filename for client SSL key, if any - :arg string client_cert: Filename for client SSL certificate, if any + :arg string client_key: Filename for client SSL key, if any. See + note below when used with ``curl_httpclient``. + :arg string client_cert: Filename for client SSL certificate, if any. + See note below when used with ``curl_httpclient``. + + .. note:: + + When using ``curl_httpclient`` certain options may be + inherited by subsequent fetches because ``pycurl`` does + not allow them to be cleanly reset. This applies to the + ``ca_certs``, ``client_key``, ``client_cert``, and + ``network_interface`` arguments. If you use these + options, you should pass them on every request (you don't + have to always use the same values, but it's not possible + to mix requests that specify these options with ones that + use the defaults). .. versionadded:: 3.1 The ``auth_mode`` argument. @@ -372,6 +385,9 @@ class HTTPResponse(object): * headers: `tornado.httputil.HTTPHeaders` object + * effective_url: final location of the resource after following any + redirects + * buffer: ``cStringIO`` object for response body * body: response body as string (created on demand from ``self.buffer``) diff --git a/libs/tornado/httpserver.py b/libs/tornado/httpserver.py index d005545..34e7b76 100755 --- a/libs/tornado/httpserver.py +++ b/libs/tornado/httpserver.py @@ -29,6 +29,7 @@ from __future__ import absolute_import, division, print_function, with_statement import socket import ssl import time +import copy from tornado.escape import native_str, parse_qs_bytes from tornado import httputil @@ -326,8 +327,8 @@ class HTTPConnection(object): self.request_callback(self._request) except _BadRequestException as e: - gen_log.info("Malformed HTTP request from %s: %s", - self.address[0], e) + gen_log.info("Malformed HTTP request from %r: %s", + self.address, e) self.close() return @@ -336,7 +337,10 @@ class HTTPConnection(object): if self._request.method in ("POST", "PATCH", "PUT"): httputil.parse_body_arguments( self._request.headers.get("Content-Type", ""), data, - self._request.arguments, self._request.files) + self._request.body_arguments, self._request.files) + + for k, v in self._request.body_arguments.items(): + self._request.arguments.setdefault(k, []).extend(v) self.request_callback(self._request) @@ -403,6 +407,20 @@ class HTTPRequest(object): `.RequestHandler.get_argument`, which returns argument values as unicode strings. + .. attribute:: query_arguments + + Same format as ``arguments``, but contains only arguments extracted + from the query string. + + .. versionadded:: 3.2 + + .. attribute:: body_arguments + + Same format as ``arguments``, but contains only arguments extracted + from the request body. + + .. versionadded:: 3.2 + .. attribute:: files File uploads are available in the files property, which maps file @@ -457,6 +475,8 @@ class HTTPRequest(object): self.path, sep, self.query = uri.partition('?') self.arguments = parse_qs_bytes(self.query, keep_blank_values=True) + self.query_arguments = copy.deepcopy(self.arguments) + self.body_arguments = {} def supports_http_1_1(self): """Returns True if this request supports HTTP/1.1 semantics""" diff --git a/libs/tornado/httputil.py b/libs/tornado/httputil.py index 3e7337d..2575bc5 100755 --- a/libs/tornado/httputil.py +++ b/libs/tornado/httputil.py @@ -320,7 +320,11 @@ def parse_body_arguments(content_type, body, arguments, files): with the parsed contents. """ if content_type.startswith("application/x-www-form-urlencoded"): - uri_arguments = parse_qs_bytes(native_str(body), keep_blank_values=True) + try: + uri_arguments = parse_qs_bytes(native_str(body), keep_blank_values=True) + except Exception as e: + gen_log.warning('Invalid x-www-form-urlencoded body: %s', e) + uri_arguments = {} for name, values in uri_arguments.items(): if values: arguments.setdefault(name, []).extend(values) diff --git a/libs/tornado/ioloop.py b/libs/tornado/ioloop.py index 91ee2c5..a36ab7a 100755 --- a/libs/tornado/ioloop.py +++ b/libs/tornado/ioloop.py @@ -676,8 +676,7 @@ class PollIOLoop(IOLoop): while self._events: fd, events = self._events.popitem() try: - if self._handlers.has_key(fd): - self._handlers[fd](fd, events) + self._handlers[fd](fd, events) except (OSError, IOError) as e: if e.args[0] == errno.EPIPE: # Happens when the client closes the connection diff --git a/libs/tornado/iostream.py b/libs/tornado/iostream.py index 6bdc639..08430ce 100755 --- a/libs/tornado/iostream.py +++ b/libs/tornado/iostream.py @@ -774,7 +774,7 @@ class IOStream(BaseIOStream): # Sometimes setsockopt will fail if the socket is closed # at the wrong time. This can happen with HTTPServer # resetting the value to false between requests. - if e.errno != errno.EINVAL: + if e.errno not in (errno.EINVAL, errno.ECONNRESET): raise diff --git a/libs/tornado/log.py b/libs/tornado/log.py index fa11f37..648db5c 100755 --- a/libs/tornado/log.py +++ b/libs/tornado/log.py @@ -51,7 +51,7 @@ gen_log = logging.getLogger("tornado.general") def _stderr_supports_color(): color = False - if curses and sys.stderr.isatty(): + if curses and hasattr(sys.stderr, 'isatty') and sys.stderr.isatty(): try: curses.setupterm() if curses.tigetnum("colors") > 0: diff --git a/libs/tornado/netutil.py b/libs/tornado/netutil.py index 9dc8506..21db475 100755 --- a/libs/tornado/netutil.py +++ b/libs/tornado/netutil.py @@ -20,7 +20,6 @@ from __future__ import absolute_import, division, print_function, with_statement import errno import os -import re import socket import ssl import stat @@ -30,6 +29,13 @@ from tornado.ioloop import IOLoop from tornado.platform.auto import set_close_exec from tornado.util import Configurable +if hasattr(ssl, 'match_hostname') and hasattr(ssl, 'CertificateError'): # python 3.2+ + ssl_match_hostname = ssl.match_hostname + SSLCertificateError = ssl.CertificateError +else: + import backports.ssl_match_hostname + ssl_match_hostname = backports.ssl_match_hostname.match_hostname + SSLCertificateError = backports.ssl_match_hostname.CertificateError def bind_sockets(port, address=None, family=socket.AF_UNSPEC, backlog=128, flags=None): """Creates listening sockets bound to the given port and address. @@ -391,73 +397,3 @@ def ssl_wrap_socket(socket, ssl_options, server_hostname=None, **kwargs): return context.wrap_socket(socket, **kwargs) else: return ssl.wrap_socket(socket, **dict(context, **kwargs)) - -if hasattr(ssl, 'match_hostname') and hasattr(ssl, 'CertificateError'): # python 3.2+ - ssl_match_hostname = ssl.match_hostname - SSLCertificateError = ssl.CertificateError -else: - # match_hostname was added to the standard library ssl module in python 3.2. - # The following code was backported for older releases and copied from - # https://bitbucket.org/brandon/backports.ssl_match_hostname - class SSLCertificateError(ValueError): - pass - - def _dnsname_to_pat(dn, max_wildcards=1): - pats = [] - for frag in dn.split(r'.'): - if frag.count('*') > max_wildcards: - # Issue #17980: avoid denials of service by refusing more - # than one wildcard per fragment. A survery of established - # policy among SSL implementations showed it to be a - # reasonable choice. - raise SSLCertificateError( - "too many wildcards in certificate DNS name: " + repr(dn)) - if frag == '*': - # When '*' is a fragment by itself, it matches a non-empty dotless - # fragment. - pats.append('[^.]+') - else: - # Otherwise, '*' matches any dotless fragment. - frag = re.escape(frag) - pats.append(frag.replace(r'\*', '[^.]*')) - return re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) - - def ssl_match_hostname(cert, hostname): - """Verify that *cert* (in decoded format as returned by - SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 rules - are mostly followed, but IP addresses are not accepted for *hostname*. - - CertificateError is raised on failure. On success, the function - returns nothing. - """ - if not cert: - raise ValueError("empty or no certificate") - dnsnames = [] - san = cert.get('subjectAltName', ()) - for key, value in san: - if key == 'DNS': - if _dnsname_to_pat(value).match(hostname): - return - dnsnames.append(value) - if not dnsnames: - # The subject is only checked when there is no dNSName entry - # in subjectAltName - for sub in cert.get('subject', ()): - for key, value in sub: - # XXX according to RFC 2818, the most specific Common Name - # must be used. - if key == 'commonName': - if _dnsname_to_pat(value).match(hostname): - return - dnsnames.append(value) - if len(dnsnames) > 1: - raise SSLCertificateError("hostname %r " - "doesn't match either of %s" - % (hostname, ', '.join(map(repr, dnsnames)))) - elif len(dnsnames) == 1: - raise SSLCertificateError("hostname %r " - "doesn't match %r" - % (hostname, dnsnames[0])) - else: - raise SSLCertificateError("no appropriate commonName or " - "subjectAltName fields were found") diff --git a/libs/tornado/platform/asyncio.py b/libs/tornado/platform/asyncio.py new file mode 100644 index 0000000..a8f5bad --- /dev/null +++ b/libs/tornado/platform/asyncio.py @@ -0,0 +1,134 @@ +"""Bridges between the `asyncio` module and Tornado IOLoop. + +This is a work in progress and interfaces are subject to change. + +To test: +python3.4 -m tornado.test.runtests --ioloop=tornado.platform.asyncio.AsyncIOLoop +python3.4 -m tornado.test.runtests --ioloop=tornado.platform.asyncio.AsyncIOMainLoop +(the tests log a few warnings with AsyncIOMainLoop because they leave some +unfinished callbacks on the event loop that fail when it resumes) +""" +import asyncio +import datetime +import functools +import os + +from tornado.ioloop import IOLoop +from tornado import stack_context + +class BaseAsyncIOLoop(IOLoop): + def initialize(self, asyncio_loop, close_loop=False): + self.asyncio_loop = asyncio_loop + self.close_loop = close_loop + self.asyncio_loop.call_soon(self.make_current) + # Maps fd to handler function (as in IOLoop.add_handler) + self.handlers = {} + # Set of fds listening for reads/writes + self.readers = set() + self.writers = set() + self.closing = False + + def close(self, all_fds=False): + self.closing = True + for fd in list(self.handlers): + self.remove_handler(fd) + if all_fds: + os.close(fd) + if self.close_loop: + self.asyncio_loop.close() + + def add_handler(self, fd, handler, events): + if fd in self.handlers: + raise ValueError("fd %d added twice" % fd) + self.handlers[fd] = stack_context.wrap(handler) + if events & IOLoop.READ: + self.asyncio_loop.add_reader( + fd, self._handle_events, fd, IOLoop.READ) + self.readers.add(fd) + if events & IOLoop.WRITE: + self.asyncio_loop.add_writer( + fd, self._handle_events, fd, IOLoop.WRITE) + self.writers.add(fd) + + def update_handler(self, fd, events): + if events & IOLoop.READ: + if fd not in self.readers: + self.asyncio_loop.add_reader( + fd, self._handle_events, fd, IOLoop.READ) + self.readers.add(fd) + else: + if fd in self.readers: + self.asyncio_loop.remove_reader(fd) + self.readers.remove(fd) + if events & IOLoop.WRITE: + if fd not in self.writers: + self.asyncio_loop.add_writer( + fd, self._handle_events, fd, IOLoop.WRITE) + self.writers.add(fd) + else: + if fd in self.writers: + self.asyncio_loop.remove_writer(fd) + self.writers.remove(fd) + + def remove_handler(self, fd): + if fd not in self.handlers: + return + if fd in self.readers: + self.asyncio_loop.remove_reader(fd) + self.readers.remove(fd) + if fd in self.writers: + self.asyncio_loop.remove_writer(fd) + self.writers.remove(fd) + del self.handlers[fd] + + def _handle_events(self, fd, events): + self.handlers[fd](fd, events) + + def start(self): + self.asyncio_loop.run_forever() + + def stop(self): + self.asyncio_loop.stop() + + def _run_callback(self, callback, *args, **kwargs): + try: + callback(*args, **kwargs) + except Exception: + self.handle_callback_exception(callback) + + def add_timeout(self, deadline, callback): + if isinstance(deadline, (int, float)): + delay = max(deadline - self.time(), 0) + elif isinstance(deadline, datetime.timedelta): + delay = deadline.total_seconds() + else: + raise TypeError("Unsupported deadline %r", deadline) + return self.asyncio_loop.call_later(delay, self._run_callback, + stack_context.wrap(callback)) + + def remove_timeout(self, timeout): + timeout.cancel() + + def add_callback(self, callback, *args, **kwargs): + if self.closing: + raise RuntimeError("IOLoop is closing") + if kwargs: + self.asyncio_loop.call_soon_threadsafe(functools.partial( + self._run_callback, stack_context.wrap(callback), + *args, **kwargs)) + else: + self.asyncio_loop.call_soon_threadsafe( + self._run_callback, stack_context.wrap(callback), *args) + + add_callback_from_signal = add_callback + + +class AsyncIOMainLoop(BaseAsyncIOLoop): + def initialize(self): + super(AsyncIOMainLoop, self).initialize(asyncio.get_event_loop(), + close_loop=False) + +class AsyncIOLoop(BaseAsyncIOLoop): + def initialize(self): + super(AsyncIOLoop, self).initialize(asyncio.new_event_loop(), + close_loop=True) diff --git a/libs/tornado/process.py b/libs/tornado/process.py index ffd2d29..942c5c3 100755 --- a/libs/tornado/process.py +++ b/libs/tornado/process.py @@ -92,7 +92,8 @@ def fork_processes(num_processes, max_restarts=100): between any server code. Note that multiple processes are not compatible with the autoreload - module (or the debug=True option to `tornado.web.Application`). + module (or the ``autoreload=True`` option to `tornado.web.Application` + which defaults to True when ``debug=True``). When using multiple processes, no IOLoops can be created or referenced until after the call to ``fork_processes``. diff --git a/libs/tornado/simple_httpclient.py b/libs/tornado/simple_httpclient.py index d8dbb27..2558ada 100755 --- a/libs/tornado/simple_httpclient.py +++ b/libs/tornado/simple_httpclient.py @@ -72,6 +72,7 @@ class SimpleAsyncHTTPClient(AsyncHTTPClient): self.max_clients = max_clients self.queue = collections.deque() self.active = {} + self.waiting = {} self.max_buffer_size = max_buffer_size if resolver: self.resolver = resolver @@ -89,7 +90,16 @@ class SimpleAsyncHTTPClient(AsyncHTTPClient): self.resolver.close() def fetch_impl(self, request, callback): - self.queue.append((request, callback)) + key = object() + self.queue.append((key, request, callback)) + if not len(self.active) < self.max_clients: + timeout_handle = self.io_loop.add_timeout( + self.io_loop.time() + min(request.connect_timeout, + request.request_timeout), + functools.partial(self._on_timeout, key)) + else: + timeout_handle = None + self.waiting[key] = (request, callback, timeout_handle) self._process_queue() if self.queue: gen_log.debug("max_clients limit reached, request queued. " @@ -99,8 +109,10 @@ class SimpleAsyncHTTPClient(AsyncHTTPClient): def _process_queue(self): with stack_context.NullContext(): while self.queue and len(self.active) < self.max_clients: - request, callback = self.queue.popleft() - key = object() + key, request, callback = self.queue.popleft() + if key not in self.waiting: + continue + self._remove_timeout(key) self.active[key] = (request, callback) release_callback = functools.partial(self._release_fetch, key) self._handle_request(request, release_callback, callback) @@ -113,6 +125,22 @@ class SimpleAsyncHTTPClient(AsyncHTTPClient): del self.active[key] self._process_queue() + def _remove_timeout(self, key): + if key in self.waiting: + request, callback, timeout_handle = self.waiting[key] + if timeout_handle is not None: + self.io_loop.remove_timeout(timeout_handle) + del self.waiting[key] + + def _on_timeout(self, key): + request, callback, timeout_handle = self.waiting[key] + self.queue.remove((key, request, callback)) + timeout_response = HTTPResponse( + request, 599, error=HTTPError(599, "Timeout"), + request_time=self.io_loop.time() - request.start_time) + self.io_loop.add_callback(callback, timeout_response) + del self.waiting[key] + class _HTTPConnection(object): _SUPPORTED_METHODS = set(["GET", "HEAD", "POST", "PUT", "DELETE", "PATCH", "OPTIONS"]) @@ -162,15 +190,18 @@ class _HTTPConnection(object): # so restrict to ipv4 by default. af = socket.AF_INET + timeout = min(self.request.connect_timeout, self.request.request_timeout) + if timeout: + self._timeout = self.io_loop.add_timeout( + self.start_time + timeout, + stack_context.wrap(self._on_timeout)) self.resolver.resolve(host, port, af, callback=self._on_resolve) def _on_resolve(self, addrinfo): + if self.final_callback is None: + # final_callback is cleared if we've hit our timeout + return self.stream = self._create_stream(addrinfo) - timeout = min(self.request.connect_timeout, self.request.request_timeout) - if timeout: - self._timeout = self.io_loop.add_timeout( - self.start_time + timeout, - stack_context.wrap(self._on_timeout)) self.stream.set_close_callback(self._on_close) # ipv6 addresses are broken (in self.parsed.hostname) until # 2.7, here is correctly parsed value calculated in __init__ @@ -199,10 +230,10 @@ class _HTTPConnection(object): # the SSL_OP_NO_SSLv2, but that wasn't exposed to python # until 3.2. Python 2.7 adds the ciphers argument, which # can also be used to disable SSLv2. As a last resort - # on python 2.6, we set ssl_version to SSLv3. This is + # on python 2.6, we set ssl_version to TLSv1. This is # more narrow than we'd like since it also breaks - # compatibility with servers configured for TLSv1 only, - # but nearly all servers support SSLv3: + # compatibility with servers configured for SSLv3 only, + # but nearly all servers support both SSLv3 and TLSv1: # http://blog.ivanristic.com/2011/09/ssl-survey-protocol-support.html if sys.version_info >= (2, 7): ssl_options["ciphers"] = "DEFAULT:!SSLv2" @@ -210,7 +241,7 @@ class _HTTPConnection(object): # This is really only necessary for pre-1.0 versions # of openssl, but python 2.6 doesn't expose version # information. - ssl_options["ssl_version"] = ssl.PROTOCOL_SSLv3 + ssl_options["ssl_version"] = ssl.PROTOCOL_TLSv1 return SSLIOStream(socket.socket(af), io_loop=self.io_loop, @@ -233,6 +264,8 @@ class _HTTPConnection(object): def _on_connect(self): self._remove_timeout() + if self.final_callback is None: + return if self.request.request_timeout: self._timeout = self.io_loop.add_timeout( self.start_time + self.request.request_timeout, @@ -269,9 +302,15 @@ class _HTTPConnection(object): self.request.headers["User-Agent"] = self.request.user_agent if not self.request.allow_nonstandard_methods: if self.request.method in ("POST", "PATCH", "PUT"): - assert self.request.body is not None + if self.request.body is None: + raise AssertionError( + 'Body must not be empty for "%s" request' + % self.request.method) else: - assert self.request.body is None + if self.request.body is not None: + raise AssertionError( + 'Body must be empty for "%s" request' + % self.request.method) if self.request.body is not None: self.request.headers["Content-Length"] = str(len( self.request.body)) diff --git a/libs/tornado/speedups.c b/libs/tornado/speedups.c new file mode 100644 index 0000000..8a316c5 --- /dev/null +++ b/libs/tornado/speedups.c @@ -0,0 +1,49 @@ +#include <Python.h> + +static PyObject* websocket_mask(PyObject* self, PyObject* args) { + const char* mask; + int mask_len; + const char* data; + int data_len; + int i; + + if (!PyArg_ParseTuple(args, "s#s#", &mask, &mask_len, &data, &data_len)) { + return NULL; + } + + PyObject* result = PyBytes_FromStringAndSize(NULL, data_len); + if (!result) { + return NULL; + } + char* buf = PyBytes_AsString(result); + for (i = 0; i < data_len; i++) { + buf[i] = data[i] ^ mask[i % 4]; + } + + return result; +} + +static PyMethodDef methods[] = { + {"websocket_mask", websocket_mask, METH_VARARGS, ""}, + {NULL, NULL, 0, NULL} +}; + +#if PY_MAJOR_VERSION >= 3 +static struct PyModuleDef speedupsmodule = { + PyModuleDef_HEAD_INIT, + "speedups", + NULL, + -1, + methods +}; + +PyMODINIT_FUNC +PyInit_speedups() { + return PyModule_Create(&speedupsmodule); +} +#else // Python 2.x +PyMODINIT_FUNC +initspeedups() { + Py_InitModule("tornado.speedups", methods); +} +#endif diff --git a/libs/tornado/tcpserver.py b/libs/tornado/tcpserver.py index 8473a21..c077373 100755 --- a/libs/tornado/tcpserver.py +++ b/libs/tornado/tcpserver.py @@ -180,7 +180,8 @@ class TCPServer(object): between any server code. Note that multiple processes are not compatible with the autoreload - module (or the ``debug=True`` option to `tornado.web.Application`). + module (or the ``autoreload=True`` option to `tornado.web.Application` + which defaults to True when ``debug=True``). When using multiple processes, no IOLoops can be created or referenced until after the call to ``TCPServer.start(n)``. """ diff --git a/libs/tornado/web.py b/libs/tornado/web.py index 5f8d609..b6d7e97 100755 --- a/libs/tornado/web.py +++ b/libs/tornado/web.py @@ -250,7 +250,7 @@ class RequestHandler(object): not self.request.connection.no_keep_alive): conn_header = self.request.headers.get("Connection") if conn_header and (conn_header.lower() == "keep-alive"): - self.set_header("Connection", "Keep-Alive") + self._headers["Connection"] = "Keep-Alive" self._write_buffer = [] self._status_code = 200 self._reason = httputil.responses[200] @@ -348,12 +348,7 @@ class RequestHandler(object): The returned value is always unicode. """ - args = self.get_arguments(name, strip=strip) - if not args: - if default is self._ARG_DEFAULT: - raise MissingArgumentError(name) - return default - return args[-1] + return self._get_argument(name, default, self.request.arguments, strip) def get_arguments(self, name, strip=True): """Returns a list of the arguments with the given name. @@ -362,9 +357,73 @@ class RequestHandler(object): The returned values are always unicode. """ + return self._get_arguments(name, self.request.arguments, strip) + + def get_body_argument(self, name, default=_ARG_DEFAULT, strip=True): + """Returns the value of the argument with the given name + from the request body. + + If default is not provided, the argument is considered to be + required, and we raise a `MissingArgumentError` if it is missing. + + If the argument appears in the url more than once, we return the + last value. + + The returned value is always unicode. + + .. versionadded:: 3.2 + """ + return self._get_argument(name, default, self.request.body_arguments, strip) + + def get_body_arguments(self, name, strip=True): + """Returns a list of the body arguments with the given name. + + If the argument is not present, returns an empty list. + + The returned values are always unicode. + + .. versionadded:: 3.2 + """ + return self._get_arguments(name, self.request.body_arguments, strip) + + def get_query_argument(self, name, default=_ARG_DEFAULT, strip=True): + """Returns the value of the argument with the given name + from the request query string. + + If default is not provided, the argument is considered to be + required, and we raise a `MissingArgumentError` if it is missing. + + If the argument appears in the url more than once, we return the + last value. + + The returned value is always unicode. + + .. versionadded:: 3.2 + """ + return self._get_argument(name, default, self.request.query_arguments, strip) + + def get_query_arguments(self, name, strip=True): + """Returns a list of the query arguments with the given name. + + If the argument is not present, returns an empty list. + + The returned values are always unicode. + + .. versionadded:: 3.2 + """ + return self._get_arguments(name, self.request.query_arguments, strip) + + def _get_argument(self, name, default, source, strip=True): + args = self._get_arguments(name, source, strip=strip) + if not args: + if default is self._ARG_DEFAULT: + raise MissingArgumentError(name) + return default + return args[-1] + def _get_arguments(self, name, source, strip=True): values = [] - for v in self.request.arguments.get(name, []): + for v in source.get(name, []): v = self.decode_argument(v, name=name) if isinstance(v, unicode_type): # Get rid of any weird control chars (unless decoding gave @@ -838,7 +897,7 @@ class RequestHandler(object): else: self.finish(self.get_error_html(status_code, **kwargs)) return - if self.settings.get("debug") and "exc_info" in kwargs: + if self.settings.get("serve_traceback") and "exc_info" in kwargs: # in debug mode, try to send a traceback self.set_header('Content-Type', 'text/plain') for line in traceback.format_exception(*kwargs["exc_info"]): @@ -1318,6 +1377,12 @@ def asynchronous(method): if not self._finished: self.finish() IOLoop.current().add_future(result, future_complete) + # Once we have done this, hide the Future from our + # caller (i.e. RequestHandler._when_complete), which + # would otherwise set up its own callback and + # exception handler (resulting in exceptions being + # logged twice). + return None return result return wrapper @@ -1383,10 +1448,16 @@ class Application(object): or (regexp, request_class) tuples. When we receive requests, we iterate over the list in order and instantiate an instance of the first request class whose regexp matches the request path. + The request class can be specified as either a class object or a + (fully-qualified) name. + + Each tuple can contain additional elements, which correspond to the + arguments to the `URLSpec` constructor. (Prior to Tornado 3.2, this + only tuples of two or three elements were allowed). - Each tuple can contain an optional third element, which should be - a dictionary if it is present. That dictionary is passed as - keyword arguments to the contructor of the handler. This pattern + A dictionary may be passed as the third element of the tuple, + which will be used as keyword arguments to the handler's + constructor and `~RequestHandler.initialize` method. This pattern is used for the `StaticFileHandler` in this example (note that a `StaticFileHandler` can be installed automatically with the static_path setting described below):: @@ -1409,6 +1480,7 @@ class Application(object): and ``/robots.txt`` from the same directory. A custom subclass of `StaticFileHandler` can be specified with the ``static_handler_class`` setting. + """ def __init__(self, handlers=None, default_host="", transforms=None, wsgi=False, **settings): @@ -1447,8 +1519,14 @@ class Application(object): if handlers: self.add_handlers(".*$", handlers) + if self.settings.get('debug'): + self.settings.setdefault('autoreload', True) + self.settings.setdefault('compiled_template_cache', False) + self.settings.setdefault('static_hash_cache', False) + self.settings.setdefault('serve_traceback', True) + # Automatically reload modified modules - if self.settings.get("debug") and not wsgi: + if self.settings.get('autoreload') and not wsgi: from tornado import autoreload autoreload.start() @@ -1493,20 +1571,8 @@ class Application(object): for spec in host_handlers: if isinstance(spec, (tuple, list)): - assert len(spec) in (2, 3) - pattern = spec[0] - handler = spec[1] - - if isinstance(handler, str): - # import the Module and instantiate the class - # Must be a fully qualified name (module.ClassName) - handler = import_object(handler) - - if len(spec) == 3: - kwargs = spec[2] - else: - kwargs = {} - spec = URLSpec(pattern, handler, kwargs) + assert len(spec) in (2, 3, 4) + spec = URLSpec(*spec) handlers.append(spec) if spec.name: if spec.name in self.named_handlers: @@ -1597,14 +1663,23 @@ class Application(object): args = [unquote(s) for s in match.groups()] break if not handler: - handler = ErrorHandler(self, request, status_code=404) + if self.settings.get('default_handler_class'): + handler_class = self.settings['default_handler_class'] + handler_args = self.settings.get( + 'default_handler_args', {}) + else: + handler_class = ErrorHandler + handler_args = dict(status_code=404) + handler = handler_class(self, request, **handler_args) - # In debug mode, re-compile templates and reload static files on every + # If template cache is disabled (usually in the debug mode), + # re-compile templates and reload static files on every # request so you don't need to restart to see changes - if self.settings.get("debug"): + if not self.settings.get("compiled_template_cache", True): with RequestHandler._template_loader_lock: for loader in RequestHandler._template_loaders.values(): loader.reset() + if not self.settings.get('static_hash_cache', True): StaticFileHandler.reset() handler._execute(transforms, *args, **kwargs) @@ -2454,7 +2529,7 @@ class _UIModuleNamespace(object): class URLSpec(object): """Specifies mappings between URLs and handlers.""" - def __init__(self, pattern, handler_class, kwargs=None, name=None): + def __init__(self, pattern, handler, kwargs=None, name=None): """Parameters: * ``pattern``: Regular expression to be matched. Any groups @@ -2475,7 +2550,13 @@ class URLSpec(object): assert len(self.regex.groupindex) in (0, self.regex.groups), \ ("groups in url regexes must either be all named or all " "positional: %r" % self.regex.pattern) - self.handler_class = handler_class + + if isinstance(handler, str): + # import the Module and instantiate the class + # Must be a fully qualified name (module.ClassName) + handler = import_object(handler) + + self.handler_class = handler self.kwargs = kwargs or {} self.name = name self._path, self._group_count = self._find_groups() diff --git a/libs/tornado/websocket.py b/libs/tornado/websocket.py index 676d21b..8c2f5a6 100755 --- a/libs/tornado/websocket.py +++ b/libs/tornado/websocket.py @@ -33,7 +33,7 @@ import tornado.web from tornado.concurrent import TracebackFuture from tornado.escape import utf8, native_str -from tornado import httpclient +from tornado import httpclient, httputil from tornado.ioloop import IOLoop from tornado.iostream import StreamClosedError from tornado.log import gen_log, app_log @@ -52,6 +52,10 @@ class WebSocketError(Exception): class WebSocketClosedError(WebSocketError): + """Raised by operations on a closed connection. + + .. versionadded:: 3.2 + """ pass @@ -163,6 +167,12 @@ class WebSocketHandler(tornado.web.RequestHandler): encoded as json). If the ``binary`` argument is false, the message will be sent as utf8; in binary mode any byte string is allowed. + + If the connection is already closed, raises `WebSocketClosedError`. + + .. versionchanged:: 3.2 + `WebSocketClosedError` was added (previously a closed connection + would raise an `AttributeError`) """ if self.ws_connection is None: raise WebSocketClosedError() @@ -586,7 +596,7 @@ class WebSocketProtocol13(WebSocketProtocol): frame += struct.pack("!BQ", 127 | mask_bit, l) if self.mask_outgoing: mask = os.urandom(4) - data = mask + self._apply_mask(mask, data) + data = mask + _websocket_mask(mask, data) frame += data self.stream.write(frame) @@ -671,21 +681,8 @@ class WebSocketProtocol13(WebSocketProtocol): except StreamClosedError: self._abort() - def _apply_mask(self, mask, data): - mask = array.array("B", mask) - unmasked = array.array("B", data) - for i in xrange(len(data)): - unmasked[i] = unmasked[i] ^ mask[i % 4] - if hasattr(unmasked, 'tobytes'): - # tostring was deprecated in py32. It hasn't been removed, - # but since we turn on deprecation warnings in our tests - # we need to use the right one. - return unmasked.tobytes() - else: - return unmasked.tostring() - def _on_masked_frame_data(self, data): - self._on_frame_data(self._apply_mask(self._frame_mask, data)) + self._on_frame_data(_websocket_mask(self._frame_mask, data)) def _on_frame_data(self, data): if self._frame_opcode_is_control: @@ -771,7 +768,11 @@ class WebSocketProtocol13(WebSocketProtocol): class WebSocketClientConnection(simple_httpclient._HTTPConnection): - """WebSocket client connection.""" + """WebSocket client connection. + + This class should not be instantiated directly; use the + `websocket_connect` function instead. + """ def __init__(self, io_loop, request): self.connect_future = TracebackFuture() self.read_future = None @@ -793,9 +794,19 @@ class WebSocketClientConnection(simple_httpclient._HTTPConnection): io_loop, None, request, lambda: None, self._on_http_response, 104857600, self.resolver) + def close(self): + """Closes the websocket connection. + + .. versionadded:: 3.2 + """ + if self.protocol is not None: + self.protocol.close() + self.protocol = None + def _on_close(self): self.on_message(None) self.resolver.close() + super(WebSocketClientConnection, self)._on_close() def _on_http_response(self, response): if not self.connect_future.done(): @@ -859,13 +870,54 @@ def websocket_connect(url, io_loop=None, callback=None, connect_timeout=None): Takes a url and returns a Future whose result is a `WebSocketClientConnection`. + + .. versionchanged:: 3.2 + Also accepts ``HTTPRequest`` objects in place of urls. """ if io_loop is None: io_loop = IOLoop.current() - request = httpclient.HTTPRequest(url, connect_timeout=connect_timeout) + if isinstance(url, httpclient.HTTPRequest): + assert connect_timeout is None + request = url + # Copy and convert the headers dict/object (see comments in + # AsyncHTTPClient.fetch) + request.headers = httputil.HTTPHeaders(request.headers) + else: + request = httpclient.HTTPRequest(url, connect_timeout=connect_timeout) request = httpclient._RequestProxy( request, httpclient.HTTPRequest._DEFAULTS) conn = WebSocketClientConnection(io_loop, request) if callback is not None: io_loop.add_future(conn.connect_future, callback) return conn.connect_future + +def _websocket_mask_python(mask, data): + """Websocket masking function. + + `mask` is a `bytes` object of length 4; `data` is a `bytes` object of any length. + Returns a `bytes` object of the same length as `data` with the mask applied + as specified in section 5.3 of RFC 6455. + + This pure-python implementation may be replaced by an optimized version when available. + """ + mask = array.array("B", mask) + unmasked = array.array("B", data) + for i in xrange(len(data)): + unmasked[i] = unmasked[i] ^ mask[i % 4] + if hasattr(unmasked, 'tobytes'): + # tostring was deprecated in py32. It hasn't been removed, + # but since we turn on deprecation warnings in our tests + # we need to use the right one. + return unmasked.tobytes() + else: + return unmasked.tostring() + +if os.environ.get('TORNADO_NO_EXTENSION'): + # This environment variable exists to make it easier to do performance comparisons; + # it's not guaranteed to remain supported in the future. + _websocket_mask = _websocket_mask_python +else: + try: + from tornado.speedups import websocket_mask as _websocket_mask + except ImportError: + _websocket_mask = _websocket_mask_python diff --git a/libs/tornado/wsgi.py b/libs/tornado/wsgi.py index 5e25a56..8e5dded 100755 --- a/libs/tornado/wsgi.py +++ b/libs/tornado/wsgi.py @@ -33,6 +33,7 @@ from __future__ import absolute_import, division, print_function, with_statement import sys import time +import copy import tornado from tornado import escape @@ -142,11 +143,14 @@ class HTTPRequest(object): self.path += urllib_parse.quote(from_wsgi_str(environ.get("PATH_INFO", ""))) self.uri = self.path self.arguments = {} + self.query_arguments = {} + self.body_arguments = {} self.query = environ.get("QUERY_STRING", "") if self.query: self.uri += "?" + self.query self.arguments = parse_qs_bytes(native_str(self.query), keep_blank_values=True) + self.query_arguments = copy.deepcopy(self.arguments) self.version = "HTTP/1.1" self.headers = httputil.HTTPHeaders() if environ.get("CONTENT_TYPE"): @@ -171,7 +175,10 @@ class HTTPRequest(object): # Parse request body self.files = {} httputil.parse_body_arguments(self.headers.get("Content-Type", ""), - self.body, self.arguments, self.files) + self.body, self.body_arguments, self.files) + + for k, v in self.body_arguments.items(): + self.arguments.setdefault(k, []).extend(v) self._start_time = time.time() self._finish_time = None diff --git a/libs/xmpp/__init__.py b/libs/xmpp/__init__.py new file mode 100644 index 0000000..ad03b28 --- /dev/null +++ b/libs/xmpp/__init__.py @@ -0,0 +1,31 @@ +# $Id: __init__.py,v 1.9 2005/03/07 09:34:51 snakeru Exp $ + +""" +All features of xmpppy library contained within separate modules. +At present there are modules: +simplexml - XML handling routines +protocol - jabber-objects (I.e. JID and different stanzas and sub-stanzas) handling routines. +debug - Jacob Lundquist's debugging module. Very handy if you like colored debug. +auth - Non-SASL and SASL stuff. You will need it to auth as a client or transport. +transports - low level connection handling. TCP and TLS currently. HTTP support planned. +roster - simple roster for use in clients. +dispatcher - decision-making logic. Handles all hooks. The first who takes control over fresh stanzas. +features - different stuff that didn't worths separating into modules +browser - DISCO server framework. Allows to build dynamic disco tree. +filetransfer - Currently contains only IBB stuff. Can be used for bot-to-bot transfers. + +Most of the classes that is defined in all these modules is an ancestors of +class PlugIn so they share a single set of methods allowing you to compile +a featured XMPP client. For every instance of PlugIn class the 'owner' is the class +in what the plug was plugged. While plugging in such instance usually sets some +methods of owner to it's own ones for easy access. All session specific info stored +either in instance of PlugIn or in owner's instance. This is considered unhandy +and there are plans to port 'Session' class from xmppd.py project for storing all +session-related info. Though if you are not accessing instances variables directly +and use only methods for access all values you should not have any problems. + +""" + +import simplexml,protocol,debug,auth,transports,roster,dispatcher,features,browser,filetransfer,commands +from client import * +from protocol import * diff --git a/libs/xmpp/auth.py b/libs/xmpp/auth.py new file mode 100644 index 0000000..6e51d72 --- /dev/null +++ b/libs/xmpp/auth.py @@ -0,0 +1,326 @@ +## auth.py +## +## Copyright (C) 2003-2005 Alexey "Snake" Nezhdanov +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. + +# $Id: auth.py,v 1.41 2008/09/13 21:45:21 normanr Exp $ + +""" +Provides library with all Non-SASL and SASL authentication mechanisms. +Can be used both for client and transport authentication. +""" + +from protocol import * +from client import PlugIn +import sha,base64,random,dispatcher,re + +import md5 +def HH(some): return md5.new(some).hexdigest() +def H(some): return md5.new(some).digest() +def C(some): return ':'.join(some) + +class NonSASL(PlugIn): + """ Implements old Non-SASL (JEP-0078) authentication used in jabberd1.4 and transport authentication.""" + def __init__(self,user,password,resource): + """ Caches username, password and resource for auth. """ + PlugIn.__init__(self) + self.DBG_LINE='gen_auth' + self.user=user + self.password=password + self.resource=resource + + def plugin(self,owner): + """ Determine the best auth method (digest/0k/plain) and use it for auth. + Returns used method name on success. Used internally. """ + if not self.resource: return self.authComponent(owner) + self.DEBUG('Querying server about possible auth methods','start') + resp=owner.Dispatcher.SendAndWaitForResponse(Iq('get',NS_AUTH,payload=[Node('username',payload=[self.user])])) + if not isResultNode(resp): + self.DEBUG('No result node arrived! Aborting...','error') + return + iq=Iq(typ='set',node=resp) + query=iq.getTag('query') + query.setTagData('username',self.user) + query.setTagData('resource',self.resource) + + if query.getTag('digest'): + self.DEBUG("Performing digest authentication",'ok') + query.setTagData('digest',sha.new(owner.Dispatcher.Stream._document_attrs['id']+self.password).hexdigest()) + if query.getTag('password'): query.delChild('password') + method='digest' + elif query.getTag('token'): + token=query.getTagData('token') + seq=query.getTagData('sequence') + self.DEBUG("Performing zero-k authentication",'ok') + hash = sha.new(sha.new(self.password).hexdigest()+token).hexdigest() + for foo in xrange(int(seq)): hash = sha.new(hash).hexdigest() + query.setTagData('hash',hash) + method='0k' + else: + self.DEBUG("Sequre methods unsupported, performing plain text authentication",'warn') + query.setTagData('password',self.password) + method='plain' + resp=owner.Dispatcher.SendAndWaitForResponse(iq) + if isResultNode(resp): + self.DEBUG('Sucessfully authenticated with remove host.','ok') + owner.User=self.user + owner.Resource=self.resource + owner._registered_name=owner.User+'@'+owner.Server+'/'+owner.Resource + return method + self.DEBUG('Authentication failed!','error') + + def authComponent(self,owner): + """ Authenticate component. Send handshake stanza and wait for result. Returns "ok" on success. """ + self.handshake=0 + owner.send(Node(NS_COMPONENT_ACCEPT+' handshake',payload=[sha.new(owner.Dispatcher.Stream._document_attrs['id']+self.password).hexdigest()])) + owner.RegisterHandler('handshake',self.handshakeHandler,xmlns=NS_COMPONENT_ACCEPT) + while not self.handshake: + self.DEBUG("waiting on handshake",'notify') + owner.Process(1) + owner._registered_name=self.user + if self.handshake+1: return 'ok' + + def handshakeHandler(self,disp,stanza): + """ Handler for registering in dispatcher for accepting transport authentication. """ + if stanza.getName()=='handshake': self.handshake=1 + else: self.handshake=-1 + +class SASL(PlugIn): + """ Implements SASL authentication. """ + def __init__(self,username,password): + PlugIn.__init__(self) + self.username=username + self.password=password + + def plugin(self,owner): + if not self._owner.Dispatcher.Stream._document_attrs.has_key('version'): self.startsasl='not-supported' + elif self._owner.Dispatcher.Stream.features: + try: self.FeaturesHandler(self._owner.Dispatcher,self._owner.Dispatcher.Stream.features) + except NodeProcessed: pass + else: self.startsasl=None + + def auth(self): + """ Start authentication. Result can be obtained via "SASL.startsasl" attribute and will be + either "success" or "failure". Note that successfull auth will take at least + two Dispatcher.Process() calls. """ + if self.startsasl: pass + elif self._owner.Dispatcher.Stream.features: + try: self.FeaturesHandler(self._owner.Dispatcher,self._owner.Dispatcher.Stream.features) + except NodeProcessed: pass + else: self._owner.RegisterHandler('features',self.FeaturesHandler,xmlns=NS_STREAMS) + + def plugout(self): + """ Remove SASL handlers from owner's dispatcher. Used internally. """ + if self._owner.__dict__.has_key('features'): self._owner.UnregisterHandler('features',self.FeaturesHandler,xmlns=NS_STREAMS) + if self._owner.__dict__.has_key('challenge'): self._owner.UnregisterHandler('challenge',self.SASLHandler,xmlns=NS_SASL) + if self._owner.__dict__.has_key('failure'): self._owner.UnregisterHandler('failure',self.SASLHandler,xmlns=NS_SASL) + if self._owner.__dict__.has_key('success'): self._owner.UnregisterHandler('success',self.SASLHandler,xmlns=NS_SASL) + + def FeaturesHandler(self,conn,feats): + """ Used to determine if server supports SASL auth. Used internally. """ + if not feats.getTag('mechanisms',namespace=NS_SASL): + self.startsasl='not-supported' + self.DEBUG('SASL not supported by server','error') + return + mecs=[] + for mec in feats.getTag('mechanisms',namespace=NS_SASL).getTags('mechanism'): + mecs.append(mec.getData()) + self._owner.RegisterHandler('challenge',self.SASLHandler,xmlns=NS_SASL) + self._owner.RegisterHandler('failure',self.SASLHandler,xmlns=NS_SASL) + self._owner.RegisterHandler('success',self.SASLHandler,xmlns=NS_SASL) + if "ANONYMOUS" in mecs and self.username == None: + node=Node('auth',attrs={'xmlns':NS_SASL,'mechanism':'ANONYMOUS'}) + elif "DIGEST-MD5" in mecs: + node=Node('auth',attrs={'xmlns':NS_SASL,'mechanism':'DIGEST-MD5'}) + elif "PLAIN" in mecs: + sasl_data='%s\x00%s\x00%s'%(self.username+'@'+self._owner.Server,self.username,self.password) + node=Node('auth',attrs={'xmlns':NS_SASL,'mechanism':'PLAIN'},payload=[base64.encodestring(sasl_data).replace('\r','').replace('\n','')]) + else: + self.startsasl='failure' + self.DEBUG('I can only use DIGEST-MD5 and PLAIN mecanisms.','error') + return + self.startsasl='in-process' + self._owner.send(node.__str__()) + raise NodeProcessed + + def SASLHandler(self,conn,challenge): + """ Perform next SASL auth step. Used internally. """ + if challenge.getNamespace()<>NS_SASL: return + if challenge.getName()=='failure': + self.startsasl='failure' + try: reason=challenge.getChildren()[0] + except: reason=challenge + self.DEBUG('Failed SASL authentification: %s'%reason,'error') + raise NodeProcessed + elif challenge.getName()=='success': + self.startsasl='success' + self.DEBUG('Successfully authenticated with remote server.','ok') + handlers=self._owner.Dispatcher.dumpHandlers() + self._owner.Dispatcher.PlugOut() + dispatcher.Dispatcher().PlugIn(self._owner) + self._owner.Dispatcher.restoreHandlers(handlers) + self._owner.User=self.username + raise NodeProcessed +########################################3333 + incoming_data=challenge.getData() + chal={} + data=base64.decodestring(incoming_data) + self.DEBUG('Got challenge:'+data,'ok') + for pair in re.findall('(\w+\s*=\s*(?:(?:"[^"]+")|(?:[^,]+)))',data): + key,value=[x.strip() for x in pair.split('=', 1)] + if value[:1]=='"' and value[-1:]=='"': value=value[1:-1] + chal[key]=value + if chal.has_key('qop') and 'auth' in [x.strip() for x in chal['qop'].split(',')]: + resp={} + resp['username']=self.username + resp['realm']=self._owner.Server + resp['nonce']=chal['nonce'] + cnonce='' + for i in range(7): + cnonce+=hex(int(random.random()*65536*4096))[2:] + resp['cnonce']=cnonce + resp['nc']=('00000001') + resp['qop']='auth' + resp['digest-uri']='xmpp/'+self._owner.Server + A1=C([H(C([resp['username'],resp['realm'],self.password])),resp['nonce'],resp['cnonce']]) + A2=C(['AUTHENTICATE',resp['digest-uri']]) + response= HH(C([HH(A1),resp['nonce'],resp['nc'],resp['cnonce'],resp['qop'],HH(A2)])) + resp['response']=response + resp['charset']='utf-8' + sasl_data='' + for key in ['charset','username','realm','nonce','nc','cnonce','digest-uri','response','qop']: + if key in ['nc','qop','response','charset']: sasl_data+="%s=%s,"%(key,resp[key]) + else: sasl_data+='%s="%s",'%(key,resp[key]) +########################################3333 + node=Node('response',attrs={'xmlns':NS_SASL},payload=[base64.encodestring(sasl_data[:-1]).replace('\r','').replace('\n','')]) + self._owner.send(node.__str__()) + elif chal.has_key('rspauth'): self._owner.send(Node('response',attrs={'xmlns':NS_SASL}).__str__()) + else: + self.startsasl='failure' + self.DEBUG('Failed SASL authentification: unknown challenge','error') + raise NodeProcessed + +class Bind(PlugIn): + """ Bind some JID to the current connection to allow router know of our location.""" + def __init__(self): + PlugIn.__init__(self) + self.DBG_LINE='bind' + self.bound=None + + def plugin(self,owner): + """ Start resource binding, if allowed at this time. Used internally. """ + if self._owner.Dispatcher.Stream.features: + try: self.FeaturesHandler(self._owner.Dispatcher,self._owner.Dispatcher.Stream.features) + except NodeProcessed: pass + else: self._owner.RegisterHandler('features',self.FeaturesHandler,xmlns=NS_STREAMS) + + def plugout(self): + """ Remove Bind handler from owner's dispatcher. Used internally. """ + self._owner.UnregisterHandler('features',self.FeaturesHandler,xmlns=NS_STREAMS) + + def FeaturesHandler(self,conn,feats): + """ Determine if server supports resource binding and set some internal attributes accordingly. """ + if not feats.getTag('bind',namespace=NS_BIND): + self.bound='failure' + self.DEBUG('Server does not requested binding.','error') + return + if feats.getTag('session',namespace=NS_SESSION): self.session=1 + else: self.session=-1 + self.bound=[] + + def Bind(self,resource=None): + """ Perform binding. Use provided resource name or random (if not provided). """ + while self.bound is None and self._owner.Process(1): pass + if resource: resource=[Node('resource',payload=[resource])] + else: resource=[] + resp=self._owner.SendAndWaitForResponse(Protocol('iq',typ='set',payload=[Node('bind',attrs={'xmlns':NS_BIND},payload=resource)])) + if isResultNode(resp): + self.bound.append(resp.getTag('bind').getTagData('jid')) + self.DEBUG('Successfully bound %s.'%self.bound[-1],'ok') + jid=JID(resp.getTag('bind').getTagData('jid')) + self._owner.User=jid.getNode() + self._owner.Resource=jid.getResource() + resp=self._owner.SendAndWaitForResponse(Protocol('iq',typ='set',payload=[Node('session',attrs={'xmlns':NS_SESSION})])) + if isResultNode(resp): + self.DEBUG('Successfully opened session.','ok') + self.session=1 + return 'ok' + else: + self.DEBUG('Session open failed.','error') + self.session=0 + elif resp: self.DEBUG('Binding failed: %s.'%resp.getTag('error'),'error') + else: + self.DEBUG('Binding failed: timeout expired.','error') + return '' + +class ComponentBind(PlugIn): + """ ComponentBind some JID to the current connection to allow router know of our location.""" + def __init__(self, sasl): + PlugIn.__init__(self) + self.DBG_LINE='bind' + self.bound=None + self.needsUnregister=None + self.sasl = sasl + + def plugin(self,owner): + """ Start resource binding, if allowed at this time. Used internally. """ + if not self.sasl: + self.bound=[] + return + if self._owner.Dispatcher.Stream.features: + try: self.FeaturesHandler(self._owner.Dispatcher,self._owner.Dispatcher.Stream.features) + except NodeProcessed: pass + else: + self._owner.RegisterHandler('features',self.FeaturesHandler,xmlns=NS_STREAMS) + self.needsUnregister=1 + + def plugout(self): + """ Remove ComponentBind handler from owner's dispatcher. Used internally. """ + if self.needsUnregister: + self._owner.UnregisterHandler('features',self.FeaturesHandler,xmlns=NS_STREAMS) + + def FeaturesHandler(self,conn,feats): + """ Determine if server supports resource binding and set some internal attributes accordingly. """ + if not feats.getTag('bind',namespace=NS_BIND): + self.bound='failure' + self.DEBUG('Server does not requested binding.','error') + return + if feats.getTag('session',namespace=NS_SESSION): self.session=1 + else: self.session=-1 + self.bound=[] + + def Bind(self,domain=None): + """ Perform binding. Use provided domain name (if not provided). """ + while self.bound is None and self._owner.Process(1): pass + if self.sasl: + xmlns = NS_COMPONENT_1 + else: + xmlns = None + self.bindresponse = None + ttl = dispatcher.DefaultTimeout + self._owner.RegisterHandler('bind',self.BindHandler,xmlns=xmlns) + self._owner.send(Protocol('bind',attrs={'name':domain},xmlns=NS_COMPONENT_1)) + while self.bindresponse is None and self._owner.Process(1) and ttl > 0: ttl-=1 + self._owner.UnregisterHandler('bind',self.BindHandler,xmlns=xmlns) + resp=self.bindresponse + if resp and resp.getAttr('error'): + self.DEBUG('Binding failed: %s.'%resp.getAttr('error'),'error') + elif resp: + self.DEBUG('Successfully bound.','ok') + return 'ok' + else: + self.DEBUG('Binding failed: timeout expired.','error') + return '' + + def BindHandler(self,conn,bind): + self.bindresponse = bind + pass diff --git a/libs/xmpp/browser.py b/libs/xmpp/browser.py new file mode 100644 index 0000000..8848ea4 --- /dev/null +++ b/libs/xmpp/browser.py @@ -0,0 +1,221 @@ +## browser.py +## +## Copyright (C) 2004 Alexey "Snake" Nezhdanov +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. + +# $Id: browser.py,v 1.12 2007/05/13 17:55:14 normanr Exp $ + +"""Browser module provides DISCO server framework for your application. +This functionality can be used for very different purposes - from publishing +software version and supported features to building of "jabber site" that users +can navigate with their disco browsers and interact with active content. + +Such functionality is achieved via registering "DISCO handlers" that are +automatically called when user requests some node of your disco tree. +""" + +from dispatcher import * +from client import PlugIn + +class Browser(PlugIn): + """ WARNING! This class is for components only. It will not work in client mode! + + Standart xmpppy class that is ancestor of PlugIn and can be attached + to your application. + All processing will be performed in the handlers registered in the browser + instance. You can register any number of handlers ensuring that for each + node/jid combination only one (or none) handler registered. + You can register static information or the fully-blown function that will + calculate the answer dynamically. + Example of static info (see JEP-0030, examples 13-14): + # cl - your xmpppy connection instance. + b=xmpp.browser.Browser() + b.PlugIn(cl) + items=[] + item={} + item['jid']='catalog.shakespeare.lit' + item['node']='books' + item['name']='Books by and about Shakespeare' + items.append(item) + item={} + item['jid']='catalog.shakespeare.lit' + item['node']='clothing' + item['name']='Wear your literary taste with pride' + items.append(item) + item={} + item['jid']='catalog.shakespeare.lit' + item['node']='music' + item['name']='Music from the time of Shakespeare' + items.append(item) + info={'ids':[], 'features':[]} + b.setDiscoHandler({'items':items,'info':info}) + + items should be a list of item elements. + every item element can have any of these four keys: 'jid', 'node', 'name', 'action' + info should be a dicionary and must have keys 'ids' and 'features'. + Both of them should be lists: + ids is a list of dictionaries and features is a list of text strings. + Example (see JEP-0030, examples 1-2) + # cl - your xmpppy connection instance. + b=xmpp.browser.Browser() + b.PlugIn(cl) + items=[] + ids=[] + ids.append({'category':'conference','type':'text','name':'Play-Specific Chatrooms'}) + ids.append({'category':'directory','type':'chatroom','name':'Play-Specific Chatrooms'}) + features=[NS_DISCO_INFO,NS_DISCO_ITEMS,NS_MUC,NS_REGISTER,NS_SEARCH,NS_TIME,NS_VERSION] + info={'ids':ids,'features':features} + # info['xdata']=xmpp.protocol.DataForm() # JEP-0128 + b.setDiscoHandler({'items':[],'info':info}) + """ + def __init__(self): + """Initialises internal variables. Used internally.""" + PlugIn.__init__(self) + DBG_LINE='browser' + self._exported_methods=[] + self._handlers={'':{}} + + def plugin(self, owner): + """ Registers it's own iq handlers in your application dispatcher instance. + Used internally.""" + owner.RegisterHandler('iq',self._DiscoveryHandler,typ='get',ns=NS_DISCO_INFO) + owner.RegisterHandler('iq',self._DiscoveryHandler,typ='get',ns=NS_DISCO_ITEMS) + + def plugout(self): + """ Unregisters browser's iq handlers from your application dispatcher instance. + Used internally.""" + self._owner.UnregisterHandler('iq',self._DiscoveryHandler,typ='get',ns=NS_DISCO_INFO) + self._owner.UnregisterHandler('iq',self._DiscoveryHandler,typ='get',ns=NS_DISCO_ITEMS) + + def _traversePath(self,node,jid,set=0): + """ Returns dictionary and key or None,None + None - root node (w/o "node" attribute) + /a/b/c - node + /a/b/ - branch + Set returns '' or None as the key + get returns '' or None as the key or None as the dict. + Used internally.""" + if self._handlers.has_key(jid): cur=self._handlers[jid] + elif set: + self._handlers[jid]={} + cur=self._handlers[jid] + else: cur=self._handlers[''] + if node is None: node=[None] + else: node=node.replace('/',' /').split('/') + for i in node: + if i<>'' and cur.has_key(i): cur=cur[i] + elif set and i<>'': cur[i]={dict:cur,str:i}; cur=cur[i] + elif set or cur.has_key(''): return cur,'' + else: return None,None + if cur.has_key(1) or set: return cur,1 + raise "Corrupted data" + + def setDiscoHandler(self,handler,node='',jid=''): + """ This is the main method that you will use in this class. + It is used to register supplied DISCO handler (or dictionary with static info) + as handler of some disco tree branch. + If you do not specify the node this handler will be used for all queried nodes. + If you do not specify the jid this handler will be used for all queried JIDs. + + Usage: + cl.Browser.setDiscoHandler(someDict,node,jid) + or + cl.Browser.setDiscoHandler(someDISCOHandler,node,jid) + where + + someDict={ + 'items':[ + {'jid':'jid1','action':'action1','node':'node1','name':'name1'}, + {'jid':'jid2','action':'action2','node':'node2','name':'name2'}, + {'jid':'jid3','node':'node3','name':'name3'}, + {'jid':'jid4','node':'node4'} + ], + 'info' :{ + 'ids':[ + {'category':'category1','type':'type1','name':'name1'}, + {'category':'category2','type':'type2','name':'name2'}, + {'category':'category3','type':'type3','name':'name3'}, + ], + 'features':['feature1','feature2','feature3','feature4'], + 'xdata':DataForm + } + } + + and/or + + def someDISCOHandler(session,request,TYR): + # if TYR=='items': # returns items list of the same format as shown above + # elif TYR=='info': # returns info dictionary of the same format as shown above + # else: # this case is impossible for now. + """ + self.DEBUG('Registering handler %s for "%s" node->%s'%(handler,jid,node), 'info') + node,key=self._traversePath(node,jid,1) + node[key]=handler + + def getDiscoHandler(self,node='',jid=''): + """ Returns the previously registered DISCO handler + that is resonsible for this node/jid combination. + Used internally.""" + node,key=self._traversePath(node,jid) + if node: return node[key] + + def delDiscoHandler(self,node='',jid=''): + """ Unregisters DISCO handler that is resonsible for this + node/jid combination. When handler is unregistered the branch + is handled in the same way that it's parent branch from this moment. + """ + node,key=self._traversePath(node,jid) + if node: + handler=node[key] + del node[dict][node[str]] + return handler + + def _DiscoveryHandler(self,conn,request): + """ Servers DISCO iq request from the remote client. + Automatically determines the best handler to use and calls it + to handle the request. Used internally. + """ + node=request.getQuerynode() + if node: + nodestr=node + else: + nodestr='None' + handler=self.getDiscoHandler(node,request.getTo()) + if not handler: + self.DEBUG("No Handler for request with jid->%s node->%s ns->%s"%(request.getTo().__str__().encode('utf8'),nodestr.encode('utf8'),request.getQueryNS().encode('utf8')),'error') + conn.send(Error(request,ERR_ITEM_NOT_FOUND)) + raise NodeProcessed + self.DEBUG("Handling request with jid->%s node->%s ns->%s"%(request.getTo().__str__().encode('utf8'),nodestr.encode('utf8'),request.getQueryNS().encode('utf8')),'ok') + rep=request.buildReply('result') + if node: rep.setQuerynode(node) + q=rep.getTag('query') + if request.getQueryNS()==NS_DISCO_ITEMS: + # handler must return list: [{jid,action,node,name}] + if type(handler)==dict: lst=handler['items'] + else: lst=handler(conn,request,'items') + if lst==None: + conn.send(Error(request,ERR_ITEM_NOT_FOUND)) + raise NodeProcessed + for item in lst: q.addChild('item',item) + elif request.getQueryNS()==NS_DISCO_INFO: + if type(handler)==dict: dt=handler['info'] + else: dt=handler(conn,request,'info') + if dt==None: + conn.send(Error(request,ERR_ITEM_NOT_FOUND)) + raise NodeProcessed + # handler must return dictionary: + # {'ids':[{},{},{},{}], 'features':[fe,at,ur,es], 'xdata':DataForm} + for id in dt['ids']: q.addChild('identity',id) + for feature in dt['features']: q.addChild('feature',{'var':feature}) + if dt.has_key('xdata'): q.addChild(node=dt['xdata']) + conn.send(rep) + raise NodeProcessed diff --git a/libs/xmpp/client.py b/libs/xmpp/client.py new file mode 100644 index 0000000..4d93211 --- /dev/null +++ b/libs/xmpp/client.py @@ -0,0 +1,325 @@ +## client.py +## +## Copyright (C) 2003-2005 Alexey "Snake" Nezhdanov +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. + +# $Id: client.py,v 1.61 2009/04/07 06:19:42 snakeru Exp $ + +""" +Provides PlugIn class functionality to develop extentions for xmpppy. +Also provides Client and Component classes implementations as the +examples of xmpppy structures usage. +These classes can be used for simple applications "AS IS" though. +""" + +import socket +import debug +Debug=debug +Debug.DEBUGGING_IS_ON=1 +Debug.Debug.colors['socket']=debug.color_dark_gray +Debug.Debug.colors['CONNECTproxy']=debug.color_dark_gray +Debug.Debug.colors['nodebuilder']=debug.color_brown +Debug.Debug.colors['client']=debug.color_cyan +Debug.Debug.colors['component']=debug.color_cyan +Debug.Debug.colors['dispatcher']=debug.color_green +Debug.Debug.colors['browser']=debug.color_blue +Debug.Debug.colors['auth']=debug.color_yellow +Debug.Debug.colors['roster']=debug.color_magenta +Debug.Debug.colors['ibb']=debug.color_yellow + +Debug.Debug.colors['down']=debug.color_brown +Debug.Debug.colors['up']=debug.color_brown +Debug.Debug.colors['data']=debug.color_brown +Debug.Debug.colors['ok']=debug.color_green +Debug.Debug.colors['warn']=debug.color_yellow +Debug.Debug.colors['error']=debug.color_red +Debug.Debug.colors['start']=debug.color_dark_gray +Debug.Debug.colors['stop']=debug.color_dark_gray +Debug.Debug.colors['sent']=debug.color_yellow +Debug.Debug.colors['got']=debug.color_bright_cyan + +DBG_CLIENT='client' +DBG_COMPONENT='component' + +class PlugIn: + """ Common xmpppy plugins infrastructure: plugging in/out, debugging. """ + def __init__(self): + self._exported_methods=[] + self.DBG_LINE=self.__class__.__name__.lower() + + def PlugIn(self,owner): + """ Attach to main instance and register ourself and all our staff in it. """ + self._owner=owner + if self.DBG_LINE not in owner.debug_flags: + owner.debug_flags.append(self.DBG_LINE) + self.DEBUG('Plugging %s into %s'%(self,self._owner),'start') + if owner.__dict__.has_key(self.__class__.__name__): + return self.DEBUG('Plugging ignored: another instance already plugged.','error') + self._old_owners_methods=[] + for method in self._exported_methods: + if owner.__dict__.has_key(method.__name__): + self._old_owners_methods.append(owner.__dict__[method.__name__]) + owner.__dict__[method.__name__]=method + owner.__dict__[self.__class__.__name__]=self + if self.__class__.__dict__.has_key('plugin'): return self.plugin(owner) + + def PlugOut(self): + """ Unregister all our staff from main instance and detach from it. """ + self.DEBUG('Plugging %s out of %s.'%(self,self._owner),'stop') + ret = None + if self.__class__.__dict__.has_key('plugout'): ret = self.plugout() + self._owner.debug_flags.remove(self.DBG_LINE) + for method in self._exported_methods: del self._owner.__dict__[method.__name__] + for method in self._old_owners_methods: self._owner.__dict__[method.__name__]=method + del self._owner.__dict__[self.__class__.__name__] + return ret + + def DEBUG(self,text,severity='info'): + """ Feed a provided debug line to main instance's debug facility along with our ID string. """ + self._owner.DEBUG(self.DBG_LINE,text,severity) + +import transports,dispatcher,auth,roster +class CommonClient: + """ Base for Client and Component classes.""" + def __init__(self,server,port=5222,debug=['always', 'nodebuilder']): + """ Caches server name and (optionally) port to connect to. "debug" parameter specifies + the debug IDs that will go into debug output. You can either specifiy an "include" + or "exclude" list. The latter is done via adding "always" pseudo-ID to the list. + Full list: ['nodebuilder', 'dispatcher', 'gen_auth', 'SASL_auth', 'bind', 'socket', + 'CONNECTproxy', 'TLS', 'roster', 'browser', 'ibb'] . """ + if self.__class__.__name__=='Client': self.Namespace,self.DBG='jabber:client',DBG_CLIENT + elif self.__class__.__name__=='Component': self.Namespace,self.DBG=dispatcher.NS_COMPONENT_ACCEPT,DBG_COMPONENT + self.defaultNamespace=self.Namespace + self.disconnect_handlers=[] + self.Server=server + self.Port=port + if debug and type(debug)<>list: debug=['always', 'nodebuilder'] + self._DEBUG=Debug.Debug(debug) + self.DEBUG=self._DEBUG.Show + self.debug_flags=self._DEBUG.debug_flags + self.debug_flags.append(self.DBG) + self._owner=self + self._registered_name=None + self.RegisterDisconnectHandler(self.DisconnectHandler) + self.connected='' + self._route=0 + + def RegisterDisconnectHandler(self,handler): + """ Register handler that will be called on disconnect.""" + self.disconnect_handlers.append(handler) + + def UnregisterDisconnectHandler(self,handler): + """ Unregister handler that is called on disconnect.""" + self.disconnect_handlers.remove(handler) + + def disconnected(self): + """ Called on disconnection. Calls disconnect handlers and cleans things up. """ + self.connected='' + self.DEBUG(self.DBG,'Disconnect detected','stop') + self.disconnect_handlers.reverse() + for i in self.disconnect_handlers: i() + self.disconnect_handlers.reverse() + if self.__dict__.has_key('TLS'): self.TLS.PlugOut() + + def DisconnectHandler(self): + """ Default disconnect handler. Just raises an IOError. + If you choosed to use this class in your production client, + override this method or at least unregister it. """ + raise IOError('Disconnected from server.') + + def event(self,eventName,args={}): + """ Default event handler. To be overriden. """ + print "Event: ",(eventName,args) + + def isConnected(self): + """ Returns connection state. F.e.: None / 'tls' / 'tcp+non_sasl' . """ + return self.connected + + def reconnectAndReauth(self): + """ Example of reconnection method. In fact, it can be used to batch connection and auth as well. """ + handlerssave=self.Dispatcher.dumpHandlers() + if self.__dict__.has_key('ComponentBind'): self.ComponentBind.PlugOut() + if self.__dict__.has_key('Bind'): self.Bind.PlugOut() + self._route=0 + if self.__dict__.has_key('NonSASL'): self.NonSASL.PlugOut() + if self.__dict__.has_key('SASL'): self.SASL.PlugOut() + if self.__dict__.has_key('TLS'): self.TLS.PlugOut() + self.Dispatcher.PlugOut() + if self.__dict__.has_key('HTTPPROXYsocket'): self.HTTPPROXYsocket.PlugOut() + if self.__dict__.has_key('TCPsocket'): self.TCPsocket.PlugOut() + if not self.connect(server=self._Server,proxy=self._Proxy): return + if not self.auth(self._User,self._Password,self._Resource): return + self.Dispatcher.restoreHandlers(handlerssave) + return self.connected + + def connect(self,server=None,proxy=None,ssl=None,use_srv=None): + """ Make a tcp/ip connection, protect it with tls/ssl if possible and start XMPP stream. + Returns None or 'tcp' or 'tls', depending on the result.""" + if not server: server=(self.Server,self.Port) + if proxy: sock=transports.HTTPPROXYsocket(proxy,server,use_srv) + else: sock=transports.TCPsocket(server,use_srv) + connected=sock.PlugIn(self) + if not connected: + sock.PlugOut() + return + self._Server,self._Proxy=server,proxy + self.connected='tcp' + if (ssl is None and self.Connection.getPort() in (5223, 443)) or ssl: + try: # FIXME. This should be done in transports.py + transports.TLS().PlugIn(self,now=1) + self.connected='ssl' + except socket.sslerror: + return + dispatcher.Dispatcher().PlugIn(self) + while self.Dispatcher.Stream._document_attrs is None: + if not self.Process(1): return + if self.Dispatcher.Stream._document_attrs.has_key('version') and self.Dispatcher.Stream._document_attrs['version']=='1.0': + while not self.Dispatcher.Stream.features and self.Process(1): pass # If we get version 1.0 stream the features tag MUST BE presented + return self.connected + +class Client(CommonClient): + """ Example client class, based on CommonClient. """ + def connect(self,server=None,proxy=None,secure=None,use_srv=True): + """ Connect to jabber server. If you want to specify different ip/port to connect to you can + pass it as tuple as first parameter. If there is HTTP proxy between you and server + specify it's address and credentials (if needed) in the second argument. + If you want ssl/tls support to be discovered and enable automatically - leave third argument as None. (ssl will be autodetected only if port is 5223 or 443) + If you want to force SSL start (i.e. if port 5223 or 443 is remapped to some non-standard port) then set it to 1. + If you want to disable tls/ssl support completely, set it to 0. + Example: connect(('192.168.5.5',5222),{'host':'proxy.my.net','port':8080,'user':'me','password':'secret'}) + Returns '' or 'tcp' or 'tls', depending on the result.""" + if not CommonClient.connect(self,server,proxy,secure,use_srv) or secure<>None and not secure: return self.connected + transports.TLS().PlugIn(self) + if not self.Dispatcher.Stream._document_attrs.has_key('version') or not self.Dispatcher.Stream._document_attrs['version']=='1.0': return self.connected + while not self.Dispatcher.Stream.features and self.Process(1): pass # If we get version 1.0 stream the features tag MUST BE presented + if not self.Dispatcher.Stream.features.getTag('starttls'): return self.connected # TLS not supported by server + while not self.TLS.starttls and self.Process(1): pass + if not hasattr(self, 'TLS') or self.TLS.starttls!='success': self.event('tls_failed'); return self.connected + self.connected='tls' + return self.connected + + def auth(self,user,password,resource='',sasl=1): + """ Authenticate connnection and bind resource. If resource is not provided + random one or library name used. """ + self._User,self._Password,self._Resource=user,password,resource + while not self.Dispatcher.Stream._document_attrs and self.Process(1): pass + if self.Dispatcher.Stream._document_attrs.has_key('version') and self.Dispatcher.Stream._document_attrs['version']=='1.0': + while not self.Dispatcher.Stream.features and self.Process(1): pass # If we get version 1.0 stream the features tag MUST BE presented + if sasl: auth.SASL(user,password).PlugIn(self) + if not sasl or self.SASL.startsasl=='not-supported': + if not resource: resource='xmpppy' + if auth.NonSASL(user,password,resource).PlugIn(self): + self.connected+='+old_auth' + return 'old_auth' + return + self.SASL.auth() + while self.SASL.startsasl=='in-process' and self.Process(1): pass + if self.SASL.startsasl=='success': + auth.Bind().PlugIn(self) + while self.Bind.bound is None and self.Process(1): pass + if self.Bind.Bind(resource): + self.connected+='+sasl' + return 'sasl' + else: + if self.__dict__.has_key('SASL'): self.SASL.PlugOut() + + def getRoster(self): + """ Return the Roster instance, previously plugging it in and + requesting roster from server if needed. """ + if not self.__dict__.has_key('Roster'): roster.Roster().PlugIn(self) + return self.Roster.getRoster() + + def sendInitPresence(self,requestRoster=1): + """ Send roster request and initial <presence/>. + You can disable the first by setting requestRoster argument to 0. """ + self.sendPresence(requestRoster=requestRoster) + + def sendPresence(self,jid=None,typ=None,requestRoster=0): + """ Send some specific presence state. + Can also request roster from server if according agrument is set.""" + if requestRoster: roster.Roster().PlugIn(self) + self.send(dispatcher.Presence(to=jid, typ=typ)) + +class Component(CommonClient): + """ Component class. The only difference from CommonClient is ability to perform component authentication. """ + def __init__(self,transport,port=5347,typ=None,debug=['always', 'nodebuilder'],domains=None,sasl=0,bind=0,route=0,xcp=0): + """ Init function for Components. + As components use a different auth mechanism which includes the namespace of the component. + Jabberd1.4 and Ejabberd use the default namespace then for all client messages. + Jabberd2 uses jabber:client. + 'transport' argument is a transport name that you are going to serve (f.e. "irc.localhost"). + 'port' can be specified if 'transport' resolves to correct IP. If it is not then you'll have to specify IP + and port while calling "connect()". + If you are going to serve several different domains with single Component instance - you must list them ALL + in the 'domains' argument. + For jabberd2 servers you should set typ='jabberd2' argument. + """ + CommonClient.__init__(self,transport,port=port,debug=debug) + self.typ=typ + self.sasl=sasl + self.bind=bind + self.route=route + self.xcp=xcp + if domains: + self.domains=domains + else: + self.domains=[transport] + + def connect(self,server=None,proxy=None): + """ This will connect to the server, and if the features tag is found then set + the namespace to be jabber:client as that is required for jabberd2. + 'server' and 'proxy' arguments have the same meaning as in xmpp.Client.connect() """ + if self.sasl: + self.Namespace=auth.NS_COMPONENT_1 + self.Server=server[0] + CommonClient.connect(self,server=server,proxy=proxy) + if self.connected and (self.typ=='jabberd2' or not self.typ and self.Dispatcher.Stream.features != None) and (not self.xcp): + self.defaultNamespace=auth.NS_CLIENT + self.Dispatcher.RegisterNamespace(self.defaultNamespace) + self.Dispatcher.RegisterProtocol('iq',dispatcher.Iq) + self.Dispatcher.RegisterProtocol('message',dispatcher.Message) + self.Dispatcher.RegisterProtocol('presence',dispatcher.Presence) + return self.connected + + def dobind(self, sasl): + # This has to be done before binding, because we can receive a route stanza before binding finishes + self._route = self.route + if self.bind: + for domain in self.domains: + auth.ComponentBind(sasl).PlugIn(self) + while self.ComponentBind.bound is None: self.Process(1) + if (not self.ComponentBind.Bind(domain)): + self.ComponentBind.PlugOut() + return + self.ComponentBind.PlugOut() + + def auth(self,name,password,dup=None): + """ Authenticate component "name" with password "password".""" + self._User,self._Password,self._Resource=name,password,'' + try: + if self.sasl: auth.SASL(name,password).PlugIn(self) + if not self.sasl or self.SASL.startsasl=='not-supported': + if auth.NonSASL(name,password,'').PlugIn(self): + self.dobind(sasl=False) + self.connected+='+old_auth' + return 'old_auth' + return + self.SASL.auth() + while self.SASL.startsasl=='in-process' and self.Process(1): pass + if self.SASL.startsasl=='success': + self.dobind(sasl=True) + self.connected+='+sasl' + return 'sasl' + else: + raise auth.NotAuthorized(self.SASL.startsasl) + except: + self.DEBUG(self.DBG,"Failed to authenticate %s"%name,'error') diff --git a/libs/xmpp/commands.py b/libs/xmpp/commands.py new file mode 100644 index 0000000..cdebf8f --- /dev/null +++ b/libs/xmpp/commands.py @@ -0,0 +1,328 @@ +## $Id: commands.py,v 1.17 2007/08/28 09:54:15 normanr Exp $ + +## Ad-Hoc Command manager +## Mike Albon (c) 5th January 2005 + +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. + + +"""This module is a ad-hoc command processor for xmpppy. It uses the plug-in mechanism like most of the core library. It depends on a DISCO browser manager. + +There are 3 classes here, a command processor Commands like the Browser, and a command template plugin Command, and an example command. + +To use this module: + + Instansiate the module with the parent transport and disco browser manager as parameters. + 'Plug in' commands using the command template. + The command feature must be added to existing disco replies where neccessary. + +What it supplies: + + Automatic command registration with the disco browser manager. + Automatic listing of commands in the public command list. + A means of handling requests, by redirection though the command manager. +""" + +from protocol import * +from client import PlugIn + +class Commands(PlugIn): + """Commands is an ancestor of PlugIn and can be attached to any session. + + The commands class provides a lookup and browse mechnism. It follows the same priciple of the Browser class, for Service Discovery to provide the list of commands, it adds the 'list' disco type to your existing disco handler function. + + How it works: + The commands are added into the existing Browser on the correct nodes. When the command list is built the supplied discovery handler function needs to have a 'list' option in type. This then gets enumerated, all results returned as None are ignored. + The command executed is then called using it's Execute method. All session management is handled by the command itself. + """ + def __init__(self, browser): + """Initialises class and sets up local variables""" + PlugIn.__init__(self) + DBG_LINE='commands' + self._exported_methods=[] + self._handlers={'':{}} + self._browser = browser + + def plugin(self, owner): + """Makes handlers within the session""" + # Plug into the session and the disco manager + # We only need get and set, results are not needed by a service provider, only a service user. + owner.RegisterHandler('iq',self._CommandHandler,typ='set',ns=NS_COMMANDS) + owner.RegisterHandler('iq',self._CommandHandler,typ='get',ns=NS_COMMANDS) + self._browser.setDiscoHandler(self._DiscoHandler,node=NS_COMMANDS,jid='') + + def plugout(self): + """Removes handlers from the session""" + # unPlug from the session and the disco manager + self._owner.UnregisterHandler('iq',self._CommandHandler,ns=NS_COMMANDS) + for jid in self._handlers: + self._browser.delDiscoHandler(self._DiscoHandler,node=NS_COMMANDS) + + def _CommandHandler(self,conn,request): + """The internal method to process the routing of command execution requests""" + # This is the command handler itself. + # We must: + # Pass on command execution to command handler + # (Do we need to keep session details here, or can that be done in the command?) + jid = str(request.getTo()) + try: + node = request.getTagAttr('command','node') + except: + conn.send(Error(request,ERR_BAD_REQUEST)) + raise NodeProcessed + if self._handlers.has_key(jid): + if self._handlers[jid].has_key(node): + self._handlers[jid][node]['execute'](conn,request) + else: + conn.send(Error(request,ERR_ITEM_NOT_FOUND)) + raise NodeProcessed + elif self._handlers[''].has_key(node): + self._handlers[''][node]['execute'](conn,request) + else: + conn.send(Error(request,ERR_ITEM_NOT_FOUND)) + raise NodeProcessed + + def _DiscoHandler(self,conn,request,typ): + """The internal method to process service discovery requests""" + # This is the disco manager handler. + if typ == 'items': + # We must: + # Generate a list of commands and return the list + # * This handler does not handle individual commands disco requests. + # Pseudo: + # Enumerate the 'item' disco of each command for the specified jid + # Build responce and send + # To make this code easy to write we add an 'list' disco type, it returns a tuple or 'none' if not advertised + list = [] + items = [] + jid = str(request.getTo()) + # Get specific jid based results + if self._handlers.has_key(jid): + for each in self._handlers[jid].keys(): + items.append((jid,each)) + else: + # Get generic results + for each in self._handlers[''].keys(): + items.append(('',each)) + if items != []: + for each in items: + i = self._handlers[each[0]][each[1]]['disco'](conn,request,'list') + if i != None: + list.append(Node(tag='item',attrs={'jid':i[0],'node':i[1],'name':i[2]})) + iq = request.buildReply('result') + if request.getQuerynode(): iq.setQuerynode(request.getQuerynode()) + iq.setQueryPayload(list) + conn.send(iq) + else: + conn.send(Error(request,ERR_ITEM_NOT_FOUND)) + raise NodeProcessed + elif typ == 'info': + return {'ids':[{'category':'automation','type':'command-list'}],'features':[]} + + def addCommand(self,name,cmddisco,cmdexecute,jid=''): + """The method to call if adding a new command to the session, the requred parameters of cmddisco and cmdexecute are the methods to enable that command to be executed""" + # This command takes a command object and the name of the command for registration + # We must: + # Add item into disco + # Add item into command list + if not self._handlers.has_key(jid): + self._handlers[jid]={} + self._browser.setDiscoHandler(self._DiscoHandler,node=NS_COMMANDS,jid=jid) + if self._handlers[jid].has_key(name): + raise NameError,'Command Exists' + else: + self._handlers[jid][name]={'disco':cmddisco,'execute':cmdexecute} + # Need to add disco stuff here + self._browser.setDiscoHandler(cmddisco,node=name,jid=jid) + + def delCommand(self,name,jid=''): + """Removed command from the session""" + # This command takes a command object and the name used for registration + # We must: + # Remove item from disco + # Remove item from command list + if not self._handlers.has_key(jid): + raise NameError,'Jid not found' + if not self._handlers[jid].has_key(name): + raise NameError, 'Command not found' + else: + #Do disco removal here + command = self.getCommand(name,jid)['disco'] + del self._handlers[jid][name] + self._browser.delDiscoHandler(command,node=name,jid=jid) + + def getCommand(self,name,jid=''): + """Returns the command tuple""" + # This gets the command object with name + # We must: + # Return item that matches this name + if not self._handlers.has_key(jid): + raise NameError,'Jid not found' + elif not self._handlers[jid].has_key(name): + raise NameError,'Command not found' + else: + return self._handlers[jid][name] + +class Command_Handler_Prototype(PlugIn): + """This is a prototype command handler, as each command uses a disco method + and execute method you can implement it any way you like, however this is + my first attempt at making a generic handler that you can hang process + stages on too. There is an example command below. + + The parameters are as follows: + name : the name of the command within the jabber environment + description : the natural language description + discofeatures : the features supported by the command + initial : the initial command in the from of {'execute':commandname} + + All stages set the 'actions' dictionary for each session to represent the possible options available. + """ + name = 'examplecommand' + count = 0 + description = 'an example command' + discofeatures = [NS_COMMANDS,NS_DATA] + # This is the command template + def __init__(self,jid=''): + """Set up the class""" + PlugIn.__init__(self) + DBG_LINE='command' + self.sessioncount = 0 + self.sessions = {} + # Disco information for command list pre-formatted as a tuple + self.discoinfo = {'ids':[{'category':'automation','type':'command-node','name':self.description}],'features': self.discofeatures} + self._jid = jid + + def plugin(self,owner): + """Plug command into the commands class""" + # The owner in this instance is the Command Processor + self._commands = owner + self._owner = owner._owner + self._commands.addCommand(self.name,self._DiscoHandler,self.Execute,jid=self._jid) + + def plugout(self): + """Remove command from the commands class""" + self._commands.delCommand(self.name,self._jid) + + def getSessionID(self): + """Returns an id for the command session""" + self.count = self.count+1 + return 'cmd-%s-%d'%(self.name,self.count) + + def Execute(self,conn,request): + """The method that handles all the commands, and routes them to the correct method for that stage.""" + # New request or old? + try: + session = request.getTagAttr('command','sessionid') + except: + session = None + try: + action = request.getTagAttr('command','action') + except: + action = None + if action == None: action = 'execute' + # Check session is in session list + if self.sessions.has_key(session): + if self.sessions[session]['jid']==request.getFrom(): + # Check action is vaild + if self.sessions[session]['actions'].has_key(action): + # Execute next action + self.sessions[session]['actions'][action](conn,request) + else: + # Stage not presented as an option + self._owner.send(Error(request,ERR_BAD_REQUEST)) + raise NodeProcessed + else: + # Jid and session don't match. Go away imposter + self._owner.send(Error(request,ERR_BAD_REQUEST)) + raise NodeProcessed + elif session != None: + # Not on this sessionid you won't. + self._owner.send(Error(request,ERR_BAD_REQUEST)) + raise NodeProcessed + else: + # New session + self.initial[action](conn,request) + + def _DiscoHandler(self,conn,request,type): + """The handler for discovery events""" + if type == 'list': + return (request.getTo(),self.name,self.description) + elif type == 'items': + return [] + elif type == 'info': + return self.discoinfo + +class TestCommand(Command_Handler_Prototype): + """ Example class. You should read source if you wish to understate how it works. + Generally, it presents a "master" that giudes user through to calculate something. + """ + name = 'testcommand' + description = 'a noddy example command' + def __init__(self,jid=''): + """ Init internal constants. """ + Command_Handler_Prototype.__init__(self,jid) + self.initial = {'execute':self.cmdFirstStage} + + def cmdFirstStage(self,conn,request): + """ Determine """ + # This is the only place this should be repeated as all other stages should have SessionIDs + try: + session = request.getTagAttr('command','sessionid') + except: + session = None + if session == None: + session = self.getSessionID() + self.sessions[session]={'jid':request.getFrom(),'actions':{'cancel':self.cmdCancel,'next':self.cmdSecondStage,'execute':self.cmdSecondStage},'data':{'type':None}} + # As this is the first stage we only send a form + reply = request.buildReply('result') + form = DataForm(title='Select type of operation',data=['Use the combobox to select the type of calculation you would like to do, then click Next',DataField(name='calctype',desc='Calculation Type',value=self.sessions[session]['data']['type'],options=[['circlediameter','Calculate the Diameter of a circle'],['circlearea','Calculate the area of a circle']],typ='list-single',required=1)]) + replypayload = [Node('actions',attrs={'execute':'next'},payload=[Node('next')]),form] + reply.addChild(name='command',namespace=NS_COMMANDS,attrs={'node':request.getTagAttr('command','node'),'sessionid':session,'status':'executing'},payload=replypayload) + self._owner.send(reply) + raise NodeProcessed + + def cmdSecondStage(self,conn,request): + form = DataForm(node = request.getTag(name='command').getTag(name='x',namespace=NS_DATA)) + self.sessions[request.getTagAttr('command','sessionid')]['data']['type']=form.getField('calctype').getValue() + self.sessions[request.getTagAttr('command','sessionid')]['actions']={'cancel':self.cmdCancel,None:self.cmdThirdStage,'previous':self.cmdFirstStage,'execute':self.cmdThirdStage,'next':self.cmdThirdStage} + # The form generation is split out to another method as it may be called by cmdThirdStage + self.cmdSecondStageReply(conn,request) + + def cmdSecondStageReply(self,conn,request): + reply = request.buildReply('result') + form = DataForm(title = 'Enter the radius', data=['Enter the radius of the circle (numbers only)',DataField(desc='Radius',name='radius',typ='text-single')]) + replypayload = [Node('actions',attrs={'execute':'complete'},payload=[Node('complete'),Node('prev')]),form] + reply.addChild(name='command',namespace=NS_COMMANDS,attrs={'node':request.getTagAttr('command','node'),'sessionid':request.getTagAttr('command','sessionid'),'status':'executing'},payload=replypayload) + self._owner.send(reply) + raise NodeProcessed + + def cmdThirdStage(self,conn,request): + form = DataForm(node = request.getTag(name='command').getTag(name='x',namespace=NS_DATA)) + try: + num = float(form.getField('radius').getValue()) + except: + self.cmdSecondStageReply(conn,request) + from math import pi + if self.sessions[request.getTagAttr('command','sessionid')]['data']['type'] == 'circlearea': + result = (num**2)*pi + else: + result = num*2*pi + reply = request.buildReply('result') + form = DataForm(typ='result',data=[DataField(desc='result',name='result',value=result)]) + reply.addChild(name='command',namespace=NS_COMMANDS,attrs={'node':request.getTagAttr('command','node'),'sessionid':request.getTagAttr('command','sessionid'),'status':'completed'},payload=[form]) + self._owner.send(reply) + raise NodeProcessed + + def cmdCancel(self,conn,request): + reply = request.buildReply('result') + reply.addChild(name='command',namespace=NS_COMMANDS,attrs={'node':request.getTagAttr('command','node'),'sessionid':request.getTagAttr('command','sessionid'),'status':'cancelled'}) + self._owner.send(reply) + del self.sessions[request.getTagAttr('command','sessionid')] diff --git a/libs/xmpp/debug.py b/libs/xmpp/debug.py new file mode 100644 index 0000000..34ade88 --- /dev/null +++ b/libs/xmpp/debug.py @@ -0,0 +1,423 @@ +## debug.py +## +## Copyright (C) 2003 Jacob Lundqvist +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU Lesser General Public License as published +## by the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU Lesser General Public License for more details. + +_version_ = '1.4.0' + +"""\ + +Generic debug class + +Other modules can always define extra debug flags for local usage, as long as +they make sure they append them to debug_flags + +Also its always a good thing to prefix local flags with something, to reduce risk +of coliding flags. Nothing breaks if two flags would be identical, but it might +activate unintended debugging. + +flags can be numeric, but that makes analysing harder, on creation its +not obvious what is activated, and when flag_show is given, output isnt +really meaningfull. + +This Debug class can either be initialized and used on app level, or used independantly +by the individual classes. + +For samples of usage, see samples subdir in distro source, and selftest +in this code + +""" + + + +import sys +import traceback +import time +import os + +import types + +if os.environ.has_key('TERM'): + colors_enabled=True +else: + colors_enabled=False + +color_none = chr(27) + "[0m" +color_black = chr(27) + "[30m" +color_red = chr(27) + "[31m" +color_green = chr(27) + "[32m" +color_brown = chr(27) + "[33m" +color_blue = chr(27) + "[34m" +color_magenta = chr(27) + "[35m" +color_cyan = chr(27) + "[36m" +color_light_gray = chr(27) + "[37m" +color_dark_gray = chr(27) + "[30;1m" +color_bright_red = chr(27) + "[31;1m" +color_bright_green = chr(27) + "[32;1m" +color_yellow = chr(27) + "[33;1m" +color_bright_blue = chr(27) + "[34;1m" +color_purple = chr(27) + "[35;1m" +color_bright_cyan = chr(27) + "[36;1m" +color_white = chr(27) + "[37;1m" + + +""" +Define your flags in yor modules like this: + +from debug import * + +DBG_INIT = 'init' ; debug_flags.append( DBG_INIT ) +DBG_CONNECTION = 'connection' ; debug_flags.append( DBG_CONNECTION ) + + The reason for having a double statement wis so we can validate params + and catch all undefined debug flags + + This gives us control over all used flags, and makes it easier to allow + global debugging in your code, just do something like + + foo = Debug( debug_flags ) + + group flags, that is a flag in it self containing multiple flags should be + defined without the debug_flags.append() sequence, since the parts are already + in the list, also they must of course be defined after the flags they depend on ;) + example: + +DBG_MULTI = [ DBG_INIT, DBG_CONNECTION ] + + + + NoDebug + ------- + To speed code up, typically for product releases or such + use this class instead if you globaly want to disable debugging +""" + + +class NoDebug: + def __init__( self, *args, **kwargs ): + self.debug_flags = [] + def show( self, *args, **kwargs): + pass + def Show( self, *args, **kwargs): + pass + def is_active( self, flag ): + pass + colors={} + def active_set( self, active_flags = None ): + return 0 + + +LINE_FEED = '\n' + + +class Debug: + def __init__( self, + # + # active_flags are those that will trigger output + # + active_flags = None, + # + # Log file should be file object or file namne + # + log_file = sys.stderr, + # + # prefix and sufix can either be set globaly or per call. + # personally I use this to color code debug statements + # with prefix = chr(27) + '[34m' + # sufix = chr(27) + '[37;1m\n' + # + prefix = 'DEBUG: ', + sufix = '\n', + # + # If you want unix style timestamps, + # 0 disables timestamps + # 1 before prefix, good when prefix is a string + # 2 after prefix, good when prefix is a color + # + time_stamp = 0, + # + # flag_show should normaly be of, but can be turned on to get a + # good view of what flags are actually used for calls, + # if it is not None, it should be a string + # flags for current call will be displayed + # with flag_show as separator + # recomended values vould be '-' or ':', but any string goes + # + flag_show = None, + # + # If you dont want to validate flags on each call to + # show(), set this to 0 + # + validate_flags = 1, + # + # If you dont want the welcome message, set to 0 + # default is to show welcome if any flags are active + welcome = -1 + ): + + self.debug_flags = [] + if welcome == -1: + if active_flags and len(active_flags): + welcome = 1 + else: + welcome = 0 + + self._remove_dupe_flags() + if log_file: + if type( log_file ) is type(''): + try: + self._fh = open(log_file,'w') + except: + print 'ERROR: can open %s for writing' + sys.exit(0) + else: ## assume its a stream type object + self._fh = log_file + else: + self._fh = sys.stdout + + if time_stamp not in (0,1,2): + msg2 = '%s' % time_stamp + raise 'Invalid time_stamp param', msg2 + self.prefix = prefix + self.sufix = sufix + self.time_stamp = time_stamp + self.flag_show = None # must be initialised after possible welcome + self.validate_flags = validate_flags + + self.active_set( active_flags ) + if welcome: + self.show('') + caller = sys._getframe(1) # used to get name of caller + try: + mod_name= ":%s" % caller.f_locals['__name__'] + except: + mod_name = "" + self.show('Debug created for %s%s' % (caller.f_code.co_filename, + mod_name )) + self.show(' flags defined: %s' % ','.join( self.active )) + + if type(flag_show) in (type(''), type(None)): + self.flag_show = flag_show + else: + msg2 = '%s' % type(flag_show ) + raise 'Invalid type for flag_show!', msg2 + + + + + + def show( self, msg, flag = None, prefix = None, sufix = None, + lf = 0 ): + """ + flag can be of folowing types: + None - this msg will always be shown if any debugging is on + flag - will be shown if flag is active + (flag1,flag2,,,) - will be shown if any of the given flags + are active + + if prefix / sufix are not given, default ones from init will be used + + lf = -1 means strip linefeed if pressent + lf = 1 means add linefeed if not pressent + """ + + if self.validate_flags: + self._validate_flag( flag ) + + if not self.is_active(flag): + return + if prefix: + pre = prefix + else: + pre = self.prefix + if sufix: + suf = sufix + else: + suf = self.sufix + + if self.time_stamp == 2: + output = '%s%s ' % ( pre, + time.strftime('%b %d %H:%M:%S', + time.localtime(time.time() )), + ) + elif self.time_stamp == 1: + output = '%s %s' % ( time.strftime('%b %d %H:%M:%S', + time.localtime(time.time() )), + pre, + ) + else: + output = pre + + if self.flag_show: + if flag: + output = '%s%s%s' % ( output, flag, self.flag_show ) + else: + # this call uses the global default, + # dont print "None", just show the separator + output = '%s %s' % ( output, self.flag_show ) + + output = '%s%s%s' % ( output, msg, suf ) + if lf: + # strip/add lf if needed + last_char = output[-1] + if lf == 1 and last_char != LINE_FEED: + output = output + LINE_FEED + elif lf == -1 and last_char == LINE_FEED: + output = output[:-1] + try: + self._fh.write( output ) + except: + # unicode strikes again ;) + s=u'' + for i in range(len(output)): + if ord(output[i]) < 128: + c = output[i] + else: + c = '?' + s=s+c + self._fh.write( '%s%s%s' % ( pre, s, suf )) + self._fh.flush() + + + def is_active( self, flag ): + 'If given flag(s) should generate output.' + + # try to abort early to quicken code + if not self.active: + return 0 + if not flag or flag in self.active: + return 1 + else: + # check for multi flag type: + if type( flag ) in ( type(()), type([]) ): + for s in flag: + if s in self.active: + return 1 + return 0 + + + def active_set( self, active_flags = None ): + "returns 1 if any flags where actually set, otherwise 0." + r = 0 + ok_flags = [] + if not active_flags: + #no debuging at all + self.active = [] + elif type( active_flags ) in ( types.TupleType, types.ListType ): + flags = self._as_one_list( active_flags ) + for t in flags: + if t not in self.debug_flags: + sys.stderr.write('Invalid debugflag given: %s\n' % t ) + ok_flags.append( t ) + + self.active = ok_flags + r = 1 + else: + # assume comma string + try: + flags = active_flags.split(',') + except: + self.show( '***' ) + self.show( '*** Invalid debug param given: %s' % active_flags ) + self.show( '*** please correct your param!' ) + self.show( '*** due to this, full debuging is enabled' ) + self.active = self.debug_flags + + for f in flags: + s = f.strip() + ok_flags.append( s ) + self.active = ok_flags + + self._remove_dupe_flags() + return r + + def active_get( self ): + "returns currently active flags." + return self.active + + + def _as_one_list( self, items ): + """ init param might contain nested lists, typically from group flags. + + This code organises lst and remves dupes + """ + if type( items ) <> type( [] ) and type( items ) <> type( () ): + return [ items ] + r = [] + for l in items: + if type( l ) == type([]): + lst2 = self._as_one_list( l ) + for l2 in lst2: + self._append_unique_str(r, l2 ) + elif l == None: + continue + else: + self._append_unique_str(r, l ) + return r + + + def _append_unique_str( self, lst, item ): + """filter out any dupes.""" + if type(item) <> type(''): + msg2 = '%s' % item + raise 'Invalid item type (should be string)',msg2 + if item not in lst: + lst.append( item ) + return lst + + + def _validate_flag( self, flags ): + 'verify that flag is defined.' + if flags: + for f in self._as_one_list( flags ): + if not f in self.debug_flags: + msg2 = '%s' % f + raise 'Invalid debugflag given', msg2 + + def _remove_dupe_flags( self ): + """ + if multiple instances of Debug is used in same app, + some flags might be created multiple time, filter out dupes + """ + unique_flags = [] + for f in self.debug_flags: + if f not in unique_flags: + unique_flags.append(f) + self.debug_flags = unique_flags + + colors={} + def Show(self, flag, msg, prefix=''): + msg=msg.replace('\r','\\r').replace('\n','\\n').replace('><','>\n <') + if not colors_enabled: pass + elif self.colors.has_key(prefix): msg=self.colors[prefix]+msg+color_none + else: msg=color_none+msg + if not colors_enabled: prefixcolor='' + elif self.colors.has_key(flag): prefixcolor=self.colors[flag] + else: prefixcolor=color_none + + if prefix=='error': + _exception = sys.exc_info() + if _exception[0]: + msg=msg+'\n'+''.join(traceback.format_exception(_exception[0], _exception[1], _exception[2])).rstrip() + + prefix= self.prefix+prefixcolor+(flag+' '*12)[:12]+' '+(prefix+' '*6)[:6] + self.show(msg, flag, prefix) + + def is_active( self, flag ): + if not self.active: return 0 + if not flag or flag in self.active and DBG_ALWAYS not in self.active or flag not in self.active and DBG_ALWAYS in self.active : return 1 + return 0 + +DBG_ALWAYS='always' + +##Uncomment this to effectively disable all debugging and all debugging overhead. +#Debug=NoDebug diff --git a/libs/xmpp/dispatcher.py b/libs/xmpp/dispatcher.py new file mode 100644 index 0000000..cc94ee0 --- /dev/null +++ b/libs/xmpp/dispatcher.py @@ -0,0 +1,373 @@ +## transports.py +## +## Copyright (C) 2003-2005 Alexey "Snake" Nezhdanov +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. + +# $Id: dispatcher.py,v 1.42 2007/05/18 23:18:36 normanr Exp $ + +""" +Main xmpppy mechanism. Provides library with methods to assign different handlers +to different XMPP stanzas. +Contains one tunable attribute: DefaultTimeout (25 seconds by default). It defines time that +Dispatcher.SendAndWaitForResponce method will wait for reply stanza before giving up. +""" + +import simplexml,time,sys +from protocol import * +from client import PlugIn + +DefaultTimeout=25 +ID=0 + +class Dispatcher(PlugIn): + """ Ancestor of PlugIn class. Handles XMPP stream, i.e. aware of stream headers. + Can be plugged out/in to restart these headers (used for SASL f.e.). """ + def __init__(self): + PlugIn.__init__(self) + DBG_LINE='dispatcher' + self.handlers={} + self._expected={} + self._defaultHandler=None + self._pendingExceptions=[] + self._eventHandler=None + self._cycleHandlers=[] + self._exported_methods=[self.Process,self.RegisterHandler,self.RegisterDefaultHandler,\ + self.RegisterEventHandler,self.UnregisterCycleHandler,self.RegisterCycleHandler,\ + self.RegisterHandlerOnce,self.UnregisterHandler,self.RegisterProtocol,\ + self.WaitForResponse,self.SendAndWaitForResponse,self.send,self.disconnect,\ + self.SendAndCallForResponse, ] + + def dumpHandlers(self): + """ Return set of user-registered callbacks in it's internal format. + Used within the library to carry user handlers set over Dispatcher replugins. """ + return self.handlers + def restoreHandlers(self,handlers): + """ Restores user-registered callbacks structure from dump previously obtained via dumpHandlers. + Used within the library to carry user handlers set over Dispatcher replugins. """ + self.handlers=handlers + + def _init(self): + """ Registers default namespaces/protocols/handlers. Used internally. """ + self.RegisterNamespace('unknown') + self.RegisterNamespace(NS_STREAMS) + self.RegisterNamespace(self._owner.defaultNamespace) + self.RegisterProtocol('iq',Iq) + self.RegisterProtocol('presence',Presence) + self.RegisterProtocol('message',Message) + self.RegisterDefaultHandler(self.returnStanzaHandler) + self.RegisterHandler('error',self.streamErrorHandler,xmlns=NS_STREAMS) + + def plugin(self, owner): + """ Plug the Dispatcher instance into Client class instance and send initial stream header. Used internally.""" + self._init() + for method in self._old_owners_methods: + if method.__name__=='send': self._owner_send=method; break + self._owner.lastErrNode=None + self._owner.lastErr=None + self._owner.lastErrCode=None + self.StreamInit() + + def plugout(self): + """ Prepares instance to be destructed. """ + self.Stream.dispatch=None + self.Stream.DEBUG=None + self.Stream.features=None + self.Stream.destroy() + + def StreamInit(self): + """ Send an initial stream header. """ + self.Stream=simplexml.NodeBuilder() + self.Stream._dispatch_depth=2 + self.Stream.dispatch=self.dispatch + self.Stream.stream_header_received=self._check_stream_start + self._owner.debug_flags.append(simplexml.DBG_NODEBUILDER) + self.Stream.DEBUG=self._owner.DEBUG + self.Stream.features=None + self._metastream=Node('stream:stream') + self._metastream.setNamespace(self._owner.Namespace) + self._metastream.setAttr('version','1.0') + self._metastream.setAttr('xmlns:stream',NS_STREAMS) + self._metastream.setAttr('to',self._owner.Server) + self._owner.send("<?xml version='1.0'?>%s>"%str(self._metastream)[:-2]) + + def _check_stream_start(self,ns,tag,attrs): + if ns<>NS_STREAMS or tag<>'stream': + raise ValueError('Incorrect stream start: (%s,%s). Terminating.'%(tag,ns)) + + def Process(self, timeout=0): + """ Check incoming stream for data waiting. If "timeout" is positive - block for as max. this time. + Returns: + 1) length of processed data if some data were processed; + 2) '0' string if no data were processed but link is alive; + 3) 0 (zero) if underlying connection is closed. + Take note that in case of disconnection detect during Process() call + disconnect handlers are called automatically. + """ + for handler in self._cycleHandlers: handler(self) + if len(self._pendingExceptions) > 0: + _pendingException = self._pendingExceptions.pop() + raise _pendingException[0], _pendingException[1], _pendingException[2] + if self._owner.Connection.pending_data(timeout): + try: data=self._owner.Connection.receive() + except IOError: return + self.Stream.Parse(data) + if len(self._pendingExceptions) > 0: + _pendingException = self._pendingExceptions.pop() + raise _pendingException[0], _pendingException[1], _pendingException[2] + if data: return len(data) + return '0' # It means that nothing is received but link is alive. + + def RegisterNamespace(self,xmlns,order='info'): + """ Creates internal structures for newly registered namespace. + You can register handlers for this namespace afterwards. By default one namespace + already registered (jabber:client or jabber:component:accept depending on context. """ + self.DEBUG('Registering namespace "%s"'%xmlns,order) + self.handlers[xmlns]={} + self.RegisterProtocol('unknown',Protocol,xmlns=xmlns) + self.RegisterProtocol('default',Protocol,xmlns=xmlns) + + def RegisterProtocol(self,tag_name,Proto,xmlns=None,order='info'): + """ Used to declare some top-level stanza name to dispatcher. + Needed to start registering handlers for such stanzas. + Iq, message and presence protocols are registered by default. """ + if not xmlns: xmlns=self._owner.defaultNamespace + self.DEBUG('Registering protocol "%s" as %s(%s)'%(tag_name,Proto,xmlns), order) + self.handlers[xmlns][tag_name]={type:Proto, 'default':[]} + + def RegisterNamespaceHandler(self,xmlns,handler,typ='',ns='', makefirst=0, system=0): + """ Register handler for processing all stanzas for specified namespace. """ + self.RegisterHandler('default', handler, typ, ns, xmlns, makefirst, system) + + def RegisterHandler(self,name,handler,typ='',ns='',xmlns=None, makefirst=0, system=0): + """Register user callback as stanzas handler of declared type. Callback must take + (if chained, see later) arguments: dispatcher instance (for replying), incomed + return of previous handlers. + The callback must raise xmpp.NodeProcessed just before return if it want preven + callbacks to be called with the same stanza as argument _and_, more importantly + library from returning stanza to sender with error set (to be enabled in 0.2 ve + Arguments: + "name" - name of stanza. F.e. "iq". + "handler" - user callback. + "typ" - value of stanza's "type" attribute. If not specified any value match + "ns" - namespace of child that stanza must contain. + "chained" - chain together output of several handlers. + "makefirst" - insert handler in the beginning of handlers list instead of + adding it to the end. Note that more common handlers (i.e. w/o "typ" and " + will be called first nevertheless. + "system" - call handler even if NodeProcessed Exception were raised already. + """ + if not xmlns: xmlns=self._owner.defaultNamespace + self.DEBUG('Registering handler %s for "%s" type->%s ns->%s(%s)'%(handler,name,typ,ns,xmlns), 'info') + if not typ and not ns: typ='default' + if not self.handlers.has_key(xmlns): self.RegisterNamespace(xmlns,'warn') + if not self.handlers[xmlns].has_key(name): self.RegisterProtocol(name,Protocol,xmlns,'warn') + if not self.handlers[xmlns][name].has_key(typ+ns): self.handlers[xmlns][name][typ+ns]=[] + if makefirst: self.handlers[xmlns][name][typ+ns].insert(0,{'func':handler,'system':system}) + else: self.handlers[xmlns][name][typ+ns].append({'func':handler,'system':system}) + + def RegisterHandlerOnce(self,name,handler,typ='',ns='',xmlns=None,makefirst=0, system=0): + """ Unregister handler after first call (not implemented yet). """ + if not xmlns: xmlns=self._owner.defaultNamespace + self.RegisterHandler(name, handler, typ, ns, xmlns, makefirst, system) + + def UnregisterHandler(self,name,handler,typ='',ns='',xmlns=None): + """ Unregister handler. "typ" and "ns" must be specified exactly the same as with registering.""" + if not xmlns: xmlns=self._owner.defaultNamespace + if not self.handlers.has_key(xmlns): return + if not typ and not ns: typ='default' + for pack in self.handlers[xmlns][name][typ+ns]: + if handler==pack['func']: break + else: pack=None + try: self.handlers[xmlns][name][typ+ns].remove(pack) + except ValueError: pass + + def RegisterDefaultHandler(self,handler): + """ Specify the handler that will be used if no NodeProcessed exception were raised. + This is returnStanzaHandler by default. """ + self._defaultHandler=handler + + def RegisterEventHandler(self,handler): + """ Register handler that will process events. F.e. "FILERECEIVED" event. """ + self._eventHandler=handler + + def returnStanzaHandler(self,conn,stanza): + """ Return stanza back to the sender with <feature-not-implemennted/> error set. """ + if stanza.getType() in ['get','set']: + conn.send(Error(stanza,ERR_FEATURE_NOT_IMPLEMENTED)) + + def streamErrorHandler(self,conn,error): + name,text='error',error.getData() + for tag in error.getChildren(): + if tag.getNamespace()==NS_XMPP_STREAMS: + if tag.getName()=='text': text=tag.getData() + else: name=tag.getName() + if name in stream_exceptions.keys(): exc=stream_exceptions[name] + else: exc=StreamError + raise exc((name,text)) + + def RegisterCycleHandler(self,handler): + """ Register handler that will be called on every Dispatcher.Process() call. """ + if handler not in self._cycleHandlers: self._cycleHandlers.append(handler) + + def UnregisterCycleHandler(self,handler): + """ Unregister handler that will is called on every Dispatcher.Process() call.""" + if handler in self._cycleHandlers: self._cycleHandlers.remove(handler) + + def Event(self,realm,event,data): + """ Raise some event. Takes three arguments: + 1) "realm" - scope of event. Usually a namespace. + 2) "event" - the event itself. F.e. "SUCESSFULL SEND". + 3) data that comes along with event. Depends on event.""" + if self._eventHandler: self._eventHandler(realm,event,data) + + def dispatch(self,stanza,session=None,direct=0): + """ Main procedure that performs XMPP stanza recognition and calling apppropriate handlers for it. + Called internally. """ + if not session: session=self + session.Stream._mini_dom=None + name=stanza.getName() + + if not direct and self._owner._route: + if name == 'route': + if stanza.getAttr('error') == None: + if len(stanza.getChildren()) == 1: + stanza = stanza.getChildren()[0] + name=stanza.getName() + else: + for each in stanza.getChildren(): + self.dispatch(each,session,direct=1) + return + elif name == 'presence': + return + elif name in ('features','bind'): + pass + else: + raise UnsupportedStanzaType(name) + + if name=='features': session.Stream.features=stanza + + xmlns=stanza.getNamespace() + if not self.handlers.has_key(xmlns): + self.DEBUG("Unknown namespace: " + xmlns,'warn') + xmlns='unknown' + if not self.handlers[xmlns].has_key(name): + self.DEBUG("Unknown stanza: " + name,'warn') + name='unknown' + else: + self.DEBUG("Got %s/%s stanza"%(xmlns,name), 'ok') + + if stanza.__class__.__name__=='Node': stanza=self.handlers[xmlns][name][type](node=stanza) + + typ=stanza.getType() + if not typ: typ='' + stanza.props=stanza.getProperties() + ID=stanza.getID() + + session.DEBUG("Dispatching %s stanza with type->%s props->%s id->%s"%(name,typ,stanza.props,ID),'ok') + + list=['default'] # we will use all handlers: + if self.handlers[xmlns][name].has_key(typ): list.append(typ) # from very common... + for prop in stanza.props: + if self.handlers[xmlns][name].has_key(prop): list.append(prop) + if typ and self.handlers[xmlns][name].has_key(typ+prop): list.append(typ+prop) # ...to very particular + + chain=self.handlers[xmlns]['default']['default'] + for key in list: + if key: chain = chain + self.handlers[xmlns][name][key] + + output='' + if session._expected.has_key(ID): + user=0 + if type(session._expected[ID])==type(()): + cb,args=session._expected[ID] + session.DEBUG("Expected stanza arrived. Callback %s(%s) found!"%(cb,args),'ok') + try: cb(session,stanza,**args) + except Exception, typ: + if typ.__class__.__name__<>'NodeProcessed': raise + else: + session.DEBUG("Expected stanza arrived!",'ok') + session._expected[ID]=stanza + else: user=1 + for handler in chain: + if user or handler['system']: + try: + handler['func'](session,stanza) + except Exception, typ: + if typ.__class__.__name__<>'NodeProcessed': + self._pendingExceptions.insert(0, sys.exc_info()) + return + user=0 + if user and self._defaultHandler: self._defaultHandler(session,stanza) + + def WaitForResponse(self, ID, timeout=DefaultTimeout): + """ Block and wait until stanza with specific "id" attribute will come. + If no such stanza is arrived within timeout, return None. + If operation failed for some reason then owner's attributes + lastErrNode, lastErr and lastErrCode are set accordingly. """ + self._expected[ID]=None + has_timed_out=0 + abort_time=time.time() + timeout + self.DEBUG("Waiting for ID:%s with timeout %s..." % (ID,timeout),'wait') + while not self._expected[ID]: + if not self.Process(0.04): + self._owner.lastErr="Disconnect" + return None + if time.time() > abort_time: + self._owner.lastErr="Timeout" + return None + response=self._expected[ID] + del self._expected[ID] + if response.getErrorCode(): + self._owner.lastErrNode=response + self._owner.lastErr=response.getError() + self._owner.lastErrCode=response.getErrorCode() + return response + + def SendAndWaitForResponse(self, stanza, timeout=DefaultTimeout): + """ Put stanza on the wire and wait for recipient's response to it. """ + return self.WaitForResponse(self.send(stanza),timeout) + + def SendAndCallForResponse(self, stanza, func, args={}): + """ Put stanza on the wire and call back when recipient replies. + Additional callback arguments can be specified in args. """ + self._expected[self.send(stanza)]=(func,args) + + def send(self,stanza): + """ Serialise stanza and put it on the wire. Assign an unique ID to it before send. + Returns assigned ID.""" + if type(stanza) in [type(''), type(u'')]: return self._owner_send(stanza) + if not isinstance(stanza,Protocol): _ID=None + elif not stanza.getID(): + global ID + ID+=1 + _ID=`ID` + stanza.setID(_ID) + else: _ID=stanza.getID() + if self._owner._registered_name and not stanza.getAttr('from'): stanza.setAttr('from',self._owner._registered_name) + if self._owner._route and stanza.getName()!='bind': + to=self._owner.Server + if stanza.getTo() and stanza.getTo().getDomain(): + to=stanza.getTo().getDomain() + frm=stanza.getFrom() + if frm.getDomain(): + frm=frm.getDomain() + route=Protocol('route',to=to,frm=frm,payload=[stanza]) + stanza=route + stanza.setNamespace(self._owner.Namespace) + stanza.setParent(self._metastream) + self._owner_send(stanza) + return _ID + + def disconnect(self): + """ Send a stream terminator and and handle all incoming stanzas before stream closure. """ + self._owner_send('</stream:stream>') + while self.Process(1): pass diff --git a/libs/xmpp/features.py b/libs/xmpp/features.py new file mode 100644 index 0000000..c7993c2 --- /dev/null +++ b/libs/xmpp/features.py @@ -0,0 +1,182 @@ +## features.py +## +## Copyright (C) 2003-2004 Alexey "Snake" Nezhdanov +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. + +# $Id: features.py,v 1.25 2009/04/07 07:11:48 snakeru Exp $ + +""" +This module contains variable stuff that is not worth splitting into separate modules. +Here is: + DISCO client and agents-to-DISCO and browse-to-DISCO emulators. + IBR and password manager. + jabber:iq:privacy methods +All these methods takes 'disp' first argument that should be already connected +(and in most cases already authorised) dispatcher instance. +""" + +from protocol import * + +REGISTER_DATA_RECEIVED='REGISTER DATA RECEIVED' + +### DISCO ### http://jabber.org/protocol/disco ### JEP-0030 #################### +### Browse ### jabber:iq:browse ### JEP-0030 ################################### +### Agents ### jabber:iq:agents ### JEP-0030 ################################### +def _discover(disp,ns,jid,node=None,fb2b=0,fb2a=1): + """ Try to obtain info from the remote object. + If remote object doesn't support disco fall back to browse (if fb2b is true) + and if it doesnt support browse (or fb2b is not true) fall back to agents protocol + (if gb2a is true). Returns obtained info. Used internally. """ + iq=Iq(to=jid,typ='get',queryNS=ns) + if node: iq.setQuerynode(node) + rep=disp.SendAndWaitForResponse(iq) + if fb2b and not isResultNode(rep): rep=disp.SendAndWaitForResponse(Iq(to=jid,typ='get',queryNS=NS_BROWSE)) # Fallback to browse + if fb2a and not isResultNode(rep): rep=disp.SendAndWaitForResponse(Iq(to=jid,typ='get',queryNS=NS_AGENTS)) # Fallback to agents + if isResultNode(rep): return [n for n in rep.getQueryPayload() if isinstance(n, Node)] + return [] + +def discoverItems(disp,jid,node=None): + """ Query remote object about any items that it contains. Return items list. """ + """ According to JEP-0030: + query MAY have node attribute + item: MUST HAVE jid attribute and MAY HAVE name, node, action attributes. + action attribute of item can be either of remove or update value.""" + ret=[] + for i in _discover(disp,NS_DISCO_ITEMS,jid,node): + if i.getName()=='agent' and i.getTag('name'): i.setAttr('name',i.getTagData('name')) + ret.append(i.attrs) + return ret + +def discoverInfo(disp,jid,node=None): + """ Query remote object about info that it publishes. Returns identities and features lists.""" + """ According to JEP-0030: + query MAY have node attribute + identity: MUST HAVE category and name attributes and MAY HAVE type attribute. + feature: MUST HAVE var attribute""" + identities , features = [] , [] + for i in _discover(disp,NS_DISCO_INFO,jid,node): + if i.getName()=='identity': identities.append(i.attrs) + elif i.getName()=='feature': features.append(i.getAttr('var')) + elif i.getName()=='agent': + if i.getTag('name'): i.setAttr('name',i.getTagData('name')) + if i.getTag('description'): i.setAttr('name',i.getTagData('description')) + identities.append(i.attrs) + if i.getTag('groupchat'): features.append(NS_GROUPCHAT) + if i.getTag('register'): features.append(NS_REGISTER) + if i.getTag('search'): features.append(NS_SEARCH) + return identities , features + +### Registration ### jabber:iq:register ### JEP-0077 ########################### +def getRegInfo(disp,host,info={},sync=True): + """ Gets registration form from remote host. + You can pre-fill the info dictionary. + F.e. if you are requesting info on registering user joey than specify + info as {'username':'joey'}. See JEP-0077 for details. + 'disp' must be connected dispatcher instance.""" + iq=Iq('get',NS_REGISTER,to=host) + for i in info.keys(): iq.setTagData(i,info[i]) + if sync: + resp=disp.SendAndWaitForResponse(iq) + _ReceivedRegInfo(disp.Dispatcher,resp, host) + return resp + else: disp.SendAndCallForResponse(iq,_ReceivedRegInfo, {'agent': host}) + +def _ReceivedRegInfo(con, resp, agent): + iq=Iq('get',NS_REGISTER,to=agent) + if not isResultNode(resp): return + df=resp.getTag('query',namespace=NS_REGISTER).getTag('x',namespace=NS_DATA) + if df: + con.Event(NS_REGISTER,REGISTER_DATA_RECEIVED,(agent, DataForm(node=df))) + return + df=DataForm(typ='form') + for i in resp.getQueryPayload(): + if type(i)<>type(iq): pass + elif i.getName()=='instructions': df.addInstructions(i.getData()) + else: df.setField(i.getName()).setValue(i.getData()) + con.Event(NS_REGISTER,REGISTER_DATA_RECEIVED,(agent, df)) + +def register(disp,host,info): + """ Perform registration on remote server with provided info. + disp must be connected dispatcher instance. + Returns true or false depending on registration result. + If registration fails you can get additional info from the dispatcher's owner + attributes lastErrNode, lastErr and lastErrCode. + """ + iq=Iq('set',NS_REGISTER,to=host) + if type(info)<>type({}): info=info.asDict() + for i in info.keys(): iq.setTag('query').setTagData(i,info[i]) + resp=disp.SendAndWaitForResponse(iq) + if isResultNode(resp): return 1 + +def unregister(disp,host): + """ Unregisters with host (permanently removes account). + disp must be connected and authorized dispatcher instance. + Returns true on success.""" + resp=disp.SendAndWaitForResponse(Iq('set',NS_REGISTER,to=host,payload=[Node('remove')])) + if isResultNode(resp): return 1 + +def changePasswordTo(disp,newpassword,host=None): + """ Changes password on specified or current (if not specified) server. + disp must be connected and authorized dispatcher instance. + Returns true on success.""" + if not host: host=disp._owner.Server + resp=disp.SendAndWaitForResponse(Iq('set',NS_REGISTER,to=host,payload=[Node('username',payload=[disp._owner.Server]),Node('password',payload=[newpassword])])) + if isResultNode(resp): return 1 + +### Privacy ### jabber:iq:privacy ### draft-ietf-xmpp-im-19 #################### +#type=[jid|group|subscription] +#action=[allow|deny] + +def getPrivacyLists(disp): + """ Requests privacy lists from connected server. + Returns dictionary of existing lists on success.""" + try: + dict={'lists':[]} + resp=disp.SendAndWaitForResponse(Iq('get',NS_PRIVACY)) + if not isResultNode(resp): return + for list in resp.getQueryPayload(): + if list.getName()=='list': dict['lists'].append(list.getAttr('name')) + else: dict[list.getName()]=list.getAttr('name') + return dict + except: pass + +def getPrivacyList(disp,listname): + """ Requests specific privacy list listname. Returns list of XML nodes (rules) + taken from the server responce.""" + try: + resp=disp.SendAndWaitForResponse(Iq('get',NS_PRIVACY,payload=[Node('list',{'name':listname})])) + if isResultNode(resp): return resp.getQueryPayload()[0] + except: pass + +def setActivePrivacyList(disp,listname=None,typ='active'): + """ Switches privacy list 'listname' to specified type. + By default the type is 'active'. Returns true on success.""" + if listname: attrs={'name':listname} + else: attrs={} + resp=disp.SendAndWaitForResponse(Iq('set',NS_PRIVACY,payload=[Node(typ,attrs)])) + if isResultNode(resp): return 1 + +def setDefaultPrivacyList(disp,listname=None): + """ Sets the default privacy list as 'listname'. Returns true on success.""" + return setActivePrivacyList(disp,listname,'default') + +def setPrivacyList(disp,list): + """ Set the ruleset. 'list' should be the simpleXML node formatted + according to RFC 3921 (XMPP-IM) (I.e. Node('list',{'name':listname},payload=[...]) ) + Returns true on success.""" + resp=disp.SendAndWaitForResponse(Iq('set',NS_PRIVACY,payload=[list])) + if isResultNode(resp): return 1 + +def delPrivacyList(disp,listname): + """ Deletes privacy list 'listname'. Returns true on success.""" + resp=disp.SendAndWaitForResponse(Iq('set',NS_PRIVACY,payload=[Node('list',{'name':listname})])) + if isResultNode(resp): return 1 diff --git a/libs/xmpp/filetransfer.py b/libs/xmpp/filetransfer.py new file mode 100644 index 0000000..87ddc21 --- /dev/null +++ b/libs/xmpp/filetransfer.py @@ -0,0 +1,199 @@ +## filetransfer.py +## +## Copyright (C) 2004 Alexey "Snake" Nezhdanov +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. + +# $Id: filetransfer.py,v 1.6 2004/12/25 20:06:59 snakeru Exp $ + +""" +This module contains IBB class that is the simple implementation of JEP-0047. +Note that this is just a transport for data. You have to negotiate data transfer before +(via StreamInitiation most probably). Unfortunately SI is not implemented yet. +""" + +from protocol import * +from dispatcher import PlugIn +import base64 + +class IBB(PlugIn): + """ IBB used to transfer small-sized data chunk over estabilished xmpp connection. + Data is split into small blocks (by default 3000 bytes each), encoded as base 64 + and sent to another entity that compiles these blocks back into the data chunk. + This is very inefficiend but should work under any circumstances. Note that + using IBB normally should be the last resort. + """ + def __init__(self): + """ Initialise internal variables. """ + PlugIn.__init__(self) + self.DBG_LINE='ibb' + self._exported_methods=[self.OpenStream] + self._streams={} + self._ampnode=Node(NS_AMP+' amp',payload=[Node('rule',{'condition':'deliver-at','value':'stored','action':'error'}),Node('rule',{'condition':'match-resource','value':'exact','action':'error'})]) + + def plugin(self,owner): + """ Register handlers for receiving incoming datastreams. Used internally. """ + self._owner.RegisterHandlerOnce('iq',self.StreamOpenReplyHandler) # Move to StreamOpen and specify stanza id + self._owner.RegisterHandler('iq',self.IqHandler,ns=NS_IBB) + self._owner.RegisterHandler('message',self.ReceiveHandler,ns=NS_IBB) + + def IqHandler(self,conn,stanza): + """ Handles streams state change. Used internally. """ + typ=stanza.getType() + self.DEBUG('IqHandler called typ->%s'%typ,'info') + if typ=='set' and stanza.getTag('open',namespace=NS_IBB): self.StreamOpenHandler(conn,stanza) + elif typ=='set' and stanza.getTag('close',namespace=NS_IBB): self.StreamCloseHandler(conn,stanza) + elif typ=='result': self.StreamCommitHandler(conn,stanza) + elif typ=='error': self.StreamOpenReplyHandler(conn,stanza) + else: conn.send(Error(stanza,ERR_BAD_REQUEST)) + raise NodeProcessed + + def StreamOpenHandler(self,conn,stanza): + """ Handles opening of new incoming stream. Used internally. """ + """ +<iq type='set' + from='romeo@montague.net/orchard' + to='juliet@capulet.com/balcony' + id='inband_1'> + <open sid='mySID' + block-size='4096' + xmlns='http://jabber.org/protocol/ibb'/> +</iq> +""" + err=None + sid,blocksize=stanza.getTagAttr('open','sid'),stanza.getTagAttr('open','block-size') + self.DEBUG('StreamOpenHandler called sid->%s blocksize->%s'%(sid,blocksize),'info') + try: blocksize=int(blocksize) + except: err=ERR_BAD_REQUEST + if not sid or not blocksize: err=ERR_BAD_REQUEST + elif sid in self._streams.keys(): err=ERR_UNEXPECTED_REQUEST + if err: rep=Error(stanza,err) + else: + self.DEBUG("Opening stream: id %s, block-size %s"%(sid,blocksize),'info') + rep=Protocol('iq',stanza.getFrom(),'result',stanza.getTo(),{'id':stanza.getID()}) + self._streams[sid]={'direction':'<'+str(stanza.getFrom()),'block-size':blocksize,'fp':open('/tmp/xmpp_file_'+sid,'w'),'seq':0,'syn_id':stanza.getID()} + conn.send(rep) + + def OpenStream(self,sid,to,fp,blocksize=3000): + """ Start new stream. You should provide stream id 'sid', the endpoind jid 'to', + the file object containing info for send 'fp'. Also the desired blocksize can be specified. + Take into account that recommended stanza size is 4k and IBB uses base64 encoding + that increases size of data by 1/3.""" + if sid in self._streams.keys(): return + if not JID(to).getResource(): return + self._streams[sid]={'direction':'|>'+to,'block-size':blocksize,'fp':fp,'seq':0} + self._owner.RegisterCycleHandler(self.SendHandler) + syn=Protocol('iq',to,'set',payload=[Node(NS_IBB+' open',{'sid':sid,'block-size':blocksize})]) + self._owner.send(syn) + self._streams[sid]['syn_id']=syn.getID() + return self._streams[sid] + + def SendHandler(self,conn): + """ Send next portion of data if it is time to do it. Used internally. """ + self.DEBUG('SendHandler called','info') + for sid in self._streams.keys(): + stream=self._streams[sid] + if stream['direction'][:2]=='|>': cont=1 + elif stream['direction'][0]=='>': + chunk=stream['fp'].read(stream['block-size']) + if chunk: + datanode=Node(NS_IBB+' data',{'sid':sid,'seq':stream['seq']},base64.encodestring(chunk)) + stream['seq']+=1 + if stream['seq']==65536: stream['seq']=0 + conn.send(Protocol('message',stream['direction'][1:],payload=[datanode,self._ampnode])) + else: + """ notify the other side about stream closing + notify the local user about sucessfull send + delete the local stream""" + conn.send(Protocol('iq',stream['direction'][1:],'set',payload=[Node(NS_IBB+' close',{'sid':sid})])) + conn.Event(self.DBG_LINE,'SUCCESSFULL SEND',stream) + del self._streams[sid] + self._owner.UnregisterCycleHandler(self.SendHandler) + + """ +<message from='romeo@montague.net/orchard' to='juliet@capulet.com/balcony' id='msg1'> + <data xmlns='http://jabber.org/protocol/ibb' sid='mySID' seq='0'> + qANQR1DBwU4DX7jmYZnncmUQB/9KuKBddzQH+tZ1ZywKK0yHKnq57kWq+RFtQdCJ + WpdWpR0uQsuJe7+vh3NWn59/gTc5MDlX8dS9p0ovStmNcyLhxVgmqS8ZKhsblVeu + IpQ0JgavABqibJolc3BKrVtVV1igKiX/N7Pi8RtY1K18toaMDhdEfhBRzO/XB0+P + AQhYlRjNacGcslkhXqNjK5Va4tuOAPy2n1Q8UUrHbUd0g+xJ9Bm0G0LZXyvCWyKH + kuNEHFQiLuCY6Iv0myq6iX6tjuHehZlFSh80b5BVV9tNLwNR5Eqz1klxMhoghJOA + </data> + <amp xmlns='http://jabber.org/protocol/amp'> + <rule condition='deliver-at' value='stored' action='error'/> + <rule condition='match-resource' value='exact' action='error'/> + </amp> +</message> +""" + + def ReceiveHandler(self,conn,stanza): + """ Receive next portion of incoming datastream and store it write + it to temporary file. Used internally. + """ + sid,seq,data=stanza.getTagAttr('data','sid'),stanza.getTagAttr('data','seq'),stanza.getTagData('data') + self.DEBUG('ReceiveHandler called sid->%s seq->%s'%(sid,seq),'info') + try: seq=int(seq); data=base64.decodestring(data) + except: seq=''; data='' + err=None + if not sid in self._streams.keys(): err=ERR_ITEM_NOT_FOUND + else: + stream=self._streams[sid] + if not data: err=ERR_BAD_REQUEST + elif seq<>stream['seq']: err=ERR_UNEXPECTED_REQUEST + else: + self.DEBUG('Successfull receive sid->%s %s+%s bytes'%(sid,stream['fp'].tell(),len(data)),'ok') + stream['seq']+=1 + stream['fp'].write(data) + if err: + self.DEBUG('Error on receive: %s'%err,'error') + conn.send(Error(Iq(to=stanza.getFrom(),frm=stanza.getTo(),payload=[Node(NS_IBB+' close')]),err,reply=0)) + + def StreamCloseHandler(self,conn,stanza): + """ Handle stream closure due to all data transmitted. + Raise xmpppy event specifying successfull data receive. """ + sid=stanza.getTagAttr('close','sid') + self.DEBUG('StreamCloseHandler called sid->%s'%sid,'info') + if sid in self._streams.keys(): + conn.send(stanza.buildReply('result')) + conn.Event(self.DBG_LINE,'SUCCESSFULL RECEIVE',self._streams[sid]) + del self._streams[sid] + else: conn.send(Error(stanza,ERR_ITEM_NOT_FOUND)) + + def StreamBrokenHandler(self,conn,stanza): + """ Handle stream closure due to all some error while receiving data. + Raise xmpppy event specifying unsuccessfull data receive. """ + syn_id=stanza.getID() + self.DEBUG('StreamBrokenHandler called syn_id->%s'%syn_id,'info') + for sid in self._streams.keys(): + stream=self._streams[sid] + if stream['syn_id']==syn_id: + if stream['direction'][0]=='<': conn.Event(self.DBG_LINE,'ERROR ON RECEIVE',stream) + else: conn.Event(self.DBG_LINE,'ERROR ON SEND',stream) + del self._streams[sid] + + def StreamOpenReplyHandler(self,conn,stanza): + """ Handle remote side reply about is it agree or not to receive our datastream. + Used internally. Raises xmpppy event specfiying if the data transfer + is agreed upon.""" + syn_id=stanza.getID() + self.DEBUG('StreamOpenReplyHandler called syn_id->%s'%syn_id,'info') + for sid in self._streams.keys(): + stream=self._streams[sid] + if stream['syn_id']==syn_id: + if stanza.getType()=='error': + if stream['direction'][0]=='<': conn.Event(self.DBG_LINE,'ERROR ON RECEIVE',stream) + else: conn.Event(self.DBG_LINE,'ERROR ON SEND',stream) + del self._streams[sid] + elif stanza.getType()=='result': + if stream['direction'][0]=='|': + stream['direction']=stream['direction'][1:] + conn.Event(self.DBG_LINE,'STREAM COMMITTED',stream) + else: conn.send(Error(stanza,ERR_UNEXPECTED_REQUEST)) diff --git a/libs/xmpp/jep0106.py b/libs/xmpp/jep0106.py new file mode 100644 index 0000000..fcf1114 --- /dev/null +++ b/libs/xmpp/jep0106.py @@ -0,0 +1,57 @@ + +# JID Escaping XEP-0106 for the xmpppy based transports written by Norman Rasmussen + +"""This file is the XEP-0106 commands. + +Implemented commands as follows: + +4.2 Encode : Encoding Transformation +4.3 Decode : Decoding Transformation + + +""" + +xep0106mapping = [ + [' ' ,'20'], + ['"' ,'22'], + ['&' ,'26'], + ['\'','27'], + ['/' ,'2f'], + [':' ,'3a'], + ['<' ,'3c'], + ['>' ,'3e'], + ['@' ,'40']] + +def JIDEncode(str): + str = str.replace('\\5c', '\\5c5c') + for each in xep0106mapping: + str = str.replace('\\' + each[1], '\\5c' + each[1]) + for each in xep0106mapping: + str = str.replace(each[0], '\\' + each[1]) + return str + +def JIDDecode(str): + for each in xep0106mapping: + str = str.replace('\\' + each[1], each[0]) + return str.replace('\\5c', '\\') + +if __name__ == "__main__": + def test(before,valid): + during = JIDEncode(before) + after = JIDDecode(during) + if during == valid and after == before: + print 'PASS Before: ' + before + print 'PASS During: ' + during + else: + print 'FAIL Before: ' + before + print 'FAIL During: ' + during + print 'FAIL After : ' + after + print + + test('jid escaping',r'jid\20escaping') + test(r'\3and\2is\5@example.com',r'\5c3and\2is\5\40example.com') + test(r'\3catsand\2catsis\5cats@example.com',r'\5c3catsand\2catsis\5c5cats\40example.com') + test(r'\2plus\2is\4',r'\2plus\2is\4') + test(r'foo\bar',r'foo\bar') + test(r'foob\41r',r'foob\41r') + test('here\'s_a wild_&_/cr%zy/_address@example.com',r'here\27s_a\20wild_\26_\2fcr%zy\2f_address\40example.com') diff --git a/libs/xmpp/protocol.py b/libs/xmpp/protocol.py new file mode 100644 index 0000000..3e49b8d --- /dev/null +++ b/libs/xmpp/protocol.py @@ -0,0 +1,860 @@ +## protocol.py +## +## Copyright (C) 2003-2005 Alexey "Snake" Nezhdanov +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. + +# $Id: protocol.py,v 1.60 2009/04/07 11:14:28 snakeru Exp $ + +""" +Protocol module contains tools that is needed for processing of +xmpp-related data structures. +""" + +from simplexml import Node,ustr +import time +NS_ACTIVITY ='http://jabber.org/protocol/activity' # XEP-0108 +NS_ADDRESS ='http://jabber.org/protocol/address' # XEP-0033 +NS_ADMIN ='http://jabber.org/protocol/admin' # XEP-0133 +NS_ADMIN_ADD_USER =NS_ADMIN+'#add-user' # XEP-0133 +NS_ADMIN_DELETE_USER =NS_ADMIN+'#delete-user' # XEP-0133 +NS_ADMIN_DISABLE_USER =NS_ADMIN+'#disable-user' # XEP-0133 +NS_ADMIN_REENABLE_USER =NS_ADMIN+'#reenable-user' # XEP-0133 +NS_ADMIN_END_USER_SESSION =NS_ADMIN+'#end-user-session' # XEP-0133 +NS_ADMIN_GET_USER_PASSWORD =NS_ADMIN+'#get-user-password' # XEP-0133 +NS_ADMIN_CHANGE_USER_PASSWORD =NS_ADMIN+'#change-user-password' # XEP-0133 +NS_ADMIN_GET_USER_ROSTER =NS_ADMIN+'#get-user-roster' # XEP-0133 +NS_ADMIN_GET_USER_LASTLOGIN =NS_ADMIN+'#get-user-lastlogin' # XEP-0133 +NS_ADMIN_USER_STATS =NS_ADMIN+'#user-stats' # XEP-0133 +NS_ADMIN_EDIT_BLACKLIST =NS_ADMIN+'#edit-blacklist' # XEP-0133 +NS_ADMIN_EDIT_WHITELIST =NS_ADMIN+'#edit-whitelist' # XEP-0133 +NS_ADMIN_REGISTERED_USERS_NUM =NS_ADMIN+'#get-registered-users-num' # XEP-0133 +NS_ADMIN_DISABLED_USERS_NUM =NS_ADMIN+'#get-disabled-users-num' # XEP-0133 +NS_ADMIN_ONLINE_USERS_NUM =NS_ADMIN+'#get-online-users-num' # XEP-0133 +NS_ADMIN_ACTIVE_USERS_NUM =NS_ADMIN+'#get-active-users-num' # XEP-0133 +NS_ADMIN_IDLE_USERS_NUM =NS_ADMIN+'#get-idle-users-num' # XEP-0133 +NS_ADMIN_REGISTERED_USERS_LIST =NS_ADMIN+'#get-registered-users-list' # XEP-0133 +NS_ADMIN_DISABLED_USERS_LIST =NS_ADMIN+'#get-disabled-users-list' # XEP-0133 +NS_ADMIN_ONLINE_USERS_LIST =NS_ADMIN+'#get-online-users-list' # XEP-0133 +NS_ADMIN_ACTIVE_USERS_LIST =NS_ADMIN+'#get-active-users-list' # XEP-0133 +NS_ADMIN_IDLE_USERS_LIST =NS_ADMIN+'#get-idle-users-list' # XEP-0133 +NS_ADMIN_ANNOUNCE =NS_ADMIN+'#announce' # XEP-0133 +NS_ADMIN_SET_MOTD =NS_ADMIN+'#set-motd' # XEP-0133 +NS_ADMIN_EDIT_MOTD =NS_ADMIN+'#edit-motd' # XEP-0133 +NS_ADMIN_DELETE_MOTD =NS_ADMIN+'#delete-motd' # XEP-0133 +NS_ADMIN_SET_WELCOME =NS_ADMIN+'#set-welcome' # XEP-0133 +NS_ADMIN_DELETE_WELCOME =NS_ADMIN+'#delete-welcome' # XEP-0133 +NS_ADMIN_EDIT_ADMIN =NS_ADMIN+'#edit-admin' # XEP-0133 +NS_ADMIN_RESTART =NS_ADMIN+'#restart' # XEP-0133 +NS_ADMIN_SHUTDOWN =NS_ADMIN+'#shutdown' # XEP-0133 +NS_AGENTS ='jabber:iq:agents' # XEP-0094 (historical) +NS_AMP ='http://jabber.org/protocol/amp' # XEP-0079 +NS_AMP_ERRORS =NS_AMP+'#errors' # XEP-0079 +NS_AUTH ='jabber:iq:auth' # XEP-0078 +NS_AVATAR ='jabber:iq:avatar' # XEP-0008 (historical) +NS_BIND ='urn:ietf:params:xml:ns:xmpp-bind' # RFC 3920 +NS_BROWSE ='jabber:iq:browse' # XEP-0011 (historical) +NS_BYTESTREAM ='http://jabber.org/protocol/bytestreams' # XEP-0065 +NS_CAPS ='http://jabber.org/protocol/caps' # XEP-0115 +NS_CHATSTATES ='http://jabber.org/protocol/chatstates' # XEP-0085 +NS_CLIENT ='jabber:client' # RFC 3921 +NS_COMMANDS ='http://jabber.org/protocol/commands' # XEP-0050 +NS_COMPONENT_ACCEPT ='jabber:component:accept' # XEP-0114 +NS_COMPONENT_1 ='http://jabberd.jabberstudio.org/ns/component/1.0' # Jabberd2 +NS_COMPRESS ='http://jabber.org/protocol/compress' # XEP-0138 +NS_DATA ='jabber:x:data' # XEP-0004 +NS_DATA_LAYOUT ='http://jabber.org/protocol/xdata-layout' # XEP-0141 +NS_DATA_VALIDATE ='http://jabber.org/protocol/xdata-validate' # XEP-0122 +NS_DELAY ='jabber:x:delay' # XEP-0091 (deprecated) +NS_DIALBACK ='jabber:server:dialback' # RFC 3921 +NS_DISCO ='http://jabber.org/protocol/disco' # XEP-0030 +NS_DISCO_INFO =NS_DISCO+'#info' # XEP-0030 +NS_DISCO_ITEMS =NS_DISCO+'#items' # XEP-0030 +NS_ENCRYPTED ='jabber:x:encrypted' # XEP-0027 +NS_EVENT ='jabber:x:event' # XEP-0022 (deprecated) +NS_FEATURE ='http://jabber.org/protocol/feature-neg' # XEP-0020 +NS_FILE ='http://jabber.org/protocol/si/profile/file-transfer' # XEP-0096 +NS_GATEWAY ='jabber:iq:gateway' # XEP-0100 +NS_GEOLOC ='http://jabber.org/protocol/geoloc' # XEP-0080 +NS_GROUPCHAT ='gc-1.0' # XEP-0045 +NS_HTTP_BIND ='http://jabber.org/protocol/httpbind' # XEP-0124 +NS_IBB ='http://jabber.org/protocol/ibb' # XEP-0047 +NS_INVISIBLE ='presence-invisible' # Jabberd2 +NS_IQ ='iq' # Jabberd2 +NS_LAST ='jabber:iq:last' # XEP-0012 +NS_MESSAGE ='message' # Jabberd2 +NS_MOOD ='http://jabber.org/protocol/mood' # XEP-0107 +NS_MUC ='http://jabber.org/protocol/muc' # XEP-0045 +NS_MUC_ADMIN =NS_MUC+'#admin' # XEP-0045 +NS_MUC_OWNER =NS_MUC+'#owner' # XEP-0045 +NS_MUC_UNIQUE =NS_MUC+'#unique' # XEP-0045 +NS_MUC_USER =NS_MUC+'#user' # XEP-0045 +NS_MUC_REGISTER =NS_MUC+'#register' # XEP-0045 +NS_MUC_REQUEST =NS_MUC+'#request' # XEP-0045 +NS_MUC_ROOMCONFIG =NS_MUC+'#roomconfig' # XEP-0045 +NS_MUC_ROOMINFO =NS_MUC+'#roominfo' # XEP-0045 +NS_MUC_ROOMS =NS_MUC+'#rooms' # XEP-0045 +NS_MUC_TRAFIC =NS_MUC+'#traffic' # XEP-0045 +NS_NICK ='http://jabber.org/protocol/nick' # XEP-0172 +NS_OFFLINE ='http://jabber.org/protocol/offline' # XEP-0013 +NS_PHYSLOC ='http://jabber.org/protocol/physloc' # XEP-0112 +NS_PRESENCE ='presence' # Jabberd2 +NS_PRIVACY ='jabber:iq:privacy' # RFC 3921 +NS_PRIVATE ='jabber:iq:private' # XEP-0049 +NS_PUBSUB ='http://jabber.org/protocol/pubsub' # XEP-0060 +NS_REGISTER ='jabber:iq:register' # XEP-0077 +NS_RC ='http://jabber.org/protocol/rc' # XEP-0146 +NS_ROSTER ='jabber:iq:roster' # RFC 3921 +NS_ROSTERX ='http://jabber.org/protocol/rosterx' # XEP-0144 +NS_RPC ='jabber:iq:rpc' # XEP-0009 +NS_SASL ='urn:ietf:params:xml:ns:xmpp-sasl' # RFC 3920 +NS_SEARCH ='jabber:iq:search' # XEP-0055 +NS_SERVER ='jabber:server' # RFC 3921 +NS_SESSION ='urn:ietf:params:xml:ns:xmpp-session' # RFC 3921 +NS_SI ='http://jabber.org/protocol/si' # XEP-0096 +NS_SI_PUB ='http://jabber.org/protocol/sipub' # XEP-0137 +NS_SIGNED ='jabber:x:signed' # XEP-0027 +NS_STANZAS ='urn:ietf:params:xml:ns:xmpp-stanzas' # RFC 3920 +NS_STREAMS ='http://etherx.jabber.org/streams' # RFC 3920 +NS_TIME ='jabber:iq:time' # XEP-0090 (deprecated) +NS_TLS ='urn:ietf:params:xml:ns:xmpp-tls' # RFC 3920 +NS_VACATION ='http://jabber.org/protocol/vacation' # XEP-0109 +NS_VCARD ='vcard-temp' # XEP-0054 +NS_VCARD_UPDATE ='vcard-temp:x:update' # XEP-0153 +NS_VERSION ='jabber:iq:version' # XEP-0092 +NS_WAITINGLIST ='http://jabber.org/protocol/waitinglist' # XEP-0130 +NS_XHTML_IM ='http://jabber.org/protocol/xhtml-im' # XEP-0071 +NS_XMPP_STREAMS ='urn:ietf:params:xml:ns:xmpp-streams' # RFC 3920 + +xmpp_stream_error_conditions=""" +bad-format -- -- -- The entity has sent XML that cannot be processed. +bad-namespace-prefix -- -- -- The entity has sent a namespace prefix that is unsupported, or has sent no namespace prefix on an element that requires such a prefix. +conflict -- -- -- The server is closing the active stream for this entity because a new stream has been initiated that conflicts with the existing stream. +connection-timeout -- -- -- The entity has not generated any traffic over the stream for some period of time. +host-gone -- -- -- The value of the 'to' attribute provided by the initiating entity in the stream header corresponds to a hostname that is no longer hosted by the server. +host-unknown -- -- -- The value of the 'to' attribute provided by the initiating entity in the stream header does not correspond to a hostname that is hosted by the server. +improper-addressing -- -- -- A stanza sent between two servers lacks a 'to' or 'from' attribute (or the attribute has no value). +internal-server-error -- -- -- The server has experienced a misconfiguration or an otherwise-undefined internal error that prevents it from servicing the stream. +invalid-from -- cancel -- -- The JID or hostname provided in a 'from' address does not match an authorized JID or validated domain negotiated between servers via SASL or dialback, or between a client and a server via authentication and resource authorization. +invalid-id -- -- -- The stream ID or dialback ID is invalid or does not match an ID previously provided. +invalid-namespace -- -- -- The streams namespace name is something other than "http://etherx.jabber.org/streams" or the dialback namespace name is something other than "jabber:server:dialback". +invalid-xml -- -- -- The entity has sent invalid XML over the stream to a server that performs validation. +not-authorized -- -- -- The entity has attempted to send data before the stream has been authenticated, or otherwise is not authorized to perform an action related to stream negotiation. +policy-violation -- -- -- The entity has violated some local service policy. +remote-connection-failed -- -- -- The server is unable to properly connect to a remote resource that is required for authentication or authorization. +resource-constraint -- -- -- The server lacks the system resources necessary to service the stream. +restricted-xml -- -- -- The entity has attempted to send restricted XML features such as a comment, processing instruction, DTD, entity reference, or unescaped character. +see-other-host -- -- -- The server will not provide service to the initiating entity but is redirecting traffic to another host. +system-shutdown -- -- -- The server is being shut down and all active streams are being closed. +undefined-condition -- -- -- The error condition is not one of those defined by the other conditions in this list. +unsupported-encoding -- -- -- The initiating entity has encoded the stream in an encoding that is not supported by the server. +unsupported-stanza-type -- -- -- The initiating entity has sent a first-level child of the stream that is not supported by the server. +unsupported-version -- -- -- The value of the 'version' attribute provided by the initiating entity in the stream header specifies a version of XMPP that is not supported by the server. +xml-not-well-formed -- -- -- The initiating entity has sent XML that is not well-formed.""" +xmpp_stanza_error_conditions=""" +bad-request -- 400 -- modify -- The sender has sent XML that is malformed or that cannot be processed. +conflict -- 409 -- cancel -- Access cannot be granted because an existing resource or session exists with the same name or address. +feature-not-implemented -- 501 -- cancel -- The feature requested is not implemented by the recipient or server and therefore cannot be processed. +forbidden -- 403 -- auth -- The requesting entity does not possess the required permissions to perform the action. +gone -- 302 -- modify -- The recipient or server can no longer be contacted at this address. +internal-server-error -- 500 -- wait -- The server could not process the stanza because of a misconfiguration or an otherwise-undefined internal server error. +item-not-found -- 404 -- cancel -- The addressed JID or item requested cannot be found. +jid-malformed -- 400 -- modify -- The value of the 'to' attribute in the sender's stanza does not adhere to the syntax defined in Addressing Scheme. +not-acceptable -- 406 -- cancel -- The recipient or server understands the request but is refusing to process it because it does not meet criteria defined by the recipient or server. +not-allowed -- 405 -- cancel -- The recipient or server does not allow any entity to perform the action. +not-authorized -- 401 -- auth -- The sender must provide proper credentials before being allowed to perform the action, or has provided improper credentials. +payment-required -- 402 -- auth -- The requesting entity is not authorized to access the requested service because payment is required. +recipient-unavailable -- 404 -- wait -- The intended recipient is temporarily unavailable. +redirect -- 302 -- modify -- The recipient or server is redirecting requests for this information to another entity. +registration-required -- 407 -- auth -- The requesting entity is not authorized to access the requested service because registration is required. +remote-server-not-found -- 404 -- cancel -- A remote server or service specified as part or all of the JID of the intended recipient does not exist. +remote-server-timeout -- 504 -- wait -- A remote server or service specified as part or all of the JID of the intended recipient could not be contacted within a reasonable amount of time. +resource-constraint -- 500 -- wait -- The server or recipient lacks the system resources necessary to service the request. +service-unavailable -- 503 -- cancel -- The server or recipient does not currently provide the requested service. +subscription-required -- 407 -- auth -- The requesting entity is not authorized to access the requested service because a subscription is required. +undefined-condition -- 500 -- -- +unexpected-request -- 400 -- wait -- The recipient or server understood the request but was not expecting it at this time (e.g., the request was out of order).""" +sasl_error_conditions=""" +aborted -- -- -- The receiving entity acknowledges an <abort/> element sent by the initiating entity; sent in reply to the <abort/> element. +incorrect-encoding -- -- -- The data provided by the initiating entity could not be processed because the [BASE64]Josefsson, S., The Base16, Base32, and Base64 Data Encodings, July 2003. encoding is incorrect (e.g., because the encoding does not adhere to the definition in Section 3 of [BASE64]Josefsson, S., The Base16, Base32, and Base64 Data Encodings, July 2003.); sent in reply to a <response/> element or an <auth/> element with initial response data. +invalid-authzid -- -- -- The authzid provided by the initiating entity is invalid, either because it is incorrectly formatted or because the initiating entity does not have permissions to authorize that ID; sent in reply to a <response/> element or an <auth/> element with initial response data. +invalid-mechanism -- -- -- The initiating entity did not provide a mechanism or requested a mechanism that is not supported by the receiving entity; sent in reply to an <auth/> element. +mechanism-too-weak -- -- -- The mechanism requested by the initiating entity is weaker than server policy permits for that initiating entity; sent in reply to a <response/> element or an <auth/> element with initial response data. +not-authorized -- -- -- The authentication failed because the initiating entity did not provide valid credentials (this includes but is not limited to the case of an unknown username); sent in reply to a <response/> element or an <auth/> element with initial response data. +temporary-auth-failure -- -- -- The authentication failed because of a temporary error condition within the receiving entity; sent in reply to an <auth/> element or <response/> element.""" + +ERRORS,_errorcodes={},{} +for ns,errname,errpool in [(NS_XMPP_STREAMS,'STREAM',xmpp_stream_error_conditions), + (NS_STANZAS ,'ERR' ,xmpp_stanza_error_conditions), + (NS_SASL ,'SASL' ,sasl_error_conditions)]: + for err in errpool.split('\n')[1:]: + cond,code,typ,text=err.split(' -- ') + name=errname+'_'+cond.upper().replace('-','_') + locals()[name]=ns+' '+cond + ERRORS[ns+' '+cond]=[code,typ,text] + if code: _errorcodes[code]=cond +del ns,errname,errpool,err,cond,code,typ,text + +def isResultNode(node): + """ Returns true if the node is a positive reply. """ + return node and node.getType()=='result' +def isErrorNode(node): + """ Returns true if the node is a negative reply. """ + return node and node.getType()=='error' + +class NodeProcessed(Exception): + """ Exception that should be raised by handler when the handling should be stopped. """ +class StreamError(Exception): + """ Base exception class for stream errors.""" +class BadFormat(StreamError): pass +class BadNamespacePrefix(StreamError): pass +class Conflict(StreamError): pass +class ConnectionTimeout(StreamError): pass +class HostGone(StreamError): pass +class HostUnknown(StreamError): pass +class ImproperAddressing(StreamError): pass +class InternalServerError(StreamError): pass +class InvalidFrom(StreamError): pass +class InvalidID(StreamError): pass +class InvalidNamespace(StreamError): pass +class InvalidXML(StreamError): pass +class NotAuthorized(StreamError): pass +class PolicyViolation(StreamError): pass +class RemoteConnectionFailed(StreamError): pass +class ResourceConstraint(StreamError): pass +class RestrictedXML(StreamError): pass +class SeeOtherHost(StreamError): pass +class SystemShutdown(StreamError): pass +class UndefinedCondition(StreamError): pass +class UnsupportedEncoding(StreamError): pass +class UnsupportedStanzaType(StreamError): pass +class UnsupportedVersion(StreamError): pass +class XMLNotWellFormed(StreamError): pass + +stream_exceptions = {'bad-format': BadFormat, + 'bad-namespace-prefix': BadNamespacePrefix, + 'conflict': Conflict, + 'connection-timeout': ConnectionTimeout, + 'host-gone': HostGone, + 'host-unknown': HostUnknown, + 'improper-addressing': ImproperAddressing, + 'internal-server-error': InternalServerError, + 'invalid-from': InvalidFrom, + 'invalid-id': InvalidID, + 'invalid-namespace': InvalidNamespace, + 'invalid-xml': InvalidXML, + 'not-authorized': NotAuthorized, + 'policy-violation': PolicyViolation, + 'remote-connection-failed': RemoteConnectionFailed, + 'resource-constraint': ResourceConstraint, + 'restricted-xml': RestrictedXML, + 'see-other-host': SeeOtherHost, + 'system-shutdown': SystemShutdown, + 'undefined-condition': UndefinedCondition, + 'unsupported-encoding': UnsupportedEncoding, + 'unsupported-stanza-type': UnsupportedStanzaType, + 'unsupported-version': UnsupportedVersion, + 'xml-not-well-formed': XMLNotWellFormed} + +class JID: + """ JID object. JID can be built from string, modified, compared, serialised into string. """ + def __init__(self, jid=None, node='', domain='', resource=''): + """ Constructor. JID can be specified as string (jid argument) or as separate parts. + Examples: + JID('node@domain/resource') + JID(node='node',domain='domain.org') + """ + if not jid and not domain: raise ValueError('JID must contain at least domain name') + elif type(jid)==type(self): self.node,self.domain,self.resource=jid.node,jid.domain,jid.resource + elif domain: self.node,self.domain,self.resource=node,domain,resource + else: + if jid.find('@')+1: self.node,jid=jid.split('@',1) + else: self.node='' + if jid.find('/')+1: self.domain,self.resource=jid.split('/',1) + else: self.domain,self.resource=jid,'' + def getNode(self): + """ Return the node part of the JID """ + return self.node + def setNode(self,node): + """ Set the node part of the JID to new value. Specify None to remove the node part.""" + self.node=node.lower() + def getDomain(self): + """ Return the domain part of the JID """ + return self.domain + def setDomain(self,domain): + """ Set the domain part of the JID to new value.""" + self.domain=domain.lower() + def getResource(self): + """ Return the resource part of the JID """ + return self.resource + def setResource(self,resource): + """ Set the resource part of the JID to new value. Specify None to remove the resource part.""" + self.resource=resource + def getStripped(self): + """ Return the bare representation of JID. I.e. string value w/o resource. """ + return self.__str__(0) + def __eq__(self, other): + """ Compare the JID to another instance or to string for equality. """ + try: other=JID(other) + except ValueError: return 0 + return self.resource==other.resource and self.__str__(0) == other.__str__(0) + def __ne__(self, other): + """ Compare the JID to another instance or to string for non-equality. """ + return not self.__eq__(other) + def bareMatch(self, other): + """ Compare the node and domain parts of the JID's for equality. """ + return self.__str__(0) == JID(other).__str__(0) + def __str__(self,wresource=1): + """ Serialise JID into string. """ + if self.node: jid=self.node+'@'+self.domain + else: jid=self.domain + if wresource and self.resource: return jid+'/'+self.resource + return jid + def __hash__(self): + """ Produce hash of the JID, Allows to use JID objects as keys of the dictionary. """ + return hash(self.__str__()) + +class Protocol(Node): + """ A "stanza" object class. Contains methods that are common for presences, iqs and messages. """ + def __init__(self, name=None, to=None, typ=None, frm=None, attrs={}, payload=[], timestamp=None, xmlns=None, node=None): + """ Constructor, name is the name of the stanza i.e. 'message' or 'presence' or 'iq'. + to is the value of 'to' attribure, 'typ' - 'type' attribute + frn - from attribure, attrs - other attributes mapping, payload - same meaning as for simplexml payload definition + timestamp - the time value that needs to be stamped over stanza + xmlns - namespace of top stanza node + node - parsed or unparsed stana to be taken as prototype. + """ + if not attrs: attrs={} + if to: attrs['to']=to + if frm: attrs['from']=frm + if typ: attrs['type']=typ + Node.__init__(self, tag=name, attrs=attrs, payload=payload, node=node) + if not node and xmlns: self.setNamespace(xmlns) + if self['to']: self.setTo(self['to']) + if self['from']: self.setFrom(self['from']) + if node and type(self)==type(node) and self.__class__==node.__class__ and self.attrs.has_key('id'): del self.attrs['id'] + self.timestamp=None + for x in self.getTags('x',namespace=NS_DELAY): + try: + if not self.getTimestamp() or x.getAttr('stamp')<self.getTimestamp(): self.setTimestamp(x.getAttr('stamp')) + except: pass + if timestamp is not None: self.setTimestamp(timestamp) # To auto-timestamp stanza just pass timestamp='' + def getTo(self): + """ Return value of the 'to' attribute. """ + try: return self['to'] + except: return None + def getFrom(self): + """ Return value of the 'from' attribute. """ + try: return self['from'] + except: return None + def getTimestamp(self): + """ Return the timestamp in the 'yyyymmddThhmmss' format. """ + return self.timestamp + def getID(self): + """ Return the value of the 'id' attribute. """ + return self.getAttr('id') + def setTo(self,val): + """ Set the value of the 'to' attribute. """ + self.setAttr('to', JID(val)) + def getType(self): + """ Return the value of the 'type' attribute. """ + return self.getAttr('type') + def setFrom(self,val): + """ Set the value of the 'from' attribute. """ + self.setAttr('from', JID(val)) + def setType(self,val): + """ Set the value of the 'type' attribute. """ + self.setAttr('type', val) + def setID(self,val): + """ Set the value of the 'id' attribute. """ + self.setAttr('id', val) + def getError(self): + """ Return the error-condition (if present) or the textual description of the error (otherwise). """ + errtag=self.getTag('error') + if errtag: + for tag in errtag.getChildren(): + if tag.getName()<>'text': return tag.getName() + return errtag.getData() + def getErrorCode(self): + """ Return the error code. Obsolette. """ + return self.getTagAttr('error','code') + def setError(self,error,code=None): + """ Set the error code. Obsolette. Use error-conditions instead. """ + if code: + if str(code) in _errorcodes.keys(): error=ErrorNode(_errorcodes[str(code)],text=error) + else: error=ErrorNode(ERR_UNDEFINED_CONDITION,code=code,typ='cancel',text=error) + elif type(error) in [type(''),type(u'')]: error=ErrorNode(error) + self.setType('error') + self.addChild(node=error) + def setTimestamp(self,val=None): + """Set the timestamp. timestamp should be the yyyymmddThhmmss string.""" + if not val: val=time.strftime('%Y%m%dT%H:%M:%S', time.gmtime()) + self.timestamp=val + self.setTag('x',{'stamp':self.timestamp},namespace=NS_DELAY) + def getProperties(self): + """ Return the list of namespaces to which belongs the direct childs of element""" + props=[] + for child in self.getChildren(): + prop=child.getNamespace() + if prop not in props: props.append(prop) + return props + def __setitem__(self,item,val): + """ Set the item 'item' to the value 'val'.""" + if item in ['to','from']: val=JID(val) + return self.setAttr(item,val) + +class Message(Protocol): + """ XMPP Message stanza - "push" mechanism.""" + def __init__(self, to=None, body=None, typ=None, subject=None, attrs={}, frm=None, payload=[], timestamp=None, xmlns=NS_CLIENT, node=None): + """ Create message object. You can specify recipient, text of message, type of message + any additional attributes, sender of the message, any additional payload (f.e. jabber:x:delay element) and namespace in one go. + Alternatively you can pass in the other XML object as the 'node' parameted to replicate it as message. """ + Protocol.__init__(self, 'message', to=to, typ=typ, attrs=attrs, frm=frm, payload=payload, timestamp=timestamp, xmlns=xmlns, node=node) + if body: self.setBody(body) + if subject: self.setSubject(subject) + def getBody(self): + """ Returns text of the message. """ + return self.getTagData('body') + def getSubject(self): + """ Returns subject of the message. """ + return self.getTagData('subject') + def getThread(self): + """ Returns thread of the message. """ + return self.getTagData('thread') + def setBody(self,val): + """ Sets the text of the message. """ + self.setTagData('body',val) + def setSubject(self,val): + """ Sets the subject of the message. """ + self.setTagData('subject',val) + def setThread(self,val): + """ Sets the thread of the message. """ + self.setTagData('thread',val) + def buildReply(self,text=None): + """ Builds and returns another message object with specified text. + The to, from and thread properties of new message are pre-set as reply to this message. """ + m=Message(to=self.getFrom(),frm=self.getTo(),body=text) + th=self.getThread() + if th: m.setThread(th) + return m + +class Presence(Protocol): + """ XMPP Presence object.""" + def __init__(self, to=None, typ=None, priority=None, show=None, status=None, attrs={}, frm=None, timestamp=None, payload=[], xmlns=NS_CLIENT, node=None): + """ Create presence object. You can specify recipient, type of message, priority, show and status values + any additional attributes, sender of the presence, timestamp, any additional payload (f.e. jabber:x:delay element) and namespace in one go. + Alternatively you can pass in the other XML object as the 'node' parameted to replicate it as presence. """ + Protocol.__init__(self, 'presence', to=to, typ=typ, attrs=attrs, frm=frm, payload=payload, timestamp=timestamp, xmlns=xmlns, node=node) + if priority: self.setPriority(priority) + if show: self.setShow(show) + if status: self.setStatus(status) + def getPriority(self): + """ Returns the priority of the message. """ + return self.getTagData('priority') + def getShow(self): + """ Returns the show value of the message. """ + return self.getTagData('show') + def getStatus(self): + """ Returns the status string of the message. """ + return self.getTagData('status') + def setPriority(self,val): + """ Sets the priority of the message. """ + self.setTagData('priority',val) + def setShow(self,val): + """ Sets the show value of the message. """ + self.setTagData('show',val) + def setStatus(self,val): + """ Sets the status string of the message. """ + self.setTagData('status',val) + + def _muc_getItemAttr(self,tag,attr): + for xtag in self.getTags('x'): + for child in xtag.getTags(tag): + return child.getAttr(attr) + def _muc_getSubTagDataAttr(self,tag,attr): + for xtag in self.getTags('x'): + for child in xtag.getTags('item'): + for cchild in child.getTags(tag): + return cchild.getData(),cchild.getAttr(attr) + return None,None + def getRole(self): + """Returns the presence role (for groupchat)""" + return self._muc_getItemAttr('item','role') + def getAffiliation(self): + """Returns the presence affiliation (for groupchat)""" + return self._muc_getItemAttr('item','affiliation') + def getNick(self): + """Returns the nick value (for nick change in groupchat)""" + return self._muc_getItemAttr('item','nick') + def getJid(self): + """Returns the presence jid (for groupchat)""" + return self._muc_getItemAttr('item','jid') + def getReason(self): + """Returns the reason of the presence (for groupchat)""" + return self._muc_getSubTagDataAttr('reason','')[0] + def getActor(self): + """Returns the reason of the presence (for groupchat)""" + return self._muc_getSubTagDataAttr('actor','jid')[1] + def getStatusCode(self): + """Returns the status code of the presence (for groupchat)""" + return self._muc_getItemAttr('status','code') + +class Iq(Protocol): + """ XMPP Iq object - get/set dialog mechanism. """ + def __init__(self, typ=None, queryNS=None, attrs={}, to=None, frm=None, payload=[], xmlns=NS_CLIENT, node=None): + """ Create Iq object. You can specify type, query namespace + any additional attributes, recipient of the iq, sender of the iq, any additional payload (f.e. jabber:x:data node) and namespace in one go. + Alternatively you can pass in the other XML object as the 'node' parameted to replicate it as an iq. """ + Protocol.__init__(self, 'iq', to=to, typ=typ, attrs=attrs, frm=frm, xmlns=xmlns, node=node) + if payload: self.setQueryPayload(payload) + if queryNS: self.setQueryNS(queryNS) + def getQueryNS(self): + """ Return the namespace of the 'query' child element.""" + tag=self.getTag('query') + if tag: return tag.getNamespace() + def getQuerynode(self): + """ Return the 'node' attribute value of the 'query' child element.""" + return self.getTagAttr('query','node') + def getQueryPayload(self): + """ Return the 'query' child element payload.""" + tag=self.getTag('query') + if tag: return tag.getPayload() + def getQueryChildren(self): + """ Return the 'query' child element child nodes.""" + tag=self.getTag('query') + if tag: return tag.getChildren() + def setQueryNS(self,namespace): + """ Set the namespace of the 'query' child element.""" + self.setTag('query').setNamespace(namespace) + def setQueryPayload(self,payload): + """ Set the 'query' child element payload.""" + self.setTag('query').setPayload(payload) + def setQuerynode(self,node): + """ Set the 'node' attribute value of the 'query' child element.""" + self.setTagAttr('query','node',node) + def buildReply(self,typ): + """ Builds and returns another Iq object of specified type. + The to, from and query child node of new Iq are pre-set as reply to this Iq. """ + iq=Iq(typ,to=self.getFrom(),frm=self.getTo(),attrs={'id':self.getID()}) + if self.getTag('query'): iq.setQueryNS(self.getQueryNS()) + return iq + +class ErrorNode(Node): + """ XMPP-style error element. + In the case of stanza error should be attached to XMPP stanza. + In the case of stream-level errors should be used separately. """ + def __init__(self,name,code=None,typ=None,text=None): + """ Create new error node object. + Mandatory parameter: name - name of error condition. + Optional parameters: code, typ, text. Used for backwards compartibility with older jabber protocol.""" + if ERRORS.has_key(name): + cod,type,txt=ERRORS[name] + ns=name.split()[0] + else: cod,ns,type,txt='500',NS_STANZAS,'cancel','' + if typ: type=typ + if code: cod=code + if text: txt=text + Node.__init__(self,'error',{},[Node(name)]) + if type: self.setAttr('type',type) + if not cod: self.setName('stream:error') + if txt: self.addChild(node=Node(ns+' text',{},[txt])) + if cod: self.setAttr('code',cod) + +class Error(Protocol): + """ Used to quickly transform received stanza into error reply.""" + def __init__(self,node,error,reply=1): + """ Create error reply basing on the received 'node' stanza and the 'error' error condition. + If the 'node' is not the received stanza but locally created ('to' and 'from' fields needs not swapping) + specify the 'reply' argument as false.""" + if reply: Protocol.__init__(self,to=node.getFrom(),frm=node.getTo(),node=node) + else: Protocol.__init__(self,node=node) + self.setError(error) + if node.getType()=='error': self.__str__=self.__dupstr__ + def __dupstr__(self,dup1=None,dup2=None): + """ Dummy function used as preventor of creating error node in reply to error node. + I.e. you will not be able to serialise "double" error into string. + """ + return '' + +class DataField(Node): + """ This class is used in the DataForm class to describe the single data item. + If you are working with jabber:x:data (XEP-0004, XEP-0068, XEP-0122) + then you will need to work with instances of this class. """ + def __init__(self,name=None,value=None,typ=None,required=0,label=None,desc=None,options=[],node=None): + """ Create new data field of specified name,value and type. + Also 'required','desc' and 'options' fields can be set. + Alternatively other XML object can be passed in as the 'node' parameted to replicate it as a new datafiled. + """ + Node.__init__(self,'field',node=node) + if name: self.setVar(name) + if type(value) in [list,tuple]: self.setValues(value) + elif value: self.setValue(value) + if typ: self.setType(typ) + elif not typ and not node: self.setType('text-single') + if required: self.setRequired(required) + if label: self.setLabel(label) + if desc: self.setDesc(desc) + if options: self.setOptions(options) + def setRequired(self,req=1): + """ Change the state of the 'required' flag. """ + if req: self.setTag('required') + else: + try: self.delChild('required') + except ValueError: return + def isRequired(self): + """ Returns in this field a required one. """ + return self.getTag('required') + def setLabel(self,label): + """ Set the label of this field. """ + self.setAttr('label',label) + def getLabel(self): + """ Return the label of this field. """ + return self.getAttr('label') + def setDesc(self,desc): + """ Set the description of this field. """ + self.setTagData('desc',desc) + def getDesc(self): + """ Return the description of this field. """ + return self.getTagData('desc') + def setValue(self,val): + """ Set the value of this field. """ + self.setTagData('value',val) + def getValue(self): + return self.getTagData('value') + def setValues(self,lst): + """ Set the values of this field as values-list. + Replaces all previous filed values! If you need to just add a value - use addValue method.""" + while self.getTag('value'): self.delChild('value') + for val in lst: self.addValue(val) + def addValue(self,val): + """ Add one more value to this field. Used in 'get' iq's or such.""" + self.addChild('value',{},[val]) + def getValues(self): + """ Return the list of values associated with this field.""" + ret=[] + for tag in self.getTags('value'): ret.append(tag.getData()) + return ret + def getOptions(self): + """ Return label-option pairs list associated with this field.""" + ret=[] + for tag in self.getTags('option'): ret.append([tag.getAttr('label'),tag.getTagData('value')]) + return ret + def setOptions(self,lst): + """ Set label-option pairs list associated with this field.""" + while self.getTag('option'): self.delChild('option') + for opt in lst: self.addOption(opt) + def addOption(self,opt): + """ Add one more label-option pair to this field.""" + if type(opt) in [str,unicode]: self.addChild('option').setTagData('value',opt) + else: self.addChild('option',{'label':opt[0]}).setTagData('value',opt[1]) + def getType(self): + """ Get type of this field. """ + return self.getAttr('type') + def setType(self,val): + """ Set type of this field. """ + return self.setAttr('type',val) + def getVar(self): + """ Get 'var' attribute value of this field. """ + return self.getAttr('var') + def setVar(self,val): + """ Set 'var' attribute value of this field. """ + return self.setAttr('var',val) + +class DataReported(Node): + """ This class is used in the DataForm class to describe the 'reported data field' data items which are used in + 'multiple item form results' (as described in XEP-0004). + Represents the fields that will be returned from a search. This information is useful when + you try to use the jabber:iq:search namespace to return dynamic form information. + """ + def __init__(self,node=None): + """ Create new empty 'reported data' field. However, note that, according XEP-0004: + * It MUST contain one or more DataFields. + * Contained DataFields SHOULD possess a 'type' and 'label' attribute in addition to 'var' attribute + * Contained DataFields SHOULD NOT contain a <value/> element. + Alternatively other XML object can be passed in as the 'node' parameted to replicate it as a new + dataitem. + """ + Node.__init__(self,'reported',node=node) + if node: + newkids=[] + for n in self.getChildren(): + if n.getName()=='field': newkids.append(DataField(node=n)) + else: newkids.append(n) + self.kids=newkids + def getField(self,name): + """ Return the datafield object with name 'name' (if exists). """ + return self.getTag('field',attrs={'var':name}) + def setField(self,name,typ=None,label=None): + """ Create if nessessary or get the existing datafield object with name 'name' and return it. + If created, attributes 'type' and 'label' are applied to new datafield.""" + f=self.getField(name) + if f: return f + return self.addChild(node=DataField(name,None,typ,0,label)) + def asDict(self): + """ Represent dataitem as simple dictionary mapping of datafield names to their values.""" + ret={} + for field in self.getTags('field'): + name=field.getAttr('var') + typ=field.getType() + if isinstance(typ,(str,unicode)) and typ[-6:]=='-multi': + val=[] + for i in field.getTags('value'): val.append(i.getData()) + else: val=field.getTagData('value') + ret[name]=val + if self.getTag('instructions'): ret['instructions']=self.getInstructions() + return ret + def __getitem__(self,name): + """ Simple dictionary interface for getting datafields values by their names.""" + item=self.getField(name) + if item: return item.getValue() + raise IndexError('No such field') + def __setitem__(self,name,val): + """ Simple dictionary interface for setting datafields values by their names.""" + return self.setField(name).setValue(val) + +class DataItem(Node): + """ This class is used in the DataForm class to describe data items which are used in 'multiple + item form results' (as described in XEP-0004). + """ + def __init__(self,node=None): + """ Create new empty data item. However, note that, according XEP-0004, DataItem MUST contain ALL + DataFields described in DataReported. + Alternatively other XML object can be passed in as the 'node' parameted to replicate it as a new + dataitem. + """ + Node.__init__(self,'item',node=node) + if node: + newkids=[] + for n in self.getChildren(): + if n.getName()=='field': newkids.append(DataField(node=n)) + else: newkids.append(n) + self.kids=newkids + def getField(self,name): + """ Return the datafield object with name 'name' (if exists). """ + return self.getTag('field',attrs={'var':name}) + def setField(self,name): + """ Create if nessessary or get the existing datafield object with name 'name' and return it. """ + f=self.getField(name) + if f: return f + return self.addChild(node=DataField(name)) + def asDict(self): + """ Represent dataitem as simple dictionary mapping of datafield names to their values.""" + ret={} + for field in self.getTags('field'): + name=field.getAttr('var') + typ=field.getType() + if isinstance(typ,(str,unicode)) and typ[-6:]=='-multi': + val=[] + for i in field.getTags('value'): val.append(i.getData()) + else: val=field.getTagData('value') + ret[name]=val + if self.getTag('instructions'): ret['instructions']=self.getInstructions() + return ret + def __getitem__(self,name): + """ Simple dictionary interface for getting datafields values by their names.""" + item=self.getField(name) + if item: return item.getValue() + raise IndexError('No such field') + def __setitem__(self,name,val): + """ Simple dictionary interface for setting datafields values by their names.""" + return self.setField(name).setValue(val) + +class DataForm(Node): + """ DataForm class. Used for manipulating dataforms in XMPP. + Relevant XEPs: 0004, 0068, 0122. + Can be used in disco, pub-sub and many other applications.""" + def __init__(self, typ=None, data=[], title=None, node=None): + """ + Create new dataform of type 'typ'; 'data' is the list of DataReported, + DataItem and DataField instances that this dataform contains; 'title' + is the title string. + You can specify the 'node' argument as the other node to be used as + base for constructing this dataform. + + title and instructions is optional and SHOULD NOT contain newlines. + Several instructions MAY be present. + 'typ' can be one of ('form' | 'submit' | 'cancel' | 'result' ) + 'typ' of reply iq can be ( 'result' | 'set' | 'set' | 'result' ) respectively. + 'cancel' form can not contain any fields. All other forms contains AT LEAST one field. + 'title' MAY be included in forms of type "form" and "result" + """ + Node.__init__(self,'x',node=node) + if node: + newkids=[] + for n in self.getChildren(): + if n.getName()=='field': newkids.append(DataField(node=n)) + elif n.getName()=='item': newkids.append(DataItem(node=n)) + elif n.getName()=='reported': newkids.append(DataReported(node=n)) + else: newkids.append(n) + self.kids=newkids + if typ: self.setType(typ) + self.setNamespace(NS_DATA) + if title: self.setTitle(title) + if type(data)==type({}): + newdata=[] + for name in data.keys(): newdata.append(DataField(name,data[name])) + data=newdata + for child in data: + if type(child) in [type(''),type(u'')]: self.addInstructions(child) + elif child.__class__.__name__=='DataField': self.kids.append(child) + elif child.__class__.__name__=='DataItem': self.kids.append(child) + elif child.__class__.__name__=='DataReported': self.kids.append(child) + else: self.kids.append(DataField(node=child)) + def getType(self): + """ Return the type of dataform. """ + return self.getAttr('type') + def setType(self,typ): + """ Set the type of dataform. """ + self.setAttr('type',typ) + def getTitle(self): + """ Return the title of dataform. """ + return self.getTagData('title') + def setTitle(self,text): + """ Set the title of dataform. """ + self.setTagData('title',text) + def getInstructions(self): + """ Return the instructions of dataform. """ + return self.getTagData('instructions') + def setInstructions(self,text): + """ Set the instructions of dataform. """ + self.setTagData('instructions',text) + def addInstructions(self,text): + """ Add one more instruction to the dataform. """ + self.addChild('instructions',{},[text]) + def getField(self,name): + """ Return the datafield object with name 'name' (if exists). """ + return self.getTag('field',attrs={'var':name}) + def setField(self,name): + """ Create if nessessary or get the existing datafield object with name 'name' and return it. """ + f=self.getField(name) + if f: return f + return self.addChild(node=DataField(name)) + def asDict(self): + """ Represent dataform as simple dictionary mapping of datafield names to their values.""" + ret={} + for field in self.getTags('field'): + name=field.getAttr('var') + typ=field.getType() + if isinstance(typ,(str,unicode)) and typ[-6:]=='-multi': + val=[] + for i in field.getTags('value'): val.append(i.getData()) + else: val=field.getTagData('value') + ret[name]=val + if self.getTag('instructions'): ret['instructions']=self.getInstructions() + return ret + def __getitem__(self,name): + """ Simple dictionary interface for getting datafields values by their names.""" + item=self.getField(name) + if item: return item.getValue() + raise IndexError('No such field') + def __setitem__(self,name,val): + """ Simple dictionary interface for setting datafields values by their names.""" + return self.setField(name).setValue(val) diff --git a/libs/xmpp/roster.py b/libs/xmpp/roster.py new file mode 100644 index 0000000..676a4c9 --- /dev/null +++ b/libs/xmpp/roster.py @@ -0,0 +1,184 @@ +## roster.py +## +## Copyright (C) 2003-2005 Alexey "Snake" Nezhdanov +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. + +# $Id: roster.py,v 1.20 2005/07/13 13:22:52 snakeru Exp $ + +""" +Simple roster implementation. Can be used though for different tasks like +mass-renaming of contacts. +""" + +from protocol import * +from client import PlugIn + +class Roster(PlugIn): + """ Defines a plenty of methods that will allow you to manage roster. + Also automatically track presences from remote JIDs taking into + account that every JID can have multiple resources connected. Does not + currently support 'error' presences. + You can also use mapping interface for access to the internal representation of + contacts in roster. + """ + def __init__(self): + """ Init internal variables. """ + PlugIn.__init__(self) + self.DBG_LINE='roster' + self._data = {} + self.set=None + self._exported_methods=[self.getRoster] + + def plugin(self,owner,request=1): + """ Register presence and subscription trackers in the owner's dispatcher. + Also request roster from server if the 'request' argument is set. + Used internally.""" + self._owner.RegisterHandler('iq',self.RosterIqHandler,'result',NS_ROSTER) + self._owner.RegisterHandler('iq',self.RosterIqHandler,'set',NS_ROSTER) + self._owner.RegisterHandler('presence',self.PresenceHandler) + if request: self.Request() + + def Request(self,force=0): + """ Request roster from server if it were not yet requested + (or if the 'force' argument is set). """ + if self.set is None: self.set=0 + elif not force: return + self._owner.send(Iq('get',NS_ROSTER)) + self.DEBUG('Roster requested from server','start') + + def getRoster(self): + """ Requests roster from server if neccessary and returns self.""" + if not self.set: self.Request() + while not self.set: self._owner.Process(10) + return self + + def RosterIqHandler(self,dis,stanza): + """ Subscription tracker. Used internally for setting items state in + internal roster representation. """ + for item in stanza.getTag('query').getTags('item'): + jid=item.getAttr('jid') + if item.getAttr('subscription')=='remove': + if self._data.has_key(jid): del self._data[jid] + raise NodeProcessed # a MUST + self.DEBUG('Setting roster item %s...'%jid,'ok') + if not self._data.has_key(jid): self._data[jid]={} + self._data[jid]['name']=item.getAttr('name') + self._data[jid]['ask']=item.getAttr('ask') + self._data[jid]['subscription']=item.getAttr('subscription') + self._data[jid]['groups']=[] + if not self._data[jid].has_key('resources'): self._data[jid]['resources']={} + for group in item.getTags('group'): self._data[jid]['groups'].append(group.getData()) + self._data[self._owner.User+'@'+self._owner.Server]={'resources':{},'name':None,'ask':None,'subscription':None,'groups':None,} + self.set=1 + raise NodeProcessed # a MUST. Otherwise you'll get back an <iq type='error'/> + + def PresenceHandler(self,dis,pres): + """ Presence tracker. Used internally for setting items' resources state in + internal roster representation. """ + jid=JID(pres.getFrom()) + if not self._data.has_key(jid.getStripped()): self._data[jid.getStripped()]={'name':None,'ask':None,'subscription':'none','groups':['Not in roster'],'resources':{}} + + item=self._data[jid.getStripped()] + typ=pres.getType() + + if not typ: + self.DEBUG('Setting roster item %s for resource %s...'%(jid.getStripped(),jid.getResource()),'ok') + item['resources'][jid.getResource()]=res={'show':None,'status':None,'priority':'0','timestamp':None} + if pres.getTag('show'): res['show']=pres.getShow() + if pres.getTag('status'): res['status']=pres.getStatus() + if pres.getTag('priority'): res['priority']=pres.getPriority() + if not pres.getTimestamp(): pres.setTimestamp() + res['timestamp']=pres.getTimestamp() + elif typ=='unavailable' and item['resources'].has_key(jid.getResource()): del item['resources'][jid.getResource()] + # Need to handle type='error' also + + def _getItemData(self,jid,dataname): + """ Return specific jid's representation in internal format. Used internally. """ + jid=jid[:(jid+'/').find('/')] + return self._data[jid][dataname] + def _getResourceData(self,jid,dataname): + """ Return specific jid's resource representation in internal format. Used internally. """ + if jid.find('/')+1: + jid,resource=jid.split('/',1) + if self._data[jid]['resources'].has_key(resource): return self._data[jid]['resources'][resource][dataname] + elif self._data[jid]['resources'].keys(): + lastpri=-129 + for r in self._data[jid]['resources'].keys(): + if int(self._data[jid]['resources'][r]['priority'])>lastpri: resource,lastpri=r,int(self._data[jid]['resources'][r]['priority']) + return self._data[jid]['resources'][resource][dataname] + def delItem(self,jid): + """ Delete contact 'jid' from roster.""" + self._owner.send(Iq('set',NS_ROSTER,payload=[Node('item',{'jid':jid,'subscription':'remove'})])) + def getAsk(self,jid): + """ Returns 'ask' value of contact 'jid'.""" + return self._getItemData(jid,'ask') + def getGroups(self,jid): + """ Returns groups list that contact 'jid' belongs to.""" + return self._getItemData(jid,'groups') + def getName(self,jid): + """ Returns name of contact 'jid'.""" + return self._getItemData(jid,'name') + def getPriority(self,jid): + """ Returns priority of contact 'jid'. 'jid' should be a full (not bare) JID.""" + return self._getResourceData(jid,'priority') + def getRawRoster(self): + """ Returns roster representation in internal format. """ + return self._data + def getRawItem(self,jid): + """ Returns roster item 'jid' representation in internal format. """ + return self._data[jid[:(jid+'/').find('/')]] + def getShow(self, jid): + """ Returns 'show' value of contact 'jid'. 'jid' should be a full (not bare) JID.""" + return self._getResourceData(jid,'show') + def getStatus(self, jid): + """ Returns 'status' value of contact 'jid'. 'jid' should be a full (not bare) JID.""" + return self._getResourceData(jid,'status') + def getSubscription(self,jid): + """ Returns 'subscription' value of contact 'jid'.""" + return self._getItemData(jid,'subscription') + def getResources(self,jid): + """ Returns list of connected resources of contact 'jid'.""" + return self._data[jid[:(jid+'/').find('/')]]['resources'].keys() + def setItem(self,jid,name=None,groups=[]): + """ Creates/renames contact 'jid' and sets the groups list that it now belongs to.""" + iq=Iq('set',NS_ROSTER) + query=iq.getTag('query') + attrs={'jid':jid} + if name: attrs['name']=name + item=query.setTag('item',attrs) + for group in groups: item.addChild(node=Node('group',payload=[group])) + self._owner.send(iq) + def getItems(self): + """ Return list of all [bare] JIDs that the roster is currently tracks.""" + return self._data.keys() + def keys(self): + """ Same as getItems. Provided for the sake of dictionary interface.""" + return self._data.keys() + def __getitem__(self,item): + """ Get the contact in the internal format. Raises KeyError if JID 'item' is not in roster.""" + return self._data[item] + def getItem(self,item): + """ Get the contact in the internal format (or None if JID 'item' is not in roster).""" + if self._data.has_key(item): return self._data[item] + def Subscribe(self,jid): + """ Send subscription request to JID 'jid'.""" + self._owner.send(Presence(jid,'subscribe')) + def Unsubscribe(self,jid): + """ Ask for removing our subscription for JID 'jid'.""" + self._owner.send(Presence(jid,'unsubscribe')) + def Authorize(self,jid): + """ Authorise JID 'jid'. Works only if these JID requested auth previously. """ + self._owner.send(Presence(jid,'subscribed')) + def Unauthorize(self,jid): + """ Unauthorise JID 'jid'. Use for declining authorisation request + or for removing existing authorization. """ + self._owner.send(Presence(jid,'unsubscribed')) diff --git a/libs/xmpp/session.py b/libs/xmpp/session.py new file mode 100644 index 0000000..24066b3 --- /dev/null +++ b/libs/xmpp/session.py @@ -0,0 +1,349 @@ +## +## XMPP server +## +## Copyright (C) 2004 Alexey "Snake" Nezhdanov +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. + +__version__="$Id" + +""" +When your handler is called it is getting the session instance as the first argument. +This is the difference from xmpppy 0.1 where you got the "Client" instance. +With Session class you can have "multi-session" client instead of having +one client for each connection. Is is specifically important when you are +writing the server. +""" + +from protocol import * + +# Transport-level flags +SOCKET_UNCONNECTED =0 +SOCKET_ALIVE =1 +SOCKET_DEAD =2 +# XML-level flags +STREAM__NOT_OPENED =1 +STREAM__OPENED =2 +STREAM__CLOSING =3 +STREAM__CLOSED =4 +# XMPP-session flags +SESSION_NOT_AUTHED =1 +SESSION_AUTHED =2 +SESSION_BOUND =3 +SESSION_OPENED =4 +SESSION_CLOSED =5 + +class Session: + """ + The Session class instance is used for storing all session-related info like + credentials, socket/xml stream/session state flags, roster items (in case of + client type connection) etc. + Session object have no means of discovering is any info is ready to be read. + Instead you should use poll() (recomended) or select() methods for this purpose. + Session can be one of two types: 'server' and 'client'. 'server' session handles + inbound connection and 'client' one used to create an outbound one. + Session instance have multitude of internal attributes. The most imporant is the 'peer' one. + It is set once the peer is authenticated (client). + """ + def __init__(self,socket,owner,xmlns=None,peer=None): + """ When the session is created it's type (client/server) is determined from the beginning. + socket argument is the pre-created socket-like object. + It must have the following methods: send, recv, fileno, close. + owner is the 'master' instance that have Dispatcher plugged into it and generally + will take care about all session events. + xmlns is the stream namespace that will be used. Client must set this argument + If server sets this argument than stream will be dropped if opened with some another namespace. + peer is the name of peer instance. This is the flag that differentiates client session from + server session. Client must set it to the name of the server that will be connected, server must + leave this argument alone. + """ + self.xmlns=xmlns + if peer: + self.TYP='client' + self.peer=peer + self._socket_state=SOCKET_UNCONNECTED + else: + self.TYP='server' + self.peer=None + self._socket_state=SOCKET_ALIVE + self._sock=socket + self._send=socket.send + self._recv=socket.recv + self.fileno=socket.fileno + self._registered=0 + + self.Dispatcher=owner.Dispatcher + self.DBG_LINE='session' + self.DEBUG=owner.Dispatcher.DEBUG + self._expected={} + self._owner=owner + if self.TYP=='server': self.ID=`random.random()`[2:] + else: self.ID=None + + self.sendbuffer='' + self._stream_pos_queued=None + self._stream_pos_sent=0 + self.deliver_key_queue=[] + self.deliver_queue_map={} + self.stanza_queue=[] + + self._session_state=SESSION_NOT_AUTHED + self.waiting_features=[] + for feature in [NS_TLS,NS_SASL,NS_BIND,NS_SESSION]: + if feature in owner.features: self.waiting_features.append(feature) + self.features=[] + self.feature_in_process=None + self.slave_session=None + self.StartStream() + + def StartStream(self): + """ This method is used to initialise the internal xml expat parser + and to send initial stream header (in case of client connection). + Should be used after initial connection and after every stream restart.""" + self._stream_state=STREAM__NOT_OPENED + self.Stream=simplexml.NodeBuilder() + self.Stream._dispatch_depth=2 + self.Stream.dispatch=self._dispatch + self.Parse=self.Stream.Parse + self.Stream.stream_footer_received=self._stream_close + if self.TYP=='client': + self.Stream.stream_header_received=self._catch_stream_id + self._stream_open() + else: + self.Stream.stream_header_received=self._stream_open + + def receive(self): + """ Reads all pending incoming data. + Raises IOError on disconnection. + Blocks until at least one byte is read.""" + try: received = self._recv(10240) + except: received = '' + + if len(received): # length of 0 means disconnect + self.DEBUG(`self.fileno()`+' '+received,'got') + else: + self.DEBUG('Socket error while receiving data','error') + self.set_socket_state(SOCKET_DEAD) + raise IOError("Peer disconnected") + return received + + def sendnow(self,chunk): + """ Put chunk into "immidiatedly send" queue. + Should only be used for auth/TLS stuff and like. + If you just want to shedule regular stanza for delivery use enqueue method. + """ + if isinstance(chunk,Node): chunk = chunk.__str__().encode('utf-8') + elif type(chunk)==type(u''): chunk = chunk.encode('utf-8') + self.enqueue(chunk) + + def enqueue(self,stanza): + """ Takes Protocol instance as argument. + Puts stanza into "send" fifo queue. Items into the send queue are hold until + stream authenticated. After that this method is effectively the same as "sendnow" method.""" + if isinstance(stanza,Protocol): + self.stanza_queue.append(stanza) + else: self.sendbuffer+=stanza + if self._socket_state>=SOCKET_ALIVE: self.push_queue() + + def push_queue(self,failreason=ERR_RECIPIENT_UNAVAILABLE): + """ If stream is authenticated than move items from "send" queue to "immidiatedly send" queue. + Else if the stream is failed then return all queued stanzas with error passed as argument. + Otherwise do nothing.""" + # If the stream authed - convert stanza_queue into sendbuffer and set the checkpoints + + if self._stream_state>=STREAM__CLOSED or self._socket_state>=SOCKET_DEAD: # the stream failed. Return all stanzas that are still waiting for delivery. + self._owner.deactivatesession(self) + for key in self.deliver_key_queue: # Not sure. May be I + self._dispatch(Error(self.deliver_queue_map[key],failreason),trusted=1) # should simply re-dispatch it? + for stanza in self.stanza_queue: # But such action can invoke + self._dispatch(Error(stanza,failreason),trusted=1) # Infinite loops in case of S2S connection... + self.deliver_queue_map,self.deliver_key_queue,self.stanza_queue={},[],[] + return + elif self._session_state>=SESSION_AUTHED: # FIXME! Должен быть какой-то другой флаг. + #### LOCK_QUEUE + for stanza in self.stanza_queue: + txt=stanza.__str__().encode('utf-8') + self.sendbuffer+=txt + self._stream_pos_queued+=len(txt) # should be re-evaluated for SSL connection. + self.deliver_queue_map[self._stream_pos_queued]=stanza # position of the stream when stanza will be successfully and fully sent + self.deliver_key_queue.append(self._stream_pos_queued) + self.stanza_queue=[] + #### UNLOCK_QUEUE + + def flush_queue(self): + """ Put the "immidiatedly send" queue content on the wire. Blocks until at least one byte sent.""" + if self.sendbuffer: + try: + # LOCK_QUEUE + sent=self._send(self.sendbuffer) # Блокирующая штучка! + except: + # UNLOCK_QUEUE + self.set_socket_state(SOCKET_DEAD) + self.DEBUG("Socket error while sending data",'error') + return self.terminate_stream() + self.DEBUG(`self.fileno()`+' '+self.sendbuffer[:sent],'sent') + self._stream_pos_sent+=sent + self.sendbuffer=self.sendbuffer[sent:] + self._stream_pos_delivered=self._stream_pos_sent # Should be acquired from socket somehow. Take SSL into account. + while self.deliver_key_queue and self._stream_pos_delivered>self.deliver_key_queue[0]: + del self.deliver_queue_map[self.deliver_key_queue[0]] + self.deliver_key_queue.remove(self.deliver_key_queue[0]) + # UNLOCK_QUEUE + + def _dispatch(self,stanza,trusted=0): + """ This is callback that is used to pass the received stanza forth to owner's dispatcher + _if_ the stream is authorised. Otherwise the stanza is just dropped. + The 'trusted' argument is used to emulate stanza receive. + This method is used internally. + """ + self._owner.packets+=1 + if self._stream_state==STREAM__OPENED or trusted: # if the server really should reject all stanzas after he is closed stream (himeself)? + self.DEBUG(stanza.__str__(),'dispatch') + stanza.trusted=trusted + return self.Dispatcher.dispatch(stanza,self) + + def _catch_stream_id(self,ns=None,tag='stream',attrs={}): + """ This callback is used to detect the stream namespace of incoming stream. Used internally. """ + if not attrs.has_key('id') or not attrs['id']: + return self.terminate_stream(STREAM_INVALID_XML) + self.ID=attrs['id'] + if not attrs.has_key('version'): self._owner.Dialback(self) + + def _stream_open(self,ns=None,tag='stream',attrs={}): + """ This callback is used to handle opening stream tag of the incoming stream. + In the case of client session it just make some validation. + Server session also sends server headers and if the stream valid the features node. + Used internally. """ + text='<?xml version="1.0" encoding="utf-8"?>\n<stream:stream' + if self.TYP=='client': + text+=' to="%s"'%self.peer + else: + text+=' id="%s"'%self.ID + if not attrs.has_key('to'): text+=' from="%s"'%self._owner.servernames[0] + else: text+=' from="%s"'%attrs['to'] + if attrs.has_key('xml:lang'): text+=' xml:lang="%s"'%attrs['xml:lang'] + if self.xmlns: xmlns=self.xmlns + else: xmlns=NS_SERVER + text+=' xmlns:db="%s" xmlns:stream="%s" xmlns="%s"'%(NS_DIALBACK,NS_STREAMS,xmlns) + if attrs.has_key('version') or self.TYP=='client': text+=' version="1.0"' + self.sendnow(text+'>') + self.set_stream_state(STREAM__OPENED) + if self.TYP=='client': return + if tag<>'stream': return self.terminate_stream(STREAM_INVALID_XML) + if ns<>NS_STREAMS: return self.terminate_stream(STREAM_INVALID_NAMESPACE) + if self.Stream.xmlns<>self.xmlns: return self.terminate_stream(STREAM_BAD_NAMESPACE_PREFIX) + if not attrs.has_key('to'): return self.terminate_stream(STREAM_IMPROPER_ADDRESSING) + if attrs['to'] not in self._owner.servernames: return self.terminate_stream(STREAM_HOST_UNKNOWN) + self.ourname=attrs['to'].lower() + if self.TYP=='server' and attrs.has_key('version'): + # send features + features=Node('stream:features') + if NS_TLS in self.waiting_features: + features.NT.starttls.setNamespace(NS_TLS) + features.T.starttls.NT.required + if NS_SASL in self.waiting_features: + features.NT.mechanisms.setNamespace(NS_SASL) + for mec in self._owner.SASL.mechanisms: + features.T.mechanisms.NT.mechanism=mec + else: + if NS_BIND in self.waiting_features: features.NT.bind.setNamespace(NS_BIND) + if NS_SESSION in self.waiting_features: features.NT.session.setNamespace(NS_SESSION) + self.sendnow(features) + + def feature(self,feature): + """ Declare some stream feature as activated one. """ + if feature not in self.features: self.features.append(feature) + self.unfeature(feature) + + def unfeature(self,feature): + """ Declare some feature as illegal. Illegal features can not be used. + Example: BIND feature becomes illegal after Non-SASL auth. """ + if feature in self.waiting_features: self.waiting_features.remove(feature) + + def _stream_close(self,unregister=1): + """ Write the closing stream tag and destroy the underlaying socket. Used internally. """ + if self._stream_state>=STREAM__CLOSED: return + self.set_stream_state(STREAM__CLOSING) + self.sendnow('</stream:stream>') + self.set_stream_state(STREAM__CLOSED) + self.push_queue() # decompose queue really since STREAM__CLOSED + self._owner.flush_queues() + if unregister: self._owner.unregistersession(self) + self._destroy_socket() + + def terminate_stream(self,error=None,unregister=1): + """ Notify the peer about stream closure. + Ensure that xmlstream is not brokes - i.e. if the stream isn't opened yet - + open it before closure. + If the error condition is specified than create a stream error and send it along with + closing stream tag. + Emulate receiving 'unavailable' type presence just before stream closure. + """ + if self._stream_state>=STREAM__CLOSING: return + if self._stream_state<STREAM__OPENED: + self.set_stream_state(STREAM__CLOSING) + self._stream_open() + else: + self.set_stream_state(STREAM__CLOSING) + p=Presence(typ='unavailable') + p.setNamespace(NS_CLIENT) + self._dispatch(p,trusted=1) + if error: + if isinstance(error,Node): self.sendnow(error) + else: self.sendnow(ErrorNode(error)) + self._stream_close(unregister=unregister) + if self.slave_session: + self.slave_session.terminate_stream(STREAM_REMOTE_CONNECTION_FAILED) + + def _destroy_socket(self): + """ Break cyclic dependancies to let python's GC free memory right now.""" + self.Stream.dispatch=None + self.Stream.stream_footer_received=None + self.Stream.stream_header_received=None + self.Stream.destroy() + self._sock.close() + self.set_socket_state(SOCKET_DEAD) + + def start_feature(self,f): + """ Declare some feature as "negotiating now" to prevent other features from start negotiating. """ + if self.feature_in_process: raise "Starting feature %s over %s !"%(f,self.feature_in_process) + self.feature_in_process=f + + def stop_feature(self,f): + """ Declare some feature as "negotiated" to allow other features start negotiating. """ + if self.feature_in_process<>f: raise "Stopping feature %s instead of %s !"%(f,self.feature_in_process) + self.feature_in_process=None + + def set_socket_state(self,newstate): + """ Change the underlaying socket state. + Socket starts with SOCKET_UNCONNECTED state + and then proceeds (possibly) to SOCKET_ALIVE + and then to SOCKET_DEAD """ + if self._socket_state<newstate: self._socket_state=newstate + + def set_session_state(self,newstate): + """ Change the session state. + Session starts with SESSION_NOT_AUTHED state + and then comes through + SESSION_AUTHED, SESSION_BOUND, SESSION_OPENED and SESSION_CLOSED states. + """ + if self._session_state<newstate: + if self._session_state<SESSION_AUTHED and \ + newstate>=SESSION_AUTHED: self._stream_pos_queued=self._stream_pos_sent + self._session_state=newstate + + def set_stream_state(self,newstate): + """ Change the underlaying XML stream state + Stream starts with STREAM__NOT_OPENED and then proceeds with + STREAM__OPENED, STREAM__CLOSING and STREAM__CLOSED states. + Note that some features (like TLS and SASL) + requires stream re-start so this state can have non-linear changes. """ + if self._stream_state<newstate: self._stream_state=newstate diff --git a/libs/xmpp/simplexml.py b/libs/xmpp/simplexml.py new file mode 100644 index 0000000..99383de --- /dev/null +++ b/libs/xmpp/simplexml.py @@ -0,0 +1,485 @@ +## simplexml.py based on Mattew Allum's xmlstream.py +## +## Copyright (C) 2003-2005 Alexey "Snake" Nezhdanov +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. + +# $Id: simplexml.py,v 1.34 2009/03/03 10:24:02 normanr Exp $ + +"""Simplexml module provides xmpppy library with all needed tools to handle XML nodes and XML streams. +I'm personally using it in many other separate projects. It is designed to be as standalone as possible.""" + +import xml.parsers.expat + +def XMLescape(txt): + """Returns provided string with symbols & < > " replaced by their respective XML entities.""" + # replace also FORM FEED and ESC, because they are not valid XML chars + return txt.replace("&", "&").replace("<", "<").replace(">", ">").replace('"', """).replace(u'\x0C', "").replace(u'\x1B', "") + +ENCODING='utf-8' +def ustr(what): + """Converts object "what" to unicode string using it's own __str__ method if accessible or unicode method otherwise.""" + if isinstance(what, unicode): return what + try: r=what.__str__() + except AttributeError: r=str(what) + if not isinstance(r, unicode): return unicode(r,ENCODING) + return r + +class Node(object): + """ Node class describes syntax of separate XML Node. It have a constructor that permits node creation + from set of "namespace name", attributes and payload of text strings and other nodes. + It does not natively support building node from text string and uses NodeBuilder class for that purpose. + After creation node can be mangled in many ways so it can be completely changed. + Also node can be serialised into string in one of two modes: default (where the textual representation + of node describes it exactly) and "fancy" - with whitespace added to make indentation and thus make + result more readable by human. + + Node class have attribute FORCE_NODE_RECREATION that is defaults to False thus enabling fast node + replication from the some other node. The drawback of the fast way is that new node shares some + info with the "original" node that is changing the one node may influence the other. Though it is + rarely needed (in xmpppy it is never needed at all since I'm usually never using original node after + replication (and using replication only to move upwards on the classes tree). + """ + FORCE_NODE_RECREATION=0 + def __init__(self, tag=None, attrs={}, payload=[], parent=None, nsp=None, node_built=False, node=None): + """ Takes "tag" argument as the name of node (prepended by namespace, if needed and separated from it + by a space), attrs dictionary as the set of arguments, payload list as the set of textual strings + and child nodes that this node carries within itself and "parent" argument that is another node + that this one will be the child of. Also the __init__ can be provided with "node" argument that is + either a text string containing exactly one node or another Node instance to begin with. If both + "node" and other arguments is provided then the node initially created as replica of "node" + provided and then modified to be compliant with other arguments.""" + if node: + if self.FORCE_NODE_RECREATION and isinstance(node, Node): + node=str(node) + if not isinstance(node, Node): + node=NodeBuilder(node,self) + node_built = True + else: + self.name,self.namespace,self.attrs,self.data,self.kids,self.parent,self.nsd = node.name,node.namespace,{},[],[],node.parent,{} + for key in node.attrs.keys(): self.attrs[key]=node.attrs[key] + for data in node.data: self.data.append(data) + for kid in node.kids: self.kids.append(kid) + for k,v in node.nsd.items(): self.nsd[k] = v + else: self.name,self.namespace,self.attrs,self.data,self.kids,self.parent,self.nsd = 'tag','',{},[],[],None,{} + if parent: + self.parent = parent + self.nsp_cache = {} + if nsp: + for k,v in nsp.items(): self.nsp_cache[k] = v + for attr,val in attrs.items(): + if attr == 'xmlns': + self.nsd[u''] = val + elif attr.startswith('xmlns:'): + self.nsd[attr[6:]] = val + self.attrs[attr]=attrs[attr] + if tag: + if node_built: + pfx,self.name = (['']+tag.split(':'))[-2:] + self.namespace = self.lookup_nsp(pfx) + else: + if ' ' in tag: + self.namespace,self.name = tag.split() + else: + self.name = tag + if isinstance(payload, basestring): payload=[payload] + for i in payload: + if isinstance(i, Node): self.addChild(node=i) + else: self.data.append(ustr(i)) + + def lookup_nsp(self,pfx=''): + ns = self.nsd.get(pfx,None) + if ns is None: + ns = self.nsp_cache.get(pfx,None) + if ns is None: + if self.parent: + ns = self.parent.lookup_nsp(pfx) + self.nsp_cache[pfx] = ns + else: + return 'http://www.gajim.org/xmlns/undeclared' + return ns + + def __str__(self,fancy=0): + """ Method used to dump node into textual representation. + if "fancy" argument is set to True produces indented output for readability.""" + s = (fancy-1) * 2 * ' ' + "<" + self.name + if self.namespace: + if not self.parent or self.parent.namespace!=self.namespace: + if 'xmlns' not in self.attrs: + s = s + ' xmlns="%s"'%self.namespace + for key in self.attrs.keys(): + val = ustr(self.attrs[key]) + s = s + ' %s="%s"' % ( key, XMLescape(val) ) + s = s + ">" + cnt = 0 + if self.kids: + if fancy: s = s + "\n" + for a in self.kids: + if not fancy and (len(self.data)-1)>=cnt: s=s+XMLescape(self.data[cnt]) + elif (len(self.data)-1)>=cnt: s=s+XMLescape(self.data[cnt].strip()) + if isinstance(a, Node): + s = s + a.__str__(fancy and fancy+1) + elif a: + s = s + a.__str__() + cnt=cnt+1 + if not fancy and (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt]) + elif (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt].strip()) + if not self.kids and s.endswith('>'): + s=s[:-1]+' />' + if fancy: s = s + "\n" + else: + if fancy and not self.data: s = s + (fancy-1) * 2 * ' ' + s = s + "</" + self.name + ">" + if fancy: s = s + "\n" + return s + def getCDATA(self): + """ Serialise node, dropping all tags and leaving CDATA intact. + That is effectively kills all formatiing, leaving only text were contained in XML. + """ + s = "" + cnt = 0 + if self.kids: + for a in self.kids: + s=s+self.data[cnt] + if a: s = s + a.getCDATA() + cnt=cnt+1 + if (len(self.data)-1) >= cnt: s = s + self.data[cnt] + return s + def addChild(self, name=None, attrs={}, payload=[], namespace=None, node=None): + """ If "node" argument is provided, adds it as child node. Else creates new node from + the other arguments' values and adds it as well.""" + if 'xmlns' in attrs: + raise AttributeError("Use namespace=x instead of attrs={'xmlns':x}") + if node: + newnode=node + node.parent = self + else: newnode=Node(tag=name, parent=self, attrs=attrs, payload=payload) + if namespace: + newnode.setNamespace(namespace) + self.kids.append(newnode) + self.data.append(u'') + return newnode + def addData(self, data): + """ Adds some CDATA to node. """ + self.data.append(ustr(data)) + self.kids.append(None) + def clearData(self): + """ Removes all CDATA from the node. """ + self.data=[] + def delAttr(self, key): + """ Deletes an attribute "key" """ + del self.attrs[key] + def delChild(self, node, attrs={}): + """ Deletes the "node" from the node's childs list, if "node" is an instance. + Else deletes the first node that have specified name and (optionally) attributes. """ + if not isinstance(node, Node): node=self.getTag(node,attrs) + self.kids[self.kids.index(node)]=None + return node + def getAttrs(self): + """ Returns all node's attributes as dictionary. """ + return self.attrs + def getAttr(self, key): + """ Returns value of specified attribute. """ + try: return self.attrs[key] + except: return None + def getChildren(self): + """ Returns all node's child nodes as list. """ + return self.kids + def getData(self): + """ Returns all node CDATA as string (concatenated). """ + return ''.join(self.data) + def getName(self): + """ Returns the name of node """ + return self.name + def getNamespace(self): + """ Returns the namespace of node """ + return self.namespace + def getParent(self): + """ Returns the parent of node (if present). """ + return self.parent + def getPayload(self): + """ Return the payload of node i.e. list of child nodes and CDATA entries. + F.e. for "<node>text1<nodea/><nodeb/> text2</node>" will be returned list: + ['text1', <nodea instance>, <nodeb instance>, ' text2']. """ + ret=[] + for i in range(max(len(self.data),len(self.kids))): + if i < len(self.data) and self.data[i]: ret.append(self.data[i]) + if i < len(self.kids) and self.kids[i]: ret.append(self.kids[i]) + return ret + def getTag(self, name, attrs={}, namespace=None): + """ Filters all child nodes using specified arguments as filter. + Returns the first found or None if not found. """ + return self.getTags(name, attrs, namespace, one=1) + def getTagAttr(self,tag,attr): + """ Returns attribute value of the child with specified name (or None if no such attribute).""" + try: return self.getTag(tag).attrs[attr] + except: return None + def getTagData(self,tag): + """ Returns cocatenated CDATA of the child with specified name.""" + try: return self.getTag(tag).getData() + except: return None + def getTags(self, name, attrs={}, namespace=None, one=0): + """ Filters all child nodes using specified arguments as filter. + Returns the list of nodes found. """ + nodes=[] + for node in self.kids: + if not node: continue + if namespace and namespace!=node.getNamespace(): continue + if node.getName() == name: + for key in attrs.keys(): + if key not in node.attrs or node.attrs[key]!=attrs[key]: break + else: nodes.append(node) + if one and nodes: return nodes[0] + if not one: return nodes + + def iterTags(self, name, attrs={}, namespace=None): + """ Iterate over all children using specified arguments as filter. """ + for node in self.kids: + if not node: continue + if namespace is not None and namespace!=node.getNamespace(): continue + if node.getName() == name: + for key in attrs.keys(): + if key not in node.attrs or \ + node.attrs[key]!=attrs[key]: break + else: + yield node + + def setAttr(self, key, val): + """ Sets attribute "key" with the value "val". """ + self.attrs[key]=val + def setData(self, data): + """ Sets node's CDATA to provided string. Resets all previous CDATA!""" + self.data=[ustr(data)] + def setName(self,val): + """ Changes the node name. """ + self.name = val + def setNamespace(self, namespace): + """ Changes the node namespace. """ + self.namespace=namespace + def setParent(self, node): + """ Sets node's parent to "node". WARNING: do not checks if the parent already present + and not removes the node from the list of childs of previous parent. """ + self.parent = node + def setPayload(self,payload,add=0): + """ Sets node payload according to the list specified. WARNING: completely replaces all node's + previous content. If you wish just to add child or CDATA - use addData or addChild methods. """ + if isinstance(payload, basestring): payload=[payload] + if add: self.kids+=payload + else: self.kids=payload + def setTag(self, name, attrs={}, namespace=None): + """ Same as getTag but if the node with specified namespace/attributes not found, creates such + node and returns it. """ + node=self.getTags(name, attrs, namespace=namespace, one=1) + if node: return node + else: return self.addChild(name, attrs, namespace=namespace) + def setTagAttr(self,tag,attr,val): + """ Creates new node (if not already present) with name "tag" + and sets it's attribute "attr" to value "val". """ + try: self.getTag(tag).attrs[attr]=val + except: self.addChild(tag,attrs={attr:val}) + def setTagData(self,tag,val,attrs={}): + """ Creates new node (if not already present) with name "tag" and (optionally) attributes "attrs" + and sets it's CDATA to string "val". """ + try: self.getTag(tag,attrs).setData(ustr(val)) + except: self.addChild(tag,attrs,payload=[ustr(val)]) + def has_attr(self,key): + """ Checks if node have attribute "key".""" + return key in self.attrs + def __getitem__(self,item): + """ Returns node's attribute "item" value. """ + return self.getAttr(item) + def __setitem__(self,item,val): + """ Sets node's attribute "item" value. """ + return self.setAttr(item,val) + def __delitem__(self,item): + """ Deletes node's attribute "item". """ + return self.delAttr(item) + def __getattr__(self,attr): + """ Reduce memory usage caused by T/NT classes - use memory only when needed. """ + if attr=='T': + self.T=T(self) + return self.T + if attr=='NT': + self.NT=NT(self) + return self.NT + raise AttributeError + +class T: + """ Auxiliary class used to quick access to node's child nodes. """ + def __init__(self,node): self.__dict__['node']=node + def __getattr__(self,attr): return self.node.getTag(attr) + def __setattr__(self,attr,val): + if isinstance(val,Node): Node.__init__(self.node.setTag(attr),node=val) + else: return self.node.setTagData(attr,val) + def __delattr__(self,attr): return self.node.delChild(attr) + +class NT(T): + """ Auxiliary class used to quick create node's child nodes. """ + def __getattr__(self,attr): return self.node.addChild(attr) + def __setattr__(self,attr,val): + if isinstance(val,Node): self.node.addChild(attr,node=val) + else: return self.node.addChild(attr,payload=[val]) + +DBG_NODEBUILDER = 'nodebuilder' +class NodeBuilder: + """ Builds a Node class minidom from data parsed to it. This class used for two purposes: + 1. Creation an XML Node from a textual representation. F.e. reading a config file. See an XML2Node method. + 2. Handling an incoming XML stream. This is done by mangling + the __dispatch_depth parameter and redefining the dispatch method. + You do not need to use this class directly if you do not designing your own XML handler.""" + def __init__(self,data=None,initial_node=None): + """ Takes two optional parameters: "data" and "initial_node". + By default class initialised with empty Node class instance. + Though, if "initial_node" is provided it used as "starting point". + You can think about it as of "node upgrade". + "data" (if provided) feeded to parser immidiatedly after instance init. + """ + self.DEBUG(DBG_NODEBUILDER, "Preparing to handle incoming XML stream.", 'start') + self._parser = xml.parsers.expat.ParserCreate() + self._parser.StartElementHandler = self.starttag + self._parser.EndElementHandler = self.endtag + self._parser.CharacterDataHandler = self.handle_cdata + self._parser.StartNamespaceDeclHandler = self.handle_namespace_start + self._parser.buffer_text = True + self.Parse = self._parser.Parse + + self.__depth = 0 + self.__last_depth = 0 + self.__max_depth = 0 + self._dispatch_depth = 1 + self._document_attrs = None + self._document_nsp = None + self._mini_dom=initial_node + self.last_is_data = 1 + self._ptr=None + self.data_buffer = None + self.streamError = '' + if data: + self._parser.Parse(data,1) + + def check_data_buffer(self): + if self.data_buffer: + self._ptr.data.append(''.join(self.data_buffer)) + del self.data_buffer[:] + self.data_buffer = None + + def destroy(self): + """ Method used to allow class instance to be garbage-collected. """ + self.check_data_buffer() + self._parser.StartElementHandler = None + self._parser.EndElementHandler = None + self._parser.CharacterDataHandler = None + self._parser.StartNamespaceDeclHandler = None + + def starttag(self, tag, attrs): + """XML Parser callback. Used internally""" + self.check_data_buffer() + self._inc_depth() + self.DEBUG(DBG_NODEBUILDER, "DEPTH -> %i , tag -> %s, attrs -> %s" % (self.__depth, tag, `attrs`), 'down') + if self.__depth == self._dispatch_depth: + if not self._mini_dom : + self._mini_dom = Node(tag=tag, attrs=attrs, nsp = self._document_nsp, node_built=True) + else: + Node.__init__(self._mini_dom,tag=tag, attrs=attrs, nsp = self._document_nsp, node_built=True) + self._ptr = self._mini_dom + elif self.__depth > self._dispatch_depth: + self._ptr.kids.append(Node(tag=tag,parent=self._ptr,attrs=attrs, node_built=True)) + self._ptr = self._ptr.kids[-1] + if self.__depth == 1: + self._document_attrs = {} + self._document_nsp = {} + nsp, name = (['']+tag.split(':'))[-2:] + for attr,val in attrs.items(): + if attr == 'xmlns': + self._document_nsp[u''] = val + elif attr.startswith('xmlns:'): + self._document_nsp[attr[6:]] = val + else: + self._document_attrs[attr] = val + ns = self._document_nsp.get(nsp, 'http://www.gajim.org/xmlns/undeclared-root') + try: + self.stream_header_received(ns, name, attrs) + except ValueError, e: + self._document_attrs = None + raise ValueError(str(e)) + if not self.last_is_data and self._ptr.parent: + self._ptr.parent.data.append('') + self.last_is_data = 0 + + def endtag(self, tag ): + """XML Parser callback. Used internally""" + self.DEBUG(DBG_NODEBUILDER, "DEPTH -> %i , tag -> %s" % (self.__depth, tag), 'up') + self.check_data_buffer() + if self.__depth == self._dispatch_depth: + if self._mini_dom.getName() == 'error': + self.streamError = self._mini_dom.getChildren()[0].getName() + self.dispatch(self._mini_dom) + elif self.__depth > self._dispatch_depth: + self._ptr = self._ptr.parent + else: + self.DEBUG(DBG_NODEBUILDER, "Got higher than dispatch level. Stream terminated?", 'stop') + self._dec_depth() + self.last_is_data = 0 + if self.__depth == 0: self.stream_footer_received() + + def handle_cdata(self, data): + """XML Parser callback. Used internally""" + self.DEBUG(DBG_NODEBUILDER, data, 'data') + if self.last_is_data: + if self.data_buffer: + self.data_buffer.append(data) + elif self._ptr: + self.data_buffer = [data] + self.last_is_data = 1 + + def handle_namespace_start(self, prefix, uri): + """XML Parser callback. Used internally""" + self.check_data_buffer() + + def DEBUG(self, level, text, comment=None): + """ Gets all NodeBuilder walking events. Can be used for debugging if redefined.""" + def getDom(self): + """ Returns just built Node. """ + self.check_data_buffer() + return self._mini_dom + def dispatch(self,stanza): + """ Gets called when the NodeBuilder reaches some level of depth on it's way up with the built + node as argument. Can be redefined to convert incoming XML stanzas to program events. """ + def stream_header_received(self,ns,tag,attrs): + """ Method called when stream just opened. """ + self.check_data_buffer() + def stream_footer_received(self): + """ Method called when stream just closed. """ + self.check_data_buffer() + + def has_received_endtag(self, level=0): + """ Return True if at least one end tag was seen (at level) """ + return self.__depth <= level and self.__max_depth > level + + def _inc_depth(self): + self.__last_depth = self.__depth + self.__depth += 1 + self.__max_depth = max(self.__depth, self.__max_depth) + + def _dec_depth(self): + self.__last_depth = self.__depth + self.__depth -= 1 + +def XML2Node(xml): + """ Converts supplied textual string into XML node. Handy f.e. for reading configuration file. + Raises xml.parser.expat.parsererror if provided string is not well-formed XML. """ + return NodeBuilder(xml).getDom() + +def BadXML2Node(xml): + """ Converts supplied textual string into XML node. Survives if xml data is cutted half way round. + I.e. "<html>some text <br>some more text". Will raise xml.parser.expat.parsererror on misplaced + tags though. F.e. "<b>some text <br>some more text</b>" will not work.""" + return NodeBuilder(xml).getDom() diff --git a/libs/xmpp/transports.py b/libs/xmpp/transports.py new file mode 100644 index 0000000..f3d1316 --- /dev/null +++ b/libs/xmpp/transports.py @@ -0,0 +1,339 @@ +## transports.py +## +## Copyright (C) 2003-2004 Alexey "Snake" Nezhdanov +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2, or (at your option) +## any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. + +# $Id: transports.py,v 1.35 2009/04/07 08:34:09 snakeru Exp $ + +""" +This module contains the low-level implementations of xmpppy connect methods or +(in other words) transports for xmpp-stanzas. +Currently here is three transports: +direct TCP connect - TCPsocket class +proxied TCP connect - HTTPPROXYsocket class (CONNECT proxies) +TLS connection - TLS class. Can be used for SSL connections also. + +Transports are stackable so you - f.e. TLS use HTPPROXYsocket or TCPsocket as more low-level transport. + +Also exception 'error' is defined to allow capture of this module specific exceptions. +""" + +import socket, select, base64, dispatcher, sys +from simplexml import ustr +from client import PlugIn +from protocol import * + +# determine which DNS resolution library is available +HAVE_DNSPYTHON = False +HAVE_PYDNS = False +try: + import dns.resolver # http://dnspython.org/ + HAVE_DNSPYTHON = True +except ImportError: + try: + import DNS # http://pydns.sf.net/ + HAVE_PYDNS = True + except ImportError: + pass + +DATA_RECEIVED = 'DATA RECEIVED' +DATA_SENT = 'DATA SENT' + +class error: + """An exception to be raised in case of low-level errors in methods of 'transports' module.""" + def __init__(self, comment): + """Cache the descriptive string""" + self._comment = comment + + def __str__(self): + """Serialise exception into pre-cached descriptive string.""" + return self._comment + +BUFLEN = 1024 +class TCPsocket(PlugIn): + """ This class defines direct TCP connection method. """ + def __init__(self, server = None, use_srv = True): + """ Cache connection point 'server'. 'server' is the tuple of (host, port) + absolutely the same as standard tcp socket uses. However library will lookup for + ('_xmpp-client._tcp.' + host) SRV record in DNS and connect to the found (if it is) + server instead + """ + PlugIn.__init__(self) + self.DBG_LINE = 'socket' + self._exported_methods = [self.send, self.disconnect] + self._server, self.use_srv = server, use_srv + + def srv_lookup(self, server): + " SRV resolver. Takes server=(host, port) as argument. Returns new (host, port) pair " + if HAVE_DNSPYTHON or HAVE_PYDNS: + host, port = server + possible_queries = ['_xmpp-client._tcp.' + host] + + for query in possible_queries: + try: + if HAVE_DNSPYTHON: + answers = [x for x in dns.resolver.query(query, 'SRV')] + if answers: + host = str(answers[0].target) + port = int(answers[0].port) + break + elif HAVE_PYDNS: + # ensure we haven't cached an old configuration + DNS.DiscoverNameServers() + response = DNS.Request().req(query, qtype = 'SRV') + answers = response.answers + if len(answers) > 0: + # ignore the priority and weight for now + _, _, port, host = answers[0]['data'] + del _ + port = int(port) + break + except: + self.DEBUG('An error occurred while looking up %s' % query, 'warn') + server = (host, port) + else: + self.DEBUG("Could not load one of the supported DNS libraries (dnspython or pydns). SRV records will not be queried and you may need to set custom hostname/port for some servers to be accessible.\n", 'warn') + # end of SRV resolver + return server + + def plugin(self, owner): + """ Fire up connection. Return non-empty string on success. + Also registers self.disconnected method in the owner's dispatcher. + Called internally. """ + if not self._server: self._server = (self._owner.Server, 5222) + if self.use_srv: server = self.srv_lookup(self._server) + else: server = self._server + if not self.connect(server): return + self._owner.Connection = self + self._owner.RegisterDisconnectHandler(self.disconnected) + return 'ok' + + def getHost(self): + """ Return the 'host' value that is connection is [will be] made to.""" + return self._server[0] + def getPort(self): + """ Return the 'port' value that is connection is [will be] made to.""" + return self._server[1] + + def connect(self, server = None): + """ Try to connect to the given host/port. Does not lookup for SRV record. + Returns non-empty string on success. """ + try: + if not server: server = self._server + self._sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self._sock.connect((server[0], int(server[1]))) + self._send = self._sock.sendall + self._recv = self._sock.recv + self.DEBUG("Successfully connected to remote host %s" % `server`, 'start') + return 'ok' + except socket.error, (errno, strerror): + self.DEBUG("Failed to connect to remote host %s: %s (%s)" % (`server`, strerror, errno), 'error') + except: pass + + def plugout(self): + """ Disconnect from the remote server and unregister self.disconnected method from + the owner's dispatcher. """ + self._sock.close() + if self._owner.__dict__.has_key('Connection'): + del self._owner.Connection + self._owner.UnregisterDisconnectHandler(self.disconnected) + + def receive(self): + """ Reads all pending incoming data. + In case of disconnection calls owner's disconnected() method and then raises IOError exception.""" + try: received = self._recv(BUFLEN) + except socket.sslerror, e: + self._seen_data = 0 + if e[0] == socket.SSL_ERROR_WANT_READ: return '' + if e[0] == socket.SSL_ERROR_WANT_WRITE: return '' + self.DEBUG('Socket error while receiving data', 'error') + sys.exc_clear() + self._owner.disconnected() + raise IOError("Disconnected from server") + except: received = '' + + while self.pending_data(0): + try: add = self._recv(BUFLEN) + except: add = '' + received += add + if not add: break + + if len(received): # length of 0 means disconnect + self._seen_data = 1 + self.DEBUG(received, 'got') + if hasattr(self._owner, 'Dispatcher'): + self._owner.Dispatcher.Event('', DATA_RECEIVED, received) + else: + self.DEBUG('Socket error while receiving data', 'error') + self._owner.disconnected() + raise IOError("Disconnected from server") + return received + + def send(self, raw_data): + """ Writes raw outgoing data. Blocks until done. + If supplied data is unicode string, encodes it to utf-8 before send.""" + if type(raw_data) == type(u''): raw_data = raw_data.encode('utf-8') + elif type(raw_data) <> type(''): raw_data = ustr(raw_data).encode('utf-8') + try: + self._send(raw_data) + # Avoid printing messages that are empty keepalive packets. + if raw_data.strip(): + self.DEBUG(raw_data, 'sent') + if hasattr(self._owner, 'Dispatcher'): # HTTPPROXYsocket will send data before we have a Dispatcher + self._owner.Dispatcher.Event('', DATA_SENT, raw_data) + except: + self.DEBUG("Socket error while sending data", 'error') + self._owner.disconnected() + + def pending_data(self, timeout = 0): + """ Returns true if there is a data ready to be read. """ + return select.select([self._sock], [], [], timeout)[0] + + def disconnect(self): + """ Closes the socket. """ + self.DEBUG("Closing socket", 'stop') + self._sock.close() + + def disconnected(self): + """ Called when a Network Error or disconnection occurs. + Designed to be overidden. """ + self.DEBUG("Socket operation failed", 'error') + +DBG_CONNECT_PROXY = 'CONNECTproxy' +class HTTPPROXYsocket(TCPsocket): + """ HTTP (CONNECT) proxy connection class. Uses TCPsocket as the base class + redefines only connect method. Allows to use HTTP proxies like squid with + (optionally) simple authentication (using login and password). """ + def __init__(self, proxy, server, use_srv = True): + """ Caches proxy and target addresses. + 'proxy' argument is a dictionary with mandatory keys 'host' and 'port' (proxy address) + and optional keys 'user' and 'password' to use for authentication. + 'server' argument is a tuple of host and port - just like TCPsocket uses. """ + TCPsocket.__init__(self, server, use_srv) + self.DBG_LINE = DBG_CONNECT_PROXY + self._proxy = proxy + + def plugin(self, owner): + """ Starts connection. Used interally. Returns non-empty string on success.""" + owner.debug_flags.append(DBG_CONNECT_PROXY) + return TCPsocket.plugin(self, owner) + + def connect(self, dupe = None): + """ Starts connection. Connects to proxy, supplies login and password to it + (if were specified while creating instance). Instructs proxy to make + connection to the target server. Returns non-empty sting on success. """ + if not TCPsocket.connect(self, (self._proxy['host'], self._proxy['port'])): return + self.DEBUG("Proxy server contacted, performing authentification", 'start') + connector = ['CONNECT %s:%s HTTP/1.0' % self._server, + 'Proxy-Connection: Keep-Alive', + 'Pragma: no-cache', + 'Host: %s:%s' % self._server, + 'User-Agent: HTTPPROXYsocket/v0.1'] + if self._proxy.has_key('user') and self._proxy.has_key('password'): + credentials = '%s:%s' % (self._proxy['user'], self._proxy['password']) + credentials = base64.encodestring(credentials).strip() + connector.append('Proxy-Authorization: Basic ' + credentials) + connector.append('\r\n') + self.send('\r\n'.join(connector)) + try: reply = self.receive().replace('\r', '') + except IOError: + self.DEBUG('Proxy suddenly disconnected', 'error') + self._owner.disconnected() + return + try: proto, code, desc = reply.split('\n')[0].split(' ', 2) + except: raise error('Invalid proxy reply') + if code <> '200': + self.DEBUG('Invalid proxy reply: %s %s %s' % (proto, code, desc), 'error') + self._owner.disconnected() + return + while reply.find('\n\n') == -1: + try: reply += self.receive().replace('\r', '') + except IOError: + self.DEBUG('Proxy suddenly disconnected', 'error') + self._owner.disconnected() + return + self.DEBUG("Authentification successfull. Jabber server contacted.", 'ok') + return 'ok' + + def DEBUG(self, text, severity): + """Overwrites DEBUG tag to allow debug output be presented as "CONNECTproxy".""" + return self._owner.DEBUG(DBG_CONNECT_PROXY, text, severity) + +class TLS(PlugIn): + """ TLS connection used to encrypts already estabilished tcp connection.""" + def PlugIn(self, owner, now = 0): + """ If the 'now' argument is true then starts using encryption immidiatedly. + If 'now' in false then starts encryption as soon as TLS feature is + declared by the server (if it were already declared - it is ok). + """ + if owner.__dict__.has_key('TLS'): return # Already enabled. + PlugIn.PlugIn(self, owner) + DBG_LINE = 'TLS' + if now: return self._startSSL() + if self._owner.Dispatcher.Stream.features: + try: self.FeaturesHandler(self._owner.Dispatcher, self._owner.Dispatcher.Stream.features) + except NodeProcessed: pass + else: self._owner.RegisterHandlerOnce('features', self.FeaturesHandler, xmlns = NS_STREAMS) + self.starttls = None + + def plugout(self, now = 0): + """ Unregisters TLS handler's from owner's dispatcher. Take note that encription + can not be stopped once started. You can only break the connection and start over.""" + self._owner.UnregisterHandler('features', self.FeaturesHandler, xmlns = NS_STREAMS) + self._owner.UnregisterHandler('proceed', self.StartTLSHandler, xmlns = NS_TLS) + self._owner.UnregisterHandler('failure', self.StartTLSHandler, xmlns = NS_TLS) + + def FeaturesHandler(self, conn, feats): + """ Used to analyse server <features/> tag for TLS support. + If TLS is supported starts the encryption negotiation. Used internally""" + if not feats.getTag('starttls', namespace = NS_TLS): + self.DEBUG("TLS unsupported by remote server.", 'warn') + return + self.DEBUG("TLS supported by remote server. Requesting TLS start.", 'ok') + self._owner.RegisterHandlerOnce('proceed', self.StartTLSHandler, xmlns = NS_TLS) + self._owner.RegisterHandlerOnce('failure', self.StartTLSHandler, xmlns = NS_TLS) + self._owner.Connection.send('<starttls xmlns="%s"/>' % NS_TLS) + raise NodeProcessed + + def pending_data(self, timeout = 0): + """ Returns true if there possible is a data ready to be read. """ + return self._tcpsock._seen_data or select.select([self._tcpsock._sock], [], [], timeout)[0] + + def _startSSL(self): + """ Immidiatedly switch socket to TLS mode. Used internally.""" + """ Here we should switch pending_data to hint mode.""" + tcpsock = self._owner.Connection + tcpsock._sslObj = socket.ssl(tcpsock._sock, None, None) + tcpsock._sslIssuer = tcpsock._sslObj.issuer() + tcpsock._sslServer = tcpsock._sslObj.server() + tcpsock._recv = tcpsock._sslObj.read + tcpsock._send = tcpsock._sslObj.write + + tcpsock._seen_data = 1 + self._tcpsock = tcpsock + tcpsock.pending_data = self.pending_data + tcpsock._sock.setblocking(0) + + self.starttls = 'success' + + def StartTLSHandler(self, conn, starttls): + """ Handle server reply if TLS is allowed to process. Behaves accordingly. + Used internally.""" + if starttls.getNamespace() <> NS_TLS: return + self.starttls = starttls.getName() + if self.starttls == 'failure': + self.DEBUG("Got starttls response: " + self.starttls, 'error') + return + self.DEBUG("Got starttls proceed response. Switching to TLS/SSL...", 'ok') + self._startSSL() + self._owner.Dispatcher.PlugOut() + dispatcher.Dispatcher().PlugIn(self._owner)