From 9979bc6d47f16858585ead0027bddf3fc47892ac Mon Sep 17 00:00:00 2001 From: Jacob Coffee Date: Mon, 22 Jul 2024 17:32:31 -0500 Subject: [PATCH] feat: initial py2 to 3 work --- code/planet-cache.py | 93 ++++++++-------- code/planet.py | 31 +++--- code/planet/__init__.py | 32 +++--- code/planet/atomstyler.py | 12 +-- code/planet/cache.py | 21 ++-- code/planet/compat_logging/__init__.py | 45 ++++---- code/planet/compat_logging/config.py | 27 +++-- code/planet/compat_logging/handlers.py | 19 ++-- code/planet/feedparser.py | 142 +++++++++++++------------ code/planet/htmltmpl.py | 139 ++++++++++++------------ code/planet/sanitize.py | 25 +++-- config/sort-ini.py | 4 +- 12 files changed, 300 insertions(+), 290 deletions(-) diff --git a/code/planet-cache.py b/code/planet-cache.py index 9334583a..31cedd08 100755 --- a/code/planet-cache.py +++ b/code/planet-cache.py @@ -1,5 +1,4 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- +#!/usr/bin/env python3 """Planet cache tool. """ @@ -12,34 +11,34 @@ import os import sys import time -import dbhash -import ConfigParser +import dbm +import configparser import planet def usage(): - print "Usage: planet-cache [options] CACHEFILE [ITEMID]..." - print - print "Examine and modify information in the Planet cache." - print - print "Channel Commands:" - print " -C, --channel Display known information on the channel" - print " -L, --list List items in the channel" - print " -K, --keys List all keys found in channel items" - print - print "Item Commands (need ITEMID):" - print " -I, --item Display known information about the item(s)" - print " -H, --hide Mark the item(s) as hidden" - print " -U, --unhide Mark the item(s) as not hidden" - print - print "Other Options:" - print " -h, --help Display this help message and exit" + print("Usage: planet-cache [options] CACHEFILE [ITEMID]...") + print() + print("Examine and modify information in the Planet cache.") + print() + print("Channel Commands:") + print(" -C, --channel Display known information on the channel") + print(" -L, --list List items in the channel") + print(" -K, --keys List all keys found in channel items") + print() + print("Item Commands (need ITEMID):") + print(" -I, --item Display known information about the item(s)") + print(" -H, --hide Mark the item(s) as hidden") + print(" -U, --unhide Mark the item(s) as not hidden") + print() + print("Other Options:") + print(" -h, --help Display this help message and exit") sys.exit(0) def usage_error(msg, *args): - print >>sys.stderr, msg, " ".join(args) - print >>sys.stderr, "Perhaps you need --help ?" + print(msg, " ".join(args), file=sys.stderr) + print("Perhaps you need --help ?", file=sys.stderr) sys.exit(1) def print_keys(item, title): @@ -47,13 +46,13 @@ def print_keys(item, title): keys.sort() key_len = max([ len(k) for k in keys ]) - print title + ":" + print(title + ":") for key in keys: if item.key_type(key) == item.DATE: value = time.strftime(planet.TIMEFMT_ISO, item[key]) else: value = str(item[key]) - print " %-*s %s" % (key_len, key, fit_str(value, 74 - key_len)) + print(" %-*s %s" % (key_len, key, fit_str(value, 74 - key_len))) def fit_str(string, length): if len(string) <= length: @@ -116,24 +115,23 @@ def fit_str(string, length): # Open the cache file directly to get the URL it represents try: - db = dbhash.open(cache_file) - url = db["url"] - db.close() - except dbhash.bsddb._db.DBError, e: - print >>sys.stderr, cache_file + ":", e.args[1] + with dbm.open(cache_file, 'r') as db: + url = db[b"url"].decode('utf-8') + except dbm.error as e: + print(f"{cache_file}: {str(e)}", file=sys.stderr) sys.exit(1) except KeyError: - print >>sys.stderr, cache_file + ": Probably not a cache file" + print(f"{cache_file}: Probably not a cache file", file=sys.stderr) sys.exit(1) # Now do it the right way :-) - my_planet = planet.Planet(ConfigParser.ConfigParser()) + my_planet = planet.Planet(configparser.ConfigParser()) my_planet.cache_directory = os.path.dirname(cache_file) channel = planet.Channel(my_planet, url) for item_id in ids: if not channel.has_item(item_id): - print >>sys.stderr, item_id + ": Not in channel" + print(item_id + ": Not in channel", file=sys.stderr) sys.exit(1) # Do the user's bidding @@ -146,14 +144,14 @@ def fit_str(string, length): print_keys(item, "Item Keys for %s" % item_id) elif command == "list": - print "Items in Channel:" + print("Items in Channel:") for item in channel.items(hidden=1, sorted=1): - print " " + item.id - print " " + time.strftime(planet.TIMEFMT_ISO, item.date) + print(" " + item.id) + print(" " + time.strftime(planet.TIMEFMT_ISO, item.date)) if hasattr(item, "title"): - print " " + fit_str(item.title, 70) + print(" " + fit_str(item.title, 70)) if hasattr(item, "hidden"): - print " (hidden)" + print(" (hidden)") elif command == "keys": keys = {} @@ -161,34 +159,33 @@ def fit_str(string, length): for key in item.keys(): keys[key] = 1 - keys = keys.keys() - keys.sort() + keys = sorted(keys.keys()) - print "Keys used in Channel:" + print("Keys used in Channel:") for key in keys: - print " " + key - print + print(" " + key) + print() - print "Use --item to output values of particular items." + print("Use --item to output values of particular items.") elif command == "hide": for item_id in ids: item = channel.get_item(item_id) if hasattr(item, "hidden"): - print item_id + ": Already hidden." + print(item_id + ": Already hidden.") else: item.hidden = "yes" channel.cache_write() - print "Done." + print("Done.") elif command == "unhide": for item_id in ids: item = channel.get_item(item_id) if hasattr(item, "hidden"): - del(item.hidden) + del item.hidden else: - print item_id + ": Not hidden." + print(item_id + ": Not hidden.") channel.cache_write() - print "Done." + print("Done.") diff --git a/code/planet.py b/code/planet.py index 41141b67..c59b5928 100755 --- a/code/planet.py +++ b/code/planet.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """The Planet aggregator. A flexible and easy-to-use aggregator for generating websites. @@ -16,14 +16,13 @@ import os import sys -import time import locale import socket -import urlparse +import configparser +from urllib.parse import urljoin import planet -from ConfigParser import ConfigParser # Default configuration file path CONFIG_FILE = "config.ini" @@ -56,29 +55,29 @@ def main(): for arg in sys.argv[1:]: if arg == "-h" or arg == "--help": - print "Usage: planet [options] [CONFIGFILE]" - print - print "Options:" - print " -v, --verbose DEBUG level logging during update" - print " -o, --offline Update the Planet from the cache only" - print " -h, --help Display this help message and exit" - print + print("Usage: planet [options] [CONFIGFILE]") + print() + print("Options:") + print(" -v, --verbose DEBUG level logging during update") + print(" -o, --offline Update the Planet from the cache only") + print(" -h, --help Display this help message and exit") + print() sys.exit(0) elif arg == "-v" or arg == "--verbose": verbose = 1 elif arg == "-o" or arg == "--offline": offline = 1 elif arg.startswith("-"): - print >>sys.stderr, "Unknown option:", arg + print("Unknown option:", arg, file=sys.stderr) sys.exit(1) else: config_file = arg # Read the configuration file - config = ConfigParser() + config = configparser() config.read(config_file) if not config.has_section("Planet"): - print >>sys.stderr, "Configuration missing [Planet] section." + print("Configuration missing [Planet] section.", file=sys.stderr) sys.exit(1) # Read the [Planet] config section @@ -100,7 +99,7 @@ def main(): for template_file in template_files: name = os.path.splitext(os.path.basename(template_file))[0] if name.find('atom')>=0 or name.find('rss')>=0: - planet_feed = urlparse.urljoin(planet_link, name) + planet_feed = urljoin(planet_link, name) break # Define locale @@ -118,7 +117,7 @@ def main(): locale_ok = True break if not locale_ok: - print >>sys.stderr, "Unsupported locale setting." + print("Unsupported locale setting.", file=sys.stderr) sys.exit(1) # Activate logging diff --git a/code/planet/__init__.py b/code/planet/__init__.py index 929920b0..ed3fe784 100644 --- a/code/planet/__init__.py +++ b/code/planet/__init__.py @@ -1,5 +1,4 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- +#!/usr/bin/env python3 """Planet aggregator library. This package is a library for developing web sites or software that @@ -18,7 +17,6 @@ import feedparser import sanitize import htmltmpl -import sgmllib try: import logging except: @@ -29,10 +27,11 @@ "Planet", "Channel", "NewsItem") +from html.parser import HTMLParser import os -import md5 +from hashlib import md5 import time -import dbhash +import dbm import re try: @@ -74,15 +73,18 @@ def escape(data): NEW_DATE_FORMAT = "%B %d, %Y" ACTIVITY_THRESHOLD = 0 -class stripHtml(sgmllib.SGMLParser): + +class stripHtml(HTMLParser): "remove all tags from the data" - def __init__(self, data): - sgmllib.SGMLParser.__init__(self) - self.result='' - self.feed(data) - self.close() + def __init__(self): + super().__init__() + self.result = [] + def handle_data(self, data): - if data: self.result+=data + self.result.append(data) + + def get_data(self): + return "".join(self.result) def template_info(item, date_format): """Produce a dictionary of template information.""" @@ -504,7 +506,7 @@ def __init__(self, planet, url): if not os.path.isdir(planet.cache_directory): os.makedirs(planet.cache_directory) cache_filename = cache.filename(planet.cache_directory, url) - cache_file = dbhash.open(cache_filename, "c", 0666) + cache_file = dbm.open(cache_filename, "c", 0o666) cache.CachedInfo.__init__(self, cache_file, url, root=1) @@ -695,7 +697,7 @@ def update_info(self, feed): self.set_as_string(key + "_width", str(feed[key].width)) if feed[key].has_key("height"): self.set_as_string(key + "_height", str(feed[key].height)) - elif isinstance(feed[key], (str, unicode)): + elif isinstance(feed[key], str): # String fields try: detail = key + '_detail' @@ -890,7 +892,7 @@ def update(self, entry): self.set_as_string(key + "_language", item.language) value += cache.utf8(item.value) self.set_as_string(key, value) - elif isinstance(entry[key], (str, unicode)): + elif isinstance(entry[key], str): # String fields try: detail = key + '_detail' diff --git a/code/planet/atomstyler.py b/code/planet/atomstyler.py index 9220702c..645cc405 100644 --- a/code/planet/atomstyler.py +++ b/code/planet/atomstyler.py @@ -1,7 +1,7 @@ from xml.dom import minidom, Node -from urlparse import urlparse, urlunparse +from urllib.parse import urlparse, urlunparse from xml.parsers.expat import ExpatError -from htmlentitydefs import name2codepoint +from html.entities import name2codepoint import re # select and apply an xml:base for this entry @@ -75,20 +75,20 @@ def retype(parent): elif len(node.childNodes)==1: # replace html entity defs with utf-8 - chunks=re.split('&(\w+);', node.childNodes[0].nodeValue) + chunks=re.split(r'&(\w+);', node.childNodes[0].nodeValue) for i in range(1,len(chunks),2): if chunks[i] in ['amp', 'lt', 'gt', 'apos', 'quot']: chunks[i] ='&' + chunks[i] +';' elif chunks[i] in name2codepoint: - chunks[i]=unichr(name2codepoint[chunks[i]]) + chunks[i] = chr(name2codepoint[chunks[i]]) else: chunks[i]='&' + chunks[i] + ';' - text = u"".join(chunks) + text = "".join(chunks) try: # see if the resulting text is a well-formed XML fragment div = '
%s
' - data = minidom.parseString((div % text.encode('utf-8'))) + data = minidom.parseString(div % text.encode('utf-8')) if text.find('<') < 0: # plain text diff --git a/code/planet/cache.py b/code/planet/cache.py index dfc529b7..84727423 100644 --- a/code/planet/cache.py +++ b/code/planet/cache.py @@ -1,5 +1,4 @@ -#!/usr/bin/env python -# -*- coding: UTF-8 -*- +#!/usr/bin/env python3 """Item cache. Between runs of Planet we need somewhere to store the feed information @@ -198,7 +197,7 @@ def get_as_string(self, key): """Return the key as a string value.""" key = key.replace(" ", "_") if not self.has_key(key): - raise KeyError, key + raise KeyError(key) return self._value[key] @@ -218,7 +217,7 @@ def get_as_date(self, key): """Return the key as a date value.""" key = key.replace(" ", "_") if not self.has_key(key): - raise KeyError, key + raise KeyError(key) value = self._value[key] return tuple([ int(i) for i in value.split(" ") ]) @@ -237,7 +236,7 @@ def get_as_null(self, key): """Return the key as the null value.""" key = key.replace(" ", "_") if not self.has_key(key): - raise KeyError, key + raise KeyError(key) return None @@ -245,7 +244,7 @@ def del_key(self, key): """Delete the given key.""" key = key.replace(" ", "_") if not self.has_key(key): - raise KeyError, key + raise KeyError(key) del(self._value[key]) del(self._type[key]) @@ -276,7 +275,7 @@ def __getattr__(self, key): if self.has_key(key): return self.get(key) else: - raise AttributeError, key + raise AttributeError(key) def filename(directory, filename): @@ -294,13 +293,13 @@ def filename(directory, filename): def utf8(value): """Return the value as a UTF-8 string.""" - if type(value) == type(u''): + if type(value) == type(''): return value.encode("utf-8") else: try: - return unicode(value, "utf-8").encode("utf-8") + return str(value, "utf-8").encode("utf-8") except UnicodeError: try: - return unicode(value, "iso-8859-1").encode("utf-8") + return str(value, "iso-8859-1").encode("utf-8") except UnicodeError: - return unicode(value, "ascii", "replace").encode("utf-8") + return str(value, "ascii", "replace").encode("utf-8") diff --git a/code/planet/compat_logging/__init__.py b/code/planet/compat_logging/__init__.py index 3bd0c6d7..6a751b84 100644 --- a/code/planet/compat_logging/__init__.py +++ b/code/planet/compat_logging/__init__.py @@ -26,7 +26,12 @@ To use, simply 'import logging' and log away! """ -import sys, os, types, time, string, cStringIO +import sys +import os +import types +import time +import string +import io try: import thread @@ -200,7 +205,7 @@ def __init__(self, name, level, pathname, lineno, msg, args, exc_info): self.exc_info = exc_info self.lineno = lineno self.created = ct - self.msecs = (ct - long(ct)) * 1000 + self.msecs = (ct - int(ct)) * 1000 self.relativeCreated = (self.created - _startTime) * 1000 if thread: self.thread = thread.get_ident() @@ -338,7 +343,7 @@ def formatException(self, ei): traceback.print_exception() """ import traceback - sio = cStringIO.StringIO() + sio = io.StringIO() traceback.print_exception(ei[0], ei[1], ei[2], None, sio) s = sio.getvalue() sio.close() @@ -573,8 +578,7 @@ def emit(self, record): This version is intended to be implemented by subclasses and so raises a NotImplementedError. """ - raise NotImplementedError, 'emit must be implemented '\ - 'by Handler subclasses' + raise NotImplementedError('emit must be implemented by Handler subclasses') def handle(self, record): """ @@ -737,8 +741,7 @@ def setLoggerClass(klass): """ if klass != Logger: if not issubclass(klass, Logger): - raise TypeError, "logger not derived from logging.Logger: " + \ - klass.__name__ + raise TypeError(f"logger not derived from logging.Logger: {klass.__name__}") global _loggerClass _loggerClass = klass @@ -817,7 +820,7 @@ def _fixupChildren(self, ph, alogger): specified logger. """ for c in ph.loggers: - if string.find(c.parent.name, alogger.name) <> 0: + if string.find(c.parent.name, alogger.name) != 0: alogger.parent = c.parent c.parent = alogger @@ -876,7 +879,7 @@ def debug(self, msg, *args, **kwargs): if self.manager.disable >= DEBUG: return if DEBUG >= self.getEffectiveLevel(): - apply(self._log, (DEBUG, msg, args), kwargs) + self._log(DEBUG, msg, *args, **kwargs) def info(self, msg, *args, **kwargs): """ @@ -890,7 +893,7 @@ def info(self, msg, *args, **kwargs): if self.manager.disable >= INFO: return if INFO >= self.getEffectiveLevel(): - apply(self._log, (INFO, msg, args), kwargs) + self._log(INFO, msg, args, **kwargs) def warning(self, msg, *args, **kwargs): """ @@ -904,7 +907,7 @@ def warning(self, msg, *args, **kwargs): if self.manager.disable >= WARNING: return if self.isEnabledFor(WARNING): - apply(self._log, (WARNING, msg, args), kwargs) + self._log(WARNING, msg, args, **kwargs) warn = warning @@ -920,13 +923,13 @@ def error(self, msg, *args, **kwargs): if self.manager.disable >= ERROR: return if self.isEnabledFor(ERROR): - apply(self._log, (ERROR, msg, args), kwargs) + self._log(ERROR, msg, args, **kwargs) def exception(self, msg, *args): """ Convenience method for logging an ERROR with exception information. """ - apply(self.error, (msg,) + args, {'exc_info': 1}) + self.error(msg, *args, exc_info=True) def critical(self, msg, *args, **kwargs): """ @@ -940,7 +943,7 @@ def critical(self, msg, *args, **kwargs): if self.manager.disable >= CRITICAL: return if CRITICAL >= self.getEffectiveLevel(): - apply(self._log, (CRITICAL, msg, args), kwargs) + self._log(CRITICAL, msg, *args, **kwargs) fatal = critical @@ -956,7 +959,7 @@ def log(self, level, msg, *args, **kwargs): if self.manager.disable >= level: return if self.isEnabledFor(level): - apply(self._log, (level, msg, args), kwargs) + self._log(level, msg, args, **kwargs) def findCaller(self): """ @@ -1133,7 +1136,7 @@ def critical(msg, *args, **kwargs): """ if len(root.handlers) == 0: basicConfig() - apply(root.critical, (msg,)+args, kwargs) + root.critical(msg, *args, **kwargs) fatal = critical @@ -1143,14 +1146,14 @@ def error(msg, *args, **kwargs): """ if len(root.handlers) == 0: basicConfig() - apply(root.error, (msg,)+args, kwargs) + root.error(msg, *args, **kwargs) def exception(msg, *args): """ Log a message with severity 'ERROR' on the root logger, with exception information. """ - apply(error, (msg,)+args, {'exc_info': 1}) + error(msg, *args, exc_info=True) def warning(msg, *args, **kwargs): """ @@ -1158,7 +1161,7 @@ def warning(msg, *args, **kwargs): """ if len(root.handlers) == 0: basicConfig() - apply(root.warning, (msg,)+args, kwargs) + root.warning(msg, *args, **kwargs) warn = warning @@ -1168,7 +1171,7 @@ def info(msg, *args, **kwargs): """ if len(root.handlers) == 0: basicConfig() - apply(root.info, (msg,)+args, kwargs) + root.info(msg, *args, **kwargs) def debug(msg, *args, **kwargs): """ @@ -1176,7 +1179,7 @@ def debug(msg, *args, **kwargs): """ if len(root.handlers) == 0: basicConfig() - apply(root.debug, (msg,)+args, kwargs) + root.debug(msg, *args, **kwargs) def disable(level): """ diff --git a/code/planet/compat_logging/config.py b/code/planet/compat_logging/config.py index d4d08f01..60579065 100644 --- a/code/planet/compat_logging/config.py +++ b/code/planet/compat_logging/config.py @@ -26,10 +26,15 @@ To use, simply 'import logging' and log away! """ -import sys, logging, logging.handlers, string, thread, threading, socket, struct, os - -from SocketServer import ThreadingTCPServer, StreamRequestHandler - +import logging +import logging.handlers +import os +import socket +import string +import struct +import sys +from socketserver import ThreadingTCPServer, StreamRequestHandler +import threading DEFAULT_LOGGING_CONFIG_PORT = 9030 if sys.platform == "win32": @@ -57,9 +62,9 @@ def fileConfig(fname, defaults=None): rather than a filename, in which case the file-like object will be read using readfp. """ - import ConfigParser + import configparser - cp = ConfigParser.ConfigParser(defaults) + cp = configparser.ConfigParser(defaults) if hasattr(cp, 'readfp') and hasattr(fname, 'readline'): cp.readfp(fname) else: @@ -106,7 +111,7 @@ def fileConfig(fname, defaults=None): klass = eval(klass, vars(logging)) args = cp.get(sectname, "args") args = eval(args, vars(logging)) - h = apply(klass, args) + h = klass(*args) if "level" in opts: level = cp.get(sectname, "level") h.setLevel(logging._levelNames[level]) @@ -201,8 +206,8 @@ def listen(port=DEFAULT_LOGGING_CONFIG_PORT): and which you can join() when appropriate. To stop the server, call stopListening(). """ - if not thread: - raise NotImplementedError, "listen() needs threading to work" + if not threading: + raise NotImplementedError("listen() needs threading to work") class ConfigStreamHandler(StreamRequestHandler): """ @@ -239,8 +244,8 @@ def handle(self): f.close() fileConfig(file) os.remove(file) - except socket.error, e: - if type(e.args) != types.TupleType: + except socket.error as e: + if type(e.args) != tuple: raise else: errcode = e.args[0] diff --git a/code/planet/compat_logging/handlers.py b/code/planet/compat_logging/handlers.py index 26ca8adc..6b2f3b92 100644 --- a/code/planet/compat_logging/handlers.py +++ b/code/planet/compat_logging/handlers.py @@ -26,9 +26,14 @@ To use, simply 'import logging' and log away! """ -import sys, logging, socket, types, os, string, cPickle, struct, time - -from SocketServer import ThreadingTCPServer, StreamRequestHandler +import logging +import socket +import types +import os +import string +import struct +import time +import pickle # # Some constants... @@ -164,7 +169,7 @@ def makePickle(self, record): Pickles the record in binary format with a length prefix, and returns it ready for transmission across the socket. """ - s = cPickle.dumps(record.__dict__, 1) + s = pickle.dumps(record.__dict__, 1) #n = len(s) #slen = "%c%c" % ((n >> 8) & 0xFF, n & 0xFF) slen = struct.pack(">L", len(s)) @@ -516,8 +521,8 @@ def __init__(self, appname, dllname=None, logtype="Application"): logging.CRITICAL: win32evtlog.EVENTLOG_ERROR_TYPE, } except ImportError: - print "The Python Win32 extensions for NT (service, event "\ - "logging) appear not to be available." + print("The Python Win32 extensions for NT (service, event "\ + "logging) appear not to be available.") self._welu = None def getMessageID(self, record): @@ -595,7 +600,7 @@ def __init__(self, host, url, method="GET"): logging.Handler.__init__(self) method = string.upper(method) if method not in ["GET", "POST"]: - raise ValueError, "method must be GET or POST" + raise ValueError("method must be GET or POST") self.host = host self.url = url self.method = method diff --git a/code/planet/feedparser.py b/code/planet/feedparser.py index cd7ac83d..76ea44ef 100644 --- a/code/planet/feedparser.py +++ b/code/planet/feedparser.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 """Universal feed parser Handles RSS 0.9x, RSS 1.0, RSS 2.0, CDF, Atom 0.3, and Atom 1.0 feeds @@ -9,6 +9,8 @@ Required: Python 2.1 or later Recommended: Python 2.3 or later Recommended: CJKCodecs and iconv_codec + +TODO: py2->3 conversion """ __version__ = "4.1"# + "$Revision: 1.92 $"[11:15] + "-cvs" @@ -696,7 +698,7 @@ def pop(self, element, stripWhitespace=1): if element in self.can_contain_dangerous_markup: output = _sanitizeHTML(output, self.encoding) - if self.encoding and type(output) != type(u''): + if self.encoding and type(output) != type(''): try: output = unicode(output, self.encoding) except: @@ -704,15 +706,15 @@ def pop(self, element, stripWhitespace=1): # address common error where people take data that is already # utf-8, presume that it is iso-8859-1, and re-encode it. - if self.encoding=='utf-8' and type(output) == type(u''): + if self.encoding=='utf-8' and type(output) == type(''): try: output = unicode(output.encode('iso-8859-1'), 'utf-8') except: pass # map win-1252 extensions to the proper code points - if type(output) == type(u''): - output = u''.join([c in cp1252 and cp1252[c] or c for c in output]) + if type(output) == type(''): + output = ''.join([c in cp1252 and cp1252[c] or c for c in output]) # categories/tags/keywords/whatever are handled in _end_category if element == 'category': @@ -1505,7 +1507,7 @@ def feed(self, data): data = re.sub(r'<([^<\s]+?)\s*/>', self._shorttag_replace, data) data = data.replace(''', "'") data = data.replace('"', '"') - if self.encoding and type(data) == type(u''): + if self.encoding and type(data) == type(''): data = data.encode(self.encoding) sgmllib.SGMLParser.feed(self, data) sgmllib.SGMLParser.close(self) @@ -1524,10 +1526,10 @@ def unknown_starttag(self, tag, attrs): uattrs = [] # thanks to Kevin Marks for this breathtaking hack to deal with (valid) high-bit attribute values in UTF-8 feeds for key, value in attrs: - if type(value) != type(u''): + if type(value) != type(''): value = unicode(value, self.encoding) uattrs.append((unicode(key, self.encoding), value)) - strattrs = u''.join([u' %s="%s"' % (key, value) for key, value in uattrs]).encode(self.encoding) + strattrs = ''.join([' %s="%s"' % (key, value) for key, value in uattrs]).encode(self.encoding) if tag in self.elements_no_end_tag: self.pieces.append('<%(tag)s%(strattrs)s />' % locals()) else: @@ -1750,7 +1752,7 @@ def _tidy(data, **kwargs): except: pass if _tidy: - utf8 = type(data) == type(u'') + utf8 = type(data) == type('') if utf8: data = data.encode('utf-8') data = _tidy(data, output_xhtml=1, numeric_entities=1, wrap=0, char_encoding="utf8") @@ -2025,17 +2027,17 @@ def _parse_date_iso8601(dateString): registerDateHandler(_parse_date_iso8601) # 8-bit date handling routines written by ytrewq1. -_korean_year = u'\ub144' # b3e2 in euc-kr -_korean_month = u'\uc6d4' # bff9 in euc-kr -_korean_day = u'\uc77c' # c0cf in euc-kr -_korean_am = u'\uc624\uc804' # bfc0 c0fc in euc-kr -_korean_pm = u'\uc624\ud6c4' # bfc0 c8c4 in euc-kr +_korean_year = '\ub144' # b3e2 in euc-kr +_korean_month = '\uc6d4' # bff9 in euc-kr +_korean_day = '\uc77c' # c0cf in euc-kr +_korean_am = '\uc624\uc804' # bfc0 c0fc in euc-kr +_korean_pm = '\uc624\ud6c4' # bfc0 c8c4 in euc-kr _korean_onblog_date_re = \ - re.compile('(\d{4})%s\s+(\d{2})%s\s+(\d{2})%s\s+(\d{2}):(\d{2}):(\d{2})' % \ + re.compile(r'(\d{4})%s\s+(\d{2})%s\s+(\d{2})%s\s+(\d{2}):(\d{2}):(\d{2})' % \ (_korean_year, _korean_month, _korean_day)) _korean_nate_date_re = \ - re.compile(u'(\d{4})-(\d{2})-(\d{2})\s+(%s|%s)\s+(\d{,2}):(\d{,2}):(\d{,2})' % \ + re.compile(r'(\d{4})-(\d{2})-(\d{2})\s+(%s|%s)\s+(\d{,2}):(\d{,2}):(\d{,2})' % \ (_korean_am, _korean_pm)) def _parse_date_onblog(dateString): '''Parse a string according to the OnBlog 8-bit date format''' @@ -2069,7 +2071,7 @@ def _parse_date_nate(dateString): registerDateHandler(_parse_date_nate) _mssql_date_re = \ - re.compile('(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2}):(\d{2})(\.\d+)?') + re.compile(r'(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2}):(\d{2})(\.\d+)?') def _parse_date_mssql(dateString): '''Parse a string according to the MS SQL date format''' m = _mssql_date_re.match(dateString) @@ -2085,40 +2087,40 @@ def _parse_date_mssql(dateString): # Unicode strings for Greek date strings _greek_months = \ { \ - u'\u0399\u03b1\u03bd': u'Jan', # c9e1ed in iso-8859-7 - u'\u03a6\u03b5\u03b2': u'Feb', # d6e5e2 in iso-8859-7 - u'\u039c\u03ac\u03ce': u'Mar', # ccdcfe in iso-8859-7 - u'\u039c\u03b1\u03ce': u'Mar', # cce1fe in iso-8859-7 - u'\u0391\u03c0\u03c1': u'Apr', # c1f0f1 in iso-8859-7 - u'\u039c\u03ac\u03b9': u'May', # ccdce9 in iso-8859-7 - u'\u039c\u03b1\u03ca': u'May', # cce1fa in iso-8859-7 - u'\u039c\u03b1\u03b9': u'May', # cce1e9 in iso-8859-7 - u'\u0399\u03bf\u03cd\u03bd': u'Jun', # c9effded in iso-8859-7 - u'\u0399\u03bf\u03bd': u'Jun', # c9efed in iso-8859-7 - u'\u0399\u03bf\u03cd\u03bb': u'Jul', # c9effdeb in iso-8859-7 - u'\u0399\u03bf\u03bb': u'Jul', # c9f9eb in iso-8859-7 - u'\u0391\u03cd\u03b3': u'Aug', # c1fde3 in iso-8859-7 - u'\u0391\u03c5\u03b3': u'Aug', # c1f5e3 in iso-8859-7 - u'\u03a3\u03b5\u03c0': u'Sep', # d3e5f0 in iso-8859-7 - u'\u039f\u03ba\u03c4': u'Oct', # cfeaf4 in iso-8859-7 - u'\u039d\u03bf\u03ad': u'Nov', # cdefdd in iso-8859-7 - u'\u039d\u03bf\u03b5': u'Nov', # cdefe5 in iso-8859-7 - u'\u0394\u03b5\u03ba': u'Dec', # c4e5ea in iso-8859-7 + '\u0399\u03b1\u03bd': 'Jan', # c9e1ed in iso-8859-7 + '\u03a6\u03b5\u03b2': 'Feb', # d6e5e2 in iso-8859-7 + '\u039c\u03ac\u03ce': 'Mar', # ccdcfe in iso-8859-7 + '\u039c\u03b1\u03ce': 'Mar', # cce1fe in iso-8859-7 + '\u0391\u03c0\u03c1': 'Apr', # c1f0f1 in iso-8859-7 + '\u039c\u03ac\u03b9': 'May', # ccdce9 in iso-8859-7 + '\u039c\u03b1\u03ca': 'May', # cce1fa in iso-8859-7 + '\u039c\u03b1\u03b9': 'May', # cce1e9 in iso-8859-7 + '\u0399\u03bf\u03cd\u03bd': 'Jun', # c9effded in iso-8859-7 + '\u0399\u03bf\u03bd': 'Jun', # c9efed in iso-8859-7 + '\u0399\u03bf\u03cd\u03bb': 'Jul', # c9effdeb in iso-8859-7 + '\u0399\u03bf\u03bb': 'Jul', # c9f9eb in iso-8859-7 + '\u0391\u03cd\u03b3': 'Aug', # c1fde3 in iso-8859-7 + '\u0391\u03c5\u03b3': 'Aug', # c1f5e3 in iso-8859-7 + '\u03a3\u03b5\u03c0': 'Sep', # d3e5f0 in iso-8859-7 + '\u039f\u03ba\u03c4': 'Oct', # cfeaf4 in iso-8859-7 + '\u039d\u03bf\u03ad': 'Nov', # cdefdd in iso-8859-7 + '\u039d\u03bf\u03b5': 'Nov', # cdefe5 in iso-8859-7 + '\u0394\u03b5\u03ba': 'Dec', # c4e5ea in iso-8859-7 } _greek_wdays = \ { \ - u'\u039a\u03c5\u03c1': u'Sun', # caf5f1 in iso-8859-7 - u'\u0394\u03b5\u03c5': u'Mon', # c4e5f5 in iso-8859-7 - u'\u03a4\u03c1\u03b9': u'Tue', # d4f1e9 in iso-8859-7 - u'\u03a4\u03b5\u03c4': u'Wed', # d4e5f4 in iso-8859-7 - u'\u03a0\u03b5\u03bc': u'Thu', # d0e5ec in iso-8859-7 - u'\u03a0\u03b1\u03c1': u'Fri', # d0e1f1 in iso-8859-7 - u'\u03a3\u03b1\u03b2': u'Sat', # d3e1e2 in iso-8859-7 + '\u039a\u03c5\u03c1': 'Sun', # caf5f1 in iso-8859-7 + '\u0394\u03b5\u03c5': 'Mon', # c4e5f5 in iso-8859-7 + '\u03a4\u03c1\u03b9': 'Tue', # d4f1e9 in iso-8859-7 + '\u03a4\u03b5\u03c4': 'Wed', # d4e5f4 in iso-8859-7 + '\u03a0\u03b5\u03bc': 'Thu', # d0e5ec in iso-8859-7 + '\u03a0\u03b1\u03c1': 'Fri', # d0e1f1 in iso-8859-7 + '\u03a3\u03b1\u03b2': 'Sat', # d3e1e2 in iso-8859-7 } _greek_date_format_re = \ - re.compile(u'([^,]+),\s+(\d{2})\s+([^\s]+)\s+(\d{4})\s+(\d{2}):(\d{2}):(\d{2})\s+([^\s]+)') + re.compile(r'([^,]+),\s+(\d{2})\s+([^\s]+)\s+(\d{4})\s+(\d{2}):(\d{2}):(\d{2})\s+([^\s]+)') def _parse_date_greek(dateString): '''Parse a string according to a Greek 8-bit date format.''' @@ -2140,22 +2142,22 @@ def _parse_date_greek(dateString): # Unicode strings for Hungarian date strings _hungarian_months = \ { \ - u'janu\u00e1r': u'01', # e1 in iso-8859-2 - u'febru\u00e1ri': u'02', # e1 in iso-8859-2 - u'm\u00e1rcius': u'03', # e1 in iso-8859-2 - u'\u00e1prilis': u'04', # e1 in iso-8859-2 - u'm\u00e1ujus': u'05', # e1 in iso-8859-2 - u'j\u00fanius': u'06', # fa in iso-8859-2 - u'j\u00falius': u'07', # fa in iso-8859-2 - u'augusztus': u'08', - u'szeptember': u'09', - u'okt\u00f3ber': u'10', # f3 in iso-8859-2 - u'november': u'11', - u'december': u'12', + 'janu\u00e1r': '01', # e1 in iso-8859-2 + 'febru\u00e1ri': '02', # e1 in iso-8859-2 + 'm\u00e1rcius': '03', # e1 in iso-8859-2 + '\u00e1prilis': '04', # e1 in iso-8859-2 + 'm\u00e1ujus': '05', # e1 in iso-8859-2 + 'j\u00fanius': '06', # fa in iso-8859-2 + 'j\u00falius': '07', # fa in iso-8859-2 + 'augusztus': '08', + 'szeptember': '09', + 'okt\u00f3ber': '10', # f3 in iso-8859-2 + 'november': '11', + 'december': '12', } _hungarian_date_format_re = \ - re.compile(u'(\d{4})-([^-]+)-(\d{,2})T(\d{,2}):(\d{2})((\+|-)(\d{,2}:\d{2}))') + re.compile(r'(\d{4})-([^-]+)-(\d{,2})T(\d{,2}):(\d{2})((\+|-)(\d{,2}:\d{2}))') def _parse_date_hungarian(dateString): '''Parse a string according to a Hungarian 8-bit date format.''' @@ -2260,14 +2262,14 @@ def __extract_tzd(m): return -offset return offset - __date_re = ('(?P\d\d\d\d)' + __date_re = (r'(?P\d\d\d\d)' '(?:(?P-|)' - '(?:(?P\d\d\d)' - '|(?P\d\d)(?:(?P=dsep)(?P\d\d))?))?') - __tzd_re = '(?P[-+](?P\d\d)(?::?(?P\d\d))|Z)' + r'(?:(?P\d\d\d)' + r'|(?P\d\d)(?:(?P=dsep)(?P\d\d))?))?') + __tzd_re = r'(?P[-+](?P\d\d)(?::?(?P\d\d))|Z)' __tzd_rx = re.compile(__tzd_re) - __time_re = ('(?P\d\d)(?P:|)(?P\d\d)' - '(?:(?P=tsep)(?P\d\d(?:[.,]\d+)?))?' + __time_re = (r'(?P\d\d)(?P:|)(?P\d\d)' + r'(?:(?P=tsep)(?P\d\d(?:[.,]\d+)?))?' + __tzd_re) __datetime_re = '%s(?:T%s)?' % (__date_re, __time_re) __datetime_rx = re.compile(__datetime_re) @@ -2428,7 +2430,7 @@ def _parseHTTPContentType(content_type): else: # ASCII-compatible pass - xml_encoding_match = re.compile('^<\?.*encoding=[\'"](.*?)[\'"].*\?>').match(xml_data) + xml_encoding_match = re.compile('^<\\?.*encoding=[\'"](.*?)[\'"].*\\?>').match(xml_data) except: xml_encoding_match = None if xml_encoding_match: @@ -2499,12 +2501,12 @@ def _toUTF8(data, encoding): data = data[4:] newdata = unicode(data, encoding) if _debug: sys.stderr.write('successfully converted %s data to unicode\n' % encoding) - declmatch = re.compile('^<\?xml[^>]*?>') + declmatch = re.compile(r'^<\?xml[^>]*?>') newdecl = '''''' if declmatch.search(newdata): newdata = declmatch.sub(newdecl, newdata) else: - newdata = newdecl + u'\n' + newdata + newdata = newdecl + '\n' + newdata return newdata.encode('utf-8') def _stripDoctype(data): @@ -2708,18 +2710,18 @@ def parse(url_file_stream_or_string, etag=None, modified=None, agent=None, refer if __name__ == '__main__': if not sys.argv[1:]: - print __doc__ + print(__doc__) sys.exit(0) else: urls = sys.argv[1:] zopeCompatibilityHack() from pprint import pprint for url in urls: - print url - print + print(url) + print() result = parse(url) pprint(result) - print + print() #REVISION HISTORY #1.0 - 9/27/2002 - MAP - fixed namespace processing on prefixed RSS 2.0 elements, diff --git a/code/planet/htmltmpl.py b/code/planet/htmltmpl.py index be6e41bb..52f5d326 100644 --- a/code/planet/htmltmpl.py +++ b/code/planet/htmltmpl.py @@ -1,4 +1,3 @@ - """ A templating engine for separation of code and HTML. The documentation of this templating engine is separated to two parts: @@ -31,9 +30,9 @@ __version__ = 1.22 __author__ = "Tomas Styblo (tripie@cpan.org)" +import pickle # All imported modules are part of the standard Python library. -from types import * import re import os import os.path @@ -42,7 +41,6 @@ import copy import cgi # for HTML escaping of variables import urllib # for URL escaping of variables -import cPickle # for template compilation import gettext INCLUDE_DIR = "inc" @@ -164,8 +162,8 @@ def __init__(self, include=1, max_include=5, precompile=1, comments=1, # multitask/thread safe. Currently it works only on UNIX # and Windows. Anyone willing to implement it on Mac ? if precompile and not LOCKTYPE: - raise TemplateError, "Template precompilation is not "\ - "available on this platform." + raise TemplateError("Template precompilation is not " \ + "available on this platform.") self.DEB("INIT DONE") def prepare(self, file): @@ -202,9 +200,9 @@ def prepare(self, file): if self.is_precompiled(file): try: precompiled = self.load_precompiled(file) - except PrecompiledError, template: - print >> sys.stderr, "Htmltmpl: bad precompiled "\ - "template '%s' removed" % template + except PrecompiledError as template: + print("Htmltmpl: bad precompiled "\ + "template '%s' removed" % template, file=sys.stderr) compiled = self.compile(file) self.save_precompiled(compiled) else: @@ -258,7 +256,7 @@ def DEB(self, str): """ Print debugging message to stderr if debugging is enabled. @hidden """ - if self._debug: print >> sys.stderr, str + if self._debug: print(str, file=sys.stderr) def lock_file(self, file, lock): """ Provide platform independent file locking. @@ -273,7 +271,7 @@ def lock_file(self, file, lock): elif lock == LOCK_UN: fcntl.flock(fd, fcntl.LOCK_UN) else: - raise TemplateError, "BUG: bad lock in lock_file" + raise TemplateError("BUG: bad lock in lock_file") elif LOCKTYPE == LOCKTYPE_MSVCRT: if lock == LOCK_SH: # msvcrt does not support shared locks :-( @@ -283,9 +281,9 @@ def lock_file(self, file, lock): elif lock == LOCK_UN: msvcrt.locking(fd, msvcrt.LK_UNLCK, 1) else: - raise TemplateError, "BUG: bad lock in lock_file" + raise TemplateError("BUG: bad lock in lock_file") else: - raise TemplateError, "BUG: bad locktype in lock_file" + raise TemplateError("BUG: bad locktype in lock_file") def compile(self, file): """ Compile the template. @@ -323,14 +321,14 @@ def load_precompiled(self, file): try: file = open(filename, "rb") self.lock_file(file, LOCK_SH) - precompiled = cPickle.load(file) - except IOError, (errno, errstr): - raise TemplateError, "IO error in load precompiled "\ - "template '%s': (%d) %s"\ - % (filename, errno, errstr) - except cPickle.UnpicklingError: + precompiled = pickle.load(file) + except IOError as (errno, errstr): + raise TemplateError("IO error in load precompiled " \ + "template '%s': (%d) %s" \ + % (filename, errno, errstr)) + except pickle.UnpicklingError: remove_bad = 1 - raise PrecompiledError, filename + raise PrecompiledError(filename) except: remove_bad = 1 raise @@ -361,9 +359,9 @@ def save_precompiled(self, template): # Check if we have write permission to the template's directory. template_dir = os.path.dirname(os.path.abspath(filename)) if not os.access(template_dir, os.W_OK): - raise TemplateError, "Cannot save precompiled templates "\ - "to '%s': write permission denied."\ - % template_dir + raise TemplateError("Cannot save precompiled templates " \ + "to '%s': write permission denied." \ + % template_dir) try: remove_bad = 0 file = None @@ -373,19 +371,19 @@ def save_precompiled(self, template): BINARY = 1 READABLE = 0 if self._debug: - cPickle.dump(template, file, READABLE) + pickle.dump(template, file, READABLE) else: - cPickle.dump(template, file, BINARY) - except IOError, (errno, errstr): + pickle.dump(template, file, BINARY) + except IOError as (errno, errstr): remove_bad = 1 - raise TemplateError, "IO error while saving precompiled "\ - "template '%s': (%d) %s"\ - % (filename, errno, errstr) - except cPickle.PicklingError, error: + raise TemplateError("IO error while saving precompiled " \ + "template '%s': (%d) %s" \ + % (filename, errno, errstr)) + except pickle.PicklingError as error: remove_bad = 1 - raise TemplateError, "Pickling error while saving "\ - "precompiled template '%s': %s"\ - % (filename, error) + raise TemplateError("Pickling error while saving " \ + "precompiled template '%s': %s" \ + % (filename, error)) except: remove_bad = 1 raise @@ -490,14 +488,14 @@ def set(self, var, value): if self.is_ordinary_var(value): # template top-level ordinary variable if not var.islower(): - raise TemplateError, "Invalid variable name '%s'." % var - elif type(value) == ListType: + raise TemplateError("Invalid variable name '%s'." % var) + elif type(value) == list: # template top-level loop if var != var.capitalize(): - raise TemplateError, "Invalid loop name '%s'." % var + raise TemplateError("Invalid loop name '%s'." % var) else: - raise TemplateError, "Value of toplevel variable '%s' must "\ - "be either a scalar or a list." % var + raise TemplateError("Value of toplevel variable '%s' must " \ + "be either a scalar or a list." % var) self._vars[var] = value self.DEB("VALUE SET: " + str(var)) @@ -552,7 +550,7 @@ def process(self, template, part=None): self.DEB("APP INPUT:") if self._debug: pprint.pprint(self._vars, sys.stderr) if part != None and (part == 0 or part < self._current_part): - raise TemplateError, "process() - invalid part number" + raise TemplateError("process() - invalid part number") # This flag means "jump behind the end of current statement" or # "skip the parameters of current statement". @@ -595,7 +593,7 @@ def process(self, template, part=None): # TMPL_VARs should be first. They are the most common. var = tokens[i + PARAM_NAME] if not var: - raise TemplateError, "No identifier in ." + raise TemplateError("No identifier in .") escape = tokens[i + PARAM_ESCAPE] globalp = tokens[i + PARAM_GLOBAL] skip_params = 1 @@ -611,7 +609,7 @@ def process(self, template, part=None): elif token == "." + raise TemplateError("No identifier in .") skip_params = 1 # Find total number of passes in this loop. @@ -638,7 +636,7 @@ def process(self, template, part=None): elif token == "." + raise TemplateError("No identifier in .") globalp = tokens[i + PARAM_GLOBAL] skip_params = 1 if self.find_value(var, loop_name, loop_pass, @@ -652,7 +650,7 @@ def process(self, template, part=None): elif token == "." + raise TemplateError("No identifier in .") globalp = tokens[i + PARAM_GLOBAL] skip_params = 1 if self.find_value(var, loop_name, loop_pass, @@ -666,7 +664,7 @@ def process(self, template, part=None): elif token == "." + raise TemplateError("Unmatched .") # If this loop was not disabled, then record the pass. if loop_total[-1] > 0: loop_pass[-1] += 1 @@ -689,21 +687,21 @@ def process(self, template, part=None): elif token == "." + raise TemplateError("Unmatched .") output_control.pop() self.DEB("IF: END") elif token == "." + raise TemplateError("Unmatched .") output_control.pop() self.DEB("UNLESS: END") elif token == "." + raise TemplateError("Unmatched .") if output_control[-1] == DISABLE_OUTPUT: # Condition was false, activate the ELSE block. output_control[-1] = ENABLE_OUTPUT @@ -713,7 +711,7 @@ def process(self, template, part=None): output_control[-1] = DISABLE_OUTPUT self.DEB("ELSE: DISABLE") else: - raise TemplateError, "BUG: ELSE: INVALID FLAG" + raise TemplateError("BUG: ELSE: INVALID FLAG") elif token == "." % token + raise TemplateError("Invalid statement %s>." % token) elif DISABLE_OUTPUT not in output_control: # Raw textual template data. @@ -762,8 +760,8 @@ def process(self, template, part=None): # end of the big while loop # Check whether all opening statements were closed. - if loop_name: raise TemplateError, "Missing ." - if output_control: raise TemplateError, "Missing or " + if loop_name: raise TemplateError("Missing .") + if output_control: raise TemplateError("Missing or ") return out ############################################## @@ -774,7 +772,7 @@ def DEB(self, str): """ Print debugging message to stderr if debugging is enabled. @hidden """ - if self._debug: print >> sys.stderr, str + if self._debug: print(str, file=sys.stderr) def find_value(self, var, loop_name, loop_pass, loop_total, global_override=None): @@ -816,7 +814,7 @@ def find_value(self, var, loop_name, loop_pass, loop_total, if scope.has_key(var): # Value exists in current loop. - if type(scope[var]) == ListType: + if type(scope[var]) == list: # The requested value is a loop. # Return total number of its passes. return len(scope[var]) @@ -882,12 +880,12 @@ def magic_var(self, var, loop_pass, loop_total): try: every = int(var[9:]) # nine is length of "__EVERY__" except ValueError: - raise TemplateError, "Magic variable __EVERY__x: "\ - "Invalid pass number." + raise TemplateError("Magic variable __EVERY__x: " \ + "Invalid pass number.") else: if not every: - raise TemplateError, "Magic variable __EVERY__x: "\ - "Pass number cannot be zero." + raise TemplateError("Magic variable __EVERY__x: " \ + "Pass number cannot be zero.") elif (loop_pass + 1) % every == 0: self.DEB("MAGIC: EVERY: " + str(every)) return 1 @@ -896,7 +894,7 @@ def magic_var(self, var, loop_pass, loop_total): else: return 0 else: - raise TemplateError, "Invalid magic variable '%s'." % var + raise TemplateError("Invalid magic variable '%s'." % var) def escape(self, str, override=""): """ Escape a string either by HTML escaping or by URL escaping. @@ -915,12 +913,7 @@ def is_ordinary_var(self, var): """ Return true if var is a scalar. (not a reference to loop) @hidden """ - if type(var) == StringType or type(var) == IntType or \ - type(var) == LongType or type(var) == FloatType: - return 1 - else: - return 0 - + return isinstance(var, (str, int, float)) ############################################## # CLASS: TemplateCompiler # @@ -1020,7 +1013,7 @@ def DEB(self, str): """ Print debugging message to stderr if debugging is enabled. @hidden """ - if self._debug: print >> sys.stderr, str + if self._debug: print(str, file=sys.stderr) def read(self, filename): """ Read content of file and return it. Raise an error if a problem @@ -1033,9 +1026,9 @@ def read(self, filename): try: f = open(filename, "r") data = f.read() - except IOError, (errno, errstr): - raise TemplateError, "IO error while reading template '%s': "\ - "(%d) %s" % (filename, errno, errstr) + except IOError as (errno, errstr): + raise TemplateError("IO error while reading template '%s': " \ + "(%d) %s" % (filename, errno, errstr)) else: return data finally: @@ -1086,7 +1079,7 @@ def include_templates(self, tokens): if token == "." + raise TemplateError("No filename in .") self._include_level += 1 if self._include_level > self._max_include: # Do not include the template. @@ -1274,7 +1267,7 @@ def find_param(self, param, params): for pair in params: name, value = pair.split("=") if not name or not value: - raise TemplateError, "Syntax error in template." + raise TemplateError("Syntax error in template.") if name == param: if value[0] == '"': # The value is in double quotes. @@ -1329,15 +1322,15 @@ def __init__(self, version, file, include_files, tokens, compile_params, if os.path.isfile(file): self._mtime = os.path.getmtime(file) else: - raise TemplateError, "Template: file does not exist: '%s'" % file + raise TemplateError("Template: file does not exist: '%s'" % file) # Save modificaton times of all included template files. for inc_file in include_files: if os.path.isfile(inc_file): self._include_mtimes[inc_file] = os.path.getmtime(inc_file) else: - raise TemplateError, "Template: file does not exist: '%s'"\ - % inc_file + raise TemplateError("Template: file does not exist: '%s'" \ + % inc_file) self.DEB("NEW TEMPLATE CREATED") @@ -1435,7 +1428,7 @@ def DEB(self, str): """ Print debugging message to stderr. @hidden """ - if self._debug: print >> sys.stderr, str + if self._debug: print(str, file=sys.stderr) ############################################## diff --git a/code/planet/sanitize.py b/code/planet/sanitize.py index c98b14de..d5b8c976 100644 --- a/code/planet/sanitize.py +++ b/code/planet/sanitize.py @@ -1,5 +1,7 @@ """ sanitize: bringing sanitiy to world of messed-up data + +TODO: py2->3 """ __author__ = ["Mark Pilgrim ", @@ -8,6 +10,8 @@ __license__ = "BSD" __version__ = "0.25" +import sys + _debug = 0 # If you want sanitize to automatically run HTML markup through HTML Tidy, set @@ -19,7 +23,8 @@ # if TIDY_MARKUP = 1 PREFERRED_TIDY_INTERFACES = ["uTidy", "mxTidy"] -import sgmllib, re +import re +from html.parser import HTMLParser # chardet library auto-detects character encodings # Download from http://chardet.feedparser.org/ @@ -39,7 +44,7 @@ class _BaseHTMLProcessor(sgmllib.SGMLParser): 'img', 'input', 'isindex', 'link', 'meta', 'param'] _r_barebang = re.compile(r'') def __init__(self, encoding): @@ -62,7 +67,7 @@ def feed(self, data): data = self._r_barebang.sub(r'<!\1', data) data = self._r_bareamp.sub("&", data) data = self._r_shorttag.sub(self._shorttag_replace, data) - if self.encoding and type(data) == type(u''): + if self.encoding and type(data) == str: data = data.encode(self.encoding) sgmllib.SGMLParser.feed(self, data) @@ -80,10 +85,10 @@ def unknown_starttag(self, tag, attrs): uattrs = [] # thanks to Kevin Marks for this breathtaking hack to deal with (valid) high-bit attribute values in UTF-8 feeds for key, value in attrs: - if type(value) != type(u''): - value = unicode(value, self.encoding) - uattrs.append((unicode(key, self.encoding), value)) - strattrs = u''.join([u' %s="%s"' % (key, value) for key, value in uattrs]).encode(self.encoding) + if type(value) != str: + value = str(value, self.encoding) + uattrs.append((str(key, self.encoding), value)) + strattrs = ''.join([f' {key}="{value}"' for key, value in uattrs]).encode(self.encoding) if tag in self.elements_no_end_tag: self.pieces.append('<%(tag)s%(strattrs)s />' % locals()) else: @@ -254,12 +259,12 @@ def _tidy(data, **kwargs): except: pass if _tidy: - utf8 = type(data) == type(u'') + utf8 = type(data) == str if utf8: data = data.encode('utf-8') data = _tidy(data, output_xhtml=1, numeric_entities=1, wrap=0, char_encoding="utf8") if utf8: - data = unicode(data, 'utf-8') + data = str(data, 'utf-8') if data.count(''): @@ -339,7 +344,7 @@ def tryEncoding(encoding): if encoding == 'ebcdic': return _ebcdic_to_ascii(text) try: - return unicode(text, encoding) + return str(text, encoding) except UnicodeDecodeError: pass _triedEncodings.append(encoding) diff --git a/config/sort-ini.py b/config/sort-ini.py index 98976d3b..ec040bd8 100755 --- a/config/sort-ini.py +++ b/config/sort-ini.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import sys import ConfigParser @@ -19,7 +19,7 @@ if oconfig._defaults: fd.write("[%s]\n" % DEFAULTSECT) for (key, value) in oconfig._defaults.items(): - fd.write("%s = %s\n" % (key, str(value).replace('\n', '\n\t'))) + fd.write("{} = {}\n".format(key, str(value).replace('\n', '\n\t'))) fd.write("\n") result = {}