Create a unicode friendly logging mechanism that should help remove unicode errors on non-english OSes.

This commit is contained in:
Kovid Goyal 2008-05-04 05:50:45 -07:00
parent 5b99ec33c3
commit 12676bceff
8 changed files with 157 additions and 146 deletions

View File

@ -7,7 +7,7 @@ __author__ = "Kovid Goyal <kovid at kovidgoyal.net>"
__appname__ = 'calibre'
import sys, os, logging, mechanize, locale, copy, cStringIO, re, subprocess, \
textwrap, atexit, cPickle
textwrap, atexit, cPickle, codecs
from gettext import GNUTranslations
from math import floor
from optparse import OptionParser as _OptionParser
@ -31,6 +31,13 @@ try:
except:
pass
try:
preferred_encoding = locale.getpreferredencoding()
codecs.lookup(preferred_encoding)
except:
preferred_encoding = 'utf-8'
def osx_version():
if isosx:
import platform
@ -486,3 +493,37 @@ def english_sort(x, y):
Comapare two english phrases ignoring starting prepositions.
'''
return cmp(_spat.sub('', x), _spat.sub('', y))
class LoggingInterface:
def __init__(self, logger):
self.__logger = logger
def ___log(self, func, msg, args, kwargs):
args = [msg] + list(args)
for i in range(len(args)):
if isinstance(args[i], unicode):
args[i] = args[i].encode(preferred_encoding, 'replace')
func(*args, **kwargs)
def log_debug(self, msg, *args, **kwargs):
self.___log(self.__logger.debug, msg, args, kwargs)
def log_info(self, msg, *args, **kwargs):
self.___log(self.__logger.info, msg, args, kwargs)
def log_warning(self, msg, *args, **kwargs):
self.___log(self.__logger.warning, msg, args, kwargs)
def log_warn(self, msg, *args, **kwargs):
self.___log(self.__logger.warning, msg, args, kwargs)
def log_error(self, msg, *args, **kwargs):
self.___log(self.__logger.error, msg, args, kwargs)
def log_critical(self, msg, *args, **kwargs):
self.___log(self.__logger.critical, msg, args, kwargs)
def log_exception(self, msg, *args):
self.___log(self.__logger.exception, msg, args, {})

View File

@ -29,7 +29,8 @@ from calibre.ebooks.lrf import Book, entity_to_unicode
from calibre.ebooks.lrf import option_parser as lrf_option_parser
from calibre.ebooks import ConversionError
from calibre.ebooks.lrf.html.table import Table
from calibre import filename_to_utf8, setup_cli_handlers, __appname__, fit_image
from calibre import filename_to_utf8, setup_cli_handlers, __appname__, \
fit_image, LoggingInterface
from calibre.ptempfile import PersistentTemporaryFile
from calibre.ebooks.metadata.opf import OPFReader
from calibre.devices.interface import Device
@ -77,7 +78,7 @@ def tag_regex(tagname):
return dict(open=r'(?:<\s*%(t)s\s+[^<>]*?>|<\s*%(t)s\s*>)'%dict(t=tagname), \
close=r'</\s*%(t)s\s*>'%dict(t=tagname))
class HTMLConverter(object):
class HTMLConverter(object, LoggingInterface):
SELECTOR_PAT = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}")
PAGE_BREAK_PAT = re.compile(r'page-break-(?:after|before)\s*:\s*(\w+)', re.IGNORECASE)
IGNORED_TAGS = (Comment, Declaration, ProcessingInstruction)
@ -159,7 +160,7 @@ class HTMLConverter(object):
def __getattr__(self, attr):
if hasattr(self.options, attr):
return getattr(self.options, attr)
return object.__getattr__(self, attr)
return object.__getattribute__(self, attr)
def __setattr__(self, attr, val):
if hasattr(self.options, attr):
@ -202,7 +203,7 @@ class HTMLConverter(object):
'''
# Defaults for various formatting tags
object.__setattr__(self, 'options', options)
self.logger = logger
LoggingInterface.__init__(self, logger)
self.fonts = fonts #: dict specifying font families to use
# Memory
self.scaled_images = {} #: Temporary files with scaled version of images
@ -273,9 +274,9 @@ class HTMLConverter(object):
if link['path'] == path:
self.links.remove(link)
break
self.logger.warn('Could not process '+path)
self.log_warn('Could not process '+path)
if self.verbose:
self.logger.exception(' ')
self.log_exception(' ')
self.links = self.process_links()
self.link_level += 1
paths = [link['path'] for link in self.links]
@ -288,7 +289,7 @@ class HTMLConverter(object):
self.book.addTocEntry(ascii_text, tb)
if self.base_font_size > 0:
self.logger.info('\tRationalizing font sizes...')
self.log_info('\tRationalizing font sizes...')
self.book.rationalize_font_sizes(self.base_font_size)
def is_baen(self, soup):
@ -304,9 +305,9 @@ class HTMLConverter(object):
if not self.book_designer and self.is_book_designer(raw):
self.book_designer = True
self.logger.info(_('\tBook Designer file detected.'))
self.log_info(_('\tBook Designer file detected.'))
self.logger.info(_('\tParsing HTML...'))
self.log_info(_('\tParsing HTML...'))
if self.baen:
nmassage.extend(HTMLConverter.BAEN)
@ -328,7 +329,7 @@ class HTMLConverter(object):
if not self.baen and self.is_baen(soup):
self.baen = True
self.logger.info(_('\tBaen file detected. Re-parsing...'))
self.log_info(_('\tBaen file detected. Re-parsing...'))
return self.preprocess(raw)
if self.book_designer:
t = soup.find(id='BookTitle')
@ -344,7 +345,7 @@ class HTMLConverter(object):
try:
dump = open(os.path.join(tdir, 'html2lrf-verbose.html'), 'wb')
dump.write(unicode(soup).encode('utf-8'))
self.logger.info(_('Written preprocessed HTML to ')+dump.name)
self.log_info(_('Written preprocessed HTML to ')+dump.name)
dump.close()
except:
pass
@ -357,7 +358,7 @@ class HTMLConverter(object):
self.css.update(self.override_css)
self.file_name = os.path.basename(path)
self.logger.info(_('Processing %s'), path if self.verbose else self.file_name)
self.log_info(_('Processing %s'), path if self.verbose else self.file_name)
upath = path.encode('utf-8') if isinstance(path, unicode) else path
if not os.path.exists(upath):
upath = upath.replace('&', '%26') #convertlit replaces & with %26 in file names
@ -371,7 +372,7 @@ class HTMLConverter(object):
raw = xml_to_unicode(raw, self.verbose)[0]
f.close()
soup = self.preprocess(raw)
self.logger.info(_('\tConverting to BBeB...'))
self.log_info(_('\tConverting to BBeB...'))
self.current_style = {}
self.page_break_found = False
self.target_prefix = path
@ -571,7 +572,7 @@ class HTMLConverter(object):
hasattr(target.parent, 'objId'):
self.book.addTocEntry(ascii_text, tb)
else:
self.logger.debug(_("Cannot add link %s to TOC"), ascii_text)
self.log_debug(_("Cannot add link %s to TOC"), ascii_text)
def get_target_block(fragment, targets):
@ -891,7 +892,7 @@ class HTMLConverter(object):
try:
im = PILImage.open(path)
except IOError, err:
self.logger.warning('Unable to process image: %s\n%s', original_path, err)
self.log_warning('Unable to process image: %s\n%s', original_path, err)
return
encoding = detect_encoding(im)
@ -913,7 +914,7 @@ class HTMLConverter(object):
self.scaled_images[path] = pt
return pt.name
except (IOError, SystemError), err: # PIL chokes on interlaced PNG images as well a some GIF images
self.logger.warning(_('Unable to process image %s. Error: %s')%(path, err))
self.log_warning(_('Unable to process image %s. Error: %s')%(path, err))
return None
pheight = int(self.current_page.pageStyle.attrs['textheight'])
@ -951,7 +952,7 @@ class HTMLConverter(object):
self.rotated_images[path] = pt
width, height = im.size
except IOError: # PIL chokes on interlaced PNG files and since auto-rotation is not critical we ignore the error
self.logger.debug(_('Unable to process interlaced PNG %s'), original_path)
self.log_debug(_('Unable to process interlaced PNG %s'), original_path)
finally:
pt.close()
@ -966,7 +967,7 @@ class HTMLConverter(object):
try:
self.images[path] = ImageStream(path, encoding=encoding)
except LrsError, err:
self.logger.warning(_('Could not process image: %s\n%s'), original_path, err)
self.log_warning(_('Could not process image: %s\n%s'), original_path, err)
return
im = Image(self.images[path], x0=0, y0=0, x1=width, y1=height,\
@ -1026,7 +1027,7 @@ class HTMLConverter(object):
if number_of_paragraphs > 2:
self.end_page()
self.logger.debug('Forcing page break at %s', tagname)
self.log_debug('Forcing page break at %s', tagname)
return end_page
def block_properties(self, tag_css):
@ -1434,7 +1435,7 @@ class HTMLConverter(object):
self.targets[self.target_prefix+tag[key]] = self.current_block
self.current_block.must_append = True
else:
self.logger.debug('Could not follow link to '+tag['href'])
self.log_debug('Could not follow link to '+tag['href'])
self.process_children(tag, tag_css, tag_pseudo_css)
elif tag.has_key('name') or tag.has_key('id'):
self.process_anchor(tag, tag_css, tag_pseudo_css)
@ -1453,9 +1454,9 @@ class HTMLConverter(object):
dropcaps = tag.has_key('class') and tag['class'] == 'libprs500_dropcaps'
self.process_image(path, tag_css, width, height, dropcaps=dropcaps)
elif not urlparse(tag['src'])[0]:
self.logger.warn('Could not find image: '+tag['src'])
self.log_warn('Could not find image: '+tag['src'])
else:
self.logger.debug("Failed to process: %s", str(tag))
self.log_debug("Failed to process: %s", str(tag))
elif tagname in ['style', 'link']:
ncss, npcss = {}, {}
if tagname == 'style':
@ -1475,7 +1476,7 @@ class HTMLConverter(object):
self.page_break_found = True
ncss, npcss = self.parse_css(src)
except IOError:
self.logger.warn('Could not read stylesheet: '+tag['href'])
self.log_warn('Could not read stylesheet: '+tag['href'])
if ncss:
update_css(ncss, self.css)
self.css.update(self.override_css)
@ -1605,7 +1606,7 @@ class HTMLConverter(object):
if not self.disable_chapter_detection and tagname.startswith('h'):
if self.chapter_regex.search(src):
self.logger.debug('Detected chapter %s', src)
self.log_debug('Detected chapter %s', src)
self.end_page()
self.page_break_found = True
@ -1656,9 +1657,9 @@ class HTMLConverter(object):
try:
self.process_table(tag, tag_css)
except Exception, err:
self.logger.warning(_('An error occurred while processing a table: %s. Ignoring table markup.'), str(err))
self.logger.debug('', exc_info=True)
self.logger.debug(_('Bad table:\n%s'), str(tag)[:300])
self.log_warning(_('An error occurred while processing a table: %s. Ignoring table markup.'), str(err))
self.log_debug('', exc_info=True)
self.log_debug(_('Bad table:\n%s'), str(tag)[:300])
self.in_table = False
self.process_children(tag, tag_css, tag_pseudo_css)
finally:

View File

@ -11,17 +11,11 @@ from PyQt4.QtGui import QTableView, QProgressDialog, QAbstractItemView, QColor,
from PyQt4.QtCore import QAbstractTableModel, QVariant, Qt, QString, \
QCoreApplication, SIGNAL, QObject, QSize, QModelIndex
from calibre import Settings
from calibre import Settings, preferred_encoding
from calibre.ptempfile import PersistentTemporaryFile
from calibre.library.database import LibraryDatabase, SearchToken
from calibre.gui2 import NONE, TableView, qstring_to_unicode
try:
pe = locale.getpreferredencoding()
codecs.lookup(pe)
except:
pe = 'utf-8'
class LibraryDelegate(QItemDelegate):
COLOR = QColor("blue")
SIZE = 16
@ -304,7 +298,7 @@ class BooksModel(QAbstractTableModel):
dt = self.db.timestamp(row)
if dt:
dt = dt - timedelta(seconds=time.timezone) + timedelta(hours=time.daylight)
return QVariant(dt.strftime(BooksView.TIME_FMT).decode(pe, 'replace'))
return QVariant(dt.strftime(BooksView.TIME_FMT).decode(preferred_encoding, 'replace'))
elif col == 4:
r = self.db.rating(row)
r = r/2 if r else 0

View File

@ -4,7 +4,7 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
Manage translation of user visible strings.
'''
import sys, os, cStringIO, tempfile, subprocess, functools
import sys, os, cStringIO, tempfile, subprocess, functools, tarfile
check_call = functools.partial(subprocess.check_call, shell=True)
from calibre.translations.pygettext import main as pygettext
@ -35,6 +35,8 @@ def main(args=sys.argv):
print 'Creating translations template'
pygettext(buf, ['-p', tdir]+files)
src = buf.getvalue()
tempdir = tempfile.mkdtemp()
tf = tarfile.open(os.path.join(tempdir, 'translations.tar.bz2'), 'w:bz2')
fd, fname = tempfile.mkstemp(suffix='.pot')
os.write(fd,src)
@ -46,13 +48,17 @@ def main(args=sys.argv):
else:
print 'Merging', os.path.basename(po)
check_call('msgmerge -v -U -N --backup=none '+po + ' ' + fname)
tf.add(po, os.path.basename(po))
buf = cStringIO.StringIO()
print 'Compiling translations'
msgfmt(buf, [po])
translations[tr] = buf.getvalue()
open(os.path.join(tdir, 'data.py'), 'wb').write('translations = '+repr(translations))
os.close(fd)
tf.add(fname, 'strings.pot')
tf.close()
os.unlink(fname)
print 'Translations tarball is in', os.path.join(tempdir, 'translations.tar.bz2')
return 0
if __name__ == '__main__':

File diff suppressed because one or more lines are too long

View File

@ -8,7 +8,7 @@ msgid ""
msgstr ""
"Project-Id-Version: it\n"
"POT-Creation-Date: 2008-05-02 17:09+PDT\n"
"PO-Revision-Date: 2008-04-05 14:25+0200\n"
"PO-Revision-Date: 2008-05-03 22:51+0200\n"
"Last-Translator: Iacopo Benesperi <iacchi@iacchi.org>\n"
"Language-Team: italiano\n"
"MIME-Version: 1.0\n"
@ -79,8 +79,7 @@ msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/lrf/__init__.py:108
msgid "Output file name. Default is derived from input filename"
msgstr ""
"Nome del file in uscita. Il nome predefinito è preso dal file in ingresso"
msgstr "Nome del file in uscita. Il nome predefinito è preso dal file in ingresso"
#: /home/kovid/work/calibre/src/calibre/ebooks/lrf/__init__.py:110
msgid ""
@ -110,8 +109,7 @@ msgstr ""
#: /home/kovid/work/calibre/src/calibre/ebooks/lrf/__init__.py:118
msgid "Set the space between words in pts. Default is %default"
msgstr ""
"Imposta lo spazio tra le parole in punti. Il valore predefinito è %default"
msgstr "Imposta lo spazio tra le parole in punti. Il valore predefinito è %default"
#: /home/kovid/work/calibre/src/calibre/ebooks/lrf/__init__.py:120
msgid "Separate paragraphs by blank lines."
@ -119,8 +117,7 @@ msgstr "Separa i paragrafi con linee bianche"
#: /home/kovid/work/calibre/src/calibre/ebooks/lrf/__init__.py:122
msgid "Add a header to all the pages with title and author."
msgstr ""
"Aggiunge a tutte le pagine un'intestazione contenente il titolo e l'autore"
msgstr "Aggiunge a tutte le pagine un'intestazione contenente il titolo e l'autore"
#: /home/kovid/work/calibre/src/calibre/ebooks/lrf/__init__.py:124
msgid ""
@ -219,8 +216,7 @@ msgstr ""
"ignorati. Predefinita: %default"
#: /home/kovid/work/calibre/src/calibre/ebooks/lrf/__init__.py:169
msgid ""
"Prevent the automatic insertion of page breaks before detected chapters."
msgid "Prevent the automatic insertion of page breaks before detected chapters."
msgstr ""
"Previene l'inserimento automatico di interruzioni di pagina prima dei "
"capitoli individuati"
@ -253,8 +249,7 @@ msgstr ""
"perciò ignorata se la pagina corrente ha solo pochi elementi"
#: /home/kovid/work/calibre/src/calibre/ebooks/lrf/__init__.py:186
msgid ""
"Force a page break before tags whoose names match this regular expression."
msgid "Force a page break before tags whoose names match this regular expression."
msgstr ""
"Forza un'interruzione di pagina prima dei tag i cui nomi corrispondono a "
"questa espressione regolare"
@ -384,6 +379,10 @@ msgid ""
"\n"
"%prog converts mybook.fb2 to mybook.lrf"
msgstr ""
"%prog [opzioni] miolibro.fb2\n"
"\n"
"\n"
"%prog converte miolibro.fb2 in miolibro.lrf"
#: /home/kovid/work/calibre/src/calibre/ebooks/lrf/fb2/convert_from.py:23
#: /home/kovid/work/calibre/src/calibre/ebooks/lrf/txt/convert_from.py:22
@ -582,13 +581,11 @@ msgstr "La categoria a cui questo libro appartiene. Es: Storia"
#: /home/kovid/work/calibre/src/calibre/ebooks/lrf/meta.py:559
msgid "Path to a graphic that will be set as this files' thumbnail"
msgstr ""
"Percorso a un'immagine che verrà impostata come miniatura di questo file"
msgstr "Percorso a un'immagine che verrà impostata come miniatura di questo file"
#: /home/kovid/work/calibre/src/calibre/ebooks/lrf/meta.py:562
msgid "Path to a txt file containing the comment to be stored in the lrf file."
msgstr ""
"Percorso a un file TXT contenente il commento che verrà incluso nel file LRF"
msgstr "Percorso a un file TXT contenente il commento che verrà incluso nel file LRF"
#: /home/kovid/work/calibre/src/calibre/ebooks/lrf/meta.py:566
msgid "Extract thumbnail from LRF file"
@ -1004,8 +1001,7 @@ msgstr "Nessun formato disponibile"
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/lrf_single.py:82
msgid "Cannot convert %s as this book has no supported formats"
msgstr ""
"Impossibile convertire %s perché questo libro non ha formati supportati"
msgstr "Impossibile convertire %s perché questo libro non ha formati supportati"
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/lrf_single.py:86
msgid "Choose the format to convert into LRF"
@ -1149,8 +1145,7 @@ msgstr "Ca&mbia l'immagine di copertina:"
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/lrf_single_ui.py:609
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single_ui.py:334
msgid "Browse for an image to use as the cover of this book."
msgstr ""
"Sfoglia per trovare un'immagine da usare come copertina per questo libro"
msgstr "Sfoglia per trovare un'immagine da usare come copertina per questo libro"
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/lrf_single_ui.py:611
msgid "Use cover from &source file"
@ -1517,10 +1512,8 @@ msgid "Fetch cover image from server"
msgstr "Scarica immagine di copertina dal server"
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single_ui.py:337
msgid ""
"Change the username and/or password for your account at LibraryThing.com"
msgstr ""
"Cambia il nome utente e/o password del proprio account su LibraryThing.com"
msgid "Change the username and/or password for your account at LibraryThing.com"
msgstr "Cambia il nome utente e/o password del proprio account su LibraryThing.com"
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/metadata_single_ui.py:338
msgid "Change password"
@ -1811,6 +1804,8 @@ msgid ""
"For help with writing advanced news recipes, please visit <a href=\"http://"
"calibre.kovidgoyal.net/user_manual/news.html\">User Recipes</a>"
msgstr ""
"Per un aiuto su come scrivere formule di notizie avanzate, visitare <a href=\"http://"
"calibre.kovidgoyal.net/user_manual/news.html\">Formule utente</a>"
#: /home/kovid/work/calibre/src/calibre/gui2/dialogs/user_profiles_ui.py:242
msgid "Recipe source code (python)"
@ -2287,7 +2282,7 @@ msgstr ""
#: /home/kovid/work/calibre/src/calibre/gui2/main_ui.py:266
msgid "calibre"
msgstr ""
msgstr "calibre"
#: /home/kovid/work/calibre/src/calibre/gui2/main_ui.py:267
msgid ""
@ -2303,6 +2298,17 @@ msgid ""
"a><br /><br /><span style=\" font-weight:600;\">calibre</span>: %1 by <span "
"style=\" font-weight:600;\">Kovid Goyal</span> %2<br />%3</p></body></html>"
msgstr ""
"<html><head><meta name=\"qrichtext\" content=\"1\" /><style type=\"text/css"
"\">\n"
"p, li { white-space: pre-wrap; }\n"
"</style></head><body style=\" font-family:'Sans Serif'; font-size:9pt; font-"
"weight:400; font-style:normal;\">\n"
"<p style=\" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-"
"right:0px; -qt-block-indent:0; text-indent:0px;\">Visitare <a href=\"http://calibre.kovidgoyal.net/user_manual\"><span style=\" text-decoration: "
"underline; color:#0000ff;\">calibre.kovidgoyal.net</span></a> per la "
"guida<br /><br /><span style=\" font-weight:600;\">calibre</span>: %1 di "
"<span style=\" font-weight:600;\">Kovid Goyal</span> %2<br />%3</p></body></"
"html>"
#: /home/kovid/work/calibre/src/calibre/gui2/main_ui.py:271
msgid "Advanced search"
@ -2535,8 +2541,7 @@ msgstr ""
msgid ""
"The directory in which to store the downloaded feeds. Defaults to the "
"current directory."
msgstr ""
"La cartella in cui salvare i feed scaricati. Perdefinita: cartella corrente"
msgstr "La cartella in cui salvare i feed scaricati. Perdefinita: cartella corrente"
#: /home/kovid/work/calibre/src/calibre/web/feeds/main.py:55
msgid "Dont show the progress bar"
@ -2587,8 +2592,7 @@ msgstr "\tLink falliti:"
#: /home/kovid/work/calibre/src/calibre/web/feeds/news.py:549
msgid "Could not fetch article. Run with --debug to see the reason"
msgstr ""
"Impossibile scaricare l'articolo. Eseguire con --debug per vedere la ragione"
msgstr "Impossibile scaricare l'articolo. Eseguire con --debug per vedere la ragione"
#: /home/kovid/work/calibre/src/calibre/web/feeds/news.py:571
msgid "Got feeds from index page"
@ -2732,40 +2736,3 @@ msgstr "Non scaricare i fogli di stile CSS"
msgid "Show detailed output information. Useful for debugging"
msgstr "Mostra un output dettagliato. Utile per il debugging"
#~ msgid ""
#~ "For help with writing advanced news recipes, please visit <a href="
#~ "\"http://libprs500.kovidgoyal.net/user_manual/news.html\">User Recipes</a>"
#~ msgstr ""
#~ "Per un aiuto su come scrivere formule di notizie avanzate, visitare <a "
#~ "href=\"http://libprs500.kovidgoyal.net/user_manual/news.html\">Formule "
#~ "utente</a>"
#~ msgid "libprs500"
#~ msgstr "libprs500"
#~ msgid ""
#~ "<html><head><meta name=\"qrichtext\" content=\"1\" /><style type=\"text/"
#~ "css\">\n"
#~ "p, li { white-space: pre-wrap; }\n"
#~ "</style></head><body style=\" font-family:'Sans Serif'; font-size:9pt; "
#~ "font-weight:400; font-style:normal;\">\n"
#~ "<p style=\" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-"
#~ "right:0px; -qt-block-indent:0; text-indent:0px;\">For help visit <a href="
#~ "\"http://libprs500.kovidgoyal.net/user_manual\"><span style=\" text-"
#~ "decoration: underline; color:#0000ff;\">libprs500.kovidgoyal.net</span></"
#~ "a><br /><br /><span style=\" font-weight:600;\">libprs500</span>: %1 by "
#~ "<span style=\" font-weight:600;\">Kovid Goyal</span> %2<br />%3</p></"
#~ "body></html>"
#~ msgstr ""
#~ "<html><head><meta name=\"qrichtext\" content=\"1\" /><style type=\"text/"
#~ "css\">\n"
#~ "p, li { white-space: pre-wrap; }\n"
#~ "</style></head><body style=\" font-family:'Sans Serif'; font-size:9pt; "
#~ "font-weight:400; font-style:normal;\">\n"
#~ "<p style=\" margin-top:0px; margin-bottom:0px; margin-left:0px; margin-"
#~ "right:0px; -qt-block-indent:0; text-indent:0px;\">Visitare <a href="
#~ "\"http://libprs500.kovidgoyal.net/user_manual\"><span style=\" text-"
#~ "decoration: underline; color:#0000ff;\">libprs500.kovidgoyal.net</span></"
#~ "a> per la guida<br /><br /><span style=\" font-weight:600;\">libprs500</"
#~ "span>: %1 di <span style=\" font-weight:600;\">Kovid Goyal</span> %2<br />"
#~ "%3</p></body></html>"

View File

@ -10,7 +10,7 @@ __docformat__ = "restructuredtext en"
import logging, os, cStringIO, time, traceback, re, urlparse
from collections import defaultdict
from calibre import browser, __appname__, iswindows
from calibre import browser, __appname__, iswindows, LoggingInterface
from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Tag
from calibre.ebooks.metadata.opf import OPFCreator
from calibre.ebooks.lrf import entity_to_unicode
@ -24,7 +24,7 @@ from calibre.ebooks.lrf.web.profiles import FullContentProfile
from calibre.ptempfile import PersistentTemporaryFile
class BasicNewsRecipe(object):
class BasicNewsRecipe(object, LoggingInterface):
'''
Abstract base class that contains logic needed in all feed fetchers.
'''
@ -363,17 +363,18 @@ class BasicNewsRecipe(object):
@param parser: Command line option parser. Used to intelligently merge options.
@param progress_reporter: A Callable that takes two arguments: progress (a number between 0 and 1) and a string message. The message should be optional.
'''
LoggingInterface.__init__(self, logging.getLogger('feeds2disk'))
for attr in ('username', 'password', 'lrf', 'output_dir', 'verbose', 'debug', 'test'):
setattr(self, attr, getattr(options, attr))
self.output_dir = os.path.abspath(self.output_dir)
if options.test:
self.max_articles_per_feed = 2
self.simultaneous_downloads = min(4, self.simultaneous_downloads)
self.logger = logging.getLogger('feeds2disk')
if self.debug:
self.logger.setLevel(logging.DEBUG)
logging.getLogger('feeds2disk').setLevel(logging.DEBUG)
self.verbose = True
self.report_progress = progress_reporter
@ -467,20 +468,20 @@ class BasicNewsRecipe(object):
self.cleanup()
self.report_progress(1, _('Download finished'))
if self.failed_downloads:
self.logger.warning(_('Failed to download the following articles:'))
self.log_warning(_('Failed to download the following articles:'))
for feed, article, debug in self.failed_downloads:
self.logger.warning(article.title+_(' from ')+feed.title)
self.logger.debug(article.url)
self.logger.debug(debug)
self.log_warning(article.title+_(' from ')+feed.title)
self.log_debug(article.url)
self.log_debug(debug)
if self.partial_failures:
self.logger.warning(_('Failed to download parts of the following articles:'))
self.log_warning(_('Failed to download parts of the following articles:'))
for feed, atitle, aurl, debug in self.partial_failures:
self.logger.warning(atitle + _(' from ') + feed)
self.logger.debug(aurl)
self.logger.warning(_('\tFailed links:'))
self.log_warning(atitle + _(' from ') + feed)
self.log_debug(aurl)
self.log_warning(_('\tFailed links:'))
for l, tb in debug:
self.logger.warning(l)
self.logger.debug(tb)
self.log_warning(l)
self.log_debug(tb)
return res
def feeds2index(self, feeds):
@ -645,8 +646,8 @@ class BasicNewsRecipe(object):
cu = self.get_cover_url()
except Exception, err:
cu = None
self.logger.error(_('Could not download cover: %s')%str(err))
self.logger.debug(traceback.format_exc())
self.log_error(_('Could not download cover: %s')%str(err))
self.log_debug(traceback.format_exc())
if cu is not None:
ext = cu.rpartition('.')[-1]
ext = ext.lower() if ext else 'jpg'
@ -726,7 +727,7 @@ class BasicNewsRecipe(object):
a = request.requestID[1]
article = request.article
self.logger.debug(_('\nDownloaded article %s from %s\n%s')%(article.title, article.url, request.stream.getvalue().decode('utf-8', 'ignore')))
self.log_debug(_('\nDownloaded article %s from %s\n%s')%(article.title, article.url, request.stream.getvalue().decode('utf-8', 'ignore')))
article.orig_url = article.url
article.url = 'article_%d/index.html'%a
article.downloaded = True
@ -738,11 +739,11 @@ class BasicNewsRecipe(object):
def error_in_article_download(self, request, traceback):
self.jobs_done += 1
self.logger.error(_('Failed to download article: %s from %s\n')%(request.article.title, request.article.url))
self.log_error(_('Failed to download article: %s from %s\n')%(request.article.title, request.article.url))
debug = request.stream.getvalue().decode('utf-8', 'ignore')
self.logger.debug(debug)
self.logger.debug(traceback)
self.logger.debug('\n')
self.log_debug(debug)
self.log_debug(traceback)
self.log_debug('\n')
self.report_progress(float(self.jobs_done)/len(self.jobs), _('Article download failed: %s')%request.article.title)
self.failed_downloads.append((request.feed, request.article, debug))

View File

@ -11,7 +11,8 @@ import sys, socket, os, urlparse, codecs, logging, re, time, copy, urllib2, thre
from urllib import url2pathname
from httplib import responses
from calibre import setup_cli_handlers, browser, sanitize_file_name, OptionParser, relpath
from calibre import setup_cli_handlers, browser, sanitize_file_name, \
OptionParser, relpath, LoggingInterface
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
from calibre.ebooks.chardet import xml_to_unicode
@ -47,7 +48,7 @@ def save_soup(soup, target):
f.close()
class RecursiveFetcher(object):
class RecursiveFetcher(object, LoggingInterface):
LINK_FILTER = tuple(re.compile(i, re.IGNORECASE) for i in
('.exe\s*$', '.mp3\s*$', '.ogg\s*$', '^\s*mailto:', '^\s*$'))
#ADBLOCK_FILTER = tuple(re.compile(i, re.IGNORECASE) for it in
@ -58,7 +59,7 @@ class RecursiveFetcher(object):
CSS_IMPORT_PATTERN = re.compile(r'\@import\s+url\((.*?)\)', re.IGNORECASE)
def __init__(self, options, logger, image_map={}, css_map={}, job_info=None):
self.logger = logger
LoggingInterface.__init__(self, logger)
self.base_dir = os.path.abspath(os.path.expanduser(options.dir))
if not os.path.exists(self.base_dir):
os.makedirs(self.base_dir)
@ -130,7 +131,7 @@ class RecursiveFetcher(object):
def fetch_url(self, url):
f = None
self.logger.debug('Fetching %s', url)
self.log_debug('Fetching %s', url)
delta = time.time() - self.last_fetch_at
if delta < self.delay:
time.sleep(delta)
@ -140,7 +141,7 @@ class RecursiveFetcher(object):
if hasattr(err, 'code') and responses.has_key(err.code):
raise FetchError, responses[err.code]
if err.reason[0] == 104: # Connection reset by peer
self.logger.debug('Connection reset by peer retrying in 1 second.')
self.log_debug('Connection reset by peer retrying in 1 second.')
time.sleep(1)
f = self.browser.open(url)
else:
@ -152,9 +153,9 @@ class RecursiveFetcher(object):
def start_fetch(self, url):
soup = BeautifulSoup(u'<a href="'+url+'" />')
self.logger.info('Downloading')
self.log_info('Downloading')
res = self.process_links(soup, url, 0, into_dir='')
self.logger.info('%s saved to %s', url, res)
self.log_info('%s saved to %s', url, res)
return res
def is_link_ok(self, url):
@ -191,8 +192,8 @@ class RecursiveFetcher(object):
try:
f = self.fetch_url(iurl)
except Exception, err:
self.logger.warning('Could not fetch stylesheet %s', iurl)
self.logger.debug('Error: %s', str(err), exc_info=True)
self.log_warning('Could not fetch stylesheet %s', iurl)
self.log_debug('Error: %s', str(err), exc_info=True)
continue
stylepath = os.path.join(diskpath, 'style'+str(c)+'.css')
with self.stylemap_lock:
@ -214,8 +215,8 @@ class RecursiveFetcher(object):
try:
f = self.fetch_url(iurl)
except Exception, err:
self.logger.warning('Could not fetch stylesheet %s', iurl)
self.logger.debug('Error: %s', str(err), exc_info=True)
self.log_warning('Could not fetch stylesheet %s', iurl)
self.log_debug('Error: %s', str(err), exc_info=True)
continue
c += 1
stylepath = os.path.join(diskpath, 'style'+str(c)+'.css')
@ -234,7 +235,7 @@ class RecursiveFetcher(object):
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
iurl, ext = tag['src'], os.path.splitext(tag['src'])[1]
#if not ext:
# self.logger.debug('Skipping extensionless image %s', iurl)
# self.log_debug('Skipping extensionless image %s', iurl)
# continue
if not urlparse.urlsplit(iurl).scheme:
iurl = urlparse.urljoin(baseurl, iurl, False)
@ -245,8 +246,8 @@ class RecursiveFetcher(object):
try:
f = self.fetch_url(iurl)
except Exception, err:
self.logger.warning('Could not fetch image %s', iurl)
self.logger.debug('Error: %s', str(err), exc_info=True)
self.log_warning('Could not fetch image %s', iurl)
self.log_debug('Error: %s', str(err), exc_info=True)
continue
c += 1
imgpath = os.path.join(diskpath, sanitize_file_name('img'+str(c)+ext))
@ -263,10 +264,10 @@ class RecursiveFetcher(object):
if not parts.scheme:
iurl = urlparse.urljoin(baseurl, iurl, False)
if not self.is_link_ok(iurl):
self.logger.debug('Skipping invalid link: %s', iurl)
self.log_debug('Skipping invalid link: %s', iurl)
return None
if filter and not self.is_link_wanted(iurl):
self.logger.debug('Filtered link: '+iurl)
self.log_debug('Filtered link: '+iurl)
return None
return iurl
@ -330,7 +331,7 @@ class RecursiveFetcher(object):
dsrc = xml_to_unicode(dsrc, self.verbose)[0]
soup = self.get_soup(dsrc)
self.logger.debug('Processing images...')
self.log_debug('Processing images...')
self.process_images(soup, f.geturl())
if self.download_stylesheets:
self.process_stylesheets(soup, f.geturl())
@ -339,11 +340,11 @@ class RecursiveFetcher(object):
self.downloaded_paths.append(res)
self.filemap[nurl] = res
if recursion_level < self.max_recursions:
self.logger.debug('Processing links...')
self.log_debug('Processing links...')
self.process_links(soup, iurl, recursion_level+1)
else:
self.process_return_links(soup, iurl)
self.logger.debug('Recursion limit reached. Skipping links in %s', iurl)
self.log_debug('Recursion limit reached. Skipping links in %s', iurl)
if callable(self.postprocess_html_ext):
soup = self.postprocess_html_ext(soup,
@ -356,8 +357,8 @@ class RecursiveFetcher(object):
self.localize_link(tag, 'href', res)
except Exception, err:
self.failed_links.append((iurl, traceback.format_exc()))
self.logger.warning('Could not fetch link %s', iurl)
self.logger.debug('Error: %s', str(err), exc_info=True)
self.log_warning('Could not fetch link %s', iurl)
self.log_debug('Error: %s', str(err), exc_info=True)
finally:
self.current_dir = diskpath
self.files += 1