Merge from trunk

This commit is contained in:
Charles Haley 2011-09-29 13:28:21 +02:00
commit 96337de289
10 changed files with 148 additions and 93 deletions

View File

@ -8,21 +8,25 @@ from calibre.web.feeds.news import BasicNewsRecipe
class ChicagoTribune(BasicNewsRecipe): class ChicagoTribune(BasicNewsRecipe):
title = 'Chicago Tribune' title = 'Chicago Tribune'
__author__ = 'Kovid Goyal and Sujata Raman' __author__ = 'Kovid Goyal and Sujata Raman, a.peter'
description = 'Politics, local and business news from Chicago' description = 'Politics, local and business news from Chicago'
language = 'en' language = 'en'
version = 2
use_embedded_content = False use_embedded_content = False
no_stylesheets = True no_stylesheets = True
remove_javascript = True remove_javascript = True
recursions = 1
keep_only_tags = [dict(name='div', attrs={'class':["story","entry-asset asset hentry"]}), keep_only_tags = [dict(name='div', attrs={'class':["story","entry-asset asset hentry"]}),
dict(name='div', attrs={'id':["pagebody","story","maincontentcontainer"]}), dict(name='div', attrs={'id':["pagebody","story","maincontentcontainer"]}),
] ]
remove_tags_after = [ {'class':['photo_article',]} ] remove_tags_after = [{'class':['photo_article',]}]
remove_tags = [{'id':["moduleArticleTools","content-bottom","rail","articleRelates module","toolSet","relatedrailcontent","div-wrapper","beta","atp-comments","footer"]}, match_regexps = [r'page=[0-9]+']
{'class':["clearfix","relatedTitle","articleRelates module","asset-footer","tools","comments","featurePromo","featurePromo fp-topjobs brownBackground","clearfix fullSpan brownBackground","curvedContent"]},
remove_tags = [{'id':["moduleArticleTools","content-bottom","rail","articleRelates module","toolSet","relatedrailcontent","div-wrapper","beta","atp-comments","footer",'gallery-subcontent','subFooter']},
{'class':["clearfix","relatedTitle","articleRelates module","asset-footer","tools","comments","featurePromo","featurePromo fp-topjobs brownBackground","clearfix fullSpan brownBackground","curvedContent",'nextgen-share-tools','outbrainTools', 'google-ad-story-bottom']},
dict(name='font',attrs={'id':["cr-other-headlines"]})] dict(name='font',attrs={'id':["cr-other-headlines"]})]
extra_css = ''' extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
@ -37,7 +41,7 @@ class ChicagoTribune(BasicNewsRecipe):
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;} .maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;} .story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;} body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
''' '''
feeds = [ feeds = [
('Latest news', 'http://feeds.chicagotribune.com/chicagotribune/news/'), ('Latest news', 'http://feeds.chicagotribune.com/chicagotribune/news/'),
('Local news', 'http://feeds.chicagotribune.com/chicagotribune/news/local/'), ('Local news', 'http://feeds.chicagotribune.com/chicagotribune/news/local/'),
@ -76,8 +80,12 @@ class ChicagoTribune(BasicNewsRecipe):
print article.get('feedburner_origlink', article.get('guid', article.get('link'))) print article.get('feedburner_origlink', article.get('guid', article.get('link')))
return article.get('feedburner_origlink', article.get('guid', article.get('link'))) return article.get('feedburner_origlink', article.get('guid', article.get('link')))
def postprocess_html(self, soup, first_fetch): def postprocess_html(self, soup, first_fetch):
# Remove the navigation bar. It was kept until now to be able to follow
# the links to further pages. But now we don't need them anymore.
for nav in soup.findAll(attrs={'class':['toppaginate','article-nav clearfix']}):
nav.extract()
for t in soup.findAll(['table', 'tr', 'td']): for t in soup.findAll(['table', 'tr', 'td']):
t.name = 'div' t.name = 'div'
@ -88,4 +96,3 @@ class ChicagoTribune(BasicNewsRecipe):
return soup return soup

View File

@ -1,19 +1,21 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
import re
class FSP(BasicNewsRecipe): class FSP(BasicNewsRecipe):
title = u'Folha de S\xE3o Paulo - Jornal' title = u'Folha de S\xE3o Paulo'
__author__ = 'fluzao' __author__ = 'fluzao'
description = u'Printed edition contents. UOL subscription required (Folha subscription currently not supported).' + \ description = u'Printed edition contents. UOL subscription required (Folha subscription currently not supported).' + \
u' [Conte\xfado completo da edi\xe7\xe3o impressa. Somente para assinantes UOL.]' u' [Conte\xfado completo da edi\xe7\xe3o impressa. Somente para assinantes UOL.]'
INDEX = 'http://www1.folha.uol.com.br/fsp/indices/' INDEX = 'http://www1.folha.uol.com.br/fsp/indices/'
language = 'pt' language = 'pt'
no_stylesheets = True no_stylesheets = True
max_articles_per_feed = 30 max_articles_per_feed = 40
remove_javascript = True remove_javascript = True
needs_subscription = True needs_subscription = True
remove_tags_before = dict(name='b') remove_tags_before = dict(name='b')
remove_tags_after = dict(name='!--/NOTICIA--') remove_tags = [dict(name='td', attrs={'align':'center'})]
remove_attributes = ['height','width'] remove_attributes = ['height','width']
masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif' masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
@ -26,6 +28,13 @@ class FSP(BasicNewsRecipe):
# this solves the problem with truncated content in Kindle # this solves the problem with truncated content in Kindle
conversion_options = {'linearize_tables' : True} conversion_options = {'linearize_tables' : True}
# this bit removes the footer where there are links for Proximo Texto, Texto Anterior,
# Indice e Comunicar Erros
preprocess_regexps = [(re.compile(r'<BR><BR>Texto Anterior:.*<!--/NOTICIA-->',
re.DOTALL|re.IGNORECASE), lambda match: r''),
(re.compile(r'<BR><BR>Pr&oacute;ximo Texto:.*<!--/NOTICIA-->',
re.DOTALL|re.IGNORECASE), lambda match: r'')]
def get_browser(self): def get_browser(self):
br = BasicNewsRecipe.get_browser() br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None: if self.username is not None and self.password is not None:

View File

@ -0,0 +1,34 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
'''Calibre recipe to convert the RSS feeds of the Leipziger Volkszeitung to an ebook.'''
class SportsIllustratedRecipe(BasicNewsRecipe) :
__author__ = 'a.peter'
__copyright__ = 'a.peter'
__license__ = 'GPL v3'
language = 'de'
description = 'Leipziger Volkszeitung Online RSS'
version = 1
title = u'Leipziger Volkszeitung Online RSS'
timefmt = ' [%d.%m.%Y]'
no_stylesheets = True
remove_javascript = True
use_embedded_content = False
publication_type = 'newspaper'
keep_only_tags = [dict(name='div', attrs={'class':'article'})]
remove_tags = [dict(name='div', attrs={'class':['ARTICLE_MORE', 'clearfloat']})]
feeds = [(u'Leipzig', u'http://nachrichten.lvz-online.de/rss/leipzig-rss.xml'),
(u'Mitteldeutschland', u'http://nachrichten.lvz-online.de/rss/mitteldeutschland-rss.xml'),
(u'Brennpunkte', u'http://nachrichten.lvz-online.de/rss/brennpunkte-rss.xml'),
(u'Polizeiticker', u'http://nachrichten.lvz-online.de/rss/polizeiticker-rss.xml'),
(u'Boulevard', u'http://nachrichten.lvz-online.de/rss/boulevard-rss.xml'),
(u'Kultur', u'http://nachrichten.lvz-online.de/rss/kultur-rss.xml'),
(u'Sport', u'http://nachrichten.lvz-online.de/rss/sport-rss.xml'),
(u'Regionalsport', u'http://nachrichten.lvz-online.de/rss/regionalsport-rss.xml'),
(u'Knipser', u'http://nachrichten.lvz-online.de/rss/knipser-rss.xml')]
def get_masthead_url(self):
return 'http://www.lvz-online.de/resources/themes/standard/images/global/logo.gif'

View File

@ -64,4 +64,6 @@ Various things that require other things before they can be migrated:
columns/categories/searches info into columns/categories/searches info into
self.field_metadata. Finally, implement metadata dirtied self.field_metadata. Finally, implement metadata dirtied
functionality. functionality.
2. Catching DatabaseException and sqlite.Error when creating new
libraries/switching/on calibre startup.
''' '''

View File

@ -315,7 +315,7 @@ class HeuristicProcessor(object):
supports a range of html markup and text files supports a range of html markup and text files
''' '''
# define the pieces of the regex # define the pieces of the regex
lookahead = "(?<=.{"+str(length)+u"}([a-zäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:“”)\IA\u00DF]|(?<!\&\w{4});))" # (?<!\&\w{4});) is a semicolon not part of an entity lookahead = "(?<=.{"+str(length)+u"}([a-zäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:“”)\IA\u00DF]|(?<!\&\w{4});))" # (?<!\&\w{4});) is a semicolon not part of an entity
em_en_lookahead = "(?<=.{"+str(length)+u"}[\u2013\u2014])" em_en_lookahead = "(?<=.{"+str(length)+u"}[\u2013\u2014])"
soft_hyphen = u"\xad" soft_hyphen = u"\xad"
@ -770,6 +770,7 @@ class HeuristicProcessor(object):
# Multiple sequential blank paragraphs are merged with appropriate margins # Multiple sequential blank paragraphs are merged with appropriate margins
# If non-blank scene breaks exist they are center aligned and styled with appropriate margins. # If non-blank scene breaks exist they are center aligned and styled with appropriate margins.
if getattr(self.extra_opts, 'format_scene_breaks', False): if getattr(self.extra_opts, 'format_scene_breaks', False):
self.log.debug('Formatting scene breaks')
html = re.sub('(?i)<div[^>]*>\s*<br(\s?/)?>\s*</div>', '<p></p>', html) html = re.sub('(?i)<div[^>]*>\s*<br(\s?/)?>\s*</div>', '<p></p>', html)
html = self.detect_whitespace(html) html = self.detect_whitespace(html)
html = self.detect_soft_breaks(html) html = self.detect_soft_breaks(html)

View File

@ -405,13 +405,12 @@ class ChooseLibraryAction(InterfaceAction):
else: else:
return return
prefs['library_path'] = loc
#from calibre.utils.mem import memory #from calibre.utils.mem import memory
#import weakref #import weakref
#from PyQt4.Qt import QTimer #from PyQt4.Qt import QTimer
#self.dbref = weakref.ref(self.gui.library_view.model().db) #self.dbref = weakref.ref(self.gui.library_view.model().db)
#self.before_mem = memory()/1024**2 #self.before_mem = memory()/1024**2
self.gui.library_moved(loc) self.gui.library_moved(loc, allow_rebuild=True)
#QTimer.singleShot(5000, self.debug_leak) #QTimer.singleShot(5000, self.debug_leak)
def debug_leak(self): def debug_leak(self):
@ -455,7 +454,8 @@ class ChooseLibraryAction(InterfaceAction):
self.choose_dialog_library_renamed = getattr(c, 'library_renamed', False) self.choose_dialog_library_renamed = getattr(c, 'library_renamed', False)
def choose_library_callback(self, newloc, copy_structure=False): def choose_library_callback(self, newloc, copy_structure=False):
self.gui.library_moved(newloc, copy_structure=copy_structure) self.gui.library_moved(newloc, copy_structure=copy_structure,
allow_rebuild=True)
if getattr(self, 'choose_dialog_library_renamed', False): if getattr(self, 'choose_dialog_library_renamed', False):
self.stats.rename(self.pre_choose_dialog_location, prefs['library_path']) self.stats.rename(self.pre_choose_dialog_location, prefs['library_path'])
self.build_menus() self.build_menus()

View File

@ -13,7 +13,6 @@ from calibre.gui2.dialogs.choose_library_ui import Ui_Dialog
from calibre.gui2 import error_dialog, choose_dir from calibre.gui2 import error_dialog, choose_dir
from calibre.constants import filesystem_encoding, iswindows from calibre.constants import filesystem_encoding, iswindows
from calibre import isbytestring, patheq from calibre import isbytestring, patheq
from calibre.utils.config import prefs
from calibre.gui2.wizard import move_library from calibre.gui2.wizard import move_library
from calibre.library.database2 import LibraryDatabase2 from calibre.library.database2 import LibraryDatabase2
@ -77,7 +76,6 @@ class ChooseLibrary(QDialog, Ui_Dialog):
def perform_action(self, ac, loc): def perform_action(self, ac, loc):
if ac in ('new', 'existing'): if ac in ('new', 'existing'):
prefs['library_path'] = loc
self.callback(loc, copy_structure=self.copy_structure.isChecked()) self.callback(loc, copy_structure=self.copy_structure.isChecked())
else: else:
self.db.prefs.disable_setting = True self.db.prefs.disable_setting = True

View File

@ -5,12 +5,14 @@ __license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
from PyQt4.Qt import QDialog, QLabel, QVBoxLayout, QDialogButtonBox, \ from PyQt4.Qt import (QDialog, QLabel, QVBoxLayout, QDialogButtonBox,
QProgressBar, QSize, QTimer, pyqtSignal, Qt QProgressBar, QSize, QTimer, pyqtSignal, Qt)
from calibre.library.restore import Restore from calibre.library.restore import Restore
from calibre.gui2 import error_dialog, question_dialog, warning_dialog, \ from calibre.gui2 import (error_dialog, question_dialog, warning_dialog,
info_dialog info_dialog)
from calibre import force_unicode
from calibre.constants import filesystem_encoding
class DBRestore(QDialog): class DBRestore(QDialog):
@ -73,6 +75,19 @@ class DBRestore(QDialog):
self.msg.setText(msg) self.msg.setText(msg)
self.pb.setValue(step) self.pb.setValue(step)
def _show_success_msg(restorer, parent=None):
r = restorer
olddb = _('The old database was saved as: %s')%force_unicode(r.olddb,
filesystem_encoding)
if r.errors_occurred:
warning_dialog(parent, _('Success'),
_('Restoring the database succeeded with some warnings'
' click Show details to see the details. %s')%olddb,
det_msg=r.report, show=True)
else:
info_dialog(parent, _('Success'),
_('Restoring database was successful. %s')%olddb, show=True,
show_copy_button=False)
def restore_database(db, parent=None): def restore_database(db, parent=None):
if not question_dialog(parent, _('Are you sure?'), '<p>'+ if not question_dialog(parent, _('Are you sure?'), '<p>'+
@ -102,14 +117,21 @@ def restore_database(db, parent=None):
_('Restoring database failed, click Show details to see details'), _('Restoring database failed, click Show details to see details'),
det_msg=r.tb, show=True) det_msg=r.tb, show=True)
else: else:
if r.errors_occurred: _show_success_msg(r, parent=parent)
warning_dialog(parent, _('Success'),
_('Restoring the database succeeded with some warnings'
' click Show details to see the details.'),
det_msg=r.report, show=True)
else:
info_dialog(parent, _('Success'),
_('Restoring database was successful'), show=True,
show_copy_button=False)
return True return True
def repair_library_at(library_path, parent=None):
d = DBRestore(parent, library_path)
d.exec_()
if d.rejected:
return False
r = d.restorer
if r.tb is not None:
error_dialog(parent, _('Failed'),
_('Restoring database failed, click Show details to see details'),
det_msg=r.tb, show=True)
return False
_show_success_msg(r, parent=parent)
return True

View File

@ -4,16 +4,15 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import sys, os, time, socket, traceback import sys, os, time, socket, traceback
from functools import partial from functools import partial
from PyQt4.Qt import QCoreApplication, QIcon, QObject, QTimer, \ from PyQt4.Qt import (QCoreApplication, QIcon, QObject, QTimer,
QThread, pyqtSignal, Qt, QProgressDialog, QString, QPixmap, \ QPixmap, QSplashScreen, QApplication)
QSplashScreen, QApplication
from calibre import prints, plugins from calibre import prints, plugins, force_unicode
from calibre.constants import iswindows, __appname__, isosx, DEBUG, \ from calibre.constants import (iswindows, __appname__, isosx, DEBUG,
filesystem_encoding filesystem_encoding)
from calibre.utils.ipc import ADDRESS, RC from calibre.utils.ipc import ADDRESS, RC
from calibre.gui2 import ORG_NAME, APP_UID, initialize_file_icon_provider, \ from calibre.gui2 import (ORG_NAME, APP_UID, initialize_file_icon_provider,
Application, choose_dir, error_dialog, question_dialog, gprefs Application, choose_dir, error_dialog, question_dialog, gprefs)
from calibre.gui2.main_window import option_parser as _option_parser from calibre.gui2.main_window import option_parser as _option_parser
from calibre.utils.config import prefs, dynamic from calibre.utils.config import prefs, dynamic
from calibre.library.database2 import LibraryDatabase2 from calibre.library.database2 import LibraryDatabase2
@ -110,36 +109,9 @@ def get_library_path(parent=None):
default_dir=get_default_library_path()) default_dir=get_default_library_path())
return library_path return library_path
class DBRepair(QThread): def repair_library(library_path):
from calibre.gui2.dialogs.restore_library import repair_library_at
repair_done = pyqtSignal(object, object) return repair_library_at(library_path)
progress = pyqtSignal(object, object)
def __init__(self, library_path, parent, pd):
QThread.__init__(self, parent)
self.library_path = library_path
self.pd = pd
self.progress.connect(self._callback, type=Qt.QueuedConnection)
def _callback(self, num, is_length):
if is_length:
self.pd.setRange(0, num-1)
num = 0
self.pd.setValue(num)
def callback(self, num, is_length):
self.progress.emit(num, is_length)
def run(self):
from calibre.debug import reinit_db
try:
reinit_db(os.path.join(self.library_path, 'metadata.db'),
self.callback)
db = LibraryDatabase2(self.library_path)
tb = None
except:
db, tb = None, traceback.format_exc()
self.repair_done.emit(db, tb)
class GuiRunner(QObject): class GuiRunner(QObject):
'''Make sure an event loop is running before starting the main work of '''Make sure an event loop is running before starting the main work of
@ -184,9 +156,6 @@ class GuiRunner(QObject):
raise SystemExit(1) raise SystemExit(1)
def initialize_db_stage2(self, db, tb): def initialize_db_stage2(self, db, tb):
repair_pd = getattr(self, 'repair_pd', None)
if repair_pd is not None:
repair_pd.cancel()
if db is None and tb is not None: if db is None and tb is not None:
# DB Repair failed # DB Repair failed
@ -219,23 +188,16 @@ class GuiRunner(QObject):
db = LibraryDatabase2(self.library_path) db = LibraryDatabase2(self.library_path)
except (sqlite.Error, DatabaseException): except (sqlite.Error, DatabaseException):
repair = question_dialog(self.splash_screen, _('Corrupted database'), repair = question_dialog(self.splash_screen, _('Corrupted database'),
_('Your calibre database appears to be corrupted. Do ' _('The library database at %s appears to be corrupted. Do '
'you want calibre to try and repair it automatically? ' 'you want calibre to try and rebuild it automatically? '
'If you say No, a new empty calibre library will be created.'), 'The rebuild may not be completely successful. '
'If you say No, a new empty calibre library will be created.')
% force_unicode(self.library_path, filesystem_encoding),
det_msg=traceback.format_exc() det_msg=traceback.format_exc()
) )
if repair: if repair:
self.repair_pd = QProgressDialog(_('Repairing database. This ' if repair_library(self.library_path):
'can take a very long time for a large collection'), QString(), db = LibraryDatabase2(self.library_path)
0, 0)
self.repair_pd.setWindowModality(Qt.WindowModal)
self.repair_pd.show()
self.repair = DBRepair(self.library_path, self, self.repair_pd)
self.repair.repair_done.connect(self.initialize_db_stage2,
type=Qt.QueuedConnection)
self.repair.start()
return
except: except:
error_dialog(self.splash_screen, _('Bad database location'), error_dialog(self.splash_screen, _('Bad database location'),
_('Bad database location %r. Will start with ' _('Bad database location %r. Will start with '

View File

@ -18,8 +18,8 @@ from PyQt4.Qt import (Qt, SIGNAL, QTimer, QHelpEvent, QAction,
QMenu, QIcon, pyqtSignal, QUrl, QMenu, QIcon, pyqtSignal, QUrl,
QDialog, QSystemTrayIcon, QApplication) QDialog, QSystemTrayIcon, QApplication)
from calibre import prints from calibre import prints, force_unicode
from calibre.constants import __appname__, isosx from calibre.constants import __appname__, isosx, filesystem_encoding
from calibre.utils.config import prefs, dynamic from calibre.utils.config import prefs, dynamic
from calibre.utils.ipc.server import Server from calibre.utils.ipc.server import Server
from calibre.library.database2 import LibraryDatabase2 from calibre.library.database2 import LibraryDatabase2
@ -41,7 +41,7 @@ from calibre.gui2.search_box import SearchBoxMixin, SavedSearchBoxMixin
from calibre.gui2.search_restriction_mixin import SearchRestrictionMixin from calibre.gui2.search_restriction_mixin import SearchRestrictionMixin
from calibre.gui2.tag_browser.ui import TagBrowserMixin from calibre.gui2.tag_browser.ui import TagBrowserMixin
from calibre.gui2.keyboard import Manager from calibre.gui2.keyboard import Manager
from calibre.library.sqlite import sqlite, DatabaseException
class Listener(Thread): # {{{ class Listener(Thread): # {{{
@ -475,7 +475,8 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
def booklists(self): def booklists(self):
return self.memory_view.model().db, self.card_a_view.model().db, self.card_b_view.model().db return self.memory_view.model().db, self.card_a_view.model().db, self.card_b_view.model().db
def library_moved(self, newloc, copy_structure=False, call_close=True): def library_moved(self, newloc, copy_structure=False, call_close=True,
allow_rebuild=False):
if newloc is None: return if newloc is None: return
default_prefs = None default_prefs = None
try: try:
@ -484,7 +485,26 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
default_prefs = olddb.prefs default_prefs = olddb.prefs
except: except:
olddb = None olddb = None
db = LibraryDatabase2(newloc, default_prefs=default_prefs) try:
db = LibraryDatabase2(newloc, default_prefs=default_prefs)
except (DatabaseException, sqlite.Error):
if not allow_rebuild: raise
import traceback
repair = question_dialog(self, _('Corrupted database'),
_('The library database at %s appears to be corrupted. Do '
'you want calibre to try and rebuild it automatically? '
'The rebuild may not be completely successful.')
% force_unicode(newloc, filesystem_encoding),
det_msg=traceback.format_exc()
)
if repair:
from calibre.gui2.dialogs.restore_library import repair_library_at
if repair_library_at(newloc, parent=self):
db = LibraryDatabase2(newloc, default_prefs=default_prefs)
else:
return
else:
return
if self.content_server is not None: if self.content_server is not None:
self.content_server.set_database(db) self.content_server.set_database(db)
self.library_path = newloc self.library_path = newloc