Merge from trunk

This commit is contained in:
Charles Haley 2011-09-29 13:28:21 +02:00
commit 96337de289
10 changed files with 148 additions and 93 deletions

View File

@ -8,21 +8,25 @@ from calibre.web.feeds.news import BasicNewsRecipe
class ChicagoTribune(BasicNewsRecipe):
title = 'Chicago Tribune'
__author__ = 'Kovid Goyal and Sujata Raman'
__author__ = 'Kovid Goyal and Sujata Raman, a.peter'
description = 'Politics, local and business news from Chicago'
language = 'en'
language = 'en'
version = 2
use_embedded_content = False
no_stylesheets = True
remove_javascript = True
use_embedded_content = False
no_stylesheets = True
remove_javascript = True
recursions = 1
keep_only_tags = [dict(name='div', attrs={'class':["story","entry-asset asset hentry"]}),
dict(name='div', attrs={'id':["pagebody","story","maincontentcontainer"]}),
]
remove_tags_after = [ {'class':['photo_article',]} ]
remove_tags_after = [{'class':['photo_article',]}]
remove_tags = [{'id':["moduleArticleTools","content-bottom","rail","articleRelates module","toolSet","relatedrailcontent","div-wrapper","beta","atp-comments","footer"]},
{'class':["clearfix","relatedTitle","articleRelates module","asset-footer","tools","comments","featurePromo","featurePromo fp-topjobs brownBackground","clearfix fullSpan brownBackground","curvedContent"]},
match_regexps = [r'page=[0-9]+']
remove_tags = [{'id':["moduleArticleTools","content-bottom","rail","articleRelates module","toolSet","relatedrailcontent","div-wrapper","beta","atp-comments","footer",'gallery-subcontent','subFooter']},
{'class':["clearfix","relatedTitle","articleRelates module","asset-footer","tools","comments","featurePromo","featurePromo fp-topjobs brownBackground","clearfix fullSpan brownBackground","curvedContent",'nextgen-share-tools','outbrainTools', 'google-ad-story-bottom']},
dict(name='font',attrs={'id':["cr-other-headlines"]})]
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
@ -37,7 +41,7 @@ class ChicagoTribune(BasicNewsRecipe):
.maincontentcontainer{font-family:Arial,Helvetica,sans-serif;font-size:small;}
.story-body{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
'''
feeds = [
('Latest news', 'http://feeds.chicagotribune.com/chicagotribune/news/'),
('Local news', 'http://feeds.chicagotribune.com/chicagotribune/news/local/'),
@ -76,8 +80,12 @@ class ChicagoTribune(BasicNewsRecipe):
print article.get('feedburner_origlink', article.get('guid', article.get('link')))
return article.get('feedburner_origlink', article.get('guid', article.get('link')))
def postprocess_html(self, soup, first_fetch):
# Remove the navigation bar. It was kept until now to be able to follow
# the links to further pages. But now we don't need them anymore.
for nav in soup.findAll(attrs={'class':['toppaginate','article-nav clearfix']}):
nav.extract()
for t in soup.findAll(['table', 'tr', 'td']):
t.name = 'div'
@ -88,4 +96,3 @@ class ChicagoTribune(BasicNewsRecipe):
return soup

View File

@ -1,19 +1,21 @@
from calibre.web.feeds.news import BasicNewsRecipe
import re
class FSP(BasicNewsRecipe):
title = u'Folha de S\xE3o Paulo - Jornal'
title = u'Folha de S\xE3o Paulo'
__author__ = 'fluzao'
description = u'Printed edition contents. UOL subscription required (Folha subscription currently not supported).' + \
u' [Conte\xfado completo da edi\xe7\xe3o impressa. Somente para assinantes UOL.]'
INDEX = 'http://www1.folha.uol.com.br/fsp/indices/'
language = 'pt'
no_stylesheets = True
max_articles_per_feed = 30
max_articles_per_feed = 40
remove_javascript = True
needs_subscription = True
remove_tags_before = dict(name='b')
remove_tags_after = dict(name='!--/NOTICIA--')
remove_tags = [dict(name='td', attrs={'align':'center'})]
remove_attributes = ['height','width']
masthead_url = 'http://f.i.uol.com.br/fsp/furniture/images/lgo-fsp-430x50-ffffff.gif'
@ -26,6 +28,13 @@ class FSP(BasicNewsRecipe):
# this solves the problem with truncated content in Kindle
conversion_options = {'linearize_tables' : True}
# this bit removes the footer where there are links for Proximo Texto, Texto Anterior,
# Indice e Comunicar Erros
preprocess_regexps = [(re.compile(r'<BR><BR>Texto Anterior:.*<!--/NOTICIA-->',
re.DOTALL|re.IGNORECASE), lambda match: r''),
(re.compile(r'<BR><BR>Pr&oacute;ximo Texto:.*<!--/NOTICIA-->',
re.DOTALL|re.IGNORECASE), lambda match: r'')]
def get_browser(self):
br = BasicNewsRecipe.get_browser()
if self.username is not None and self.password is not None:

View File

@ -0,0 +1,34 @@
from calibre.web.feeds.recipes import BasicNewsRecipe
'''Calibre recipe to convert the RSS feeds of the Leipziger Volkszeitung to an ebook.'''
class SportsIllustratedRecipe(BasicNewsRecipe) :
__author__ = 'a.peter'
__copyright__ = 'a.peter'
__license__ = 'GPL v3'
language = 'de'
description = 'Leipziger Volkszeitung Online RSS'
version = 1
title = u'Leipziger Volkszeitung Online RSS'
timefmt = ' [%d.%m.%Y]'
no_stylesheets = True
remove_javascript = True
use_embedded_content = False
publication_type = 'newspaper'
keep_only_tags = [dict(name='div', attrs={'class':'article'})]
remove_tags = [dict(name='div', attrs={'class':['ARTICLE_MORE', 'clearfloat']})]
feeds = [(u'Leipzig', u'http://nachrichten.lvz-online.de/rss/leipzig-rss.xml'),
(u'Mitteldeutschland', u'http://nachrichten.lvz-online.de/rss/mitteldeutschland-rss.xml'),
(u'Brennpunkte', u'http://nachrichten.lvz-online.de/rss/brennpunkte-rss.xml'),
(u'Polizeiticker', u'http://nachrichten.lvz-online.de/rss/polizeiticker-rss.xml'),
(u'Boulevard', u'http://nachrichten.lvz-online.de/rss/boulevard-rss.xml'),
(u'Kultur', u'http://nachrichten.lvz-online.de/rss/kultur-rss.xml'),
(u'Sport', u'http://nachrichten.lvz-online.de/rss/sport-rss.xml'),
(u'Regionalsport', u'http://nachrichten.lvz-online.de/rss/regionalsport-rss.xml'),
(u'Knipser', u'http://nachrichten.lvz-online.de/rss/knipser-rss.xml')]
def get_masthead_url(self):
return 'http://www.lvz-online.de/resources/themes/standard/images/global/logo.gif'

View File

@ -64,4 +64,6 @@ Various things that require other things before they can be migrated:
columns/categories/searches info into
self.field_metadata. Finally, implement metadata dirtied
functionality.
2. Catching DatabaseException and sqlite.Error when creating new
libraries/switching/on calibre startup.
'''

View File

@ -315,7 +315,7 @@ class HeuristicProcessor(object):
supports a range of html markup and text files
'''
# define the pieces of the regex
lookahead = "(?<=.{"+str(length)+u"}([a-zäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:“”)\IA\u00DF]|(?<!\&\w{4});))" # (?<!\&\w{4});) is a semicolon not part of an entity
em_en_lookahead = "(?<=.{"+str(length)+u"}[\u2013\u2014])"
soft_hyphen = u"\xad"
@ -770,6 +770,7 @@ class HeuristicProcessor(object):
# Multiple sequential blank paragraphs are merged with appropriate margins
# If non-blank scene breaks exist they are center aligned and styled with appropriate margins.
if getattr(self.extra_opts, 'format_scene_breaks', False):
self.log.debug('Formatting scene breaks')
html = re.sub('(?i)<div[^>]*>\s*<br(\s?/)?>\s*</div>', '<p></p>', html)
html = self.detect_whitespace(html)
html = self.detect_soft_breaks(html)

View File

@ -405,13 +405,12 @@ class ChooseLibraryAction(InterfaceAction):
else:
return
prefs['library_path'] = loc
#from calibre.utils.mem import memory
#import weakref
#from PyQt4.Qt import QTimer
#self.dbref = weakref.ref(self.gui.library_view.model().db)
#self.before_mem = memory()/1024**2
self.gui.library_moved(loc)
self.gui.library_moved(loc, allow_rebuild=True)
#QTimer.singleShot(5000, self.debug_leak)
def debug_leak(self):
@ -455,7 +454,8 @@ class ChooseLibraryAction(InterfaceAction):
self.choose_dialog_library_renamed = getattr(c, 'library_renamed', False)
def choose_library_callback(self, newloc, copy_structure=False):
self.gui.library_moved(newloc, copy_structure=copy_structure)
self.gui.library_moved(newloc, copy_structure=copy_structure,
allow_rebuild=True)
if getattr(self, 'choose_dialog_library_renamed', False):
self.stats.rename(self.pre_choose_dialog_location, prefs['library_path'])
self.build_menus()

View File

@ -13,7 +13,6 @@ from calibre.gui2.dialogs.choose_library_ui import Ui_Dialog
from calibre.gui2 import error_dialog, choose_dir
from calibre.constants import filesystem_encoding, iswindows
from calibre import isbytestring, patheq
from calibre.utils.config import prefs
from calibre.gui2.wizard import move_library
from calibre.library.database2 import LibraryDatabase2
@ -77,7 +76,6 @@ class ChooseLibrary(QDialog, Ui_Dialog):
def perform_action(self, ac, loc):
if ac in ('new', 'existing'):
prefs['library_path'] = loc
self.callback(loc, copy_structure=self.copy_structure.isChecked())
else:
self.db.prefs.disable_setting = True

View File

@ -5,12 +5,14 @@ __license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from PyQt4.Qt import QDialog, QLabel, QVBoxLayout, QDialogButtonBox, \
QProgressBar, QSize, QTimer, pyqtSignal, Qt
from PyQt4.Qt import (QDialog, QLabel, QVBoxLayout, QDialogButtonBox,
QProgressBar, QSize, QTimer, pyqtSignal, Qt)
from calibre.library.restore import Restore
from calibre.gui2 import error_dialog, question_dialog, warning_dialog, \
info_dialog
from calibre.gui2 import (error_dialog, question_dialog, warning_dialog,
info_dialog)
from calibre import force_unicode
from calibre.constants import filesystem_encoding
class DBRestore(QDialog):
@ -73,6 +75,19 @@ class DBRestore(QDialog):
self.msg.setText(msg)
self.pb.setValue(step)
def _show_success_msg(restorer, parent=None):
r = restorer
olddb = _('The old database was saved as: %s')%force_unicode(r.olddb,
filesystem_encoding)
if r.errors_occurred:
warning_dialog(parent, _('Success'),
_('Restoring the database succeeded with some warnings'
' click Show details to see the details. %s')%olddb,
det_msg=r.report, show=True)
else:
info_dialog(parent, _('Success'),
_('Restoring database was successful. %s')%olddb, show=True,
show_copy_button=False)
def restore_database(db, parent=None):
if not question_dialog(parent, _('Are you sure?'), '<p>'+
@ -102,14 +117,21 @@ def restore_database(db, parent=None):
_('Restoring database failed, click Show details to see details'),
det_msg=r.tb, show=True)
else:
if r.errors_occurred:
warning_dialog(parent, _('Success'),
_('Restoring the database succeeded with some warnings'
' click Show details to see the details.'),
det_msg=r.report, show=True)
else:
info_dialog(parent, _('Success'),
_('Restoring database was successful'), show=True,
show_copy_button=False)
_show_success_msg(r, parent=parent)
return True
def repair_library_at(library_path, parent=None):
d = DBRestore(parent, library_path)
d.exec_()
if d.rejected:
return False
r = d.restorer
if r.tb is not None:
error_dialog(parent, _('Failed'),
_('Restoring database failed, click Show details to see details'),
det_msg=r.tb, show=True)
return False
_show_success_msg(r, parent=parent)
return True

View File

@ -4,16 +4,15 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import sys, os, time, socket, traceback
from functools import partial
from PyQt4.Qt import QCoreApplication, QIcon, QObject, QTimer, \
QThread, pyqtSignal, Qt, QProgressDialog, QString, QPixmap, \
QSplashScreen, QApplication
from PyQt4.Qt import (QCoreApplication, QIcon, QObject, QTimer,
QPixmap, QSplashScreen, QApplication)
from calibre import prints, plugins
from calibre.constants import iswindows, __appname__, isosx, DEBUG, \
filesystem_encoding
from calibre import prints, plugins, force_unicode
from calibre.constants import (iswindows, __appname__, isosx, DEBUG,
filesystem_encoding)
from calibre.utils.ipc import ADDRESS, RC
from calibre.gui2 import ORG_NAME, APP_UID, initialize_file_icon_provider, \
Application, choose_dir, error_dialog, question_dialog, gprefs
from calibre.gui2 import (ORG_NAME, APP_UID, initialize_file_icon_provider,
Application, choose_dir, error_dialog, question_dialog, gprefs)
from calibre.gui2.main_window import option_parser as _option_parser
from calibre.utils.config import prefs, dynamic
from calibre.library.database2 import LibraryDatabase2
@ -110,36 +109,9 @@ def get_library_path(parent=None):
default_dir=get_default_library_path())
return library_path
class DBRepair(QThread):
repair_done = pyqtSignal(object, object)
progress = pyqtSignal(object, object)
def __init__(self, library_path, parent, pd):
QThread.__init__(self, parent)
self.library_path = library_path
self.pd = pd
self.progress.connect(self._callback, type=Qt.QueuedConnection)
def _callback(self, num, is_length):
if is_length:
self.pd.setRange(0, num-1)
num = 0
self.pd.setValue(num)
def callback(self, num, is_length):
self.progress.emit(num, is_length)
def run(self):
from calibre.debug import reinit_db
try:
reinit_db(os.path.join(self.library_path, 'metadata.db'),
self.callback)
db = LibraryDatabase2(self.library_path)
tb = None
except:
db, tb = None, traceback.format_exc()
self.repair_done.emit(db, tb)
def repair_library(library_path):
from calibre.gui2.dialogs.restore_library import repair_library_at
return repair_library_at(library_path)
class GuiRunner(QObject):
'''Make sure an event loop is running before starting the main work of
@ -184,9 +156,6 @@ class GuiRunner(QObject):
raise SystemExit(1)
def initialize_db_stage2(self, db, tb):
repair_pd = getattr(self, 'repair_pd', None)
if repair_pd is not None:
repair_pd.cancel()
if db is None and tb is not None:
# DB Repair failed
@ -219,23 +188,16 @@ class GuiRunner(QObject):
db = LibraryDatabase2(self.library_path)
except (sqlite.Error, DatabaseException):
repair = question_dialog(self.splash_screen, _('Corrupted database'),
_('Your calibre database appears to be corrupted. Do '
'you want calibre to try and repair it automatically? '
'If you say No, a new empty calibre library will be created.'),
_('The library database at %s appears to be corrupted. Do '
'you want calibre to try and rebuild it automatically? '
'The rebuild may not be completely successful. '
'If you say No, a new empty calibre library will be created.')
% force_unicode(self.library_path, filesystem_encoding),
det_msg=traceback.format_exc()
)
if repair:
self.repair_pd = QProgressDialog(_('Repairing database. This '
'can take a very long time for a large collection'), QString(),
0, 0)
self.repair_pd.setWindowModality(Qt.WindowModal)
self.repair_pd.show()
self.repair = DBRepair(self.library_path, self, self.repair_pd)
self.repair.repair_done.connect(self.initialize_db_stage2,
type=Qt.QueuedConnection)
self.repair.start()
return
if repair_library(self.library_path):
db = LibraryDatabase2(self.library_path)
except:
error_dialog(self.splash_screen, _('Bad database location'),
_('Bad database location %r. Will start with '

View File

@ -18,8 +18,8 @@ from PyQt4.Qt import (Qt, SIGNAL, QTimer, QHelpEvent, QAction,
QMenu, QIcon, pyqtSignal, QUrl,
QDialog, QSystemTrayIcon, QApplication)
from calibre import prints
from calibre.constants import __appname__, isosx
from calibre import prints, force_unicode
from calibre.constants import __appname__, isosx, filesystem_encoding
from calibre.utils.config import prefs, dynamic
from calibre.utils.ipc.server import Server
from calibre.library.database2 import LibraryDatabase2
@ -41,7 +41,7 @@ from calibre.gui2.search_box import SearchBoxMixin, SavedSearchBoxMixin
from calibre.gui2.search_restriction_mixin import SearchRestrictionMixin
from calibre.gui2.tag_browser.ui import TagBrowserMixin
from calibre.gui2.keyboard import Manager
from calibre.library.sqlite import sqlite, DatabaseException
class Listener(Thread): # {{{
@ -475,7 +475,8 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
def booklists(self):
return self.memory_view.model().db, self.card_a_view.model().db, self.card_b_view.model().db
def library_moved(self, newloc, copy_structure=False, call_close=True):
def library_moved(self, newloc, copy_structure=False, call_close=True,
allow_rebuild=False):
if newloc is None: return
default_prefs = None
try:
@ -484,7 +485,26 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
default_prefs = olddb.prefs
except:
olddb = None
db = LibraryDatabase2(newloc, default_prefs=default_prefs)
try:
db = LibraryDatabase2(newloc, default_prefs=default_prefs)
except (DatabaseException, sqlite.Error):
if not allow_rebuild: raise
import traceback
repair = question_dialog(self, _('Corrupted database'),
_('The library database at %s appears to be corrupted. Do '
'you want calibre to try and rebuild it automatically? '
'The rebuild may not be completely successful.')
% force_unicode(newloc, filesystem_encoding),
det_msg=traceback.format_exc()
)
if repair:
from calibre.gui2.dialogs.restore_library import repair_library_at
if repair_library_at(newloc, parent=self):
db = LibraryDatabase2(newloc, default_prefs=default_prefs)
else:
return
else:
return
if self.content_server is not None:
self.content_server.set_database(db)
self.library_path = newloc