Merge from trunk

This commit is contained in:
Charles Haley 2011-01-24 20:21:07 +00:00
commit 5acaed2683
18 changed files with 228 additions and 51 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 635 B

View File

@ -0,0 +1,66 @@
__license__ = 'GPL v3'
__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
'''
daily.tportal.hr
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Pagina12(BasicNewsRecipe):
title = 'Daily tportal.h'
__author__ = 'Darko Miletic'
description = 'News from Croatia'
publisher = 'tportal.hr'
category = 'news, politics, Croatia'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
language = 'en_HR'
remove_empty_feeds = True
publication_type = 'newsportal'
extra_css = """
body{font-family: Verdana,sans-serif }
img{margin-bottom: 0.4em; display:block}
h1,h2{color: #2D648A; font-family: Georgia,serif}
.artAbstract{font-size: 1.2em; font-family: Georgia,serif}
"""
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
remove_tags = [
dict(name=['meta','link','embed','object','iframe','base'])
,dict(name='div', attrs={'class':'artInfo'})
]
remove_attributes=['lang']
keep_only_tags=dict(attrs={'class':'articleDetails'})
feeds = [(u'News', u'http://daily.tportal.hr/rss/dailynaslovnicarss.xml')]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup

View File

@ -52,6 +52,7 @@ class heiseDe(BasicNewsRecipe):
dict(id='navi_login'), dict(id='navi_login'),
dict(id='navigation'), dict(id='navigation'),
dict(id='breadcrumb'), dict(id='breadcrumb'),
dict(id='adblockerwarnung'),
dict(id=''), dict(id=''),
dict(id='sitemap'), dict(id='sitemap'),
dict(id='bannerzone'), dict(id='bannerzone'),
@ -67,3 +68,4 @@ class heiseDe(BasicNewsRecipe):

View File

@ -21,7 +21,7 @@ class hnaDe(BasicNewsRecipe):
max_articles_per_feed = 40 max_articles_per_feed = 40
no_stylesheets = True no_stylesheets = True
remove_javascript = True remove_javascript = True
encoding = 'iso-8859-1' encoding = 'utf-8'
remove_tags = [dict(id='topnav'), remove_tags = [dict(id='topnav'),
dict(id='nav_main'), dict(id='nav_main'),
@ -60,3 +60,4 @@ class hnaDe(BasicNewsRecipe):
feeds = [ ('hna_soehre', 'http://feeds2.feedburner.com/hna/soehre'), feeds = [ ('hna_soehre', 'http://feeds2.feedburner.com/hna/soehre'),
('hna_kassel', 'http://feeds2.feedburner.com/hna/kassel') ] ('hna_kassel', 'http://feeds2.feedburner.com/hna/kassel') ]

View File

@ -27,12 +27,34 @@ class cdnet(BasicNewsRecipe):
dict(id='header'), dict(id='header'),
dict(id='search'), dict(id='search'),
dict(id='nav'), dict(id='nav'),
dict(id='blog-author-info'),
dict(id='post-tags'),
dict(id='bio-naraine'),
dict(id='bio-kennedy'),
dict(id='author-short-disclosure-kennedy'),
dict(id=''), dict(id=''),
dict(name='div', attrs={'class':'banner'}), dict(name='div', attrs={'class':'banner'}),
dict(name='div', attrs={'class':'int'}),
dict(name='div', attrs={'class':'talkback clear space-2'}),
dict(name='div', attrs={'class':'content-1 clear'}),
dict(name='div', attrs={'class':'space-2'}),
dict(name='div', attrs={'class':'space-3'}),
dict(name='div', attrs={'class':'thumb-2 left'}),
dict(name='div', attrs={'class':'hotspot'}),
dict(name='div', attrs={'class':'hed hed-1 space-1'}),
dict(name='div', attrs={'class':'view-1 clear content-3 space-2'}),
dict(name='div', attrs={'class':'hed hed-1 space-1'}),
dict(name='div', attrs={'class':'hed hed-1'}),
dict(name='div', attrs={'class':'post-header'}),
dict(name='div', attrs={'class':'lvl-nav clear'}),
dict(name='div', attrs={'class':'t-share-overlay overlay-pop contain-overlay-4'}),
dict(name='p', attrs={'class':'tags'}), dict(name='p', attrs={'class':'tags'}),
dict(name='span', attrs={'class':'follow'}),
dict(name='span', attrs={'class':'int'}),
dict(name='h4', attrs={'class':'h s-4'}),
dict(name='a', attrs={'href':'http://www.twitter.com/ryanaraine'}), dict(name='a', attrs={'href':'http://www.twitter.com/ryanaraine'}),
dict(name='div', attrs={'class':'special1'})] dict(name='div', attrs={'class':'special1'})]
remove_tags_after = [dict(name='div', attrs={'class':'bloggerDesc clear'})] remove_tags_after = [dict(name='div', attrs={'class':'clear'})]
feeds = [ ('zdnet', 'http://feeds.feedburner.com/zdnet/security') ] feeds = [ ('zdnet', 'http://feeds.feedburner.com/zdnet/security') ]
@ -43,3 +65,4 @@ class cdnet(BasicNewsRecipe):
return soup return soup

View File

@ -54,7 +54,7 @@ class ANDROID(USBMS):
0x1004 : { 0x61cc : [0x100] }, 0x1004 : { 0x61cc : [0x100] },
# Archos # Archos
0x0e79 : { 0x1419: [0x0216], 0x1420 : [0x0216]}, 0x0e79 : { 0x1419: [0x0216], 0x1420 : [0x0216], 0x1422 : [0x0216]},
} }
EBOOK_DIR_MAIN = ['eBooks/import', 'wordplayer/calibretransfer', 'Books'] EBOOK_DIR_MAIN = ['eBooks/import', 'wordplayer/calibretransfer', 'Books']
@ -70,7 +70,7 @@ class ANDROID(USBMS):
'__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897', '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE', 'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE',
'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT'] 'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H']
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
'A70S', 'A101IT'] 'A70S', 'A101IT']

View File

@ -25,13 +25,15 @@ class HeuristicProcessor(object):
self.chapters_with_title = 0 self.chapters_with_title = 0
self.blanks_deleted = False self.blanks_deleted = False
self.linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE|re.DOTALL) self.linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE|re.DOTALL)
self.blankreg = re.compile(r'\s*(?P<openline><p(?!\sid=\"softbreak\")[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE) self.blankreg = re.compile(r'\s*(?P<openline><p(?!\sclass=\"softbreak\")[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
self.softbreak = re.compile(r'\s*(?P<openline><p(?=\sclass=\"softbreak\")[^>]*>)\s*(?P<closeline></p>)', re.IGNORECASE)
self.multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>){2,}', re.IGNORECASE) self.multi_blank = re.compile(r'(\s*<p[^>]*>\s*</p>){2,}', re.IGNORECASE)
def is_pdftohtml(self, src): def is_pdftohtml(self, src):
return '<!-- created by calibre\'s pdftohtml -->' in src[:1000] return '<!-- created by calibre\'s pdftohtml -->' in src[:1000]
def chapter_head(self, match): def chapter_head(self, match):
from calibre.utils.html2text import html2text
chap = match.group('chap') chap = match.group('chap')
title = match.group('title') title = match.group('title')
if not title: if not title:
@ -40,10 +42,12 @@ class HeuristicProcessor(object):
" chapters. - " + unicode(chap)) " chapters. - " + unicode(chap))
return '<h2>'+chap+'</h2>\n' return '<h2>'+chap+'</h2>\n'
else: else:
txt_chap = html2text(chap)
txt_title = html2text(title)
self.html_preprocess_sections = self.html_preprocess_sections + 1 self.html_preprocess_sections = self.html_preprocess_sections + 1
self.log.debug("marked " + unicode(self.html_preprocess_sections) + self.log.debug("marked " + unicode(self.html_preprocess_sections) +
" chapters & titles. - " + unicode(chap) + ", " + unicode(title)) " chapters & titles. - " + unicode(chap) + ", " + unicode(title))
return '<h2>'+chap+'</h2>\n<h3>'+title+'</h3>\n' return '<h2 title="'+txt_chap+', '+txt_title+'">'+chap+'</h2>\n<h3 class="sigilNotInTOC">'+title+'</h3>\n'
def chapter_break(self, match): def chapter_break(self, match):
chap = match.group('section') chap = match.group('section')
@ -203,8 +207,8 @@ class HeuristicProcessor(object):
blank_lines = "" blank_lines = ""
opt_title_open = "(" opt_title_open = "("
opt_title_close = ")?" opt_title_close = ")?"
n_lookahead_open = "\s+(?!" n_lookahead_open = "(?!\s*"
n_lookahead_close = ")" n_lookahead_close = ")\s*"
default_title = r"(<[ibu][^>]*>)?\s{0,3}(?!Chapter)([\w\:\'\"-]+\s{0,3}){1,5}?(</[ibu][^>]*>)?(?=<)" default_title = r"(<[ibu][^>]*>)?\s{0,3}(?!Chapter)([\w\:\'\"-]+\s{0,3}){1,5}?(</[ibu][^>]*>)?(?=<)"
simple_title = r"(<[ibu][^>]*>)?\s{0,3}(?!(Chapter|\s+<)).{0,65}?(</[ibu][^>]*>)?(?=<)" simple_title = r"(<[ibu][^>]*>)?\s{0,3}(?!(Chapter|\s+<)).{0,65}?(</[ibu][^>]*>)?(?=<)"
@ -215,7 +219,7 @@ class HeuristicProcessor(object):
[r"[^'\"]?(Introduction|Synopsis|Acknowledgements|Epilogue|CHAPTER|Kapitel|Volume\b|Prologue|Book\b|Part\b|Dedication|Preface)\s*([\d\w-]+\:?\'?\s*){0,5}", True, True, True, False, "Searching for common section headings", 'common'], [r"[^'\"]?(Introduction|Synopsis|Acknowledgements|Epilogue|CHAPTER|Kapitel|Volume\b|Prologue|Book\b|Part\b|Dedication|Preface)\s*([\d\w-]+\:?\'?\s*){0,5}", True, True, True, False, "Searching for common section headings", 'common'],
[r"[^'\"]?(CHAPTER|Kapitel)\s*([\dA-Z\-\'\"\?!#,]+\s*){0,7}\s*", True, True, True, False, "Searching for most common chapter headings", 'chapter'], # Highest frequency headings which include titles [r"[^'\"]?(CHAPTER|Kapitel)\s*([\dA-Z\-\'\"\?!#,]+\s*){0,7}\s*", True, True, True, False, "Searching for most common chapter headings", 'chapter'], # Highest frequency headings which include titles
[r"<b[^>]*>\s*(<span[^>]*>)?\s*(?!([*#•=]+\s*)+)(\s*(?=[\d.\w#\-*\s]+<)([\d.\w#-*]+\s*){1,5}\s*)(?!\.)(</span>)?\s*</b>", True, True, True, False, "Searching for emphasized lines", 'emphasized'], # Emphasized lines [r"<b[^>]*>\s*(<span[^>]*>)?\s*(?!([*#•=]+\s*)+)(\s*(?=[\d.\w#\-*\s]+<)([\d.\w#-*]+\s*){1,5}\s*)(?!\.)(</span>)?\s*</b>", True, True, True, False, "Searching for emphasized lines", 'emphasized'], # Emphasized lines
[r"[^'\"]?(\d+(\.|:))\s*([\dA-Z\-\'\"#,]+\s*){0,7}\s*", True, True, True, False, "Searching for numeric chapter headings", 'numeric'], # Numeric Chapters [r"[^'\"]?(\d+(\.|:))\s*([\w\-\'\"#,]+\s*){0,7}\s*", True, True, True, False, "Searching for numeric chapter headings", 'numeric'], # Numeric Chapters
[r"([A-Z]\s+){3,}\s*([\d\w-]+\s*){0,3}\s*", True, True, True, False, "Searching for letter spaced headings", 'letter_spaced'], # Spaced Lettering [r"([A-Z]\s+){3,}\s*([\d\w-]+\s*){0,3}\s*", True, True, True, False, "Searching for letter spaced headings", 'letter_spaced'], # Spaced Lettering
[r"[^'\"]?(\d+\.?\s+([\d\w-]+\:?\'?-?\s?){0,5})\s*", True, True, True, False, "Searching for numeric chapters with titles", 'numeric_title'], # Numeric Titles [r"[^'\"]?(\d+\.?\s+([\d\w-]+\:?\'?-?\s?){0,5})\s*", True, True, True, False, "Searching for numeric chapters with titles", 'numeric_title'], # Numeric Titles
[r"[^'\"]?(\d+)\s*([\dA-Z\-\'\"\?!#,]+\s*){0,7}\s*", True, True, True, False, "Searching for simple numeric headings", 'plain_number'], # Numeric Chapters, no dot or colon [r"[^'\"]?(\d+)\s*([\dA-Z\-\'\"\?!#,]+\s*){0,7}\s*", True, True, True, False, "Searching for simple numeric headings", 'plain_number'], # Numeric Chapters, no dot or colon
@ -275,7 +279,7 @@ class HeuristicProcessor(object):
self.log.debug(unicode(type_name)+" had "+unicode(hits)+" hits - "+unicode(self.chapters_no_title)+" chapters with no title, "+unicode(self.chapters_with_title)+" chapters with titles, "+unicode(float(self.chapters_with_title) / float(hits))+" percent. ") self.log.debug(unicode(type_name)+" had "+unicode(hits)+" hits - "+unicode(self.chapters_no_title)+" chapters with no title, "+unicode(self.chapters_with_title)+" chapters with titles, "+unicode(float(self.chapters_with_title) / float(hits))+" percent. ")
if type_name == 'common': if type_name == 'common':
analysis_result.append([chapter_type, n_lookahead_req, strict_title, ignorecase, title_req, log_message, type_name]) analysis_result.append([chapter_type, n_lookahead_req, strict_title, ignorecase, title_req, log_message, type_name])
elif self.min_chapters <= hits < max_chapters: elif self.min_chapters <= hits < max_chapters or self.min_chapters < 3 > hits:
analysis_result.append([chapter_type, n_lookahead_req, strict_title, ignorecase, title_req, log_message, type_name]) analysis_result.append([chapter_type, n_lookahead_req, strict_title, ignorecase, title_req, log_message, type_name])
break break
else: else:
@ -367,6 +371,8 @@ class HeuristicProcessor(object):
html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html) html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html)
# Delete microsoft 'smart' tags # Delete microsoft 'smart' tags
html = re.sub('(?i)</?st1:\w+>', '', html) html = re.sub('(?i)</?st1:\w+>', '', html)
# Delete self closing paragraph tags
html = re.sub('<p\s?/>', '', html)
# Get rid of empty span, bold, font, em, & italics tags # Get rid of empty span, bold, font, em, & italics tags
html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]*>\s*</span>){0,2}\s*</span>\s*", " ", html) html = re.sub(r"\s*<span[^>]*>\s*(<span[^>]*>\s*</span>){0,2}\s*</span>\s*", " ", html)
html = re.sub(r"\s*<(font|[ibu]|em)[^>]*>\s*(<(font|[ibu]|em)[^>]*>\s*</(font|[ibu]|em)>\s*){0,2}\s*</(font|[ibu]|em)>", " ", html) html = re.sub(r"\s*<(font|[ibu]|em)[^>]*>\s*(<(font|[ibu]|em)[^>]*>\s*</(font|[ibu]|em)>\s*){0,2}\s*</(font|[ibu]|em)>", " ", html)
@ -467,7 +473,7 @@ class HeuristicProcessor(object):
if blanks_between_paragraphs and getattr(self.extra_opts, 'delete_blank_paragraphs', False): if blanks_between_paragraphs and getattr(self.extra_opts, 'delete_blank_paragraphs', False):
self.log.debug("deleting blank lines") self.log.debug("deleting blank lines")
self.blanks_deleted = True self.blanks_deleted = True
html = self.multi_blank.sub('\n<p id="softbreak" style="margin-top:1.5em; margin-bottom:1.5em"> </p>', html) html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1.5em; margin-bottom:1.5em"> </p>', html)
html = self.blankreg.sub('', html) html = self.blankreg.sub('', html)
# Determine line ending type # Determine line ending type
@ -522,11 +528,11 @@ class HeuristicProcessor(object):
# Center separator lines # Center separator lines
html = re.sub(u'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?P<break>([*#•=✦]+\s*)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', '<p style="text-align:center; margin-top:1.25em; margin-bottom:1.25em">' + '\g<break>' + '</p>', html) html = re.sub(u'<(?P<outer>p|div)[^>]*>\s*(<(?P<inner1>font|span|[ibu])[^>]*>)?\s*(<(?P<inner2>font|span|[ibu])[^>]*>)?\s*(<(?P<inner3>font|span|[ibu])[^>]*>)?\s*(?P<break>([*#•=✦]+\s*)+)\s*(</(?P=inner3)>)?\s*(</(?P=inner2)>)?\s*(</(?P=inner1)>)?\s*</(?P=outer)>', '<p style="text-align:center; margin-top:1.25em; margin-bottom:1.25em">' + '\g<break>' + '</p>', html)
if not self.blanks_deleted: if not self.blanks_deleted:
html = self.multi_blank.sub('\n<p id="softbreak" style="margin-top:1.5em; margin-bottom:1.5em"> </p>', html) html = self.multi_blank.sub('\n<p class="softbreak" style="margin-top:1.5em; margin-bottom:1.5em"> </p>', html)
html = re.sub('<p\s+id="softbreak"[^>]*>\s*</p>', '<div id="softbreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em"><hr style="height: 3px; background:#505050" /></div>', html) html = re.sub('<p\s+class="softbreak"[^>]*>\s*</p>', '<div id="softbreak" style="margin-left: 45%; margin-right: 45%; margin-top:1.5em; margin-bottom:1.5em"><hr style="height: 3px; background:#505050" /></div>', html)
if self.deleted_nbsps: if self.deleted_nbsps:
# put back non-breaking spaces in empty paragraphs to preserve original formatting # put back non-breaking spaces in empty paragraphs to preserve original formatting
html = self.blankreg.sub('\n'+r'\g<openline>'+u'\u00a0'+r'\g<closeline>', html) html = self.blankreg.sub('\n'+r'\g<openline>'+u'\u00a0'+r'\g<closeline>', html)
html = self.softbreak.sub('\n'+r'\g<openline>'+u'\u00a0'+r'\g<closeline>', html)
return html return html

View File

@ -411,7 +411,7 @@ def search(title=None, author=None, publisher=None, isbn=None, isbndb_key=None,
r.pubdate = pubdate r.pubdate = pubdate
def fix_case(x): def fix_case(x):
if x and x.isupper(): if x:
x = titlecase(x) x = titlecase(x)
return x return x

View File

@ -6,7 +6,7 @@ __docformat__ = 'restructuredtext en'
import re import re
from PyQt4.QtCore import SIGNAL, Qt from PyQt4.QtCore import SIGNAL, Qt, pyqtSignal
from PyQt4.QtGui import QDialog, QWidget, QDialogButtonBox, \ from PyQt4.QtGui import QDialog, QWidget, QDialogButtonBox, \
QBrush, QTextCursor, QTextEdit QBrush, QTextCursor, QTextEdit
@ -19,8 +19,8 @@ from calibre.gui2.dialogs.choose_format import ChooseFormatDialog
class RegexBuilder(QDialog, Ui_RegexBuilder): class RegexBuilder(QDialog, Ui_RegexBuilder):
def __init__(self, db, book_id, regex, *args): def __init__(self, db, book_id, regex, doc=None, parent=None):
QDialog.__init__(self, *args) QDialog.__init__(self, parent)
self.setupUi(self) self.setupUi(self)
self.regex.setText(regex) self.regex.setText(regex)
@ -28,9 +28,13 @@ class RegexBuilder(QDialog, Ui_RegexBuilder):
if not db or not book_id: if not db or not book_id:
self.button_box.addButton(QDialogButtonBox.Open) self.button_box.addButton(QDialogButtonBox.Open)
elif not self.select_format(db, book_id): elif not doc and not self.select_format(db, book_id):
self.cancelled = True self.cancelled = True
return return
if doc:
self.preview.setPlainText(doc)
self.cancelled = False self.cancelled = False
self.connect(self.button_box, SIGNAL('clicked(QAbstractButton*)'), self.button_clicked) self.connect(self.button_box, SIGNAL('clicked(QAbstractButton*)'), self.button_clicked)
self.connect(self.regex, SIGNAL('textChanged(QString)'), self.regex_valid) self.connect(self.regex, SIGNAL('textChanged(QString)'), self.regex_valid)
@ -153,24 +157,36 @@ class RegexBuilder(QDialog, Ui_RegexBuilder):
if button == self.button_box.button(QDialogButtonBox.Ok): if button == self.button_box.button(QDialogButtonBox.Ok):
self.accept() self.accept()
def doc(self):
return unicode(self.preview.toPlainText())
class RegexEdit(QWidget, Ui_Edit): class RegexEdit(QWidget, Ui_Edit):
doc_update = pyqtSignal(unicode)
def __init__(self, parent=None): def __init__(self, parent=None):
QWidget.__init__(self, parent) QWidget.__init__(self, parent)
self.setupUi(self) self.setupUi(self)
self.book_id = None self.book_id = None
self.db = None self.db = None
self.doc_cache = None
self.connect(self.button, SIGNAL('clicked()'), self.builder) self.connect(self.button, SIGNAL('clicked()'), self.builder)
def builder(self): def builder(self):
bld = RegexBuilder(self.db, self.book_id, self.edit.text(), self) bld = RegexBuilder(self.db, self.book_id, self.edit.text(), self.doc_cache, self)
if bld.cancelled: if bld.cancelled:
return return
if not self.doc_cache:
self.doc_cache = bld.doc()
self.doc_update.emit(self.doc_cache)
if bld.exec_() == bld.Accepted: if bld.exec_() == bld.Accepted:
self.edit.setText(bld.regex.text()) self.edit.setText(bld.regex.text())
def doc(self):
return self.doc_cache
def setObjectName(self, *args): def setObjectName(self, *args):
QWidget.setObjectName(self, *args) QWidget.setObjectName(self, *args)
if hasattr(self, 'edit'): if hasattr(self, 'edit'):
@ -185,8 +201,11 @@ class RegexEdit(QWidget, Ui_Edit):
def set_db(self, db): def set_db(self, db):
self.db = db self.db = db
def set_doc(self, doc):
self.doc_cache = doc
def break_cycles(self): def break_cycles(self):
self.db = None self.db = self.doc_cache = None
@property @property
def text(self): def text(self):

View File

@ -35,13 +35,26 @@ class SearchAndReplaceWidget(Widget, Ui_Form):
self.opt_sr3_search.set_book_id(book_id) self.opt_sr3_search.set_book_id(book_id)
self.opt_sr3_search.set_db(db) self.opt_sr3_search.set_db(db)
self.opt_sr1_search.doc_update.connect(self.update_doc)
self.opt_sr2_search.doc_update.connect(self.update_doc)
self.opt_sr3_search.doc_update.connect(self.update_doc)
def break_cycles(self): def break_cycles(self):
Widget.break_cycles(self) Widget.break_cycles(self)
self.opt_sr1_search.doc_update.disconnect()
self.opt_sr2_search.doc_update.disconnect()
self.opt_sr3_search.doc_update.disconnect()
self.opt_sr1_search.break_cycles() self.opt_sr1_search.break_cycles()
self.opt_sr2_search.break_cycles() self.opt_sr2_search.break_cycles()
self.opt_sr3_search.break_cycles() self.opt_sr3_search.break_cycles()
def update_doc(self, doc):
self.opt_sr1_search.set_doc(doc)
self.opt_sr2_search.set_doc(doc)
self.opt_sr3_search.set_doc(doc)
def pre_commit_check(self): def pre_commit_check(self):
for x in ('sr1_search', 'sr2_search', 'sr3_search'): for x in ('sr1_search', 'sr2_search', 'sr3_search'):
x = getattr(self, 'opt_'+x) x = getattr(self, 'opt_'+x)

View File

@ -7,7 +7,7 @@ import re, os
from PyQt4.Qt import Qt, QDialog, QGridLayout, QVBoxLayout, QFont, QLabel, \ from PyQt4.Qt import Qt, QDialog, QGridLayout, QVBoxLayout, QFont, QLabel, \
pyqtSignal, QDialogButtonBox, QInputDialog, QLineEdit, \ pyqtSignal, QDialogButtonBox, QInputDialog, QLineEdit, \
QMessageBox, QDate, QLineEdit QMessageBox, QDate
from calibre.gui2.dialogs.metadata_bulk_ui import Ui_MetadataBulkDialog from calibre.gui2.dialogs.metadata_bulk_ui import Ui_MetadataBulkDialog
from calibre.gui2.dialogs.tag_editor import TagEditor from calibre.gui2.dialogs.tag_editor import TagEditor
@ -15,7 +15,7 @@ from calibre.ebooks.metadata import string_to_authors, authors_to_string
from calibre.ebooks.metadata.book.base import composite_formatter from calibre.ebooks.metadata.book.base import composite_formatter
from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.meta import get_metadata
from calibre.gui2.custom_column_widgets import populate_metadata_page from calibre.gui2.custom_column_widgets import populate_metadata_page
from calibre.gui2 import error_dialog, ResizableDialog, UNDEFINED_QDATE from calibre.gui2 import error_dialog, ResizableDialog, UNDEFINED_QDATE, gprefs
from calibre.gui2.progress_indicator import ProgressIndicator from calibre.gui2.progress_indicator import ProgressIndicator
from calibre.utils.config import dynamic, JSONConfig from calibre.utils.config import dynamic, JSONConfig
from calibre.utils.titlecase import titlecase from calibre.utils.titlecase import titlecase
@ -321,8 +321,15 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
'This operation cannot be canceled or undone')) 'This operation cannot be canceled or undone'))
self.do_again = False self.do_again = False
self.central_widget.setCurrentIndex(tab) self.central_widget.setCurrentIndex(tab)
geom = gprefs.get('bulk_metadata_window_geometry', None)
if geom is not None:
self.restoreGeometry(bytes(geom))
self.exec_() self.exec_()
def save_state(self, *args):
gprefs['bulk_metadata_window_geometry'] = \
bytearray(self.saveGeometry())
def do_apply_pubdate(self, *args): def do_apply_pubdate(self, *args):
self.apply_pubdate.setChecked(True) self.apply_pubdate.setChecked(True)
@ -790,7 +797,12 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
self.series_start_number.setEnabled(False) self.series_start_number.setEnabled(False)
self.series_start_number.setValue(1) self.series_start_number.setValue(1)
def reject(self):
self.save_state()
ResizableDialog.reject(self)
def accept(self): def accept(self):
self.save_state()
if len(self.ids) < 1: if len(self.ids) < 1:
return QDialog.accept(self) return QDialog.accept(self)

View File

@ -6,8 +6,8 @@
<rect> <rect>
<x>0</x> <x>0</x>
<y>0</y> <y>0</y>
<width>850</width> <width>962</width>
<height>650</height> <height>727</height>
</rect> </rect>
</property> </property>
<property name="windowTitle"> <property name="windowTitle">
@ -44,8 +44,8 @@
<rect> <rect>
<x>0</x> <x>0</x>
<y>0</y> <y>0</y>
<width>832</width> <width>954</width>
<height>574</height> <height>666</height>
</rect> </rect>
</property> </property>
<layout class="QVBoxLayout" name="verticalLayout_2"> <layout class="QVBoxLayout" name="verticalLayout_2">
@ -55,7 +55,7 @@
<item> <item>
<widget class="QTabWidget" name="central_widget"> <widget class="QTabWidget" name="central_widget">
<property name="currentIndex"> <property name="currentIndex">
<number>2</number> <number>0</number>
</property> </property>
<widget class="QWidget" name="tabWidgetPage1"> <widget class="QWidget" name="tabWidgetPage1">
<attribute name="title"> <attribute name="title">
@ -996,8 +996,8 @@ not multiple and the destination field is multiple</string>
<rect> <rect>
<x>0</x> <x>0</x>
<y>0</y> <y>0</y>
<width>810</width> <width>197</width>
<height>264</height> <height>60</height>
</rect> </rect>
</property> </property>
<layout class="QGridLayout" name="testgrid"> <layout class="QGridLayout" name="testgrid">

View File

@ -16,7 +16,7 @@ class TagEditor(QDialog, Ui_TagEditor):
self.setupUi(self) self.setupUi(self)
self.db = db self.db = db
self.index = db.row(id_) self.index = db.row(id_) if id_ is not None else None
if self.index is not None: if self.index is not None:
tags = self.db.tags(self.index) tags = self.db.tags(self.index)
else: else:

View File

@ -43,7 +43,17 @@ p, li { white-space: pre-wrap; }
</property> </property>
<layout class="QVBoxLayout"> <layout class="QVBoxLayout">
<item> <item>
<widget class="QLineEdit" name="re"/> <widget class="QComboBox" name="re">
<property name="editable">
<bool>true</bool>
</property>
<property name="maxCount">
<number>10</number>
</property>
<property name="insertPolicy">
<enum>QComboBox::InsertAtTop</enum>
</property>
</widget>
</item> </item>
</layout> </layout>
</widget> </widget>
@ -94,8 +104,8 @@ p, li { white-space: pre-wrap; }
<rect> <rect>
<x>0</x> <x>0</x>
<y>0</y> <y>0</y>
<width>301</width> <width>277</width>
<height>234</height> <height>276</height>
</rect> </rect>
</property> </property>
<layout class="QGridLayout" name="gridLayout_2"> <layout class="QGridLayout" name="gridLayout_2">

View File

@ -16,7 +16,6 @@ from PyQt4.Qt import QIcon, QFont, QLabel, QListWidget, QAction, \
QTimer, QRect QTimer, QRect
from calibre.gui2 import NONE, error_dialog, pixmap_to_data, gprefs from calibre.gui2 import NONE, error_dialog, pixmap_to_data, gprefs
from calibre.constants import isosx
from calibre.gui2.filename_pattern_ui import Ui_Form from calibre.gui2.filename_pattern_ui import Ui_Form
from calibre import fit_image from calibre import fit_image
from calibre.ebooks import BOOK_EXTENSIONS from calibre.ebooks import BOOK_EXTENSIONS
@ -67,17 +66,31 @@ class FilenamePattern(QWidget, Ui_Form):
self.setupUi(self) self.setupUi(self)
self.connect(self.test_button, SIGNAL('clicked()'), self.do_test) self.connect(self.test_button, SIGNAL('clicked()'), self.do_test)
self.connect(self.re, SIGNAL('returnPressed()'), self.do_test) self.connect(self.re.lineEdit(), SIGNAL('returnPressed()'), self.do_test)
self.initialize() self.re.lineEdit().textChanged.connect(lambda x: self.changed_signal.emit())
self.re.textChanged.connect(lambda x: self.changed_signal.emit())
def initialize(self, defaults=False): def initialize(self, defaults=False):
# Get all itmes in the combobox. If we are resting
# to defaults we don't want to lose what the user
# has added.
val_hist = [unicode(self.re.lineEdit().text())] + [unicode(self.re.itemText(i)) for i in xrange(self.re.count())]
self.re.clear()
if defaults: if defaults:
val = prefs.defaults['filename_pattern'] val = prefs.defaults['filename_pattern']
else: else:
val = prefs['filename_pattern'] val = prefs['filename_pattern']
self.re.setText(val) self.re.lineEdit().setText(val)
val_hist += gprefs.get('filename_pattern_history', ['(?P<title>.+)', '(?P<author>[^_-]+) -?\s*(?P<series>[^_0-9-]*)(?P<series_index>[0-9]*)\s*-\s*(?P<title>[^_].+) ?'])
if val in val_hist:
del val_hist[val_hist.index(val)]
val_hist.insert(0, val)
for v in val_hist:
# Ensure we don't have duplicate items.
if v and self.re.findText(v) == -1:
self.re.addItem(v)
self.re.setCurrentIndex(0)
def do_test(self): def do_test(self):
try: try:
@ -110,12 +123,21 @@ class FilenamePattern(QWidget, Ui_Form):
def pattern(self): def pattern(self):
pat = unicode(self.re.text()) pat = unicode(self.re.lineEdit().text())
return re.compile(pat) return re.compile(pat)
def commit(self): def commit(self):
pat = self.pattern().pattern pat = self.pattern().pattern
prefs['filename_pattern'] = pat prefs['filename_pattern'] = pat
history = []
history_pats = [unicode(self.re.lineEdit().text())] + [unicode(self.re.itemText(i)) for i in xrange(self.re.count())]
for p in history_pats[:14]:
# Ensure we don't have duplicate items.
if p and p not in history:
history.append(p)
gprefs['filename_pattern_history'] = history
return pat return pat
@ -304,8 +326,9 @@ class FontFamilyModel(QAbstractListModel):
return NONE return NONE
if role == Qt.DisplayRole: if role == Qt.DisplayRole:
return QVariant(family) return QVariant(family)
if not isosx and role == Qt.FontRole: if False and role == Qt.FontRole:
# Causes a Qt crash with some fonts on OS X # Causes a Qt crash with some fonts
# so disabled.
return QVariant(QFont(family)) return QVariant(QFont(family))
return NONE return NONE

View File

@ -135,7 +135,7 @@ def _match(query, value, matchkind):
pass pass
return False return False
class CacheRow(list): class CacheRow(list): # {{{
def __init__(self, db, composites, val): def __init__(self, db, composites, val):
self.db = db self.db = db
@ -166,6 +166,7 @@ class CacheRow(list):
def __getslice__(self, i, j): def __getslice__(self, i, j):
return self.__getitem__(slice(i, j)) return self.__getitem__(slice(i, j))
# }}}
class ResultCache(SearchQueryParser): # {{{ class ResultCache(SearchQueryParser): # {{{
@ -192,7 +193,7 @@ class ResultCache(SearchQueryParser): # {{{
def break_cycles(self): def break_cycles(self):
self._data = self.field_metadata = self.FIELD_MAP = \ self._data = self.field_metadata = self.FIELD_MAP = \
self.numeric_search_relops = self.date_search_relops = \ self.numeric_search_relops = self.date_search_relops = \
self.all_search_locations = None self.all_search_locations = self.db_prefs = None
def __getitem__(self, row): def __getitem__(self, row):
@ -410,7 +411,7 @@ class ResultCache(SearchQueryParser): # {{{
res = set([]) res = set([])
if self.db_prefs is None: if self.db_prefs is None:
return res return res
user_cats = self.db_prefs['user_categories'] user_cats = self.db_prefs.get('user_categories', [])
if location not in user_cats: if location not in user_cats:
return res return res
c = set(candidates) c = set(candidates)

View File

@ -603,7 +603,7 @@ TXT input supports a number of options to differentiate how paragraphs are detec
formatting will be applied. formatting will be applied.
:guilabel:`Formatting Style: Heuristic` :guilabel:`Formatting Style: Heuristic`
Analyses the document for common chapter headings, scene breaks, and italicized words and applies the Analyzes the document for common chapter headings, scene breaks, and italicized words and applies the
appropriate html markup during conversion. appropriate html markup during conversion.
:guilabel:`Formatting Style: Markdown` :guilabel:`Formatting Style: Markdown`

View File

@ -105,6 +105,7 @@ _extra_lang_codes = {
'en_TH' : _('English (Thailand)'), 'en_TH' : _('English (Thailand)'),
'en_CY' : _('English (Cyprus)'), 'en_CY' : _('English (Cyprus)'),
'en_PK' : _('English (Pakistan)'), 'en_PK' : _('English (Pakistan)'),
'en_HR' : _('English (Croatia)'),
'en_IL' : _('English (Israel)'), 'en_IL' : _('English (Israel)'),
'en_SG' : _('English (Singapore)'), 'en_SG' : _('English (Singapore)'),
'en_YE' : _('English (Yemen)'), 'en_YE' : _('English (Yemen)'),