diff --git a/imgsrc/rating.svg b/imgsrc/rating.svg new file mode 100644 index 0000000000..d289c71b99 --- /dev/null +++ b/imgsrc/rating.svg @@ -0,0 +1,589 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + diff --git a/resources/content_server/gui.js b/resources/content_server/gui.js index 631fb8b617..d0fb49cc8e 100644 --- a/resources/content_server/gui.js +++ b/resources/content_server/gui.js @@ -26,7 +26,7 @@ var current_library_request = null; ////////////////////////////// GET BOOK LIST ////////////////////////////// -var LIBRARY_FETCH_TIMEOUT = 30000; // milliseconds +var LIBRARY_FETCH_TIMEOUT = 5*60000; // milliseconds function create_table_headers() { var thead = $('table#book_list thead tr'); diff --git a/resources/images/rating.png b/resources/images/rating.png new file mode 100644 index 0000000000..81eba505b9 Binary files /dev/null and b/resources/images/rating.png differ diff --git a/resources/images/star.png b/resources/images/star.png deleted file mode 100644 index 6eb1fb890f..0000000000 Binary files a/resources/images/star.png and /dev/null differ diff --git a/resources/jacket/stylesheet.css b/resources/jacket/stylesheet.css new file mode 100644 index 0000000000..8dee8edc3c --- /dev/null +++ b/resources/jacket/stylesheet.css @@ -0,0 +1,116 @@ +/* +** Book Jacket generation +** +** The template for Book Jackets is template.xhtml +** This CSS is inserted into the generated HTML at conversion time +** +** Users can control parts of the presentation of a generated book jacket by +** editing this file and template.xhtml +** +** The general form of a generated Book Jacket: +** +** Title +** Series: series [series_index] +** Published: year_of_publication +** Rating: #_of_stars +** Tags: tag1, tag2, tag3 ... +** +** Comments +** +** If a book does not have Series information, a date of publication, a rating or tags +** the corresponding row is automatically removed from the generated book jacket. +*/ + +/* +** Banner +** Only affects EPUB, kindle ignores this type of formatting +*/ +.cbj_banner { + background: #eee; + border: thin solid black; + margin: 1em; + padding: 1em; + -webkit-border-radius:8px; + } + +/* +** Title +*/ +.cbj_title { + font-size: x-large; + text-align: center; + } + +/* +** Table containing Series, Publication Year, Rating and Tags +*/ +table.cbj_header { + width: 100%; + } + +/* +** General formatting for banner labels +*/ +table.cbj_header td.cbj_label { + font-family: sans-serif; + font-weight: bold; + text-align: right; + width: 40%; + } + +/* +** General formatting for banner content +*/ +table.cbj_header td.cbj_content { + font-family: sans-serif; + text-align: left; + width:60%; + } + +/* +** To skip a banner item (Series|Published|Rating|Tags), +** edit the appropriate CSS rule below. +*/ +table.cbj_header tr.cbj_series { + /* Uncomment the next line to remove 'Series' from banner section */ + /* display:none; */ + } + +table.cbj_header tr.cbj_pubdate { + /* Uncomment the next line to remove 'Published' from banner section */ + /* display:none; */ + } + +table.cbj_header tr.cbj_rating { + /* Uncomment the next line to remove 'Rating' from banner section */ + /* display:none; */ + } + +table.cbj_header tr.cbj_tags { + /* Uncomment the next line to remove 'Tags' from banner section */ + /* display:none; */ + } + +hr { + /* This rule controls formatting for any hr elements contained in the jacket */ + border-top: 0px solid white; + border-right: 0px solid white; + border-bottom: 2px solid black; + border-left: 0px solid white; + margin-left: 10%; + width: 80%; + } + +.cbj_footer { + font-family: sans-serif; + font-size: small; + margin-top: 8px; + text-align: center; + } +.cbj_smallcaps { + font-size: 90%; + } + +.cbj_comments { + font-family: sans-serif; + } diff --git a/resources/jacket/template.xhtml b/resources/jacket/template.xhtml new file mode 100644 index 0000000000..93e12983e8 --- /dev/null +++ b/resources/jacket/template.xhtml @@ -0,0 +1,34 @@ + + + {title_str} + + + + +
+
{title}
+ + + + + + + + + + + + + + + + + +
{series_label}:{series}
{pubdate_label}:{pubdate}
{rating_label}:{rating}
{tags_label}:{tags}
+ +
+
+
{comments}
+ + + diff --git a/resources/recipes/infobae.recipe b/resources/recipes/infobae.recipe index cda9bf83d2..b7f9cd3c6c 100644 --- a/resources/recipes/infobae.recipe +++ b/resources/recipes/infobae.recipe @@ -1,12 +1,8 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' -__copyright__ = '2008-2009, Darko Miletic ' +__copyright__ = '2008-2010, Darko Miletic ' ''' infobae.com ''' -import re -import urllib, urlparse from calibre.web.feeds.news import BasicNewsRecipe @@ -20,35 +16,24 @@ class Infobae(BasicNewsRecipe): max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False - language = 'es' - lang = 'es-AR' - + language = 'es' encoding = 'cp1252' - cover_url = 'http://www.infobae.com/imgs/header/header.gif' + masthead_url = 'http://www.infobae.com/imgs/header/header.gif' remove_javascript = True - preprocess_regexps = [(re.compile( - r''), lambda m:'')] - - - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True' - - extra_css = ''' - .col-center{font-family:Arial,Helvetica,sans-serif;} - h1{font-family:Arial,Helvetica,sans-serif; color:#0D4261;} - .fuenteIntNota{font-family:Arial,Helvetica,sans-serif; color:#1D1D1D; font-size:x-small;} - ''' - - keep_only_tags = [dict(name='div', attrs={'class':['content']})] - - - remove_tags = [ - dict(name='div', attrs={'class':['options','col-right','controles', 'bannerLibre','tiulo-masleidas','masleidas-h']}), - dict(name='a', attrs={'name' : 'comentario',}), - dict(name='iframe'), - dict(name='img', alt = "Ver galerias de imagenes"), - - ] - + remove_empty_feeds = True + extra_css = ''' + body{font-family:Arial,Helvetica,sans-serif;} + .popUpTitulo{color:#0D4261; font-size: xx-large} + ''' + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + , 'linearize_tables' : True + } + feeds = [ (u'Noticias' , u'http://www.infobae.com/adjuntos/html/RSS/hoy.xml' ) @@ -57,39 +42,14 @@ class Infobae(BasicNewsRecipe): ,(u'Deportes' , u'http://www.infobae.com/adjuntos/html/RSS/deportes.xml' ) ] -# def print_version(self, url): -# main, sep, article_part = url.partition('contenidos/') -# article_id, rsep, rrest = article_part.partition('-') -# return u'http://www.infobae.com/notas/nota_imprimir.php?Idx=' + article_id - - def get_article_url(self, article): - ans = article.get('link').encode('utf-8') - parts = list(urlparse.urlparse(ans)) - parts[2] = urllib.quote(parts[2]) - ans = urlparse.urlunparse(parts) - return ans.decode('utf-8') - - - def preprocess_html(self, soup): - - for tag in soup.head.findAll('strong'): - tag.extract() - for tag in soup.findAll('meta'): - del tag['content'] - tag.extract() - - mtag = '\n\n' - soup.head.insert(0,mtag) - for item in soup.findAll(style=True): - del item['style'] - - return soup + def print_version(self, url): + article_part = url.rpartition('/')[2] + article_id= article_part.partition('-')[0] + return 'http://www.infobae.com/notas/nota_imprimir.php?Idx=' + article_id def postprocess_html(self, soup, first): - for tag in soup.findAll(name='strong'): tag.name = 'b' - return soup diff --git a/resources/recipes/nspm.recipe b/resources/recipes/nspm.recipe index 13ff42b277..58b782415b 100644 --- a/resources/recipes/nspm.recipe +++ b/resources/recipes/nspm.recipe @@ -6,6 +6,7 @@ nspm.rs import re from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import NavigableString class Nspm(BasicNewsRecipe): title = 'Nova srpska politicka misao' @@ -21,6 +22,7 @@ class Nspm(BasicNewsRecipe): encoding = 'utf-8' language = 'sr' delay = 2 + remove_empty_feeds = True publication_type = 'magazine' masthead_url = 'http://www.nspm.rs/templates/jsn_epic_pro/images/logol.jpg' extra_css = """ @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @@ -45,8 +47,9 @@ class Nspm(BasicNewsRecipe): dict(name=['link','object','embed','script','meta','base','iframe']) ,dict(attrs={'class':'buttonheading'}) ] - remove_tags_after = dict(attrs={'class':'article_separator'}) - remove_attributes = ['width','height'] + remove_tags_before = dict(attrs={'class':'contentheading'}) + remove_tags_after = dict(attrs={'class':'article_separator'}) + remove_attributes = ['width','height'] def get_browser(self): br = BasicNewsRecipe.get_browser() @@ -67,4 +70,8 @@ class Nspm(BasicNewsRecipe): def preprocess_html(self, soup): for item in soup.body.findAll(style=True): del item['style'] + for item in soup.body.findAll('h1'): + nh = NavigableString(item.a.string) + item.a.extract() + item.insert(0,nh) return self.adeify_images(soup) diff --git a/resources/recipes/slate.recipe b/resources/recipes/slate.recipe index c03255d2df..9da1c4da78 100644 --- a/resources/recipes/slate.recipe +++ b/resources/recipes/slate.recipe @@ -27,9 +27,6 @@ class PeriodicalNameHere(BasicNewsRecipe): encoding = None language = 'en' - - - # Method variables for customizing feed parsing summary_length = 250 use_embedded_content = None @@ -45,13 +42,26 @@ class PeriodicalNameHere(BasicNewsRecipe): match_regexps = [] # The second entry is for 'Big Money', which comes from a different site, uses different markup - keep_only_tags = [dict(attrs={ 'id':['article_top', 'article_body']}), + keep_only_tags = [dict(attrs={ 'id':['article_top', 'article_body', 'story']}), dict(attrs={ 'id':['content']}) ] # The second entry is for 'Big Money', which comes from a different site, uses different markup - remove_tags = [dict(attrs={ 'id':['toolbox','recommend_tab','insider_ad_wrapper', - 'article_bottom_tools_cntr','fray_article_discussion', 'fray_article_links','bottom_sponsored_links','author_bio', - 'bizbox_links_bottom','ris_links_wrapper','BOXXLE']}), + remove_tags = [dict(attrs={ 'id':[ + 'add_comments_button', + 'article_bottom_tools', + 'article_bottom_tools_cntr', + 'bizbox_links_bottom', + 'BOXXLE', + 'comments_button', + 'comments-to-fray', + 'fbog_article_bottom_cntr', + 'fray_article_discussion', 'fray_article_links','bottom_sponsored_links','author_bio', + 'insider_ad_wrapper', + 'js_kit_cntr', + 'recommend_tab', + 'ris_links_wrapper', + 'toolbox', + ]}), dict(attrs={ 'id':['content-top','service-links-bottom','hed']}) ] excludedDescriptionKeywords = ['Slate V','Twitter feed','podcast'] @@ -339,8 +349,8 @@ class PeriodicalNameHere(BasicNewsRecipe): # Change

to

headline = soup.find("h1") - tag = headline.find("span") - tag.name = 'div' + #tag = headline.find("span") + #tag.name = 'div' if headline is not None : h2tag = Tag(soup, "h2") diff --git a/resources/recipes/xkcd.recipe b/resources/recipes/xkcd.recipe index 312027004e..ad0d420deb 100644 --- a/resources/recipes/xkcd.recipe +++ b/resources/recipes/xkcd.recipe @@ -24,18 +24,18 @@ class XkcdCom(BasicNewsRecipe): (re.compile(r'()'), lambda m: '%s%s

%s

' % (m.group(1), m.group(3), m.group(2))) ] - + def parse_index(self): INDEX = 'http://xkcd.com/archive/' - soup = self.index_to_soup(INDEX) + soup = self.index_to_soup(INDEX) articles = [] for item in soup.findAll('a', title=True): articles.append({ 'date': item['title'], 'timestamp': time.mktime(time.strptime(item['title'], '%Y-%m-%d'))+1, 'url': 'http://xkcd.com' + item['href'], - 'title': self.tag_to_string(item).encode('UTF-8'), + 'title': self.tag_to_string(item), 'description': '', 'content': '', }) diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py index 4c87236e71..68df832048 100644 --- a/src/calibre/customize/builtins.py +++ b/src/calibre/customize/builtins.py @@ -459,7 +459,7 @@ from calibre.devices.iriver.driver import IRIVER_STORY from calibre.devices.binatone.driver import README from calibre.devices.hanvon.driver import N516, EB511, ALEX, AZBOOKA, THEBOOK from calibre.devices.edge.driver import EDGE -from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS +from calibre.devices.teclast.driver import TECLAST_K3, NEWSMY, IPAPYRUS, SOVOS from calibre.devices.sne.driver import SNE from calibre.devices.misc import PALMPRE, AVANT, SWEEX, PDNOVEL, KOGAN, GEMEI from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG @@ -557,6 +557,7 @@ plugins += [ TECLAST_K3, NEWSMY, IPAPYRUS, + SOVOS, EDGE, SNE, ALEX, diff --git a/src/calibre/customize/profiles.py b/src/calibre/customize/profiles.py index 1563f764ca..2b5eb5011e 100644 --- a/src/calibre/customize/profiles.py +++ b/src/calibre/customize/profiles.py @@ -248,6 +248,9 @@ class OutputProfile(Plugin): #: If True, the date is appended to the title of downloaded news periodical_date_in_title = True + #: The character used to represent a star in ratings + ratings_char = u'*' + @classmethod def tags_to_string(cls, tags): return escape(', '.join(tags)) @@ -273,6 +276,7 @@ class iPadOutput(OutputProfile): 'macros': {'border-width': '{length}|medium|thick|thin'} } ] + ratings_char = u'\u2605' touchscreen = True # touchscreen_news_css {{{ touchscreen_news_css = u''' @@ -553,10 +557,11 @@ class KindleOutput(OutputProfile): fsizes = [12, 12, 14, 16, 18, 20, 22, 24] supports_mobi_indexing = True periodical_date_in_title = False + ratings_char = u'\u2605' @classmethod def tags_to_string(cls, tags): - return u'%s
%s' % (', '.join(tags), + return u'%s
%s' % (', '.join(tags), 'ttt '.join(tags)+'ttt ') class KindleDXOutput(OutputProfile): diff --git a/src/calibre/devices/apple/driver.py b/src/calibre/devices/apple/driver.py index e318d368ff..5fe36faf75 100644 --- a/src/calibre/devices/apple/driver.py +++ b/src/calibre/devices/apple/driver.py @@ -207,8 +207,8 @@ class ITUNES(DriverBase): for (j,p_book) in enumerate(self.update_list): if False: if isosx: - self.log.info(" looking for %s" % - str(p_book['lib_book'])[-9:]) + self.log.info(" looking for '%s' by %s uuid:%s" % + (p_book['title'],p_book['author'], p_book['uuid'])) elif iswindows: self.log.info(" looking for '%s' by %s (%s)" % (p_book['title'],p_book['author'], p_book['uuid'])) @@ -303,7 +303,7 @@ class ITUNES(DriverBase): this_book.device_collections = [] this_book.library_id = library_books[this_book.path] if this_book.path in library_books else None this_book.size = book.size() - this_book.uuid = book.album() + this_book.uuid = book.composer() # Hack to discover if we're running in GUI environment if self.report_progress is not None: this_book.thumbnail = self._generate_thumbnail(this_book.path, book) @@ -732,15 +732,15 @@ class ITUNES(DriverBase): for path in paths: if DEBUG: self._dump_cached_book(self.cached_books[path], indent=2) - self.log.info(" looking for '%s' by '%s' (%s)" % + self.log.info(" looking for '%s' by '%s' uuid:%s" % (self.cached_books[path]['title'], self.cached_books[path]['author'], self.cached_books[path]['uuid'])) # Purge the booklist, self.cached_books, thumb cache for i,bl_book in enumerate(booklists[0]): - if False: - self.log.info(" evaluating '%s' by '%s' (%s)" % + if DEBUG: + self.log.info(" evaluating '%s' by '%s' uuid:%s" % (bl_book.title, bl_book.author,bl_book.uuid)) found = False @@ -781,10 +781,10 @@ class ITUNES(DriverBase): zf.close() break -# else: -# if DEBUG: -# self.log.error(" unable to find '%s' by '%s' (%s)" % -# (bl_book.title, bl_book.author,bl_book.uuid)) + else: + if DEBUG: + self.log.error(" unable to find '%s' by '%s' (%s)" % + (bl_book.title, bl_book.author,bl_book.uuid)) if False: self._dump_booklist(booklists[0], indent = 2) @@ -905,7 +905,8 @@ class ITUNES(DriverBase): # Add new_book to self.cached_books if DEBUG: - self.log.info(" adding '%s' by '%s' ['%s'] to self.cached_books" % + self.log.info("ITUNES.upload_books()") + self.log.info(" adding '%s' by '%s' uuid:%s to self.cached_books" % ( metadata[i].title, metadata[i].author, metadata[i].uuid)) self.cached_books[this_book.path] = { 'author': metadata[i].author, @@ -943,7 +944,11 @@ class ITUNES(DriverBase): new_booklist.append(this_book) self._update_iTunes_metadata(metadata[i], db_added, lb_added, this_book) - # Add new_book to self.cached_paths + # Add new_book to self.cached_books + if DEBUG: + self.log.info("ITUNES.upload_books()") + self.log.info(" adding '%s' by '%s' uuid:%s to self.cached_books" % + ( metadata[i].title, metadata[i].author, metadata[i].uuid)) self.cached_books[this_book.path] = { 'author': metadata[i].author[0], 'dev_book': db_added, @@ -1406,8 +1411,8 @@ class ITUNES(DriverBase): for book in booklist: if isosx: - self.log.info("%s%-40.40s %-30.30s %-10.10s" % - (' '*indent,book.title, book.author, str(book.library_id)[-9:])) + self.log.info("%s%-40.40s %-30.30s %-10.10s %s" % + (' '*indent,book.title, book.author, str(book.library_id)[-9:], book.uuid)) elif iswindows: self.log.info("%s%-40.40s %-30.30s" % (' '*indent,book.title, book.author)) @@ -1547,11 +1552,12 @@ class ITUNES(DriverBase): if isosx: for ub in self.update_list: - self.log.info("%s%-40.40s %-30.30s %-10.10s" % + self.log.info("%s%-40.40s %-30.30s %-10.10s %s" % (' '*indent, ub['title'], ub['author'], - str(ub['lib_book'])[-9:])) + str(ub['lib_book'])[-9:], + ub['uuid'])) elif iswindows: for ub in self.update_list: self.log.info("%s%-40.40s %-30.30s" % @@ -2342,8 +2348,10 @@ class ITUNES(DriverBase): if isosx: if DEBUG: self.log.info(" deleting '%s' from iDevice" % cached_book['title']) - cached_book['dev_book'].delete() - + try: + cached_book['dev_book'].delete() + except: + self.log.error(" error deleting '%s'" % cached_book['title']) elif iswindows: hit = self._find_device_book(cached_book) if hit: @@ -2802,7 +2810,7 @@ class ITUNES_ASYNC(ITUNES): #this_book.library_id = library_books[this_book.path] if this_book.path in library_books else None this_book.library_id = library_books[book] this_book.size = library_books[book].size() - this_book.uuid = library_books[book].album() + this_book.uuid = library_books[book].composer() # Hack to discover if we're running in GUI environment if self.report_progress is not None: this_book.thumbnail = self._generate_thumbnail(this_book.path, library_books[book]) @@ -2842,6 +2850,7 @@ class ITUNES_ASYNC(ITUNES): this_book.device_collections = [] this_book.library_id = library_books[book] this_book.size = library_books[book].Size + this_book.uuid = library_books[book].Composer # Hack to discover if we're running in GUI environment if self.report_progress is not None: this_book.thumbnail = self._generate_thumbnail(this_book.path, library_books[book]) diff --git a/src/calibre/devices/prs505/driver.py b/src/calibre/devices/prs505/driver.py index c55936be2d..094c12cf0c 100644 --- a/src/calibre/devices/prs505/driver.py +++ b/src/calibre/devices/prs505/driver.py @@ -35,16 +35,16 @@ class PRS505(USBMS): VENDOR_NAME = 'SONY' WINDOWS_MAIN_MEM = re.compile( - r'(PRS-(505|300|500))|' - r'(PRS-((700[#/])|((6|9)00&)))' + r'(PRS-(505|500))|' + r'(PRS-((700[#/])|((6|9|3)(0|5)0&)))' ) WINDOWS_CARD_A_MEM = re.compile( r'(PRS-(505|500)[#/]\S+:MS)|' - r'(PRS-((700[/#]\S+:)|((6|9)00[#_]))MS)' + r'(PRS-((700[/#]\S+:)|((6|9)(0|5)0[#_]))MS)' ) WINDOWS_CARD_B_MEM = re.compile( r'(PRS-(505|500)[#/]\S+:SD)|' - r'(PRS-((700[/#]\S+:)|((6|9)00[#_]))SD)' + r'(PRS-((700[/#]\S+:)|((6|9)(0|5)0[#_]))SD)' ) diff --git a/src/calibre/devices/teclast/driver.py b/src/calibre/devices/teclast/driver.py index 0c60a367cf..2055ff9306 100644 --- a/src/calibre/devices/teclast/driver.py +++ b/src/calibre/devices/teclast/driver.py @@ -52,3 +52,14 @@ class IPAPYRUS(TECLAST_K3): VENDOR_NAME = 'E_READER' WINDOWS_MAIN_MEM = '' +class SOVOS(TECLAST_K3): + + name = 'Sovos device interface' + gui_name = 'Sovos' + description = _('Communicate with the Sovos reader.') + + FORMATS = ['epub', 'fb2', 'pdf', 'txt'] + + VENDOR_NAME = 'RK28XX' + WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = 'USB-MSC' + diff --git a/src/calibre/ebooks/chm/reader.py b/src/calibre/ebooks/chm/reader.py index 67a2d36607..831c16bf6a 100644 --- a/src/calibre/ebooks/chm/reader.py +++ b/src/calibre/ebooks/chm/reader.py @@ -132,7 +132,11 @@ class CHMReader(CHMFile): for path in self.Contents(): lpath = os.path.join(output_dir, path) self._ensure_dir(lpath) - data = self.GetFile(path) + try: + data = self.GetFile(path) + except: + self.log.exception('Failed to extract %s from CHM, ignoring'%path) + continue if lpath.find(';') != -1: # fix file names with ";" at the end, see _reformat() lpath = lpath.split(';')[0] diff --git a/src/calibre/ebooks/conversion/cli.py b/src/calibre/ebooks/conversion/cli.py index 7439718cf6..2ef633d0bb 100644 --- a/src/calibre/ebooks/conversion/cli.py +++ b/src/calibre/ebooks/conversion/cli.py @@ -122,7 +122,7 @@ def add_pipeline_options(parser, plumber): 'font_size_mapping', 'line_height', 'linearize_tables', - 'extra_css', + 'extra_css', 'smarten_punctuation', 'margin_top', 'margin_left', 'margin_right', 'margin_bottom', 'change_justification', 'insert_blank_line', 'remove_paragraph_spacing','remove_paragraph_spacing_indent_size', diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 24b35f804f..16282dd28d 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -362,6 +362,14 @@ OptionRecommendation(name='preprocess_html', ) ), +OptionRecommendation(name='smarten_punctuation', + recommended_value=False, level=OptionRecommendation.LOW, + help=_('Convert plain quotes, dashes and ellipsis to their ' + 'typographically correct equivalents. For details, see ' + 'http://daringfireball.net/projects/smartypants' + ) + ), + OptionRecommendation(name='remove_header', recommended_value=False, level=OptionRecommendation.LOW, help=_('Use a regular expression to try and remove the header.' diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 256bcce6fc..4538af96c4 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -75,6 +75,8 @@ def line_length(format, raw, percent): linere = re.compile('(?<=)', re.DOTALL) elif format == 'pdf': linere = re.compile('(?<=
).*?(?=
)', re.DOTALL) + elif format == 'spanned_html': + linere = re.compile('(?<=)', re.DOTALL) lines = linere.findall(raw) lengths = [] @@ -219,35 +221,34 @@ class HTMLPreProcessor(object): (re.compile(u'˛\s*()*\s*A', re.UNICODE), lambda match: u'Ą'), (re.compile(u'˛\s*()*\s*e', re.UNICODE), lambda match: u'ę'), (re.compile(u'˛\s*()*\s*E', re.UNICODE), lambda match: u'Ę'), - + # ˙ (re.compile(u'˙\s*()*\s*z', re.UNICODE), lambda match: u'ż'), (re.compile(u'˙\s*()*\s*Z', re.UNICODE), lambda match: u'Ż'), - + + # If pdf printed from a browser then the header/footer has a reliable pattern + (re.compile(r'((?<=)\s*file:////?[A-Z].*
|file:////?[A-Z].*
(?=\s*
))', re.IGNORECASE), lambda match: ''), + + # Center separator lines + (re.compile(u'
\s*(?P([*#•]+\s*)+)\s*
'), lambda match: '

\n

' + match.group(1) + '

'), # Remove page links (re.compile(r'', re.IGNORECASE), lambda match: ''), # Remove
tags - (re.compile(r'', re.IGNORECASE), lambda match: '
'), - # Replace

with

- (re.compile(r'\s*', re.IGNORECASE), lambda match: '

'), - - # Remove hyphenation - (re.compile(r'-\n\r?'), lambda match: ''), + (re.compile(r'', re.IGNORECASE), lambda match: '
'), # Remove gray background (re.compile(r']+>'), lambda match : ''), # Detect Chapters to match default XPATH in GUI - (re.compile(r'(?=<(/?br|p))(<(/?br|p)[^>]*)?>\s*(?P(<(i|b)>(<(i|b)>)?)?(.?Chapter|Epilogue|Prologue|Book|Part)\s*([\d\w-]+(\s\w+)?)?(()?)?)]*>\s*(?P(<(i|b)>)?\s*\w+(\s*\w+)?\s*(</(i|b)>)?\s*(</?(br|p)[^>]*>))?', re.IGNORECASE), chap_head), - (re.compile(r'(?=<(/?br|p))(<(/?br|p)[^>]*)?>\s*(?P<chap>([A-Z \'"!]{5,})\s*(\d+|\w+)?)(</?p[^>]*>|<br[^>]*>)\n?((?=(<i>)?\s*\w+(\s+\w+)?(</i>)?(<br[^>]*>|</?p[^>]*>))((?P<title>.*)(<br[^>]*>|</?p[^>]*>)))?'), chap_head), + (re.compile(r'<br>\s*(?P<chap>(<[ibu]>){0,2}\s*.?(Introduction|Chapter|Epilogue|Prologue|Book|Part|Dedication|Volume|Preface|Acknowledgments)\s*([\d\w-]+\s*){0,3}\s*(</[ibu]>){0,2})\s*(<br>\s*){1,3}\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*<br>)?', re.IGNORECASE), chap_head), + # Cover the case where every letter in a chapter title is separated by a space + (re.compile(r'<br>\s*(?P<chap>([A-Z]\s+){4,}\s*([\d\w-]+\s*){0,3}\s*)\s*(<br>\s*){1,3}\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*(<br>))?'), chap_head), # Have paragraphs show better (re.compile(r'<br.*?>'), lambda match : '<p>'), # Clean up spaces (re.compile(u'(?<=[\.,;\?!”"\'])[\s^ ]*(?=<)'), lambda match: ' '), - # Connect paragraphs split by - - (re.compile(u'(?<=[^\s][-–])[\s]*(</p>)*[\s]*(<p>)*\s*(?=[^\s])'), lambda match: ''), # Add space before and after italics (re.compile(u'(?<!“)<i>'), lambda match: ' <i>'), (re.compile(r'</i>(?=\w)'), lambda match: '</i> '), @@ -328,12 +329,29 @@ class HTMLPreProcessor(object): print 'Failed to parse remove_footer regexp' traceback.print_exc() + # unwrap hyphenation - moved here so it's executed after header/footer removal + if is_pdftohtml: + # unwrap visible dashes and hyphens - don't delete they are often hyphens for + # for compound words, formatting, etc + end_rules.append((re.compile(u'(?<=[-–—])\s*<p>\s*(?=[[a-z\d])'), lambda match: '')) + # unwrap/delete soft hyphens + end_rules.append((re.compile(u'[­](\s*<p>)+\s*(?=[[a-z\d])'), lambda match: '')) + # unwrap/delete soft hyphens with formatting + end_rules.append((re.compile(u'[­]\s*(</(i|u|b)>)+(\s*<p>)+\s*(<(i|u|b)>)+\s*(?=[[a-z\d])'), lambda match: '')) + + # Make the more aggressive chapter marking regex optional with the preprocess option to + # reduce false positives and move after header/footer removal + if getattr(self.extra_opts, 'preprocess_html', None): + if is_pdftohtml: + end_rules.append((re.compile(r'<p>\s*(?P<chap>(<[ibu]>){0,2}\s*([A-Z \'"!]{3,})\s*([\dA-Z:]+\s){0,4}\s*(</[ibu]>){0,2})\s*<p>\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*<p>)?'), chap_head),) + if getattr(self.extra_opts, 'unwrap_factor', 0.0) > 0.01: length = line_length('pdf', html, getattr(self.extra_opts, 'unwrap_factor')) if length: + # print "The pdf line length returned is " + str(length) end_rules.append( # Un wrap using punctuation - (re.compile(r'(?<=.{%i}[a-z\.,;:)\-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>)\s*(?=(<(i|b|u)>)?\s*[\w\d(])' % length, re.UNICODE), wrap_lines), + (re.compile(r'(?<=.{%i}[a-z,;:)\-IA])\s*(?P<ital></(i|b|u)>)?\s*(<p.*?>\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])' % length, re.UNICODE), wrap_lines), ) for rule in self.PREPROCESS + start_rules: @@ -383,5 +401,14 @@ class HTMLPreProcessor(object): if self.plugin_preprocess: html = self.input_plugin_preprocess(html) + if getattr(self.extra_opts, 'smarten_punctuation', False): + html = self.smarten_punctuation(html) + return html + def smarten_punctuation(self, html): + from calibre.utils.smartypants import smartyPants + from calibre.ebooks.chardet import substitute_entites + html = smartyPants(html) + return substitute_entites(html) + diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py new file mode 100644 index 0000000000..5301f70a16 --- /dev/null +++ b/src/calibre/ebooks/conversion/utils.py @@ -0,0 +1,173 @@ +#!/usr/bin/env python +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai + +__license__ = 'GPL v3' +__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' +__docformat__ = 'restructuredtext en' + +import re +from calibre.ebooks.conversion.preprocess import line_length +from calibre.utils.logging import default_log + +class PreProcessor(object): + + def __init__(self, log=None): + self.log = default_log if log is None else log + self.html_preprocess_sections = 0 + self.found_indents = 0 + + def chapter_head(self, match): + chap = match.group('chap') + title = match.group('title') + if not title: + self.html_preprocess_sections = self.html_preprocess_sections + 1 + self.log("found " + str(self.html_preprocess_sections) + " chapters. - " + str(chap)) + return '<h2>'+chap+'</h2>\n' + else: + self.html_preprocess_sections = self.html_preprocess_sections + 1 + self.log("found " + str(self.html_preprocess_sections) + " chapters & titles. - " + str(chap) + ", " + str(title)) + return '<h2>'+chap+'</h2>\n<h3>'+title+'</h3>\n' + + def chapter_break(self, match): + chap = match.group('section') + styles = match.group('styles') + self.html_preprocess_sections = self.html_preprocess_sections + 1 + self.log("marked " + str(self.html_preprocess_sections) + " section markers based on punctuation. - " + str(chap)) + return '<'+styles+' style="page-break-before:always">'+chap + + def insert_indent(self, match): + pstyle = match.group('formatting') + span = match.group('span') + self.found_indents = self.found_indents + 1 + if pstyle: + if not span: + return '<p '+pstyle+' style="text-indent:3%">' + else: + return '<p '+pstyle+' style="text-indent:3%">'+span + else: + if not span: + return '<p style="text-indent:3%">' + else: + return '<p style="text-indent:3%">'+span + + def no_markup(self, raw, percent): + ''' + Detects total marked up line endings in the file. raw is the text to + inspect. Percent is the minimum percent of line endings which should + be marked up to return true. + ''' + htm_end_ere = re.compile('</p>', re.DOTALL) + line_end_ere = re.compile('(\n|\r|\r\n)', re.DOTALL) + htm_end = htm_end_ere.findall(raw) + line_end = line_end_ere.findall(raw) + tot_htm_ends = len(htm_end) + tot_ln_fds = len(line_end) + self.log("There are " + str(tot_ln_fds) + " total Line feeds, and " + str(tot_htm_ends) + " marked up endings") + + if percent > 1: + percent = 1 + if percent < 0: + percent = 0 + + min_lns = tot_ln_fds * percent + self.log("There must be fewer than " + str(min_lns) + " unmarked lines to add markup") + if min_lns > tot_htm_ends: + return True + + def __call__(self, html): + self.log("********* Preprocessing HTML *********") + # Replace series of non-breaking spaces with text-indent + txtindent = re.compile(ur'<p(?P<formatting>[^>]*)>\s*(?P<span>(<span[^>]*>\s*)+)?\s*(\u00a0){2,}', re.IGNORECASE) + html = txtindent.sub(self.insert_indent, html) + if self.found_indents > 1: + self.log("replaced "+str(self.found_indents)+ " nbsp indents with inline styles") + # remove remaining non-breaking spaces + html = re.sub(ur'\u00a0', ' ', html) + # Get rid of empty <o:p> tags to simplify other processing + html = re.sub(ur'\s*<o:p>\s*</o:p>', ' ', html) + # Get rid of empty span tags + html = re.sub(r"\s*<span[^>]*>\s*</span>", " ", html) + + # If more than 40% of the lines are empty paragraphs then delete them to clean up spacing + linereg = re.compile('(?<=<p).*?(?=</p>)', re.IGNORECASE|re.DOTALL) + blankreg = re.compile(r'\s*<p[^>]*>\s*(<(b|i|u)>)?\s*(</(b|i|u)>)?\s*</p>', re.IGNORECASE) + blanklines = blankreg.findall(html) + lines = linereg.findall(html) + if len(lines) > 1: + self.log("There are " + str(len(blanklines)) + " blank lines. " + str(float(len(blanklines)) / float(len(lines))) + " percent blank") + if float(len(blanklines)) / float(len(lines)) > 0.40: + self.log("deleting blank lines") + html = blankreg.sub('', html) + # Arrange line feeds and </p> tags so the line_length and no_markup functions work correctly + html = re.sub(r"\s*</p>", "</p>\n", html) + html = re.sub(r"\s*<p>\s*", "\n<p>", html) + + # some lit files don't have any <p> tags or equivalent (generally just plain text between + # <pre> tags), check and mark up line endings if required before proceeding + if self.no_markup(html, 0.1): + self.log("not enough paragraph markers, adding now") + add_markup = re.compile('(?<!>)(\n)') + html = add_markup.sub('</p>\n<p>', html) + + # detect chapters/sections to match xpath or splitting logic + heading = re.compile('<h[1-3][^>]*>', re.IGNORECASE) + self.html_preprocess_sections = len(heading.findall(html)) + self.log("found " + str(self.html_preprocess_sections) + " pre-existing headings") + # + # Start with most typical chapter headings, get more aggressive until one works + if self.html_preprocess_sections < 10: + chapdetect = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu]>){0,2}s*(<span[^>]*>)?\s*.?(Introduction|Synopsis|Acknowledgements|Chapter|Epilogue|Volume|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,8}\s*(</[ibu]>){0,2})\s*(</span>)?s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu]>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.IGNORECASE) + html = chapdetect.sub(self.chapter_head, html) + if self.html_preprocess_sections < 10: + self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying numeric chapters") + chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu]>){0,2}\s*.?(\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu]>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE) + html = chapdetect2.sub(self.chapter_head, html) + + if self.html_preprocess_sections < 10: + self.log("not enough chapters, only " + str(self.html_preprocess_sections) + ", trying with uppercase words") + chapdetect2 = re.compile(r'(?=</?(br|p))(<(/?br|p)[^>]*>)\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<chap>(<[ibu]>){0,2}\s*.?(([A-Z#-]+\s*){1,9})\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(p|/?br)>)\s*(<(/?br|p)[^>]*>\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(?P<title>(<[ibu]>){0,2}(\s*[\w\'\"-]+){1,5}\s*(</[ibu]>){0,2})\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</(br|p)>))?', re.UNICODE) + html = chapdetect2.sub(self.chapter_head, html) + + # Unwrap lines + # + self.log("Unwrapping Lines") + # Some OCR sourced files have line breaks in the html using a combination of span & p tags + # span are used for hard line breaks, p for new paragraphs. Determine which is used so + # that lines can be un-wrapped across page boundaries + paras_reg = re.compile('<p[^>]*>', re.IGNORECASE) + spans_reg = re.compile('<span[^>]*>', re.IGNORECASE) + paras = len(paras_reg.findall(html)) + spans = len(spans_reg.findall(html)) + if spans > 1: + if float(paras) / float(spans) < 0.75: + format = 'spanned_html' + else: + format = 'html' + else: + format = 'html' + + # Calculate Length + length = line_length(format, html, 0.4) + self.log("*** Median line length is " + str(length) + ",calculated with " + format + " format ***") + # + # Unwrap and/or delete soft-hyphens, hyphens + html = re.sub(u'­\s*(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*', '', html) + html = re.sub(u'(?<=[-–—])\s*(?=<)(</span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?=[[a-z\d])', '', html) + + # Unwrap lines using punctation if the median length of all lines is less than 200 + unwrap = re.compile(r"(?<=.{%i}[a-z,;:\IA])\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE) + html = unwrap.sub(' ', html) + + # If still no sections after unwrapping mark split points on lines with no punctuation + if self.html_preprocess_sections < 10: + self.log("Looking for more split points based on punctuation, currently have " + str(self.html_preprocess_sections)) + #self.log(html) + chapdetect3 = re.compile(r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*(<[ibu]>){0,2}\s*(<span[^>]*>)?\s*.?([a-z#-*]+\s*){1,5}\s*\s*(</span>)?(</[ibu]>){0,2}\s*(</span>)?\s*(</[ibu]>){0,2}\s*(</span>)?\s*</(p|div)>)', re.IGNORECASE) + html = chapdetect3.sub(self.chapter_break, html) + # search for places where a first or second level heading is immediately followed by another + # top level heading. demote the second heading to h3 to prevent splitting between chapter + # headings and titles, images, etc + doubleheading = re.compile(r'(?P<firsthead><h(1|2)[^>]*>.+?</h(1|2)>\s*(<(?!h\d)[^>]*>\s*)*)<h(1|2)(?P<secondhead>[^>]*>.+?)</h(1|2)>', re.IGNORECASE) + html = doubleheading.sub('\g<firsthead>'+'<h3'+'\g<secondhead>'+'</h3>', html) + + return html diff --git a/src/calibre/ebooks/fb2/output.py b/src/calibre/ebooks/fb2/output.py index d0125afe89..d6c7a25a90 100644 --- a/src/calibre/ebooks/fb2/output.py +++ b/src/calibre/ebooks/fb2/output.py @@ -28,6 +28,9 @@ class FB2Output(OutputFormatPlugin): ]) def convert(self, oeb_book, output_path, input_plugin, opts, log): + from calibre.ebooks.oeb.transforms.jacket import linearize_jacket + linearize_jacket(oeb_book) + fb2mlizer = FB2MLizer(log) fb2_content = fb2mlizer.extract_content(oeb_book, opts) diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py index d57bfddd3e..084d48e54b 100644 --- a/src/calibre/ebooks/html/input.py +++ b/src/calibre/ebooks/html/input.py @@ -24,7 +24,7 @@ from calibre.constants import islinux, isfreebsd, iswindows from calibre import unicode_path from calibre.utils.localization import get_lang from calibre.utils.filenames import ascii_filename -from calibre.ebooks.conversion.preprocess import line_length +from calibre.ebooks.conversion.utils import PreProcessor class Link(object): ''' @@ -491,20 +491,6 @@ class HTMLInput(InputFormatPlugin): return (None, raw) def preprocess_html(self, html): - if not hasattr(self, 'log'): - from calibre.utils.logging import default_log - self.log = default_log - self.log("********* Preprocessing HTML *********") - # Detect Chapters to match the xpath in the GUI - chapdetect = re.compile(r'(?=</?(br|p|span))(</?(br|p|span)[^>]*>)?\s*(?P<chap>(<(i|b)><(i|b)>|<(i|b)>)?(.?Chapter|Epilogue|Prologue|Book|Part|Dedication)\s*([\d\w-]+(\s\w+)?)?(</(i|b)></(i|b)>|</(i|b)>)?)(</?(p|br|span)[^>]*>)', re.IGNORECASE) - html = chapdetect.sub('<h2>'+'\g<chap>'+'</h2>\n', html) - # Unwrap lines using punctation if the median length of all lines is less than 150 - # - # Insert extra line feeds so the line length regex functions properly - html = re.sub(r"</p>", "</p>\n", html) - length = line_length('html', html, 0.4) - self.log.debug("*** Median length is " + str(length) + " ***") - unwrap = re.compile(r"(?<=.{%i}[a-z,;:\IA])\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE) - if length < 150: - html = unwrap.sub(' ', html) - return html + preprocessor = PreProcessor(log=getattr(self, 'log', None)) + return preprocessor(html) + diff --git a/src/calibre/ebooks/lit/input.py b/src/calibre/ebooks/lit/input.py index 9bf20fb1d4..65f5c607a2 100644 --- a/src/calibre/ebooks/lit/input.py +++ b/src/calibre/ebooks/lit/input.py @@ -6,10 +6,9 @@ __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __docformat__ = 'restructuredtext en' -import re - from calibre.customize.conversion import InputFormatPlugin -from calibre.ebooks.conversion.preprocess import line_length +from calibre.ebooks.conversion.utils import PreProcessor + class LITInput(InputFormatPlugin): @@ -55,18 +54,6 @@ class LITInput(InputFormatPlugin): def preprocess_html(self, html): - self.log("********* Preprocessing HTML *********") - # Detect Chapters to match the xpath in the GUI - chapdetect = re.compile(r'(?=</?(br|p|span))(</?(br|p|span)[^>]*>)?\s*(?P<chap>(<(i|b)><(i|b)>|<(i|b)>)?(.?Chapter|Epilogue|Prologue|Book|Part|Dedication)\s*([\d\w-]+(\s\w+)?)?(</(i|b)></(i|b)>|</(i|b)>)?)(</?(p|br|span)[^>]*>)', re.IGNORECASE) - html = chapdetect.sub('<h2>'+'\g<chap>'+'</h2>\n', html) - # Unwrap lines using punctation if the median length of all lines is less than 150 - # - # Insert extra line feeds so the line length regex functions properly - html = re.sub(r"</p>", "</p>\n", html) - length = line_length('html', html, 0.4) - self.log("*** Median length is " + str(length) + " ***") - unwrap = re.compile(r"(?<=.{%i}[a-z,;:\IA])\s*</(span|p|div)>\s*(</(p|span|div)>)?\s*(?P<up2threeblanks><(p|span|div)[^>]*>\s*(<(p|span|div)[^>]*>\s*</(span|p|div)>\s*)</(span|p|div)>\s*){0,3}\s*<(span|div|p)[^>]*>\s*(<(span|div|p)[^>]*>)?\s*" % length, re.UNICODE) - if length < 150: - html = unwrap.sub(' ', html) - return html + preprocessor = PreProcessor(log=getattr(self, 'log', None)) + return preprocessor(html) diff --git a/src/calibre/ebooks/mobi/input.py b/src/calibre/ebooks/mobi/input.py index 487e70c04f..b8dc7a9560 100644 --- a/src/calibre/ebooks/mobi/input.py +++ b/src/calibre/ebooks/mobi/input.py @@ -3,6 +3,7 @@ __license__ = 'GPL 3' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __docformat__ = 'restructuredtext en' +import re from calibre.customize.conversion import InputFormatPlugin class MOBIInput(InputFormatPlugin): @@ -37,3 +38,12 @@ class MOBIInput(InputFormatPlugin): include_meta_content_type=False)) accelerators['pagebreaks'] = '//h:div[@class="mbp_pagebreak"]' return mr.created_opf_path + + def preprocess_html(self, html): + # search for places where a first or second level heading is immediately followed by another + # top level heading. demote the second heading to h3 to prevent splitting between chapter + # headings and titles, images, etc + doubleheading = re.compile(r'(?P<firsthead><h(1|2)[^>]*>.+?</h(1|2)>\s*(<(?!h\d)[^>]*>\s*)*)<h(1|2)(?P<secondhead>[^>]*>.+?)</h(1|2)>', re.IGNORECASE) + html = doubleheading.sub('\g<firsthead>'+'<h3'+'\g<secondhead>'+'</h3>', html) + return html + diff --git a/src/calibre/ebooks/oeb/transforms/cover.py b/src/calibre/ebooks/oeb/transforms/cover.py index 59b42df68a..532c9bbc03 100644 --- a/src/calibre/ebooks/oeb/transforms/cover.py +++ b/src/calibre/ebooks/oeb/transforms/cover.py @@ -99,7 +99,8 @@ class CoverManager(object): series_string = None if m.series and m.series_index: series_string = _('Book %s of %s')%( - fmt_sidx(m.series_index[0], use_roman=True), m.series[0]) + fmt_sidx(m.series_index[0], use_roman=True), + unicode(m.series[0])) try: from calibre.ebooks import calibre_cover diff --git a/src/calibre/ebooks/oeb/transforms/flatcss.py b/src/calibre/ebooks/oeb/transforms/flatcss.py index f48bdb9934..7212bd33c6 100644 --- a/src/calibre/ebooks/oeb/transforms/flatcss.py +++ b/src/calibre/ebooks/oeb/transforms/flatcss.py @@ -138,6 +138,7 @@ class CSSFlattener(object): float(self.context.margin_left)) bs.append('margin-right : %fpt'%\ float(self.context.margin_right)) + bs.extend(['padding-left: 0pt', 'padding-right: 0pt']) if self.context.change_justification != 'original': bs.append('text-align: '+ self.context.change_justification) body.set('style', '; '.join(bs)) @@ -146,7 +147,6 @@ class CSSFlattener(object): extra_css=css) self.stylizers[item] = stylizer - def baseline_node(self, node, stylizer, sizes, csize): csize = stylizer.style(node)['font-size'] if node.text: @@ -194,7 +194,7 @@ class CSSFlattener(object): value = 0.0 cssdict[property] = "%0.5fem" % (value / fsize) - def flatten_node(self, node, stylizer, names, styles, psize, left=0): + def flatten_node(self, node, stylizer, names, styles, psize, item_id, left=0): if not isinstance(node.tag, basestring) \ or namespace(node.tag) != XHTML_NS: return @@ -286,15 +286,18 @@ class CSSFlattener(object): if self.lineh and 'line-height' not in cssdict: lineh = self.lineh / psize cssdict['line-height'] = "%0.5fem" % lineh + if (self.context.remove_paragraph_spacing or self.context.insert_blank_line) and tag in ('p', 'div'): - for prop in ('margin', 'padding', 'border'): - for edge in ('top', 'bottom'): - cssdict['%s-%s'%(prop, edge)] = '0pt' + if item_id != 'calibre_jacket' or self.context.output_profile.name == 'Kindle': + for prop in ('margin', 'padding', 'border'): + for edge in ('top', 'bottom'): + cssdict['%s-%s'%(prop, edge)] = '0pt' if self.context.insert_blank_line: cssdict['margin-top'] = cssdict['margin-bottom'] = '0.5em' if self.context.remove_paragraph_spacing: cssdict['text-indent'] = "%1.1fem" % self.context.remove_paragraph_spacing_indent_size + if cssdict: items = cssdict.items() items.sort() @@ -313,7 +316,7 @@ class CSSFlattener(object): if 'style' in node.attrib: del node.attrib['style'] for child in node: - self.flatten_node(child, stylizer, names, styles, psize, left) + self.flatten_node(child, stylizer, names, styles, psize, item_id, left) def flatten_head(self, item, stylizer, href): html = item.data @@ -360,7 +363,7 @@ class CSSFlattener(object): stylizer = self.stylizers[item] body = html.find(XHTML('body')) fsize = self.context.dest.fbase - self.flatten_node(body, stylizer, names, styles, fsize) + self.flatten_node(body, stylizer, names, styles, fsize, item.id) items = [(key, val) for (val, key) in styles.items()] items.sort() css = ''.join(".%s {\n%s;\n}\n\n" % (key, val) for key, val in items) diff --git a/src/calibre/ebooks/oeb/transforms/jacket.py b/src/calibre/ebooks/oeb/transforms/jacket.py index fec4d230c3..88c7a4ff0e 100644 --- a/src/calibre/ebooks/oeb/transforms/jacket.py +++ b/src/calibre/ebooks/oeb/transforms/jacket.py @@ -6,139 +6,200 @@ __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __docformat__ = 'restructuredtext en' -import textwrap +import sys from xml.sax.saxutils import escape -from itertools import repeat from lxml import etree -from calibre.ebooks.oeb.base import XPath, XPNSMAP -from calibre import guess_type +from calibre import guess_type, strftime +from calibre.ebooks.BeautifulSoup import BeautifulSoup +from calibre.ebooks.oeb.base import XPath, XHTML_NS, XHTML from calibre.library.comments import comments_to_html + +JACKET_XPATH = '//h:meta[@name="calibre-content" and @content="jacket"]' + class Jacket(object): ''' Book jacket manipulation. Remove first image and insert comments at start of book. ''' - JACKET_TEMPLATE = textwrap.dedent(u'''\ - <html xmlns="%(xmlns)s"> - <head> - <title>%(title)s - - - -

-
-

%(title)s

-

%(jacket)s

-
%(series)s
-
%(rating)s
-
%(tags)s
-
-
- %(comments)s -
-
- - - ''') + def remove_images(self, item, limit=1): + path = XPath('//h:img[@src]') + removed = 0 + for img in path(item.data): + if removed >= limit: + break + href = item.abshref(img.get('src')) + image = self.oeb.manifest.hrefs.get(href, None) + if image is not None: + self.oeb.manifest.remove(image) + img.getparent().remove(img) + removed += 1 + return removed def remove_first_image(self): - path = XPath('//h:img[@src]') - for i, item in enumerate(self.oeb.spine): - if i > 2: break - for img in path(item.data): - href = item.abshref(img.get('src')) - image = self.oeb.manifest.hrefs.get(href, None) - if image is not None: - self.log('Removing first image', img.get('src')) - self.oeb.manifest.remove(image) - img.getparent().remove(img) - return - - def get_rating(self, rating): - ans = '' - if rating is None: - return - try: - num = float(rating)/2 - except: - return ans - num = max(0, num) - num = min(num, 5) - if num < 1: - return ans - id, href = self.oeb.manifest.generate('star', 'star.png') - self.oeb.manifest.add(id, href, 'image/png', data=I('star.png', data=True)) - ans = 'Rating: ' + ''.join(repeat('star'%href, num)) - return ans + for item in self.oeb.spine: + removed = self.remove_images(item) + if removed > 0: + self.log('Removed first image') + break def insert_metadata(self, mi): self.log('Inserting metadata into book...') - comments = mi.comments - if not comments: - try: - comments = unicode(self.oeb.metadata.description[0]) - except: - comments = '' - if not comments.strip(): - comments = '' - orig_comments = comments - if comments: - comments = comments_to_html(comments) - series = 'Series: ' + escape(mi.series if mi.series else '') - if mi.series and mi.series_index is not None: - series += escape(' [%s]'%mi.format_series_index()) - if not mi.series: - series = '' - tags = mi.tags - if not tags: - try: - tags = map(unicode, self.oeb.metadata.subject) - except: - tags = [] - if tags: - tags = 'Tags: ' + self.opts.dest.tags_to_string(tags) - else: - tags = '' + try: - title = mi.title if mi.title else unicode(self.oeb.metadata.title[0]) + tags = map(unicode, self.oeb.metadata.subject) + except: + tags = [] + + try: + comments = unicode(self.oeb.metadata.description[0]) + except: + comments = '' + + try: + title = unicode(self.oeb.metadata.title[0]) except: title = _('Unknown') - def generate_html(comments): - return self.JACKET_TEMPLATE%dict(xmlns=XPNSMAP['h'], - title=escape(title), comments=comments, - jacket=escape(_('Book Jacket')), series=series, - tags=tags, rating=self.get_rating(mi.rating)) - id, href = self.oeb.manifest.generate('jacket', 'jacket.xhtml') - from calibre.ebooks.oeb.base import RECOVER_PARSER, XPath - try: - root = etree.fromstring(generate_html(comments), parser=RECOVER_PARSER) - except: - root = etree.fromstring(generate_html(escape(orig_comments)), - parser=RECOVER_PARSER) - jacket = XPath('//h:meta[@name="calibre-content" and @content="jacket"]') - found = None - for item in list(self.oeb.spine)[:4]: - try: - if jacket(item.data): - found = item - break - except: - continue - if found is None: - item = self.oeb.manifest.add(id, href, guess_type(href)[0], data=root) - self.oeb.spine.insert(0, item, True) - else: - self.log('Found existing book jacket, replacing...') - found.data = root + root = render_jacket(mi, self.opts.output_profile, + alt_title=title, alt_tags=tags, + alt_comments=comments) + id, href = self.oeb.manifest.generate('calibre_jacket', 'jacket.xhtml') + item = self.oeb.manifest.add(id, href, guess_type(href)[0], data=root) + self.oeb.spine.insert(0, item, True) + + def remove_existing_jacket(self): + for x in self.oeb.spine[:4]: + if XPath(JACKET_XPATH)(x.data): + self.remove_images(x, limit=sys.maxint) + self.oeb.manifest.remove(x) + self.log('Removed existing jacket') + break def __call__(self, oeb, opts, metadata): + ''' + Add metadata in jacket.xhtml if specified in opts + If not specified, remove previous jacket instance + ''' self.oeb, self.opts, self.log = oeb, opts, oeb.log + self.remove_existing_jacket() if opts.remove_first_image: self.remove_first_image() if opts.insert_metadata: self.insert_metadata(metadata) + +# Render Jacket {{{ + +def get_rating(rating, rchar): + ans = '' + try: + num = float(rating)/2 + except: + return ans + num = max(0, num) + num = min(num, 5) + if num < 1: + return ans + + ans = rchar * int(num) + return ans + + +def render_jacket(mi, output_profile, + alt_title=_('Unknown'), alt_tags=[], alt_comments=''): + css = P('jacket/stylesheet.css', data=True).decode('utf-8') + + try: + title_str = mi.title if mi.title else alt_title + except: + title_str = _('Unknown') + title = '%s' % (escape(title_str)) + + series = escape(mi.series if mi.series else '') + if mi.series and mi.series_index is not None: + series += escape(' [%s]'%mi.format_series_index()) + if not mi.series: + series = '' + + try: + pubdate = strftime(u'%Y', mi.pubdate.timetuple()) + except: + pubdate = '' + + rating = get_rating(mi.rating, output_profile.ratings_char) + + tags = mi.tags if mi.tags else alt_tags + if tags: + tags = output_profile.tags_to_string(tags) + else: + tags = '' + + comments = mi.comments if mi.comments else alt_comments + comments = comments.strip() + orig_comments = comments + if comments: + comments = comments_to_html(comments) + + def generate_html(comments): + args = dict(xmlns=XHTML_NS, + title_str=title_str, + css=css, + title=title, + pubdate_label=_('Published'), pubdate=pubdate, + series_label=_('Series'), series=series, + rating_label=_('Rating'), rating=rating, + tags_label=_('Tags'), tags=tags, + comments=comments, + footer='' + ) + + generated_html = P('jacket/template.xhtml', + data=True).decode('utf-8').format(**args) + + # Post-process the generated html to strip out empty header items + soup = BeautifulSoup(generated_html) + if not series: + series_tag = soup.find('tr', attrs={'class':'cbj_series'}) + series_tag.extract() + if not rating: + rating_tag = soup.find('tr', attrs={'class':'cbj_rating'}) + rating_tag.extract() + if not tags: + tags_tag = soup.find('tr', attrs={'class':'cbj_tags'}) + tags_tag.extract() + if not pubdate: + pubdate_tag = soup.find('tr', attrs={'class':'cbj_pubdate'}) + pubdate_tag.extract() + if output_profile.short_name != 'kindle': + hr_tag = soup.find('hr', attrs={'class':'cbj_kindle_banner_hr'}) + hr_tag.extract() + + return soup.renderContents(None) + + from calibre.ebooks.oeb.base import RECOVER_PARSER + + try: + root = etree.fromstring(generate_html(comments), parser=RECOVER_PARSER) + except: + try: + root = etree.fromstring(generate_html(escape(orig_comments)), + parser=RECOVER_PARSER) + except: + root = etree.fromstring(generate_html(''), + parser=RECOVER_PARSER) + return root + +# }}} + +def linearize_jacket(oeb): + for x in oeb.spine[:4]: + if XPath(JACKET_XPATH)(x.data): + for e in XPath('//h:table|//h:tr|//h:th')(x.data): + e.tag = XHTML('div') + for e in XPath('//h:td')(x.data): + e.tag = XHTML('span') + break + diff --git a/src/calibre/ebooks/pdb/pdf/reader.py b/src/calibre/ebooks/pdb/pdf/reader.py index 3ae9f8ccca..c151551866 100644 --- a/src/calibre/ebooks/pdb/pdf/reader.py +++ b/src/calibre/ebooks/pdb/pdf/reader.py @@ -21,7 +21,7 @@ class Reader(FormatReader): self.options = options setattr(self.options, 'new_pdf_engine', False) setattr(self.options, 'no_images', False) - setattr(self.options, 'unwrap_factor', 0.5) + setattr(self.options, 'unwrap_factor', 0.45) def extract_content(self, output_dir): self.log.info('Extracting PDF...') diff --git a/src/calibre/ebooks/pdf/input.py b/src/calibre/ebooks/pdf/input.py index 64a089281e..14b3552b04 100644 --- a/src/calibre/ebooks/pdf/input.py +++ b/src/calibre/ebooks/pdf/input.py @@ -22,10 +22,10 @@ class PDFInput(InputFormatPlugin): options = set([ OptionRecommendation(name='no_images', recommended_value=False, help=_('Do not extract images from the document')), - OptionRecommendation(name='unwrap_factor', recommended_value=0.5, + OptionRecommendation(name='unwrap_factor', recommended_value=0.45, help=_('Scale used to determine the length at which a line should ' 'be unwrapped. Valid values are a decimal between 0 and 1. The ' - 'default is 0.5, this is the median line length.')), + 'default is 0.45, just below the median line length.')), OptionRecommendation(name='new_pdf_engine', recommended_value=False, help=_('Use the new PDF conversion engine.')) ]) diff --git a/src/calibre/ebooks/rtf/input.py b/src/calibre/ebooks/rtf/input.py index adda8794ca..000c603c1c 100644 --- a/src/calibre/ebooks/rtf/input.py +++ b/src/calibre/ebooks/rtf/input.py @@ -7,7 +7,7 @@ import os, glob, re, textwrap from lxml import etree from calibre.customize.conversion import InputFormatPlugin -from calibre.ebooks.conversion.preprocess import line_length +from calibre.ebooks.conversion.utils import PreProcessor class InlineClass(etree.XSLTExtension): @@ -229,16 +229,8 @@ class RTFInput(InputFormatPlugin): res = transform.tostring(result) res = res[:100].replace('xmlns:html', 'xmlns') + res[100:] if self.options.preprocess_html: - self.log("********* Preprocessing HTML *********") - # Detect Chapters to match the xpath in the GUI - chapdetect = re.compile(r']*>\s*]*>\s*(?P(<(i|b)><(i|b)>|<(i|b)>)?(.?Chapter|Epilogue|Prologue|Book|Part|Dedication)\s*([\d\w-]+(\s\w+)?)?(|)?)\s*\s*

', re.IGNORECASE) - res = chapdetect.sub('

'+'\g'+'

\n', res) - # Unwrap lines using punctation if the median length of all lines is less than 150 - length = line_length('html', res, 0.4) - self.log("*** Median length is " + str(length) + " ***") - unwrap = re.compile(r"(?<=.{%i}[a-z,;:\IA])\s*\s*(

)?\s*(?P]*>\s*(]*>\s*\s*)

\s*){0,3}\s*]*>\s*(]*>)?\s*" % length, re.UNICODE) - if length < 150: - res = unwrap.sub(' ', res) + preprocessor = PreProcessor(log=getattr(self, 'log', None)) + res = preprocessor(res) f.write(res) self.write_inline_css(inline_class) stream.seek(0) diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py index 1b61404589..e58dce5559 100644 --- a/src/calibre/gui2/__init__.py +++ b/src/calibre/gui2/__init__.py @@ -50,6 +50,7 @@ gprefs.defaults['action-layout-context-menu-device'] = ( gprefs.defaults['show_splash_screen'] = True gprefs.defaults['toolbar_icon_size'] = 'medium' gprefs.defaults['toolbar_text'] = 'auto' +gprefs.defaults['show_child_bar'] = False # }}} diff --git a/src/calibre/gui2/actions/__init__.py b/src/calibre/gui2/actions/__init__.py index 57ad900fba..b2d1656367 100644 --- a/src/calibre/gui2/actions/__init__.py +++ b/src/calibre/gui2/actions/__init__.py @@ -71,6 +71,12 @@ class InterfaceAction(QObject): all_locations = frozenset(['toolbar', 'toolbar-device', 'context-menu', 'context-menu-device']) + #: Type of action + #: 'current' means acts on the current view + #: 'global' means an action that does not act on the current view, but rather + #: on calibre as a whole + action_type = 'global' + def __init__(self, parent, site_customization): QObject.__init__(self, parent) self.setObjectName(self.name) diff --git a/src/calibre/gui2/actions/add.py b/src/calibre/gui2/actions/add.py index f0ff794fab..add7bf1d5b 100644 --- a/src/calibre/gui2/actions/add.py +++ b/src/calibre/gui2/actions/add.py @@ -25,6 +25,7 @@ class AddAction(InterfaceAction): action_spec = (_('Add books'), 'add_book.png', _('Add books to the calibre library/device from files on your computer') , _('A')) + action_type = 'current' def genesis(self): self._add_filesystem_book = self.Dispatcher(self.__add_filesystem_book) diff --git a/src/calibre/gui2/actions/add_to_library.py b/src/calibre/gui2/actions/add_to_library.py index 6fc0d5fb1f..05aea8f1dd 100644 --- a/src/calibre/gui2/actions/add_to_library.py +++ b/src/calibre/gui2/actions/add_to_library.py @@ -13,6 +13,7 @@ class AddToLibraryAction(InterfaceAction): action_spec = (_('Add books to library'), 'add_book.png', _('Add books to your calibre library from the connected device'), None) dont_add_to = frozenset(['toolbar', 'context-menu']) + action_type = 'current' def genesis(self): self.qaction.triggered.connect(self.add_books_to_library) diff --git a/src/calibre/gui2/actions/annotate.py b/src/calibre/gui2/actions/annotate.py index 5356d63e98..dfafcd1a39 100644 --- a/src/calibre/gui2/actions/annotate.py +++ b/src/calibre/gui2/actions/annotate.py @@ -18,6 +18,7 @@ class FetchAnnotationsAction(InterfaceAction): name = 'Fetch Annotations' action_spec = (_('Fetch annotations (experimental)'), None, None, None) + action_type = 'current' def genesis(self): pass diff --git a/src/calibre/gui2/actions/convert.py b/src/calibre/gui2/actions/convert.py index ee0f06ab71..29acfc52b1 100644 --- a/src/calibre/gui2/actions/convert.py +++ b/src/calibre/gui2/actions/convert.py @@ -21,6 +21,7 @@ class ConvertAction(InterfaceAction): name = 'Convert Books' action_spec = (_('Convert books'), 'convert.png', None, _('C')) dont_add_to = frozenset(['toolbar-device', 'context-menu-device']) + action_type = 'current' def genesis(self): cm = QMenu() diff --git a/src/calibre/gui2/actions/copy_to_library.py b/src/calibre/gui2/actions/copy_to_library.py index 7127c91e8c..6b7654f644 100644 --- a/src/calibre/gui2/actions/copy_to_library.py +++ b/src/calibre/gui2/actions/copy_to_library.py @@ -80,6 +80,7 @@ class CopyToLibraryAction(InterfaceAction): _('Copy selected books to the specified library'), None) popup_type = QToolButton.InstantPopup dont_add_to = frozenset(['toolbar-device', 'context-menu-device']) + action_type = 'current' def genesis(self): self.menu = QMenu(self.gui) diff --git a/src/calibre/gui2/actions/delete.py b/src/calibre/gui2/actions/delete.py index 0343c6df84..406860e4ec 100644 --- a/src/calibre/gui2/actions/delete.py +++ b/src/calibre/gui2/actions/delete.py @@ -16,6 +16,7 @@ class DeleteAction(InterfaceAction): name = 'Remove Books' action_spec = (_('Remove books'), 'trash.png', None, _('Del')) + action_type = 'current' def genesis(self): self.qaction.triggered.connect(self.delete_books) diff --git a/src/calibre/gui2/actions/edit_collections.py b/src/calibre/gui2/actions/edit_collections.py index e45d36fc62..7f5dd76538 100644 --- a/src/calibre/gui2/actions/edit_collections.py +++ b/src/calibre/gui2/actions/edit_collections.py @@ -13,6 +13,7 @@ class EditCollectionsAction(InterfaceAction): action_spec = (_('Manage collections'), None, _('Manage the collections on this device'), None) dont_add_to = frozenset(['toolbar', 'context-menu']) + action_type = 'current' def genesis(self): self.qaction.triggered.connect(self.edit_collections) diff --git a/src/calibre/gui2/actions/edit_metadata.py b/src/calibre/gui2/actions/edit_metadata.py index f0232d9859..ac04652efa 100644 --- a/src/calibre/gui2/actions/edit_metadata.py +++ b/src/calibre/gui2/actions/edit_metadata.py @@ -22,6 +22,7 @@ class EditMetadataAction(InterfaceAction): name = 'Edit Metadata' action_spec = (_('Edit metadata'), 'edit_input.png', None, _('E')) + action_type = 'current' def genesis(self): self.create_action(spec=(_('Merge book records'), 'merge_books.png', @@ -209,8 +210,9 @@ class EditMetadataAction(InterfaceAction): dest_id, src_books, src_ids = self.books_to_merge(rows) if safe_merge: if not confirm('

'+_( - 'All book formats and metadata from the selected books ' - 'will be added to the first selected book.

' + 'Book formats and metadata from the selected books ' + 'will be added to the first selected book. ' + 'ISBN will not be merged.

' 'The second and subsequently selected books will not ' 'be deleted or changed.

' 'Please confirm you want to proceed.') @@ -220,8 +222,9 @@ class EditMetadataAction(InterfaceAction): self.merge_metadata(dest_id, src_ids) else: if not confirm('

'+_( - 'All book formats and metadata from the selected books will be merged ' - 'into the first selected book.

' + 'Book formats and metadata from the selected books will be merged ' + 'into the first selected book. ' + 'ISBN will not be merged.

' 'After merger the second and ' 'subsequently selected books will be deleted.

' 'All book formats of the first selected book will be kept ' diff --git a/src/calibre/gui2/actions/open.py b/src/calibre/gui2/actions/open.py index 106bfa24f6..141ff01a66 100644 --- a/src/calibre/gui2/actions/open.py +++ b/src/calibre/gui2/actions/open.py @@ -14,6 +14,7 @@ class OpenFolderAction(InterfaceAction): action_spec = (_('Open containing folder'), 'document_open.png', None, _('O')) dont_add_to = frozenset(['toolbar-device', 'context-menu-device']) + action_type = 'current' def genesis(self): self.qaction.triggered.connect(self.gui.iactions['View'].view_folder) diff --git a/src/calibre/gui2/actions/save_to_disk.py b/src/calibre/gui2/actions/save_to_disk.py index bfcc02e130..e9664b9980 100644 --- a/src/calibre/gui2/actions/save_to_disk.py +++ b/src/calibre/gui2/actions/save_to_disk.py @@ -38,6 +38,7 @@ class SaveToDiskAction(InterfaceAction): name = "Save To Disk" action_spec = (_('Save to disk'), 'save.png', None, _('S')) + action_type = 'current' def genesis(self): self.qaction.triggered.connect(self.save_to_disk) diff --git a/src/calibre/gui2/actions/show_book_details.py b/src/calibre/gui2/actions/show_book_details.py index d17d0998f1..18b0a694bf 100644 --- a/src/calibre/gui2/actions/show_book_details.py +++ b/src/calibre/gui2/actions/show_book_details.py @@ -16,6 +16,7 @@ class ShowBookDetailsAction(InterfaceAction): action_spec = (_('Show book details'), 'dialog_information.png', None, _('I')) dont_add_to = frozenset(['toolbar-device', 'context-menu-device']) + action_type = 'current' def genesis(self): self.qaction.triggered.connect(self.show_book_info) diff --git a/src/calibre/gui2/actions/similar_books.py b/src/calibre/gui2/actions/similar_books.py index 1a14869a9c..644cd3160a 100644 --- a/src/calibre/gui2/actions/similar_books.py +++ b/src/calibre/gui2/actions/similar_books.py @@ -16,6 +16,7 @@ class SimilarBooksAction(InterfaceAction): name = 'Similar Books' action_spec = (_('Similar books...'), None, None, None) popup_type = QToolButton.InstantPopup + action_type = 'current' def genesis(self): m = QMenu(self.gui) diff --git a/src/calibre/gui2/actions/view.py b/src/calibre/gui2/actions/view.py index 2f6be24e5b..0fbf86c567 100644 --- a/src/calibre/gui2/actions/view.py +++ b/src/calibre/gui2/actions/view.py @@ -22,6 +22,7 @@ class ViewAction(InterfaceAction): name = 'View' action_spec = (_('View'), 'view.png', None, _('V')) + action_type = 'current' def genesis(self): self.persistent_files = [] diff --git a/src/calibre/gui2/convert/look_and_feel.py b/src/calibre/gui2/convert/look_and_feel.py index b0403bf1dd..ec3f0b944d 100644 --- a/src/calibre/gui2/convert/look_and_feel.py +++ b/src/calibre/gui2/convert/look_and_feel.py @@ -22,7 +22,7 @@ class LookAndFeelWidget(Widget, Ui_Form): Widget.__init__(self, parent, ['change_justification', 'extra_css', 'base_font_size', 'font_size_mapping', 'line_height', - 'linearize_tables', + 'linearize_tables', 'smarten_punctuation', 'disable_font_rescaling', 'insert_blank_line', 'remove_paragraph_spacing', 'remove_paragraph_spacing_indent_size','input_encoding', 'asciiize', 'keep_ligatures'] diff --git a/src/calibre/gui2/convert/look_and_feel.ui b/src/calibre/gui2/convert/look_and_feel.ui index de48e7caf9..c683300854 100644 --- a/src/calibre/gui2/convert/look_and_feel.ui +++ b/src/calibre/gui2/convert/look_and_feel.ui @@ -178,7 +178,7 @@ - + Extra &CSS @@ -214,6 +214,13 @@ + + + + Smarten &punctuation + + + diff --git a/src/calibre/gui2/convert/pdf_input.ui b/src/calibre/gui2/convert/pdf_input.ui index 626c68ea63..b2ee421922 100644 --- a/src/calibre/gui2/convert/pdf_input.ui +++ b/src/calibre/gui2/convert/pdf_input.ui @@ -46,7 +46,7 @@ 0.010000000000000 - 0.500000000000000 + 0.450000000000000 diff --git a/src/calibre/gui2/convert/structure_detection.ui b/src/calibre/gui2/convert/structure_detection.ui index eb2892a07a..c0b3de3bd9 100644 --- a/src/calibre/gui2/convert/structure_detection.ui +++ b/src/calibre/gui2/convert/structure_detection.ui @@ -41,24 +41,17 @@ - + Insert &metadata as page at start of book - - - - &Preprocess input file to possibly improve structure detection - - - - + - + Qt::Vertical @@ -71,26 +64,33 @@ - + Remove F&ooter - + Remove H&eader - + - + + + + + &Preprocess input file to possibly improve structure detection + + + diff --git a/src/calibre/gui2/cover_flow.py b/src/calibre/gui2/cover_flow.py index 88bbae6c41..cb951b09be 100644 --- a/src/calibre/gui2/cover_flow.py +++ b/src/calibre/gui2/cover_flow.py @@ -155,6 +155,7 @@ class CoverFlowMixin(object): self.cb_splitter.action_toggle.triggered.connect(self.toggle_cover_browser) if CoverFlow is not None: self.cover_flow.stop.connect(self.hide_cover_browser) + self.cover_flow.setVisible(False) else: self.cb_splitter.insertWidget(self.cb_splitter.side_index, self.cover_flow) if CoverFlow is not None: diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py index 45c78ce6da..f839e1d519 100644 --- a/src/calibre/gui2/device.py +++ b/src/calibre/gui2/device.py @@ -627,12 +627,11 @@ class DeviceMixin(object): # {{{ def connect_to_folder(self): dir = choose_dir(self, 'Select Device Folder', _('Select folder to open as device')) - kls = FOLDER_DEVICE - self.device_manager.mount_device(kls=kls, kind='folder', path=dir) + if dir is not None: + self.device_manager.mount_device(kls=FOLDER_DEVICE, kind='folder', path=dir) def connect_to_itunes(self): - kls = ITUNES_ASYNC - self.device_manager.mount_device(kls=kls, kind='itunes', path=None) + self.device_manager.mount_device(kls=ITUNES_ASYNC, kind='itunes', path=None) # disconnect from both folder and itunes devices def disconnect_mounted_device(self): diff --git a/src/calibre/gui2/layout.py b/src/calibre/gui2/layout.py index 58d5267c8e..ec7e023dc1 100644 --- a/src/calibre/gui2/layout.py +++ b/src/calibre/gui2/layout.py @@ -61,7 +61,7 @@ class LocationManager(QObject): # {{{ ac('library', _('Library'), 'lt.png', _('Show books in calibre library')) - ac('main', _('Reader'), 'reader.png', + ac('main', _('Device'), 'reader.png', _('Show books in the main memory of the device')) ac('carda', _('Card A'), 'sd.png', _('Show books in storage card A')) @@ -197,11 +197,21 @@ class SearchBar(QWidget): # {{{ # }}} +class Spacer(QWidget): + + def __init__(self, parent): + QWidget.__init__(self, parent) + self.l = QHBoxLayout() + self.setLayout(self.l) + self.l.addStretch(10) + + class ToolBar(QToolBar): # {{{ - def __init__(self, donate, location_manager, parent): + def __init__(self, donate, location_manager, child_bar, parent): QToolBar.__init__(self, parent) self.gui = parent + self.child_bar = child_bar self.setContextMenuPolicy(Qt.PreventContextMenu) self.setMovable(False) self.setFloatable(False) @@ -223,16 +233,19 @@ class ToolBar(QToolBar): # {{{ sz = gprefs['toolbar_icon_size'] sz = {'small':24, 'medium':48, 'large':64}[sz] self.setIconSize(QSize(sz, sz)) + self.child_bar.setIconSize(QSize(sz, sz)) style = Qt.ToolButtonTextUnderIcon if gprefs['toolbar_text'] == 'never': style = Qt.ToolButtonIconOnly self.setToolButtonStyle(style) + self.child_bar.setToolButtonStyle(style) self.donate_button.set_normal_icon_size(sz, sz) def contextMenuEvent(self, *args): pass def build_bar(self): + self.child_bar.setVisible(gprefs['show_child_bar']) self.showing_donate = False showing_device = self.location_manager.has_device actions = '-device' if showing_device else '' @@ -244,10 +257,16 @@ class ToolBar(QToolBar): # {{{ m.setVisible(False) self.clear() + self.child_bar.clear() self.added_actions = [] + self.spacers = [Spacer(self.child_bar), Spacer(self.child_bar), + Spacer(self), Spacer(self)] + self.child_bar.addWidget(self.spacers[0]) + if gprefs['show_child_bar']: + self.addWidget(self.spacers[2]) for what in actions: - if what is None: + if what is None and not gprefs['show_child_bar']: self.addSeparator() elif what == 'Location Manager': for ac in self.location_manager.available_actions: @@ -262,12 +281,21 @@ class ToolBar(QToolBar): # {{{ self.showing_donate = True elif what in self.gui.iactions: action = self.gui.iactions[what] - self.addAction(action.qaction) + bar = self + if action.action_type == 'current' and gprefs['show_child_bar']: + bar = self.child_bar + bar.addAction(action.qaction) self.added_actions.append(action.qaction) self.setup_tool_button(action.qaction, action.popup_type) + self.child_bar.addWidget(self.spacers[1]) + if gprefs['show_child_bar']: + self.addWidget(self.spacers[3]) + def setup_tool_button(self, ac, menu_mode=None): ch = self.widgetForAction(ac) + if ch is None: + ch = self.child_bar.widgetForAction(ac) ch.setCursor(Qt.PointingHandCursor) ch.setAutoRaise(True) if ac.menu() is not None and menu_mode is not None: @@ -280,7 +308,8 @@ class ToolBar(QToolBar): # {{{ if p == 'never': style = Qt.ToolButtonIconOnly - if p == 'auto' and self.preferred_width > self.width()+35: + if p == 'auto' and self.preferred_width > self.width()+35 and \ + not gprefs['show_child_bar']: style = Qt.ToolButtonIconOnly self.setToolButtonStyle(style) @@ -309,9 +338,11 @@ class MainWindowMixin(object): # {{{ self.iactions['Fetch News'].init_scheduler(db) self.search_bar = SearchBar(self) + self.child_bar = QToolBar(self) self.tool_bar = ToolBar(self.donate_button, - self.location_manager, self) + self.location_manager, self.child_bar, self) self.addToolBar(Qt.TopToolBarArea, self.tool_bar) + self.addToolBar(Qt.BottomToolBarArea, self.child_bar) l = self.centralwidget.layout() l.addWidget(self.search_bar) diff --git a/src/calibre/gui2/preferences/look_feel.py b/src/calibre/gui2/preferences/look_feel.py index f30b2fddbb..10c2fcfe95 100644 --- a/src/calibre/gui2/preferences/look_feel.py +++ b/src/calibre/gui2/preferences/look_feel.py @@ -46,6 +46,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): r('use_roman_numerals_for_series_number', config) r('separate_cover_flow', config, restart_required=True) r('search_as_you_type', config) + r('show_child_bar', gprefs) choices = [(_('Small'), 'small'), (_('Medium'), 'medium'), (_('Large'), 'large')] diff --git a/src/calibre/gui2/preferences/look_feel.ui b/src/calibre/gui2/preferences/look_feel.ui index 7c6c736b24..1de55d51ef 100644 --- a/src/calibre/gui2/preferences/look_feel.ui +++ b/src/calibre/gui2/preferences/look_feel.ui @@ -173,6 +173,13 @@ + + + + &Split the toolbar into two toolbars + + + diff --git a/src/calibre/gui2/tag_view.py b/src/calibre/gui2/tag_view.py index 519d533ff6..6c50a71b92 100644 --- a/src/calibre/gui2/tag_view.py +++ b/src/calibre/gui2/tag_view.py @@ -376,7 +376,7 @@ class TagsModel(QAbstractItemModel): # {{{ 'series' : QIcon(I('series.png')), 'formats' : QIcon(I('book.png')), 'publisher' : QIcon(I('publisher.png')), - 'rating' : QIcon(I('star.png')), + 'rating' : QIcon(I('rating.png')), 'news' : QIcon(I('news.png')), 'tags' : QIcon(I('tags.png')), ':custom' : QIcon(I('column.png')), diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py index dfd7086076..4f795ab733 100644 --- a/src/calibre/library/caches.py +++ b/src/calibre/library/caches.py @@ -6,7 +6,7 @@ __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import re, itertools, functools +import re, itertools from itertools import repeat from datetime import timedelta from threading import Thread, RLock @@ -584,39 +584,7 @@ class ResultCache(SearchQueryParser): # Sorting functions {{{ - def seriescmp(self, sidx, siidx, x, y, library_order=None): - try: - if library_order: - ans = cmp(title_sort(self._data[x][sidx].lower()), - title_sort(self._data[y][sidx].lower())) - else: - ans = cmp(self._data[x][sidx].lower(), - self._data[y][sidx].lower()) - except AttributeError: # Some entries may be None - ans = cmp(self._data[x][sidx], self._data[y][sidx]) - if ans != 0: return ans - return cmp(self._data[x][siidx], self._data[y][siidx]) - - def cmp(self, loc, x, y, asstr=True, subsort=False): - try: - ans = cmp(self._data[x][loc].lower(), self._data[y][loc].lower()) if \ - asstr else cmp(self._data[x][loc], self._data[y][loc]) - except AttributeError: # Some entries may be None - ans = cmp(self._data[x][loc], self._data[y][loc]) - except TypeError: ## raised when a datetime is None - x = self._data[x][loc] - if x is None: - x = UNDEFINED_DATE - y = self._data[y][loc] - if y is None: - y = UNDEFINED_DATE - return cmp(x, y) - if subsort and ans == 0: - idx = self.FIELD_MAP['sort'] - return cmp(self._data[x][idx].lower(), self._data[y][idx].lower()) - return ans - - def sanitize_field_name(self, field): + def sanitize_sort_field_name(self, field): field = field.lower().strip() if field not in self.field_metadata.iterkeys(): if field in ('author', 'tag', 'comment'): @@ -627,38 +595,10 @@ class ResultCache(SearchQueryParser): return field def sort(self, field, ascending, subsort=False): - field = self.sanitize_field_name(field) - as_string = field not in ('size', 'rating', 'timestamp') - - if self.first_sort: - subsort = True - self.first_sort = False - if self.field_metadata[field]['is_custom']: - if self.field_metadata[field]['datatype'] == 'series': - fcmp = functools.partial(self.seriescmp, - self.field_metadata[field]['rec_index'], - self.field_metadata.cc_series_index_column_for(field), - library_order=tweaks['title_series_sorting'] == 'library_order') - else: - as_string = self.field_metadata[field]['datatype'] in ('comments', 'text') - field = self.field_metadata[field]['colnum'] - fcmp = functools.partial(self.cmp, self.FIELD_MAP[field], - subsort=subsort, asstr=as_string) - elif field == 'series': - fcmp = functools.partial(self.seriescmp, self.FIELD_MAP['series'], - self.FIELD_MAP['series_index'], - library_order=tweaks['title_series_sorting'] == 'library_order') - else: - fcmp = functools.partial(self.cmp, self.field_metadata[field]['rec_index'], - subsort=subsort, asstr=as_string) - self._map.sort(cmp=fcmp, reverse=not ascending) - tmap = list(itertools.repeat(False, len(self._data))) - for x in self._map_filtered: - tmap[x] = True - self._map_filtered = [x for x in self._map if tmap[x]] + self.multisort([(field, ascending)]) def multisort(self, fields=[], subsort=False): - fields = [(self.sanitize_field_name(x), bool(y)) for x, y in fields] + fields = [(self.sanitize_sort_field_name(x), bool(y)) for x, y in fields] keys = self.field_metadata.field_keys() fields = [x for x in fields if x[0] in keys] if subsort and 'sort' not in [x[0] for x in fields]: @@ -671,6 +611,7 @@ class ResultCache(SearchQueryParser): self._map.sort(key=keyg, reverse=not fields[0][1]) else: self._map.sort(key=keyg) + tmap = list(itertools.repeat(False, len(self._data))) for x in self._map_filtered: tmap[x] = True @@ -733,87 +674,3 @@ class SortKeyGenerator(object): # }}} -if __name__ == '__main__': - # Testing.timing for new multi-sort {{{ - import time - - from calibre.library import db - db = db() - - db.refresh() - - fields = db.field_metadata.field_keys() - - print fields - - - def do_single_sort(meth, field, order): - if meth == 'old': - db.data.sort(field, order) - else: - db.data.multisort([(field, order)]) - - def test_single_sort(field): - for meth in ('old', 'new'): - ttime = 0 - NUM = 10 - asc = desc = None - for i in range(NUM): - db.data.sort('id', False) - st = time.time() - do_single_sort(meth, field, True) - asc = db.data._map - do_single_sort(meth, field, False) - desc = db.data._map - ttime += time.time() - st - yield (ttime/NUM, asc, desc) - - - print 'Running single sort differentials' - for field in fields: - if field in ('search', 'id', 'news', 'flags'): continue - print '\t', field, db.field_metadata[field]['datatype'] - old, new = test_single_sort(field) - if old[1] != new[1] or old[2] != new[2]: - print '\t\t', 'Sort failure!' - raise SystemExit(1) - print '\t\t', 'Old:', old[0], 'New:', new[0], 'Ratio: %.2f'%(new[0]/old[0]) - - def do_multi_sort(meth, ms): - if meth == 'new': - db.data.multisort(ms) - else: - for s in reversed(ms): - db.data.sort(*s) - - def test_multi_sort(ms): - for meth in ('old', 'new'): - ttime = 0 - NUM = 10 - for i in range(NUM): - db.data.sort('id', False) - st = time.time() - do_multi_sort(meth, ms) - ttime += time.time() - st - yield (ttime/NUM, db.data._map) - - print 'Running multi-sort differentials' - - for ms in [ - [('timestamp', False), ('author', True), ('title', False)], - [('size', True), ('tags', True), ('author', False)], - [('series', False), ('title', True)], - [('size', True), ('tags', True), ('author', False), ('pubdate', - True), ('tags', False), ('formats', False), ('uuid', True)], - - ]: - print '\t', ms - db.data.sort('id', False) - old, new = test_multi_sort(ms) - if old[1] != new[1]: - print '\t\t', 'Sort failure!' - raise SystemExit() - print '\t\t', 'Old:', old[0], 'New:', new[0], 'Ratio: %.2f'%(new[0]/old[0]) - - # }}} - diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index bd2160aff1..e14d092727 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -2523,6 +2523,10 @@ class EPUB_MOBI(CatalogPlugin): # Fetch the database as a dictionary self.booksBySeries = self.plugin.search_sort_db(self.db, self.opts) + if not self.booksBySeries: + self.opts.generate_series = False + self.opts.log(" no series found in selected books, cancelling series generation") + return friendly_name = "Series" @@ -2586,7 +2590,7 @@ class EPUB_MOBI(CatalogPlugin): aTag = Tag(soup, 'a') aTag['name'] = "%s_series" % re.sub('\W','',book['series']).lower() pSeriesTag.insert(0,aTag) - pSeriesTag.insert(1,NavigableString(self.NOT_READ_SYMBOL + '%s' % book['series'])) + pSeriesTag.insert(1,NavigableString('%s' % book['series'])) divTag.insert(dtc,pSeriesTag) dtc += 1 @@ -2595,7 +2599,14 @@ class EPUB_MOBI(CatalogPlugin): ptc = 0 # book with read/reading/unread symbol - if 'read' in book and book['read']: + for tag in book['tags']: + if tag == self.opts.read_tag: + book['read'] = True + break + else: + book['read'] = False + + if book['read']: # check mark pBookTag.insert(ptc,NavigableString(self.READ_SYMBOL)) pBookTag['class'] = "read_book" diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 8a5ab75c3c..f5f0f724ba 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -597,8 +597,12 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): return identical_book_ids def has_cover(self, index, index_is_id=False): - id = index if index_is_id else self.id(index) - path = os.path.join(self.library_path, self.path(id, index_is_id=True), 'cover.jpg') + id = index if index_is_id else self.id(index) + try: + path = os.path.join(self.abspath(id, index_is_id=True), 'cover.jpg') + except: + # Can happen if path has not yet been set + return False return os.access(path, os.R_OK) def remove_cover(self, id, notify=True): @@ -609,6 +613,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): except (IOError, OSError): time.sleep(0.2) os.remove(path) + self.data.set(id, self.FIELD_MAP['cover'], False, row_is_id=True) if notify: self.notify('cover', [id]) @@ -629,6 +634,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): except (IOError, OSError): time.sleep(0.2) save_cover_data_to(data, path) + self.data.set(id, self.FIELD_MAP['cover'], True, row_is_id=True) if notify: self.notify('cover', [id]) @@ -1087,8 +1093,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): self.set_path(id, True) self.notify('metadata', [id]) - # Given a book, return the list of author sort strings for the book's authors def authors_sort_strings(self, id, index_is_id=False): + ''' + Given a book, return the list of author sort strings + for the book's authors + ''' id = id if index_is_id else self.id(id) aut_strings = self.conn.get(''' SELECT sort @@ -1744,10 +1753,10 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): series_index = 1.0 if mi.series_index is None else mi.series_index aus = mi.author_sort if mi.author_sort else self.author_sort_from_authors(mi.authors) title = mi.title - if isinstance(aus, str): + if isbytestring(aus): aus = aus.decode(preferred_encoding, 'replace') - if isinstance(title, str): - title = title.decode(preferred_encoding) + if isbytestring(title): + title = title.decode(preferred_encoding, 'replace') obj = self.conn.execute('INSERT INTO books(title, series_index, author_sort) VALUES (?, ?, ?)', (title, series_index, aus)) id = obj.lastrowid diff --git a/src/calibre/library/server/content.py b/src/calibre/library/server/content.py index 6784abd8f4..ecb467b4c2 100644 --- a/src/calibre/library/server/content.py +++ b/src/calibre/library/server/content.py @@ -5,7 +5,7 @@ __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import re, os, cStringIO, operator +import re, os, cStringIO import cherrypy try: @@ -16,7 +16,15 @@ except ImportError: from calibre import fit_image, guess_type from calibre.utils.date import fromtimestamp -from calibre.ebooks.metadata import title_sort +from calibre.library.caches import SortKeyGenerator + +class CSSortKeyGenerator(SortKeyGenerator): + + def __init__(self, fields, fm): + SortKeyGenerator.__init__(self, fields, fm, None) + + def __call__(self, record): + return self.itervals(record).next() class ContentServer(object): @@ -47,32 +55,12 @@ class ContentServer(object): def sort(self, items, field, order): - field = field.lower().strip() - if field == 'author': - field = 'authors' - if field == 'date': - field = 'timestamp' + field = self.db.data.sanitize_sort_field_name(field) if field not in ('title', 'authors', 'rating', 'timestamp', 'tags', 'size', 'series'): raise cherrypy.HTTPError(400, '%s is not a valid sort field'%field) - cmpf = cmp if field in ('rating', 'size', 'timestamp') else \ - lambda x, y: cmp(x.lower() if x else '', y.lower() if y else '') - if field == 'series': - items.sort(cmp=self.seriescmp, reverse=not order) - else: - lookup = 'sort' if field == 'title' else field - lookup = 'author_sort' if field == 'authors' else field - field = self.db.FIELD_MAP[lookup] - getter = operator.itemgetter(field) - items.sort(cmp=lambda x, y: cmpf(getter(x), getter(y)), reverse=not order) + keyg = CSSortKeyGenerator([(field, order)], self.db.field_metadata) + items.sort(key=keyg, reverse=not order) - def seriescmp(self, x, y): - si = self.db.FIELD_MAP['series'] - try: - ans = cmp(title_sort(x[si].lower()), title_sort(y[si].lower())) - except AttributeError: # Some entries may be None - ans = cmp(x[si], y[si]) - if ans != 0: return ans - return cmp(x[self.db.FIELD_MAP['series_index']], y[self.db.FIELD_MAP['series_index']]) # }}} diff --git a/src/calibre/utils/magick/__init__.py b/src/calibre/utils/magick/__init__.py index 073a030361..2707430c67 100644 --- a/src/calibre/utils/magick/__init__.py +++ b/src/calibre/utils/magick/__init__.py @@ -194,7 +194,7 @@ class Image(_magick.Image): # {{{ # }}} -def create_canvas(width, height, bgcolor='white'): +def create_canvas(width, height, bgcolor='#ffffff'): canvas = Image() canvas.create_canvas(int(width), int(height), str(bgcolor)) return canvas diff --git a/src/calibre/utils/magick/draw.py b/src/calibre/utils/magick/draw.py index 82a0237b8d..ed9e3d3d83 100644 --- a/src/calibre/utils/magick/draw.py +++ b/src/calibre/utils/magick/draw.py @@ -5,12 +5,14 @@ __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal ' __docformat__ = 'restructuredtext en' +import os from calibre.utils.magick import Image, DrawingWand, create_canvas from calibre.constants import __appname__, __version__ from calibre import fit_image -def save_cover_data_to(data, path, bgcolor='white', resize_to=None): +def save_cover_data_to(data, path, bgcolor='#ffffff', resize_to=None, + return_data=False): ''' Saves image in data to path, in the format specified by the path extension. Composes the image onto a blank canvas so as to @@ -22,9 +24,11 @@ def save_cover_data_to(data, path, bgcolor='white', resize_to=None): img.size = (resize_to[0], resize_to[1]) canvas = create_canvas(img.size[0], img.size[1], bgcolor) canvas.compose(img) + if return_data: + return canvas.export(os.path.splitext(path)[1][1:]) canvas.save(path) -def thumbnail(data, width=120, height=120, bgcolor='white', fmt='jpg'): +def thumbnail(data, width=120, height=120, bgcolor='#ffffff', fmt='jpg'): img = Image() img.load(data) owidth, oheight = img.size @@ -57,7 +61,7 @@ def identify(path): return identify_data(data) def add_borders_to_image(path_to_image, left=0, top=0, right=0, bottom=0, - border_color='white'): + border_color='#ffffff'): img = Image() img.open(path_to_image) lwidth, lheight = img.size @@ -76,7 +80,7 @@ def create_text_wand(font_size, font_path=None): ans.text_alias = True return ans -def create_text_arc(text, font_size, font=None, bgcolor='white'): +def create_text_arc(text, font_size, font=None, bgcolor='#ffffff'): if isinstance(text, unicode): text = text.encode('utf-8') @@ -144,7 +148,7 @@ class TextLine(object): def create_cover_page(top_lines, logo_path, width=590, height=750, - bgcolor='white', output_format='jpg'): + bgcolor='#ffffff', output_format='jpg'): ''' Create the standard calibre cover page and return it as a byte string in the specified output_format. diff --git a/src/calibre/utils/smartypants.py b/src/calibre/utils/smartypants.py new file mode 100755 index 0000000000..44aac4de8c --- /dev/null +++ b/src/calibre/utils/smartypants.py @@ -0,0 +1,899 @@ +#!/usr/bin/python + +r""" +============== +smartypants.py +============== + +---------------------------- +SmartyPants ported to Python +---------------------------- + +Ported by `Chad Miller`_ +Copyright (c) 2004, 2007 Chad Miller + +original `SmartyPants`_ by `John Gruber`_ +Copyright (c) 2003 John Gruber + + +Synopsis +======== + +A smart-quotes plugin for Pyblosxom_. + +The priginal "SmartyPants" is a free web publishing plug-in for Movable Type, +Blosxom, and BBEdit that easily translates plain ASCII punctuation characters +into "smart" typographic punctuation HTML entities. + +This software, *smartypants.py*, endeavours to be a functional port of +SmartyPants to Python, for use with Pyblosxom_. + + +Description +=========== + +SmartyPants can perform the following transformations: + +- Straight quotes ( " and ' ) into "curly" quote HTML entities +- Backticks-style quotes (\`\`like this'') into "curly" quote HTML entities +- Dashes (``--`` and ``---``) into en- and em-dash entities +- Three consecutive dots (``...`` or ``. . .``) into an ellipsis entity + +This means you can write, edit, and save your posts using plain old +ASCII straight quotes, plain dashes, and plain dots, but your published +posts (and final HTML output) will appear with smart quotes, em-dashes, +and proper ellipses. + +SmartyPants does not modify characters within ``

``, ````, ````,
+```` or ``

He said, "'Quoted' words in a larger quote."

+ str = re.sub(r""""'(?=\w)""", """“‘""", str) + str = re.sub(r"""'"(?=\w)""", """‘“""", str) + + # Special case for decade abbreviations (the '80s): + str = re.sub(r"""\b'(?=\d{2}s)""", r"""’""", str) + + close_class = r"""[^\ \t\r\n\[\{\(\-]""" + dec_dashes = r"""–|—""" + + # Get most opening single quotes: + opening_single_quotes_regex = re.compile(r""" + ( + \s | # a whitespace char, or +   | # a non-breaking space entity, or + -- | # dashes, or + &[mn]dash; | # named dash entities + %s | # or decimal entities + &\#x201[34]; # or hex + ) + ' # the quote + (?=\w) # followed by a word character + """ % (dec_dashes,), re.VERBOSE) + str = opening_single_quotes_regex.sub(r"""\1‘""", str) + + closing_single_quotes_regex = re.compile(r""" + (%s) + ' + (?!\s | s\b | \d) + """ % (close_class,), re.VERBOSE) + str = closing_single_quotes_regex.sub(r"""\1’""", str) + + closing_single_quotes_regex = re.compile(r""" + (%s) + ' + (\s | s\b) + """ % (close_class,), re.VERBOSE) + str = closing_single_quotes_regex.sub(r"""\1’\2""", str) + + # Any remaining single quotes should be opening ones: + str = re.sub(r"""'""", r"""‘""", str) + + # Get most opening double quotes: + opening_double_quotes_regex = re.compile(r""" + ( + \s | # a whitespace char, or +   | # a non-breaking space entity, or + -- | # dashes, or + &[mn]dash; | # named dash entities + %s | # or decimal entities + &\#x201[34]; # or hex + ) + " # the quote + (?=\w) # followed by a word character + """ % (dec_dashes,), re.VERBOSE) + str = opening_double_quotes_regex.sub(r"""\1“""", str) + + # Double closing quotes: + closing_double_quotes_regex = re.compile(r""" + #(%s)? # character that indicates the quote should be closing + " + (?=\s) + """ % (close_class,), re.VERBOSE) + str = closing_double_quotes_regex.sub(r"""”""", str) + + closing_double_quotes_regex = re.compile(r""" + (%s) # character that indicates the quote should be closing + " + """ % (close_class,), re.VERBOSE) + str = closing_double_quotes_regex.sub(r"""\1”""", str) + + # Any remaining quotes should be opening ones. + str = re.sub(r'"', r"""“""", str) + + return str + + +def educateBackticks(str): + """ + Parameter: String. + Returns: The string, with ``backticks'' -style double quotes + translated into HTML curly quote entities. + Example input: ``Isn't this fun?'' + Example output: “Isn't this fun?” + """ + + str = re.sub(r"""``""", r"""“""", str) + str = re.sub(r"""''""", r"""”""", str) + return str + + +def educateSingleBackticks(str): + """ + Parameter: String. + Returns: The string, with `backticks' -style single quotes + translated into HTML curly quote entities. + + Example input: `Isn't this fun?' + Example output: ‘Isn’t this fun?’ + """ + + str = re.sub(r"""`""", r"""‘""", str) + str = re.sub(r"""'""", r"""’""", str) + return str + + +def educateDashes(str): + """ + Parameter: String. + + Returns: The string, with each instance of "--" translated to + an em-dash HTML entity. + """ + + str = re.sub(r"""---""", r"""–""", str) # en (yes, backwards) + str = re.sub(r"""--""", r"""—""", str) # em (yes, backwards) + return str + + +def educateDashesOldSchool(str): + """ + Parameter: String. + + Returns: The string, with each instance of "--" translated to + an en-dash HTML entity, and each "---" translated to + an em-dash HTML entity. + """ + + str = re.sub(r"""---""", r"""—""", str) # em (yes, backwards) + str = re.sub(r"""--""", r"""–""", str) # en (yes, backwards) + return str + + +def educateDashesOldSchoolInverted(str): + """ + Parameter: String. + + Returns: The string, with each instance of "--" translated to + an em-dash HTML entity, and each "---" translated to + an en-dash HTML entity. Two reasons why: First, unlike the + en- and em-dash syntax supported by + EducateDashesOldSchool(), it's compatible with existing + entries written before SmartyPants 1.1, back when "--" was + only used for em-dashes. Second, em-dashes are more + common than en-dashes, and so it sort of makes sense that + the shortcut should be shorter to type. (Thanks to Aaron + Swartz for the idea.) + """ + str = re.sub(r"""---""", r"""–""", str) # em + str = re.sub(r"""--""", r"""—""", str) # en + return str + + + +def educateEllipses(str): + """ + Parameter: String. + Returns: The string, with each instance of "..." translated to + an ellipsis HTML entity. + + Example input: Huh...? + Example output: Huh…? + """ + + str = re.sub(r"""\.\.\.""", r"""…""", str) + str = re.sub(r"""\. \. \.""", r"""…""", str) + return str + + +def stupefyEntities(str): + """ + Parameter: String. + Returns: The string, with each SmartyPants HTML entity translated to + its ASCII counterpart. + + Example input: “Hello — world.” + Example output: "Hello -- world." + """ + + str = re.sub(r"""–""", r"""-""", str) # en-dash + str = re.sub(r"""—""", r"""--""", str) # em-dash + + str = re.sub(r"""‘""", r"""'""", str) # open single quote + str = re.sub(r"""’""", r"""'""", str) # close single quote + + str = re.sub(r"""“""", r'''"''', str) # open double quote + str = re.sub(r"""”""", r'''"''', str) # close double quote + + str = re.sub(r"""…""", r"""...""", str)# ellipsis + + return str + + +def processEscapes(str): + r""" + Parameter: String. + Returns: The string, with after processing the following backslash + escape sequences. This is useful if you want to force a "dumb" + quote or other character to appear. + + Escape Value + ------ ----- + \\ \ + \" " + \' ' + \. . + \- - + \` ` + """ + str = re.sub(r"""\\\\""", r"""\""", str) + str = re.sub(r'''\\"''', r""""""", str) + str = re.sub(r"""\\'""", r"""'""", str) + str = re.sub(r"""\\\.""", r""".""", str) + str = re.sub(r"""\\-""", r"""-""", str) + str = re.sub(r"""\\`""", r"""`""", str) + + return str + + +def _tokenize(str): + """ + Parameter: String containing HTML markup. + Returns: Reference to an array of the tokens comprising the input + string. Each token is either a tag (possibly with nested, + tags contained therein, such as , or a + run of text between tags. Each element of the array is a + two-element array; the first is either 'tag' or 'text'; + the second is the actual value. + + Based on the _tokenize() subroutine from Brad Choate's MTRegex plugin. + + """ + + tokens = [] + + #depth = 6 + #nested_tags = "|".join(['(?:<(?:[^<>]',] * depth) + (')*>)' * depth) + #match = r"""(?: ) | # comments + # (?: <\? .*? \?> ) | # directives + # %s # nested tags """ % (nested_tags,) + tag_soup = re.compile(r"""([^<]*)(<[^>]*>)""") + + token_match = tag_soup.search(str) + + previous_end = 0 + while token_match is not None: + if token_match.group(1): + tokens.append(['text', token_match.group(1)]) + + tokens.append(['tag', token_match.group(2)]) + + previous_end = token_match.end() + token_match = tag_soup.search(str, token_match.end()) + + if previous_end < len(str): + tokens.append(['text', str[previous_end:]]) + + return tokens + + + +if __name__ == "__main__": + + import locale + + try: + locale.setlocale(locale.LC_ALL, '') + except: + pass + + from docutils.core import publish_string + docstring_html = publish_string(__doc__, writer_name='html') + + print docstring_html + + + # Unit test output goes out stderr. No worries. + import unittest + sp = smartyPants + + class TestSmartypantsAllAttributes(unittest.TestCase): + # the default attribute is "1", which means "all". + + def test_dates(self): + self.assertEqual(sp("1440-80's"), "1440-80’s") + self.assertEqual(sp("1440-'80s"), "1440-‘80s") + self.assertEqual(sp("1440---'80s"), "1440–‘80s") + self.assertEqual(sp("1960s"), "1960s") # no effect. + self.assertEqual(sp("1960's"), "1960’s") + self.assertEqual(sp("one two '60s"), "one two ‘60s") + self.assertEqual(sp("'60s"), "‘60s") + + def test_skip_tags(self): + self.assertEqual( + sp(""""""), + """""") + self.assertEqual( + sp("""

He said "Let's write some code." This code here if True:\n\tprint "Okay" is python code.

"""), + """

He said “Let’s write some code.” This code here if True:\n\tprint "Okay" is python code.

""") + + + def test_ordinal_numbers(self): + self.assertEqual(sp("21st century"), "21st century") # no effect. + self.assertEqual(sp("3rd"), "3rd") # no effect. + + def test_educated_quotes(self): + self.assertEqual(sp('''"Isn't this fun?"'''), '''“Isn’t this fun?”''') + + unittest.main() + + + + +__author__ = "Chad Miller " +__version__ = "1.5_1.6: Fri, 27 Jul 2007 07:06:40 -0400" +__url__ = "http://wiki.chad.org/SmartyPantsPy" +__description__ = "Smart-quotes, smart-ellipses, and smart-dashes for weblog entries in pyblosxom" diff --git a/src/calibre/web/feeds/__init__.py b/src/calibre/web/feeds/__init__.py index a70cf8b664..8aef350498 100644 --- a/src/calibre/web/feeds/__init__.py +++ b/src/calibre/web/feeds/__init__.py @@ -165,7 +165,9 @@ class Feed(object): if delta.days*24*3600 + delta.seconds <= 24*3600*self.oldest_article: self.articles.append(article) else: - self.logger.debug('Skipping article %s (%s) from feed %s as it is too old.'%(title, article.localtime.strftime('%a, %d %b, %Y %H:%M'), self.title)) + t = strftime(u'%a, %d %b, %Y %H:%M', article.localtime.timetuple()) + self.logger.debug('Skipping article %s (%s) from feed %s as it is too old.'% + (title, t, self.title)) d = item.get('date', '') article.formatted_date = d diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 9ba9583c73..a140dfbf05 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -290,10 +290,12 @@ class BasicNewsRecipe(Recipe): #: the cover for the periodical. Overriding this in your recipe instructs #: calibre to render the downloaded cover into a frame whose width and height #: are expressed as a percentage of the downloaded cover. - #: cover_margins = (10,15,'white') pads the cover with a white margin + #: cover_margins = (10, 15, '#ffffff') pads the cover with a white margin #: 10px on the left and right, 15px on the top and bottom. - #: Colors name defined at http://www.imagemagick.org/script/color.php - cover_margins = (0,0,'white') + #: Color names defined at http://www.imagemagick.org/script/color.php + #: Note that for some reason, white does not always work on windows. Use + #: #ffffff instead + cover_margins = (0, 0, '#ffffff') #: Set to a non empty string to disable this recipe #: The string will be used as the disabled message