diff --git a/imgsrc/rating.svg b/imgsrc/rating.svg new file mode 100644 index 0000000000..d289c71b99 --- /dev/null +++ b/imgsrc/rating.svg @@ -0,0 +1,589 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + diff --git a/resources/images/news/ajc.png b/resources/images/news/ajc.png new file mode 100644 index 0000000000..110f40e039 Binary files /dev/null and b/resources/images/news/ajc.png differ diff --git a/resources/images/rating.png b/resources/images/rating.png new file mode 100644 index 0000000000..81eba505b9 Binary files /dev/null and b/resources/images/rating.png differ diff --git a/resources/images/star.png b/resources/images/star.png deleted file mode 100644 index 6eb1fb890f..0000000000 Binary files a/resources/images/star.png and /dev/null differ diff --git a/resources/jacket/stylesheet.css b/resources/jacket/stylesheet.css new file mode 100644 index 0000000000..8dee8edc3c --- /dev/null +++ b/resources/jacket/stylesheet.css @@ -0,0 +1,116 @@ +/* +** Book Jacket generation +** +** The template for Book Jackets is template.xhtml +** This CSS is inserted into the generated HTML at conversion time +** +** Users can control parts of the presentation of a generated book jacket by +** editing this file and template.xhtml +** +** The general form of a generated Book Jacket: +** +** Title +** Series: series [series_index] +** Published: year_of_publication +** Rating: #_of_stars +** Tags: tag1, tag2, tag3 ... +** +** Comments +** +** If a book does not have Series information, a date of publication, a rating or tags +** the corresponding row is automatically removed from the generated book jacket. +*/ + +/* +** Banner +** Only affects EPUB, kindle ignores this type of formatting +*/ +.cbj_banner { + background: #eee; + border: thin solid black; + margin: 1em; + padding: 1em; + -webkit-border-radius:8px; + } + +/* +** Title +*/ +.cbj_title { + font-size: x-large; + text-align: center; + } + +/* +** Table containing Series, Publication Year, Rating and Tags +*/ +table.cbj_header { + width: 100%; + } + +/* +** General formatting for banner labels +*/ +table.cbj_header td.cbj_label { + font-family: sans-serif; + font-weight: bold; + text-align: right; + width: 40%; + } + +/* +** General formatting for banner content +*/ +table.cbj_header td.cbj_content { + font-family: sans-serif; + text-align: left; + width:60%; + } + +/* +** To skip a banner item (Series|Published|Rating|Tags), +** edit the appropriate CSS rule below. +*/ +table.cbj_header tr.cbj_series { + /* Uncomment the next line to remove 'Series' from banner section */ + /* display:none; */ + } + +table.cbj_header tr.cbj_pubdate { + /* Uncomment the next line to remove 'Published' from banner section */ + /* display:none; */ + } + +table.cbj_header tr.cbj_rating { + /* Uncomment the next line to remove 'Rating' from banner section */ + /* display:none; */ + } + +table.cbj_header tr.cbj_tags { + /* Uncomment the next line to remove 'Tags' from banner section */ + /* display:none; */ + } + +hr { + /* This rule controls formatting for any hr elements contained in the jacket */ + border-top: 0px solid white; + border-right: 0px solid white; + border-bottom: 2px solid black; + border-left: 0px solid white; + margin-left: 10%; + width: 80%; + } + +.cbj_footer { + font-family: sans-serif; + font-size: small; + margin-top: 8px; + text-align: center; + } +.cbj_smallcaps { + font-size: 90%; + } + +.cbj_comments { + font-family: sans-serif; + } diff --git a/resources/jacket/template.xhtml b/resources/jacket/template.xhtml new file mode 100644 index 0000000000..93e12983e8 --- /dev/null +++ b/resources/jacket/template.xhtml @@ -0,0 +1,34 @@ + + + {title_str} + + + + +
+
{title}
+ + + + + + + + + + + + + + + + + +
{series_label}:{series}
{pubdate_label}:{pubdate}
{rating_label}:{rating}
{tags_label}:{tags}
+ +
+
+
{comments}
+ + + diff --git a/resources/recipes/adventuregamers.recipe b/resources/recipes/adventuregamers.recipe index 1cde045953..d08eca1723 100644 --- a/resources/recipes/adventuregamers.recipe +++ b/resources/recipes/adventuregamers.recipe @@ -1,7 +1,5 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' +__copyright__ = '2009-2010, Darko Miletic ' ''' www.adventuregamers.com ''' @@ -10,14 +8,11 @@ from calibre.web.feeds.news import BasicNewsRecipe class AdventureGamers(BasicNewsRecipe): title = u'Adventure Gamers' - language = 'en' - + language = 'en' __author__ = 'Darko Miletic' - description = 'Adventure games portal' + description = 'Adventure games portal' publisher = 'Adventure Gamers' - category = 'news, games, adventure, technology' - language = 'en' - + category = 'news, games, adventure, technology' oldest_article = 10 delay = 10 max_articles_per_feed = 100 @@ -26,14 +21,25 @@ class AdventureGamers(BasicNewsRecipe): remove_javascript = True use_embedded_content = False INDEX = u'http://www.adventuregamers.com' - - html2lrf_options = [ - '--comment', description - , '--category', category - , '--publisher', publisher - ] - - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + extra_css = """ + .pageheader_type{font-size: x-large; font-weight: bold; color: #828D74} + .pageheader_title{font-size: xx-large; color: #394128} + .pageheader_byline{font-size: small; font-weight: bold; color: #394128} + .score_bg {display: inline; width: 100%; margin-bottom: 2em} + .score_column_1{ padding-left: 10px; font-size: small; width: 50%} + .score_column_2{ padding-left: 10px; font-size: small; width: 50%} + .score_column_3{ padding-left: 10px; font-size: small; width: 50%} + .score_header{font-size: large; color: #50544A} + .bodytext{display: block} + body{font-family: Helvetica,Arial,sans-serif} + """ + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } keep_only_tags = [ dict(name='div', attrs={'class':'content_middle'}) @@ -43,14 +49,15 @@ class AdventureGamers(BasicNewsRecipe): dict(name=['object','link','embed','form']) ,dict(name='div', attrs={'class':['related-stories','article_leadout','prev','next','both']}) ] - + remove_tags_after = [dict(name='div', attrs={'class':'toolbar_fat'})] - + remove_attributes = ['width','height'] + feeds = [(u'Articles', u'http://feeds2.feedburner.com/AdventureGamers')] - + def get_article_url(self, article): return article.get('guid', None) - + def append_page(self, soup, appendtag, position): pager = soup.find('div',attrs={'class':'toolbar_fat_next'}) if pager: @@ -59,19 +66,19 @@ class AdventureGamers(BasicNewsRecipe): texttag = soup2.find('div', attrs={'class':'bodytext'}) for it in texttag.findAll(style=True): del it['style'] - newpos = len(texttag.contents) + newpos = len(texttag.contents) self.append_page(soup2,texttag,newpos) texttag.extract() appendtag.insert(position,texttag) - - + + def preprocess_html(self, soup): - mtag = '\n' - soup.head.insert(0,mtag) for item in soup.findAll(style=True): del item['style'] + for item in soup.findAll('div', attrs={'class':'floatright'}): + item.extract() self.append_page(soup, soup.body, 3) pager = soup.find('div',attrs={'class':'toolbar_fat'}) if pager: - pager.extract() - return soup + pager.extract() + return self.adeify_images(soup) diff --git a/resources/recipes/ajc.recipe b/resources/recipes/ajc.recipe index 4315101a63..82809f626c 100644 --- a/resources/recipes/ajc.recipe +++ b/resources/recipes/ajc.recipe @@ -10,12 +10,31 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe): oldest_article = 1 max_articles_per_feed = 100 no_stylesheets = True - extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }' + masthead_url = 'http://gawand.org/wp-content/uploads/2010/06/ajc-logo.gif' + extra_css = ''' + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} + h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} + p{font-family:Arial,Helvetica,sans-serif;font-size:small;} + body{font-family:Helvetica,Arial,sans-serif;font-size:small;} + ''' + + keep_only_tags = [ - dict(name='div', attrs={'id':['cxArticleContent']}) - ,dict(attrs={'id':['cxArticleText','cxArticleBodyText']}) + dict(name='div', attrs={'class':['cxArticleHeader']}) + ,dict(attrs={'id':['cxArticleText']}) ] + + + remove_tags = [ + dict(name='div' , attrs={'class':'cxArticleList' }) + ,dict(name='div' , attrs={'class':'cxFeedTease' }) + ,dict(name='div' , attrs={'class':'cxElementEnlarge' }) + ,dict(name='div' , attrs={'id':'cxArticleTools' }) + ] + + + feeds = [ ('Breaking News', 'http://www.ajc.com/genericList-rss.do?source=61499'), # ------------------------------------------------------------------- @@ -23,7 +42,7 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe): # read by simply removing the pound sign from it. I currently have it # set to only get the Cobb area # -------------------------------------------------------------------- - ('Atlanta & Fulton', 'http://www.ajc.com/section-rss.do?source=atlanta'), + #('Atlanta & Fulton', 'http://www.ajc.com/section-rss.do?source=atlanta'), #('Clayton', 'http://www.ajc.com/section-rss.do?source=clayton'), #('DeKalb', 'http://www.ajc.com/section-rss.do?source=dekalb'), #('Gwinnett', 'http://www.ajc.com/section-rss.do?source=gwinnett'), @@ -41,7 +60,7 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe): # but again # You can enable which ever team you like by removing the pound sign # ------------------------------------------------------------------------ - ('Sports News', 'http://www.ajc.com/genericList-rss.do?source=61510'), + #('Sports News', 'http://www.ajc.com/genericList-rss.do?source=61510'), #('Braves', 'http://www.ajc.com/genericList-rss.do?source=61457'), ('Falcons', 'http://www.ajc.com/genericList-rss.do?source=61458'), #('Hawks', 'http://www.ajc.com/genericList-rss.do?source=61522'), @@ -52,11 +71,16 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe): ('Music', 'http://www.accessatlanta.com/section-rss.do?source=music'), ] + def postprocess_html(self, soup, first): + for credit_tag in soup.findAll('span', attrs={'class':['imageCredit rightFloat']}): + credit_tag.name ='p' + + return soup + + #def print_version(self, url): + # return url.partition('?')[0] +'?printArticle=y' - - def print_version(self, url): - return url.partition('?')[0] +'?printArticle=y' diff --git a/resources/recipes/brand_eins.recipe b/resources/recipes/brand_eins.recipe new file mode 100644 index 0000000000..be5b98ffe6 --- /dev/null +++ b/resources/recipes/brand_eins.recipe @@ -0,0 +1,125 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +__license__ = 'GPL v3' +__copyright__ = '2010, Constantin Hofstetter ' +__version__ = '0.95' + +''' http://brandeins.de - Wirtschaftsmagazin ''' +import re +import string +from calibre.web.feeds.recipes import BasicNewsRecipe + +class BrandEins(BasicNewsRecipe): + + title = u'Brand Eins' + __author__ = 'Constantin Hofstetter' + description = u'Wirtschaftsmagazin' + publisher ='brandeins.de' + category = 'politics, business, wirtschaft, Germany' + use_embedded_content = False + lang = 'de-DE' + no_stylesheets = True + encoding = 'utf-8' + language = 'de' + + # 2 is the last full magazine (default) + # 1 is the newest (but not full) + # 3 is one before 2 etc. + which_ausgabe = 2 + + keep_only_tags = [dict(name='div', attrs={'id':'theContent'}), dict(name='div', attrs={'id':'sidebar'}), dict(name='div', attrs={'class':'intro'}), dict(name='p', attrs={'class':'bodytext'}), dict(name='div', attrs={'class':'single_image'})] + + ''' + brandeins.de + ''' + + def postprocess_html(self, soup,first): + + # Move the image of the sidebar right below the h3 + first_h3 = soup.find(name='div', attrs={'id':'theContent'}).find('h3') + for imgdiv in soup.findAll(name='div', attrs={'class':'single_image'}): + if len(first_h3.findNextSiblings('div', {'class':'intro'})) >= 1: + # first_h3.parent.insert(2, imgdiv) + first_h3.findNextSiblings('div', {'class':'intro'})[0].parent.insert(4, imgdiv) + else: + first_h3.parent.insert(2, imgdiv) + + # Now, remove the sidebar + soup.find(name='div', attrs={'id':'sidebar'}).extract() + + # Remove the rating-image (stars) from the h3 + for img in first_h3.findAll(name='img'): + img.extract() + + # Mark the intro texts as italic + for div in soup.findAll(name='div', attrs={'class':'intro'}): + for p in div.findAll('p'): + content = self.tag_to_string(p) + new_p = "

"+ content +"

" + p.replaceWith(new_p) + + return soup + + def parse_index(self): + feeds = [] + + archive = "http://www.brandeins.de/archiv.html" + + soup = self.index_to_soup(archive) + latest_jahrgang = soup.findAll('div', attrs={'class': re.compile(r'\bjahrgang-latest\b') })[0].findAll('ul')[0] + pre_latest_issue = latest_jahrgang.findAll('a')[len(latest_jahrgang.findAll('a'))-self.which_ausgabe] + url = pre_latest_issue.get('href', False) + # Get the title for the magazin - build it out of the title of the cover - take the issue and year; + self.title = "Brand Eins "+ re.search(r"(?P\d\d\/\d\d\d\d+)", pre_latest_issue.find('img').get('title', False)).group('date') + url = 'http://brandeins.de/'+url + + # url = "http://www.brandeins.de/archiv/magazin/tierisch.html" + titles_and_articles = self.brand_eins_parse_latest_issue(url) + if titles_and_articles: + for title, articles in titles_and_articles: + feeds.append((title, articles)) + return feeds + + def brand_eins_parse_latest_issue(self, url): + soup = self.index_to_soup(url) + article_lists = [soup.find('div', attrs={'class':'subColumnLeft articleList'}), soup.find('div', attrs={'class':'subColumnRight articleList'})] + + titles_and_articles = [] + current_articles = [] + chapter_title = "Editorial" + self.log('Found Chapter:', chapter_title) + + # Remove last list of links (thats just the impressum and the 'gewinnspiel') + article_lists[1].findAll('ul')[len(article_lists[1].findAll('ul'))-1].extract() + + for article_list in article_lists: + for chapter in article_list.findAll('ul'): + if len(chapter.findPreviousSiblings('h3')) >= 1: + new_chapter_title = string.capwords(self.tag_to_string(chapter.findPreviousSiblings('h3')[0])) + if new_chapter_title != chapter_title: + titles_and_articles.append([chapter_title, current_articles]) + current_articles = [] + self.log('Found Chapter:', new_chapter_title) + chapter_title = new_chapter_title + for li in chapter.findAll('li'): + a = li.find('a', href = True) + if a is None: + continue + title = self.tag_to_string(a) + url = a.get('href', False) + if not url or not title: + continue + url = 'http://brandeins.de/'+url + if len(a.parent.findNextSiblings('p')) >= 1: + description = self.tag_to_string(a.parent.findNextSiblings('p')[0]) + else: + description = '' + + self.log('\t\tFound article:', title) + self.log('\t\t\t', url) + self.log('\t\t\t', description) + + current_articles.append({'title': title, 'url': url, 'description': description, 'date':''}) + titles_and_articles.append([chapter_title, current_articles]) + return titles_and_articles diff --git a/resources/recipes/danas.recipe b/resources/recipes/danas.recipe index 159553370a..6d6042b5c9 100644 --- a/resources/recipes/danas.recipe +++ b/resources/recipes/danas.recipe @@ -20,6 +20,7 @@ class Danas(BasicNewsRecipe): encoding = 'utf-8' masthead_url = 'http://www.danas.rs/images/basic/danas.gif' language = 'sr' + remove_javascript = True publication_type = 'newspaper' remove_empty_feeds = True extra_css = """ @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @@ -29,7 +30,8 @@ class Danas(BasicNewsRecipe): .antrfileText{border-left: 2px solid #999999; margin-left: 0.8em; padding-left: 1.2em; margin-bottom: 0; margin-top: 0} h2,.datum,.lokacija,.autor{font-size: small} .antrfileNaslov{border-left: 2px solid #999999; margin-left: 0.8em; padding-left: 1.2em; - font-weight:bold; margin-bottom: 0; margin-top: 0} img{margin-bottom: 0.8em} """ + font-weight:bold; margin-bottom: 0; margin-top: 0} img{margin-bottom: 0.8em} + """ conversion_options = { 'comment' : description @@ -38,14 +40,26 @@ class Danas(BasicNewsRecipe): , 'language' : language } - preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] + preprocess_regexps = [ + (re.compile(u'\u0110'), lambda match: u'\u00D0') + ,(re.compile(r'',re.DOTALL|re.IGNORECASE), lambda match: r'') + ,(re.compile(r'',re.DOTALL|re.IGNORECASE), lambda match: r'') + ,(re.compile(r'',re.DOTALL|re.IGNORECASE), lambda match: r'') + ,(re.compile(r'',re.DOTALL|re.IGNORECASE), lambda match: r'') + ,(re.compile(r'',re.DOTALL|re.IGNORECASE), lambda match: r'') + ,(re.compile(r'',re.DOTALL|re.IGNORECASE), lambda match: r'') + ,(re.compile(r'',re.DOTALL|re.IGNORECASE), lambda match: r'') + ,(re.compile(r'',re.DOTALL|re.IGNORECASE), lambda match: r'') + ,(re.compile(r'',re.DOTALL|re.IGNORECASE), lambda match: r'') + ] keep_only_tags = [dict(name='div', attrs={'id':'left'})] remove_tags = [ dict(name='div', attrs={'class':['width_1_4','metaClanka','baner']}) ,dict(name='div', attrs={'id':'comments'}) - ,dict(name=['object','link','iframe']) + ,dict(name=['object','link','iframe','meta']) ] + remove_attributes = ['st'] feeds = [ (u'Politika' , u'http://www.danas.rs/rss/rss.asp?column_id=27') @@ -79,7 +93,13 @@ class Danas(BasicNewsRecipe): def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] - return self.adeify_images(soup) + for item in soup.findAll('a'): + if item.has_key('name'): + item.extract() + for item in soup.findAll('img'): + if not item.has_key('alt'): + item['alt'] = 'image' + return soup def print_version(self, url): return url + '&action=print' diff --git a/resources/recipes/slate.recipe b/resources/recipes/slate.recipe index c03255d2df..f2a5b71e3c 100644 --- a/resources/recipes/slate.recipe +++ b/resources/recipes/slate.recipe @@ -1,7 +1,8 @@ #!/usr/bin/env python +__copyright__ = '2008, Kovid Goyal ' __license__ = 'GPL v3' -__copyright__ = '2008, Kovid Goyal ' + ''' calibre recipe for slate.com ''' @@ -10,13 +11,12 @@ import re from calibre.web.feeds.recipes import BasicNewsRecipe from calibre.ebooks.BeautifulSoup import BeautifulSoup, NavigableString, CData, Comment, Tag -class PeriodicalNameHere(BasicNewsRecipe): +class Slate(BasicNewsRecipe): # Method variables for customizing downloads - title = 'Slate' description = 'A general-interest publication offering analysis and commentary about politics, news and culture.' - __author__ = 'GRiker and Sujata Raman' - max_articles_per_feed = 20 - oldest_article = 7.0 + __author__ = 'GRiker, Sujata Raman and Nick Redding' + max_articles_per_feed = 100 + oldest_article = 14 recursions = 0 delay = 0 simultaneous_downloads = 5 @@ -27,8 +27,11 @@ class PeriodicalNameHere(BasicNewsRecipe): encoding = None language = 'en' - - + slate_complete = True + if slate_complete: + title = 'Slate (complete)' + else: + title = 'Slate (weekly)' # Method variables for customizing feed parsing summary_length = 250 @@ -50,8 +53,10 @@ class PeriodicalNameHere(BasicNewsRecipe): # The second entry is for 'Big Money', which comes from a different site, uses different markup remove_tags = [dict(attrs={ 'id':['toolbox','recommend_tab','insider_ad_wrapper', - 'article_bottom_tools_cntr','fray_article_discussion', 'fray_article_links','bottom_sponsored_links','author_bio', - 'bizbox_links_bottom','ris_links_wrapper','BOXXLE']}), + 'article_bottom_tools_cntr','fray_article_discussion','fray_article_links','bottom_sponsored_links','author_bio', + 'bizbox_links_bottom','ris_links_wrapper','BOXXLE', + 'comments_button','add_comments_button','comments-to-fray','marriott_ad', + 'article_bottom_tools','recommend_tab2','fbog_article_bottom_cntr']}), dict(attrs={ 'id':['content-top','service-links-bottom','hed']}) ] excludedDescriptionKeywords = ['Slate V','Twitter feed','podcast'] @@ -62,16 +67,15 @@ class PeriodicalNameHere(BasicNewsRecipe): extra_css = ''' .h1_subhead{font-family:Arial; font-size:small; } h1{font-family:Verdana; font-size:large; } - .byline {font-family:Georgia; margin-bottom: 0px; color: #660033;} - .dateline {font-family:Arial; font-size: smaller; height: 0pt; color:#666666;} + .byline {font-family:Georgia; margin-bottom: 0px; } + .dateline {font-family:Arial; font-size: smaller; height: 0pt;} .imagewrapper {font-family:Verdana;font-size:x-small; } .source {font-family:Verdana; font-size:x-small;} .credit {font-family:Verdana; font-size: smaller;} #article_body {font-family:Verdana; } #content {font-family:Arial; } .caption{font-family:Verdana;font-style:italic; font-size:x-small;} - h3{font-family:Arial; color:#666666; font-size:small} - a{color:#0066CC;} + h3{font-family:Arial; font-size:small} ''' # Local variables to extend class @@ -89,32 +93,59 @@ class PeriodicalNameHere(BasicNewsRecipe): if isinstance(item, (NavigableString, CData)): strings.append(item.string) elif isinstance(item, Tag): - res = self.tag_to_string(item) + res = self.tag_to_string(item,use_alt=False) if res: strings.append(res) return strings - - def extract_sections(self): + def extract_named_sections(self): soup = self.index_to_soup( self.baseURL ) - soup_top_stories = soup.find(True, attrs={'class':'tap2_topic entry-content'}) + soup_nav_bar = soup.find(True, attrs={'id':'nav'}) + briefing_nav = soup.find('li') + briefing_url = briefing_nav.a['href'] + for section_nav in soup_nav_bar.findAll('li'): + section_name = self.tag_to_string(section_nav,use_alt=False) + self.section_dates.append(section_name) + + soup = self.index_to_soup(briefing_url) + + self.log("Briefing url = %s " % briefing_url) + section_lists = soup.findAll('ul','view_links_list') + + sections = [] + for section in section_lists : + sections.append(section) + return sections + + + def extract_dated_sections(self): + soup = self.index_to_soup( self.baseURL ) + soup_top_stories = soup.find(True, attrs={'id':'tap3_cntr'}) + if soup_top_stories: + self.section_dates.append("Top Stories") + self.log("SELECTION TOP STORIES %s" % "Top Stories") + soup = soup.find(True, attrs={'id':'toc_links_container'}) todays_section = soup.find(True, attrs={'class':'todaydateline'}) self.section_dates.append(self.tag_to_string(todays_section,use_alt=False)) + self.log("SELECTION DATE %s" % self.tag_to_string(todays_section,use_alt=False)) older_section_dates = soup.findAll(True, attrs={'class':'maindateline'}) for older_section in older_section_dates : self.section_dates.append(self.tag_to_string(older_section,use_alt=False)) + self.log("SELECTION DATE %s" % self.tag_to_string(older_section,use_alt=False)) if soup_top_stories: - headline_stories = soup_top_stories.find('ul') + headline_stories = soup_top_stories + self.log("HAVE top_stories") else: headline_stories = None + self.log("NO top_stories") section_lists = soup.findAll('ul') # Prepend the headlines to the first section if headline_stories: - section_lists[0].insert(0,headline_stories) + section_lists.insert(0,headline_stories) sections = [] for section in section_lists : @@ -123,9 +154,8 @@ class PeriodicalNameHere(BasicNewsRecipe): def extract_section_articles(self, sections_html) : - # Find the containers with section content - soup = self.index_to_soup(str(sections_html)) - sections = soup.findAll('ul') + # Find the containers with section content + sections = sections_html articles = {} key = None @@ -135,10 +165,25 @@ class PeriodicalNameHere(BasicNewsRecipe): # Get the section name if section.has_key('id') : + self.log("PROCESSING SECTION id = %s" % section['id']) key = self.section_dates[i] + if key.startswith("Pod"): + continue + if key.startswith("Blog"): + continue + articles[key] = [] + ans.append(key) + elif self.slate_complete: + key = self.section_dates[i] + if key.startswith("Pod"): + continue + if key.startswith("Blog"): + continue + self.log("PROCESSING SECTION name = %s" % key) articles[key] = [] ans.append(key) else : + self.log("SECTION %d HAS NO id" % i); continue # Get the section article_list @@ -149,8 +194,10 @@ class PeriodicalNameHere(BasicNewsRecipe): bylines = self.tag_to_strings(article) url = article.a['href'] title = bylines[0] - full_title = self.tag_to_string(article) - + full_title = self.tag_to_string(article,use_alt=False) + #self.log("ARTICLE TITLE%s" % title) + #self.log("ARTICLE FULL_TITLE%s" % full_title) + #self.log("URL %s" % url) author = None description = None pubdate = None @@ -181,7 +228,7 @@ class PeriodicalNameHere(BasicNewsRecipe): excluded = re.compile('|'.join(self.excludedDescriptionKeywords)) found_excluded = excluded.search(description) if found_excluded : - if self.verbose : self.log(" >>> skipping %s (description keyword exclusion: %s) <<<\n" % (title, found_excluded.group(0))) + self.log(" >>> skipping %s (description keyword exclusion: %s) <<<\n" % (title, found_excluded.group(0))) continue # Skip articles whose title contain excluded keywords @@ -190,7 +237,7 @@ class PeriodicalNameHere(BasicNewsRecipe): #self.log("evaluating full_title: %s" % full_title) found_excluded = excluded.search(full_title) if found_excluded : - if self.verbose : self.log(" >>> skipping %s (title keyword exclusion: %s) <<<\n" % (title, found_excluded.group(0))) + self.log(" >>> skipping %s (title keyword exclusion: %s) <<<\n" % (title, found_excluded.group(0))) continue # Skip articles whose author contain excluded keywords @@ -198,7 +245,7 @@ class PeriodicalNameHere(BasicNewsRecipe): excluded = re.compile('|'.join(self.excludedAuthorKeywords)) found_excluded = excluded.search(author) if found_excluded : - if self.verbose : self.log(" >>> skipping %s (author keyword exclusion: %s) <<<\n" % (title, found_excluded.group(0))) + self.log(" >>> skipping %s (author keyword exclusion: %s) <<<\n" % (title, found_excluded.group(0))) continue skip_this_article = False @@ -206,6 +253,7 @@ class PeriodicalNameHere(BasicNewsRecipe): for article in articles[key] : if article['url'] == url : skip_this_article = True + self.log("SKIPPING DUP %s" % url) break if skip_this_article : @@ -217,6 +265,8 @@ class PeriodicalNameHere(BasicNewsRecipe): articles[feed] = [] articles[feed].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content='')) + #self.log("KEY %s" % feed) + #self.log("APPENDED %s" % url) # Promote 'newspapers' to top for (i,article) in enumerate(articles[feed]) : if article['description'] is not None : @@ -225,32 +275,6 @@ class PeriodicalNameHere(BasicNewsRecipe): ans = [(key, articles[key]) for key in ans if articles.has_key(key)] - ans = self.remove_duplicates(ans) - return ans - - def flatten_document(self, ans): - flat_articles = [] - for (i,section) in enumerate(ans) : - #self.log("flattening section %s: " % section[0]) - for article in section[1] : - #self.log("moving %s to flat_articles[]" % article['title']) - flat_articles.append(article) - flat_section = ['All Articles', flat_articles] - flat_ans = [flat_section] - return flat_ans - - def remove_duplicates(self, ans): - # Return a stripped ans - for (i,section) in enumerate(ans) : - #self.log("section %s: " % section[0]) - for article in section[1] : - #self.log("\t%s" % article['title']) - #self.log("\looking for %s" % article['url']) - for (j,subsequent_section) in enumerate(ans[i+1:]) : - for (k,subsequent_article) in enumerate(subsequent_section[1]) : - if article['url'] == subsequent_article['url'] : - #self.log( "removing %s (%s) from %s" % (subsequent_article['title'], subsequent_article['url'], subsequent_section[0]) ) - del subsequent_section[1][k] return ans def print_version(self, url) : @@ -258,13 +282,22 @@ class PeriodicalNameHere(BasicNewsRecipe): # Class methods def parse_index(self) : - sections = self.extract_sections() + if self.slate_complete: + sections = self.extract_named_sections() + else: + sections = self.extract_dated_sections() section_list = self.extract_section_articles(sections) - section_list = self.flatten_document(section_list) return section_list - def get_browser(self) : - return BasicNewsRecipe.get_browser() + def get_masthead_url(self): + masthead = 'http://img.slate.com/images/redesign2008/slate_logo.gif' + br = BasicNewsRecipe.get_browser() + try: + br.open(masthead) + except: + self.log("\nMasthead unavailable") + masthead = None + return masthead def stripAnchors(self,soup): body = soup.find('div',attrs={'id':['article_body','content']}) @@ -294,8 +327,8 @@ class PeriodicalNameHere(BasicNewsRecipe): excluded = re.compile('|'.join(self.excludedContentKeywords)) found_excluded = excluded.search(str(soup)) if found_excluded : - print "no allowed content found, removing article" - raise Exception('String error') + print "No allowed content found, removing article" + raise Exception('Rejected article') # Articles from www.thebigmoney.com use different tagging for byline, dateline and body head = soup.find('head') @@ -328,7 +361,6 @@ class PeriodicalNameHere(BasicNewsRecipe): dept_kicker = soup.find('div', attrs={'class':'department_kicker'}) if dept_kicker is not None : kicker_strings = self.tag_to_strings(dept_kicker) - #kicker = kicker_strings[2] + kicker_strings[3] kicker = ''.join(kicker_strings[2:]) kicker = re.sub('\.','',kicker) h3Tag = Tag(soup, "h3") @@ -336,25 +368,11 @@ class PeriodicalNameHere(BasicNewsRecipe): emTag.insert(0,NavigableString(kicker)) h3Tag.insert(0, emTag) dept_kicker.replaceWith(h3Tag) + else: + self.log("No kicker--return null") + return None - # Change

to

- headline = soup.find("h1") - tag = headline.find("span") - tag.name = 'div' - - if headline is not None : - h2tag = Tag(soup, "h2") - h2tag['class'] = "headline" - strs = self.tag_to_strings(headline) - result = '' - for (i,substr) in enumerate(strs) : - result += substr - if i < len(strs) -1 : - result += '
' - #h2tag.insert(0, result) - #headline.replaceWith(h2tag) - - # Fix up the concatenated byline and dateline + # Fix up the concatenated byline and dateline byline = soup.find(True,attrs={'class':'byline'}) if byline is not None : bylineTag = Tag(soup,'div') diff --git a/src/calibre/customize/profiles.py b/src/calibre/customize/profiles.py index 1563f764ca..2b5eb5011e 100644 --- a/src/calibre/customize/profiles.py +++ b/src/calibre/customize/profiles.py @@ -248,6 +248,9 @@ class OutputProfile(Plugin): #: If True, the date is appended to the title of downloaded news periodical_date_in_title = True + #: The character used to represent a star in ratings + ratings_char = u'*' + @classmethod def tags_to_string(cls, tags): return escape(', '.join(tags)) @@ -273,6 +276,7 @@ class iPadOutput(OutputProfile): 'macros': {'border-width': '{length}|medium|thick|thin'} } ] + ratings_char = u'\u2605' touchscreen = True # touchscreen_news_css {{{ touchscreen_news_css = u''' @@ -553,10 +557,11 @@ class KindleOutput(OutputProfile): fsizes = [12, 12, 14, 16, 18, 20, 22, 24] supports_mobi_indexing = True periodical_date_in_title = False + ratings_char = u'\u2605' @classmethod def tags_to_string(cls, tags): - return u'%s
%s' % (', '.join(tags), + return u'%s
%s' % (', '.join(tags), 'ttt '.join(tags)+'ttt ') class KindleDXOutput(OutputProfile): diff --git a/src/calibre/devices/apple/driver.py b/src/calibre/devices/apple/driver.py index e318d368ff..5fe36faf75 100644 --- a/src/calibre/devices/apple/driver.py +++ b/src/calibre/devices/apple/driver.py @@ -207,8 +207,8 @@ class ITUNES(DriverBase): for (j,p_book) in enumerate(self.update_list): if False: if isosx: - self.log.info(" looking for %s" % - str(p_book['lib_book'])[-9:]) + self.log.info(" looking for '%s' by %s uuid:%s" % + (p_book['title'],p_book['author'], p_book['uuid'])) elif iswindows: self.log.info(" looking for '%s' by %s (%s)" % (p_book['title'],p_book['author'], p_book['uuid'])) @@ -303,7 +303,7 @@ class ITUNES(DriverBase): this_book.device_collections = [] this_book.library_id = library_books[this_book.path] if this_book.path in library_books else None this_book.size = book.size() - this_book.uuid = book.album() + this_book.uuid = book.composer() # Hack to discover if we're running in GUI environment if self.report_progress is not None: this_book.thumbnail = self._generate_thumbnail(this_book.path, book) @@ -732,15 +732,15 @@ class ITUNES(DriverBase): for path in paths: if DEBUG: self._dump_cached_book(self.cached_books[path], indent=2) - self.log.info(" looking for '%s' by '%s' (%s)" % + self.log.info(" looking for '%s' by '%s' uuid:%s" % (self.cached_books[path]['title'], self.cached_books[path]['author'], self.cached_books[path]['uuid'])) # Purge the booklist, self.cached_books, thumb cache for i,bl_book in enumerate(booklists[0]): - if False: - self.log.info(" evaluating '%s' by '%s' (%s)" % + if DEBUG: + self.log.info(" evaluating '%s' by '%s' uuid:%s" % (bl_book.title, bl_book.author,bl_book.uuid)) found = False @@ -781,10 +781,10 @@ class ITUNES(DriverBase): zf.close() break -# else: -# if DEBUG: -# self.log.error(" unable to find '%s' by '%s' (%s)" % -# (bl_book.title, bl_book.author,bl_book.uuid)) + else: + if DEBUG: + self.log.error(" unable to find '%s' by '%s' (%s)" % + (bl_book.title, bl_book.author,bl_book.uuid)) if False: self._dump_booklist(booklists[0], indent = 2) @@ -905,7 +905,8 @@ class ITUNES(DriverBase): # Add new_book to self.cached_books if DEBUG: - self.log.info(" adding '%s' by '%s' ['%s'] to self.cached_books" % + self.log.info("ITUNES.upload_books()") + self.log.info(" adding '%s' by '%s' uuid:%s to self.cached_books" % ( metadata[i].title, metadata[i].author, metadata[i].uuid)) self.cached_books[this_book.path] = { 'author': metadata[i].author, @@ -943,7 +944,11 @@ class ITUNES(DriverBase): new_booklist.append(this_book) self._update_iTunes_metadata(metadata[i], db_added, lb_added, this_book) - # Add new_book to self.cached_paths + # Add new_book to self.cached_books + if DEBUG: + self.log.info("ITUNES.upload_books()") + self.log.info(" adding '%s' by '%s' uuid:%s to self.cached_books" % + ( metadata[i].title, metadata[i].author, metadata[i].uuid)) self.cached_books[this_book.path] = { 'author': metadata[i].author[0], 'dev_book': db_added, @@ -1406,8 +1411,8 @@ class ITUNES(DriverBase): for book in booklist: if isosx: - self.log.info("%s%-40.40s %-30.30s %-10.10s" % - (' '*indent,book.title, book.author, str(book.library_id)[-9:])) + self.log.info("%s%-40.40s %-30.30s %-10.10s %s" % + (' '*indent,book.title, book.author, str(book.library_id)[-9:], book.uuid)) elif iswindows: self.log.info("%s%-40.40s %-30.30s" % (' '*indent,book.title, book.author)) @@ -1547,11 +1552,12 @@ class ITUNES(DriverBase): if isosx: for ub in self.update_list: - self.log.info("%s%-40.40s %-30.30s %-10.10s" % + self.log.info("%s%-40.40s %-30.30s %-10.10s %s" % (' '*indent, ub['title'], ub['author'], - str(ub['lib_book'])[-9:])) + str(ub['lib_book'])[-9:], + ub['uuid'])) elif iswindows: for ub in self.update_list: self.log.info("%s%-40.40s %-30.30s" % @@ -2342,8 +2348,10 @@ class ITUNES(DriverBase): if isosx: if DEBUG: self.log.info(" deleting '%s' from iDevice" % cached_book['title']) - cached_book['dev_book'].delete() - + try: + cached_book['dev_book'].delete() + except: + self.log.error(" error deleting '%s'" % cached_book['title']) elif iswindows: hit = self._find_device_book(cached_book) if hit: @@ -2802,7 +2810,7 @@ class ITUNES_ASYNC(ITUNES): #this_book.library_id = library_books[this_book.path] if this_book.path in library_books else None this_book.library_id = library_books[book] this_book.size = library_books[book].size() - this_book.uuid = library_books[book].album() + this_book.uuid = library_books[book].composer() # Hack to discover if we're running in GUI environment if self.report_progress is not None: this_book.thumbnail = self._generate_thumbnail(this_book.path, library_books[book]) @@ -2842,6 +2850,7 @@ class ITUNES_ASYNC(ITUNES): this_book.device_collections = [] this_book.library_id = library_books[book] this_book.size = library_books[book].Size + this_book.uuid = library_books[book].Composer # Hack to discover if we're running in GUI environment if self.report_progress is not None: this_book.thumbnail = self._generate_thumbnail(this_book.path, library_books[book]) diff --git a/src/calibre/devices/kobo/driver.py b/src/calibre/devices/kobo/driver.py index f24e00143b..a2be629449 100644 --- a/src/calibre/devices/kobo/driver.py +++ b/src/calibre/devices/kobo/driver.py @@ -5,15 +5,16 @@ __license__ = 'GPL v3' __copyright__ = '2010, Timothy Legge and Kovid Goyal ' __docformat__ = 'restructuredtext en' -import os +import os, time import sqlite3 as sqlite from calibre.devices.usbms.books import BookList from calibre.devices.kobo.books import Book from calibre.devices.kobo.books import ImageWrapper from calibre.devices.mime import mime_type_ext -from calibre.devices.usbms.driver import USBMS +from calibre.devices.usbms.driver import USBMS, debug_print from calibre import prints +from calibre.devices.usbms.books import CollectionsBookList class KOBO(USBMS): @@ -21,12 +22,15 @@ class KOBO(USBMS): gui_name = 'Kobo Reader' description = _('Communicate with the Kobo Reader') author = 'Timothy Legge and Kovid Goyal' - version = (1, 0, 4) + version = (1, 0, 6) supported_platforms = ['windows', 'osx', 'linux'] + booklist_class = CollectionsBookList + # Ordered list of supported formats FORMATS = ['epub', 'pdf'] + CAN_SET_METADATA = True VENDOR_ID = [0x2237] PRODUCT_ID = [0x4161] @@ -40,6 +44,12 @@ class KOBO(USBMS): VIRTUAL_BOOK_EXTENSIONS = frozenset(['kobo']) + EXTRA_CUSTOMIZATION_MESSAGE = _('The Kobo supports only one collection ' + 'currently: the \"Im_Reading\" list. Create a tag called \"Im_Reading\" ')+\ + 'for automatic management' + + EXTRA_CUSTOMIZATION_DEFAULT = ', '.join(['tags']) + def initialize(self): USBMS.initialize(self) self.book_class = Book @@ -63,6 +73,8 @@ class KOBO(USBMS): self._card_b_prefix if oncard == 'cardb' \ else self._main_prefix + self.booklist_class.rebuild_collections = self.rebuild_collections + # get the metadata cache bl = self.booklist_class(oncard, prefix, self.settings) need_sync = self.parse_metadata_cache(bl, prefix, self.METADATA_CACHE) @@ -85,9 +97,7 @@ class KOBO(USBMS): playlist_map = {} if readstatus == 1: - if lpath not in playlist_map: - playlist_map[lpath] = [] - playlist_map[lpath].append("I\'m Reading") + playlist_map[lpath]= "Im_Reading" path = self.normalize_path(path) # print "Normalized FileName: " + path @@ -104,14 +114,17 @@ class KOBO(USBMS): if self.update_metadata_item(bl[idx]): # print 'update_metadata_item returned true' changed = True - bl[idx].device_collections = playlist_map.get(lpath, []) + if lpath in playlist_map and \ + playlist_map[lpath] not in bl[idx].device_collections: + bl[idx].device_collections.append(playlist_map[lpath]) else: if ContentType == '6': book = Book(prefix, lpath, title, authors, mime, date, ContentType, ImageID, size=1048576) else: book = self.book_from_path(prefix, lpath, title, authors, mime, date, ContentType, ImageID) # print 'Update booklist' - book.device_collections = playlist_map.get(book.lpath, []) + book.device_collections = [playlist_map[lpath]] if lpath in playlist_map else [] + if bl.add_book(book, replace_metadata=False): changed = True except: # Probably a path encoding error @@ -398,3 +411,95 @@ class KOBO(USBMS): size = os.stat(cls.normalize_path(os.path.join(prefix, lpath))).st_size book = Book(prefix, lpath, title, authors, mime, date, ContentType, ImageID, size=size, other=mi) return book + + def get_device_paths(self): + paths, prefixes = {}, {} + for prefix, path, source_id in [ + ('main', 'metadata.calibre', 0), + ('card_a', 'metadata.calibre', 1), + ('card_b', 'metadata.calibre', 2) + ]: + prefix = getattr(self, '_%s_prefix'%prefix) + if prefix is not None and os.path.exists(prefix): + paths[source_id] = os.path.join(prefix, *(path.split('/'))) + return paths + + def update_device_database_collections(self, booklists, collections_attributes): +# debug_print('Starting update_device_database_collections', collections_attributes) + + # Force collections_attributes to be 'tags' as no other is currently supported +# debug_print('KOBO: overriding the provided collections_attributes:', collections_attributes) + collections_attributes = ['tags'] + + collections = booklists.get_collections(collections_attributes) +# debug_print('Collections', collections) + for category, books in collections.items(): + if category == 'Im_Reading': + # Create a connection to the sqlite database + connection = sqlite.connect(self._main_prefix + '.kobo/KoboReader.sqlite') + cursor = connection.cursor() + + # Reset Im_Reading list in the database + query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null' + try: + cursor.execute (query) + except: + debug_print('Database Exception: Unable to reset Im_Reading list') + raise + else: +# debug_print('Commit: Reset Im_Reading list') + connection.commit() + + for book in books: +# debug_print('Title:', book.title, 'lpath:', book.path) + book.device_collections = ['Im_Reading'] + + extension = os.path.splitext(book.path)[1] + ContentType = self.get_content_type_from_extension(extension) + + ContentID = self.contentid_from_path(book.path, ContentType) + datelastread = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime()) + + t = (datelastread,ContentID,) + + try: + cursor.execute('update content set ReadStatus=1,FirstTimeReading=\'false\',DateLastRead=? where BookID is Null and ContentID = ?', t) + except: + debug_print('Database Exception: Unable create Im_Reading list') + raise + else: + connection.commit() + # debug_print('Database: Commit create Im_Reading list') + + cursor.close() + connection.close() + +# debug_print('Finished update_device_database_collections', collections_attributes) + + def sync_booklists(self, booklists, end_session=True): +# debug_print('KOBO: started sync_booklists') + paths = self.get_device_paths() + + blists = {} + for i in paths: + if booklists[i] is not None: + #debug_print('Booklist: ', i) + blists[i] = booklists[i] + opts = self.settings() + if opts.extra_customization: + collections = [x.lower().strip() for x in + opts.extra_customization.split(',')] + else: + collections = [] + + #debug_print('KOBO: collection fields:', collections) + for i, blist in blists.items(): + self.update_device_database_collections(blist, collections) + + USBMS.sync_booklists(self, booklists, end_session=end_session) + #debug_print('KOBO: finished sync_booklists') + + def rebuild_collections(self, booklist, oncard): + collections_attributes = [] + self.update_device_database_collections(booklist, collections_attributes) + diff --git a/src/calibre/devices/prs505/driver.py b/src/calibre/devices/prs505/driver.py index c55936be2d..f90a8ab263 100644 --- a/src/calibre/devices/prs505/driver.py +++ b/src/calibre/devices/prs505/driver.py @@ -35,16 +35,16 @@ class PRS505(USBMS): VENDOR_NAME = 'SONY' WINDOWS_MAIN_MEM = re.compile( - r'(PRS-(505|300|500))|' - r'(PRS-((700[#/])|((6|9)00&)))' + r'(PRS-(505|500|300))|' + r'(PRS-((700[#/])|((6|9|3)(0|5)0&)))' ) WINDOWS_CARD_A_MEM = re.compile( r'(PRS-(505|500)[#/]\S+:MS)|' - r'(PRS-((700[/#]\S+:)|((6|9)00[#_]))MS)' + r'(PRS-((700[/#]\S+:)|((6|9)(0|5)0[#_]))MS)' ) WINDOWS_CARD_B_MEM = re.compile( r'(PRS-(505|500)[#/]\S+:SD)|' - r'(PRS-((700[/#]\S+:)|((6|9)00[#_]))SD)' + r'(PRS-((700[/#]\S+:)|((6|9)(0|5)0[#_]))SD)' ) diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index c8803fb922..3ea2926461 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -366,7 +366,7 @@ OptionRecommendation(name='html_unwrap_factor', recommended_value=0.40, level=OptionRecommendation.LOW, help=_('Scale used to determine the length at which a line should ' 'be unwrapped if preprocess is enabled. Valid values are a decimal between 0 and 1. The ' - 'default is 0.40, just below the median line length. This will unwrap typical books ' + 'default is 0.40, just below the median line length. This will unwrap typical books ' ' with hard line breaks, but should be reduced if the line length is variable.' ) ), diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index f9d16b428c..3fe6ce0ed4 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -92,7 +92,7 @@ class PreProcessor(object): # If more than 40% of the lines are empty paragraphs then delete them to clean up spacing linereg = re.compile('(?<=)', re.IGNORECASE|re.DOTALL) blankreg = re.compile(r'\s*]*>\s*(<(b|i|u)>)?\s*()?\s*

', re.IGNORECASE) - multi_blank = re.compile(r'(\s*]*>\s*(<(b|i|u)>)?\s*()?\s*

){2,}', re.IGNORECASE) + #multi_blank = re.compile(r'(\s*]*>\s*(<(b|i|u)>)?\s*()?\s*

){2,}', re.IGNORECASE) blanklines = blankreg.findall(html) lines = linereg.findall(html) if len(lines) > 1: @@ -148,12 +148,9 @@ class PreProcessor(object): else: format = 'html' - # Calculate Length - #if getattr(self.extra_opts, 'html_unwrap_factor', 0.0) > 0.01: - length = line_length('pdf', html, getattr(self.extra_opts, 'html_unwrap_factor')) - #else: - # length = line_length(format, html, 0.4) - # self.log("#@#%!$@#$ - didn't find unwrap_factor") + # Calculate Length + length = line_length('pdf', html, getattr(self.extra_opts, + 'html_unwrap_factor', 0.4)) self.log("*** Median line length is " + str(length) + ",calculated with " + format + " format ***") # # Unwrap and/or delete soft-hyphens, hyphens diff --git a/src/calibre/ebooks/fb2/output.py b/src/calibre/ebooks/fb2/output.py index d0125afe89..d6c7a25a90 100644 --- a/src/calibre/ebooks/fb2/output.py +++ b/src/calibre/ebooks/fb2/output.py @@ -28,6 +28,9 @@ class FB2Output(OutputFormatPlugin): ]) def convert(self, oeb_book, output_path, input_plugin, opts, log): + from calibre.ebooks.oeb.transforms.jacket import linearize_jacket + linearize_jacket(oeb_book) + fb2mlizer = FB2MLizer(log) fb2_content = fb2mlizer.extract_content(oeb_book, opts) diff --git a/src/calibre/ebooks/lrf/input.py b/src/calibre/ebooks/lrf/input.py index b5591176d1..c54f3b071f 100644 --- a/src/calibre/ebooks/lrf/input.py +++ b/src/calibre/ebooks/lrf/input.py @@ -420,7 +420,7 @@ class LRFInput(InputFormatPlugin): styles.write() return os.path.abspath('content.opf') - def preprocess_html(self, html): + def preprocess_html(self, html): preprocessor = PreProcessor(log=getattr(self, 'log', None)) return preprocessor(html) diff --git a/src/calibre/ebooks/oeb/transforms/cover.py b/src/calibre/ebooks/oeb/transforms/cover.py index 59b42df68a..532c9bbc03 100644 --- a/src/calibre/ebooks/oeb/transforms/cover.py +++ b/src/calibre/ebooks/oeb/transforms/cover.py @@ -99,7 +99,8 @@ class CoverManager(object): series_string = None if m.series and m.series_index: series_string = _('Book %s of %s')%( - fmt_sidx(m.series_index[0], use_roman=True), m.series[0]) + fmt_sidx(m.series_index[0], use_roman=True), + unicode(m.series[0])) try: from calibre.ebooks import calibre_cover diff --git a/src/calibre/ebooks/oeb/transforms/flatcss.py b/src/calibre/ebooks/oeb/transforms/flatcss.py index ffdc641d1e..7212bd33c6 100644 --- a/src/calibre/ebooks/oeb/transforms/flatcss.py +++ b/src/calibre/ebooks/oeb/transforms/flatcss.py @@ -147,7 +147,6 @@ class CSSFlattener(object): extra_css=css) self.stylizers[item] = stylizer - def baseline_node(self, node, stylizer, sizes, csize): csize = stylizer.style(node)['font-size'] if node.text: @@ -195,7 +194,7 @@ class CSSFlattener(object): value = 0.0 cssdict[property] = "%0.5fem" % (value / fsize) - def flatten_node(self, node, stylizer, names, styles, psize, left=0): + def flatten_node(self, node, stylizer, names, styles, psize, item_id, left=0): if not isinstance(node.tag, basestring) \ or namespace(node.tag) != XHTML_NS: return @@ -287,15 +286,18 @@ class CSSFlattener(object): if self.lineh and 'line-height' not in cssdict: lineh = self.lineh / psize cssdict['line-height'] = "%0.5fem" % lineh + if (self.context.remove_paragraph_spacing or self.context.insert_blank_line) and tag in ('p', 'div'): - for prop in ('margin', 'padding', 'border'): - for edge in ('top', 'bottom'): - cssdict['%s-%s'%(prop, edge)] = '0pt' + if item_id != 'calibre_jacket' or self.context.output_profile.name == 'Kindle': + for prop in ('margin', 'padding', 'border'): + for edge in ('top', 'bottom'): + cssdict['%s-%s'%(prop, edge)] = '0pt' if self.context.insert_blank_line: cssdict['margin-top'] = cssdict['margin-bottom'] = '0.5em' if self.context.remove_paragraph_spacing: cssdict['text-indent'] = "%1.1fem" % self.context.remove_paragraph_spacing_indent_size + if cssdict: items = cssdict.items() items.sort() @@ -314,7 +316,7 @@ class CSSFlattener(object): if 'style' in node.attrib: del node.attrib['style'] for child in node: - self.flatten_node(child, stylizer, names, styles, psize, left) + self.flatten_node(child, stylizer, names, styles, psize, item_id, left) def flatten_head(self, item, stylizer, href): html = item.data @@ -361,7 +363,7 @@ class CSSFlattener(object): stylizer = self.stylizers[item] body = html.find(XHTML('body')) fsize = self.context.dest.fbase - self.flatten_node(body, stylizer, names, styles, fsize) + self.flatten_node(body, stylizer, names, styles, fsize, item.id) items = [(key, val) for (val, key) in styles.items()] items.sort() css = ''.join(".%s {\n%s;\n}\n\n" % (key, val) for key, val in items) diff --git a/src/calibre/ebooks/oeb/transforms/jacket.py b/src/calibre/ebooks/oeb/transforms/jacket.py index fec4d230c3..88c7a4ff0e 100644 --- a/src/calibre/ebooks/oeb/transforms/jacket.py +++ b/src/calibre/ebooks/oeb/transforms/jacket.py @@ -6,139 +6,200 @@ __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import textwrap +import sys from xml.sax.saxutils import escape -from itertools import repeat from lxml import etree -from calibre.ebooks.oeb.base import XPath, XPNSMAP -from calibre import guess_type +from calibre import guess_type, strftime +from calibre.ebooks.BeautifulSoup import BeautifulSoup +from calibre.ebooks.oeb.base import XPath, XHTML_NS, XHTML from calibre.library.comments import comments_to_html + +JACKET_XPATH = '//h:meta[@name="calibre-content" and @content="jacket"]' + class Jacket(object): ''' Book jacket manipulation. Remove first image and insert comments at start of book. ''' - JACKET_TEMPLATE = textwrap.dedent(u'''\ - - - %(title)s - - - -
-
-

%(title)s

-

%(jacket)s

-
%(series)s
-
%(rating)s
-
%(tags)s
-
-
- %(comments)s -
-
- - - ''') + def remove_images(self, item, limit=1): + path = XPath('//h:img[@src]') + removed = 0 + for img in path(item.data): + if removed >= limit: + break + href = item.abshref(img.get('src')) + image = self.oeb.manifest.hrefs.get(href, None) + if image is not None: + self.oeb.manifest.remove(image) + img.getparent().remove(img) + removed += 1 + return removed def remove_first_image(self): - path = XPath('//h:img[@src]') - for i, item in enumerate(self.oeb.spine): - if i > 2: break - for img in path(item.data): - href = item.abshref(img.get('src')) - image = self.oeb.manifest.hrefs.get(href, None) - if image is not None: - self.log('Removing first image', img.get('src')) - self.oeb.manifest.remove(image) - img.getparent().remove(img) - return - - def get_rating(self, rating): - ans = '' - if rating is None: - return - try: - num = float(rating)/2 - except: - return ans - num = max(0, num) - num = min(num, 5) - if num < 1: - return ans - id, href = self.oeb.manifest.generate('star', 'star.png') - self.oeb.manifest.add(id, href, 'image/png', data=I('star.png', data=True)) - ans = 'Rating: ' + ''.join(repeat('star'%href, num)) - return ans + for item in self.oeb.spine: + removed = self.remove_images(item) + if removed > 0: + self.log('Removed first image') + break def insert_metadata(self, mi): self.log('Inserting metadata into book...') - comments = mi.comments - if not comments: - try: - comments = unicode(self.oeb.metadata.description[0]) - except: - comments = '' - if not comments.strip(): - comments = '' - orig_comments = comments - if comments: - comments = comments_to_html(comments) - series = 'Series: ' + escape(mi.series if mi.series else '') - if mi.series and mi.series_index is not None: - series += escape(' [%s]'%mi.format_series_index()) - if not mi.series: - series = '' - tags = mi.tags - if not tags: - try: - tags = map(unicode, self.oeb.metadata.subject) - except: - tags = [] - if tags: - tags = 'Tags: ' + self.opts.dest.tags_to_string(tags) - else: - tags = '' + try: - title = mi.title if mi.title else unicode(self.oeb.metadata.title[0]) + tags = map(unicode, self.oeb.metadata.subject) + except: + tags = [] + + try: + comments = unicode(self.oeb.metadata.description[0]) + except: + comments = '' + + try: + title = unicode(self.oeb.metadata.title[0]) except: title = _('Unknown') - def generate_html(comments): - return self.JACKET_TEMPLATE%dict(xmlns=XPNSMAP['h'], - title=escape(title), comments=comments, - jacket=escape(_('Book Jacket')), series=series, - tags=tags, rating=self.get_rating(mi.rating)) - id, href = self.oeb.manifest.generate('jacket', 'jacket.xhtml') - from calibre.ebooks.oeb.base import RECOVER_PARSER, XPath - try: - root = etree.fromstring(generate_html(comments), parser=RECOVER_PARSER) - except: - root = etree.fromstring(generate_html(escape(orig_comments)), - parser=RECOVER_PARSER) - jacket = XPath('//h:meta[@name="calibre-content" and @content="jacket"]') - found = None - for item in list(self.oeb.spine)[:4]: - try: - if jacket(item.data): - found = item - break - except: - continue - if found is None: - item = self.oeb.manifest.add(id, href, guess_type(href)[0], data=root) - self.oeb.spine.insert(0, item, True) - else: - self.log('Found existing book jacket, replacing...') - found.data = root + root = render_jacket(mi, self.opts.output_profile, + alt_title=title, alt_tags=tags, + alt_comments=comments) + id, href = self.oeb.manifest.generate('calibre_jacket', 'jacket.xhtml') + item = self.oeb.manifest.add(id, href, guess_type(href)[0], data=root) + self.oeb.spine.insert(0, item, True) + + def remove_existing_jacket(self): + for x in self.oeb.spine[:4]: + if XPath(JACKET_XPATH)(x.data): + self.remove_images(x, limit=sys.maxint) + self.oeb.manifest.remove(x) + self.log('Removed existing jacket') + break def __call__(self, oeb, opts, metadata): + ''' + Add metadata in jacket.xhtml if specified in opts + If not specified, remove previous jacket instance + ''' self.oeb, self.opts, self.log = oeb, opts, oeb.log + self.remove_existing_jacket() if opts.remove_first_image: self.remove_first_image() if opts.insert_metadata: self.insert_metadata(metadata) + +# Render Jacket {{{ + +def get_rating(rating, rchar): + ans = '' + try: + num = float(rating)/2 + except: + return ans + num = max(0, num) + num = min(num, 5) + if num < 1: + return ans + + ans = rchar * int(num) + return ans + + +def render_jacket(mi, output_profile, + alt_title=_('Unknown'), alt_tags=[], alt_comments=''): + css = P('jacket/stylesheet.css', data=True).decode('utf-8') + + try: + title_str = mi.title if mi.title else alt_title + except: + title_str = _('Unknown') + title = '%s' % (escape(title_str)) + + series = escape(mi.series if mi.series else '') + if mi.series and mi.series_index is not None: + series += escape(' [%s]'%mi.format_series_index()) + if not mi.series: + series = '' + + try: + pubdate = strftime(u'%Y', mi.pubdate.timetuple()) + except: + pubdate = '' + + rating = get_rating(mi.rating, output_profile.ratings_char) + + tags = mi.tags if mi.tags else alt_tags + if tags: + tags = output_profile.tags_to_string(tags) + else: + tags = '' + + comments = mi.comments if mi.comments else alt_comments + comments = comments.strip() + orig_comments = comments + if comments: + comments = comments_to_html(comments) + + def generate_html(comments): + args = dict(xmlns=XHTML_NS, + title_str=title_str, + css=css, + title=title, + pubdate_label=_('Published'), pubdate=pubdate, + series_label=_('Series'), series=series, + rating_label=_('Rating'), rating=rating, + tags_label=_('Tags'), tags=tags, + comments=comments, + footer='' + ) + + generated_html = P('jacket/template.xhtml', + data=True).decode('utf-8').format(**args) + + # Post-process the generated html to strip out empty header items + soup = BeautifulSoup(generated_html) + if not series: + series_tag = soup.find('tr', attrs={'class':'cbj_series'}) + series_tag.extract() + if not rating: + rating_tag = soup.find('tr', attrs={'class':'cbj_rating'}) + rating_tag.extract() + if not tags: + tags_tag = soup.find('tr', attrs={'class':'cbj_tags'}) + tags_tag.extract() + if not pubdate: + pubdate_tag = soup.find('tr', attrs={'class':'cbj_pubdate'}) + pubdate_tag.extract() + if output_profile.short_name != 'kindle': + hr_tag = soup.find('hr', attrs={'class':'cbj_kindle_banner_hr'}) + hr_tag.extract() + + return soup.renderContents(None) + + from calibre.ebooks.oeb.base import RECOVER_PARSER + + try: + root = etree.fromstring(generate_html(comments), parser=RECOVER_PARSER) + except: + try: + root = etree.fromstring(generate_html(escape(orig_comments)), + parser=RECOVER_PARSER) + except: + root = etree.fromstring(generate_html(''), + parser=RECOVER_PARSER) + return root + +# }}} + +def linearize_jacket(oeb): + for x in oeb.spine[:4]: + if XPath(JACKET_XPATH)(x.data): + for e in XPath('//h:table|//h:tr|//h:th')(x.data): + e.tag = XHTML('div') + for e in XPath('//h:td')(x.data): + e.tag = XHTML('span') + break + diff --git a/src/calibre/ebooks/oeb/transforms/rescale.py b/src/calibre/ebooks/oeb/transforms/rescale.py index 55aafded5c..c3b4d6d40c 100644 --- a/src/calibre/ebooks/oeb/transforms/rescale.py +++ b/src/calibre/ebooks/oeb/transforms/rescale.py @@ -72,10 +72,13 @@ class RescaleImages(object): Qt.IgnoreAspectRatio, Qt.SmoothTransformation) data = pixmap_to_data(img, format=ext) else: - im = im.resize((int(new_width), int(new_height)), PILImage.ANTIALIAS) - of = cStringIO.StringIO() - im.convert('RGB').save(of, ext) - data = of.getvalue() + try: + im = im.resize((int(new_width), int(new_height)), PILImage.ANTIALIAS) + of = cStringIO.StringIO() + im.convert('RGB').save(of, ext) + data = of.getvalue() + except: + self.log.exception('Failed to rescale image') if data is not None: item.data = data item.unload_data_from_memory() diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py index 1b61404589..e58dce5559 100644 --- a/src/calibre/gui2/__init__.py +++ b/src/calibre/gui2/__init__.py @@ -50,6 +50,7 @@ gprefs.defaults['action-layout-context-menu-device'] = ( gprefs.defaults['show_splash_screen'] = True gprefs.defaults['toolbar_icon_size'] = 'medium' gprefs.defaults['toolbar_text'] = 'auto' +gprefs.defaults['show_child_bar'] = False # }}} diff --git a/src/calibre/gui2/actions/__init__.py b/src/calibre/gui2/actions/__init__.py index 57ad900fba..b2d1656367 100644 --- a/src/calibre/gui2/actions/__init__.py +++ b/src/calibre/gui2/actions/__init__.py @@ -71,6 +71,12 @@ class InterfaceAction(QObject): all_locations = frozenset(['toolbar', 'toolbar-device', 'context-menu', 'context-menu-device']) + #: Type of action + #: 'current' means acts on the current view + #: 'global' means an action that does not act on the current view, but rather + #: on calibre as a whole + action_type = 'global' + def __init__(self, parent, site_customization): QObject.__init__(self, parent) self.setObjectName(self.name) diff --git a/src/calibre/gui2/actions/add.py b/src/calibre/gui2/actions/add.py index f0ff794fab..add7bf1d5b 100644 --- a/src/calibre/gui2/actions/add.py +++ b/src/calibre/gui2/actions/add.py @@ -25,6 +25,7 @@ class AddAction(InterfaceAction): action_spec = (_('Add books'), 'add_book.png', _('Add books to the calibre library/device from files on your computer') , _('A')) + action_type = 'current' def genesis(self): self._add_filesystem_book = self.Dispatcher(self.__add_filesystem_book) diff --git a/src/calibre/gui2/actions/add_to_library.py b/src/calibre/gui2/actions/add_to_library.py index 6fc0d5fb1f..05aea8f1dd 100644 --- a/src/calibre/gui2/actions/add_to_library.py +++ b/src/calibre/gui2/actions/add_to_library.py @@ -13,6 +13,7 @@ class AddToLibraryAction(InterfaceAction): action_spec = (_('Add books to library'), 'add_book.png', _('Add books to your calibre library from the connected device'), None) dont_add_to = frozenset(['toolbar', 'context-menu']) + action_type = 'current' def genesis(self): self.qaction.triggered.connect(self.add_books_to_library) diff --git a/src/calibre/gui2/actions/annotate.py b/src/calibre/gui2/actions/annotate.py index 5356d63e98..dfafcd1a39 100644 --- a/src/calibre/gui2/actions/annotate.py +++ b/src/calibre/gui2/actions/annotate.py @@ -18,6 +18,7 @@ class FetchAnnotationsAction(InterfaceAction): name = 'Fetch Annotations' action_spec = (_('Fetch annotations (experimental)'), None, None, None) + action_type = 'current' def genesis(self): pass diff --git a/src/calibre/gui2/actions/convert.py b/src/calibre/gui2/actions/convert.py index ee0f06ab71..29acfc52b1 100644 --- a/src/calibre/gui2/actions/convert.py +++ b/src/calibre/gui2/actions/convert.py @@ -21,6 +21,7 @@ class ConvertAction(InterfaceAction): name = 'Convert Books' action_spec = (_('Convert books'), 'convert.png', None, _('C')) dont_add_to = frozenset(['toolbar-device', 'context-menu-device']) + action_type = 'current' def genesis(self): cm = QMenu() diff --git a/src/calibre/gui2/actions/copy_to_library.py b/src/calibre/gui2/actions/copy_to_library.py index 7127c91e8c..6b7654f644 100644 --- a/src/calibre/gui2/actions/copy_to_library.py +++ b/src/calibre/gui2/actions/copy_to_library.py @@ -80,6 +80,7 @@ class CopyToLibraryAction(InterfaceAction): _('Copy selected books to the specified library'), None) popup_type = QToolButton.InstantPopup dont_add_to = frozenset(['toolbar-device', 'context-menu-device']) + action_type = 'current' def genesis(self): self.menu = QMenu(self.gui) diff --git a/src/calibre/gui2/actions/delete.py b/src/calibre/gui2/actions/delete.py index 0343c6df84..406860e4ec 100644 --- a/src/calibre/gui2/actions/delete.py +++ b/src/calibre/gui2/actions/delete.py @@ -16,6 +16,7 @@ class DeleteAction(InterfaceAction): name = 'Remove Books' action_spec = (_('Remove books'), 'trash.png', None, _('Del')) + action_type = 'current' def genesis(self): self.qaction.triggered.connect(self.delete_books) diff --git a/src/calibre/gui2/actions/edit_collections.py b/src/calibre/gui2/actions/edit_collections.py index e45d36fc62..7f5dd76538 100644 --- a/src/calibre/gui2/actions/edit_collections.py +++ b/src/calibre/gui2/actions/edit_collections.py @@ -13,6 +13,7 @@ class EditCollectionsAction(InterfaceAction): action_spec = (_('Manage collections'), None, _('Manage the collections on this device'), None) dont_add_to = frozenset(['toolbar', 'context-menu']) + action_type = 'current' def genesis(self): self.qaction.triggered.connect(self.edit_collections) diff --git a/src/calibre/gui2/actions/edit_metadata.py b/src/calibre/gui2/actions/edit_metadata.py index 878ba77a43..ac04652efa 100644 --- a/src/calibre/gui2/actions/edit_metadata.py +++ b/src/calibre/gui2/actions/edit_metadata.py @@ -22,6 +22,7 @@ class EditMetadataAction(InterfaceAction): name = 'Edit Metadata' action_spec = (_('Edit metadata'), 'edit_input.png', None, _('E')) + action_type = 'current' def genesis(self): self.create_action(spec=(_('Merge book records'), 'merge_books.png', diff --git a/src/calibre/gui2/actions/open.py b/src/calibre/gui2/actions/open.py index 106bfa24f6..141ff01a66 100644 --- a/src/calibre/gui2/actions/open.py +++ b/src/calibre/gui2/actions/open.py @@ -14,6 +14,7 @@ class OpenFolderAction(InterfaceAction): action_spec = (_('Open containing folder'), 'document_open.png', None, _('O')) dont_add_to = frozenset(['toolbar-device', 'context-menu-device']) + action_type = 'current' def genesis(self): self.qaction.triggered.connect(self.gui.iactions['View'].view_folder) diff --git a/src/calibre/gui2/actions/save_to_disk.py b/src/calibre/gui2/actions/save_to_disk.py index bfcc02e130..e9664b9980 100644 --- a/src/calibre/gui2/actions/save_to_disk.py +++ b/src/calibre/gui2/actions/save_to_disk.py @@ -38,6 +38,7 @@ class SaveToDiskAction(InterfaceAction): name = "Save To Disk" action_spec = (_('Save to disk'), 'save.png', None, _('S')) + action_type = 'current' def genesis(self): self.qaction.triggered.connect(self.save_to_disk) diff --git a/src/calibre/gui2/actions/show_book_details.py b/src/calibre/gui2/actions/show_book_details.py index d17d0998f1..18b0a694bf 100644 --- a/src/calibre/gui2/actions/show_book_details.py +++ b/src/calibre/gui2/actions/show_book_details.py @@ -16,6 +16,7 @@ class ShowBookDetailsAction(InterfaceAction): action_spec = (_('Show book details'), 'dialog_information.png', None, _('I')) dont_add_to = frozenset(['toolbar-device', 'context-menu-device']) + action_type = 'current' def genesis(self): self.qaction.triggered.connect(self.show_book_info) diff --git a/src/calibre/gui2/actions/similar_books.py b/src/calibre/gui2/actions/similar_books.py index 1a14869a9c..644cd3160a 100644 --- a/src/calibre/gui2/actions/similar_books.py +++ b/src/calibre/gui2/actions/similar_books.py @@ -16,6 +16,7 @@ class SimilarBooksAction(InterfaceAction): name = 'Similar Books' action_spec = (_('Similar books...'), None, None, None) popup_type = QToolButton.InstantPopup + action_type = 'current' def genesis(self): m = QMenu(self.gui) diff --git a/src/calibre/gui2/actions/view.py b/src/calibre/gui2/actions/view.py index 2f6be24e5b..0fbf86c567 100644 --- a/src/calibre/gui2/actions/view.py +++ b/src/calibre/gui2/actions/view.py @@ -22,6 +22,7 @@ class ViewAction(InterfaceAction): name = 'View' action_spec = (_('View'), 'view.png', None, _('V')) + action_type = 'current' def genesis(self): self.persistent_files = [] diff --git a/src/calibre/gui2/convert/structure_detection.py b/src/calibre/gui2/convert/structure_detection.py index 68f820bda4..3f350d4508 100644 --- a/src/calibre/gui2/convert/structure_detection.py +++ b/src/calibre/gui2/convert/structure_detection.py @@ -28,6 +28,8 @@ class StructureDetectionWidget(Widget, Ui_Form): 'preprocess_html', 'remove_header', 'header_regex', 'remove_footer', 'footer_regex','html_unwrap_factor'] ) + self.opt_html_unwrap_factor.setEnabled(False) + self.huf_label.setEnabled(False) self.db, self.book_id = db, book_id for x in ('pagebreak', 'rule', 'both', 'none'): self.opt_chapter_mark.addItem(x) @@ -66,6 +68,6 @@ class StructureDetectionWidget(Widget, Ui_Form): return True def set_value_handler(self, g, val): - if val is None and isinstance(g, QDoubleSpinBox): + if val is None and g is self.opt_html_unwrap_factor: g.setValue(0.0) - return True \ No newline at end of file + return True diff --git a/src/calibre/gui2/convert/structure_detection.ui b/src/calibre/gui2/convert/structure_detection.ui index 54534af950..21fe365e99 100644 --- a/src/calibre/gui2/convert/structure_detection.ui +++ b/src/calibre/gui2/convert/structure_detection.ui @@ -14,10 +14,10 @@ Form - + - + Chapter &mark: @@ -27,28 +27,31 @@ - + 20 - + Remove first &image - + Insert &metadata as page at start of book - + + + + Qt::Vertical @@ -61,52 +64,41 @@ - + Remove F&ooter - + Remove H&eader - + - - - opt_page_breaks_before - + + - - - opt_footer_regex - - - - + + - &Preprocess input file to possibly improve structure detection + Line &un-wrap factor during preprocess: + + + opt_html_unwrap_factor - - - - Qt::RightToLeft - - - Line Un-Wrapping Factor - - - - + + + + 1.000000000000000 @@ -118,6 +110,26 @@ + + + + Qt::Horizontal + + + + 40 + 20 + + + + + + + + &Preprocess input file to possibly improve structure detection + + + @@ -135,5 +147,38 @@ - + + + opt_preprocess_html + toggled(bool) + opt_html_unwrap_factor + setEnabled(bool) + + + 328 + 87 + + + 481 + 113 + + + + + opt_preprocess_html + toggled(bool) + huf_label + setEnabled(bool) + + + 295 + 88 + + + 291 + 105 + + + + diff --git a/src/calibre/gui2/device.py b/src/calibre/gui2/device.py index 45c78ce6da..b20cd7594f 100644 --- a/src/calibre/gui2/device.py +++ b/src/calibre/gui2/device.py @@ -627,12 +627,11 @@ class DeviceMixin(object): # {{{ def connect_to_folder(self): dir = choose_dir(self, 'Select Device Folder', _('Select folder to open as device')) - kls = FOLDER_DEVICE - self.device_manager.mount_device(kls=kls, kind='folder', path=dir) + if dir is not None: + self.device_manager.mount_device(kls=FOLDER_DEVICE, kind='folder', path=dir) def connect_to_itunes(self): - kls = ITUNES_ASYNC - self.device_manager.mount_device(kls=kls, kind='itunes', path=None) + self.device_manager.mount_device(kls=ITUNES_ASYNC, kind='itunes', path=None) # disconnect from both folder and itunes devices def disconnect_mounted_device(self): @@ -746,6 +745,7 @@ class DeviceMixin(object): # {{{ if job.failed: self.device_job_exception(job) return + # set_books_in_library might schedule a sync_booklists job self.set_books_in_library(job.result, reset=True) mainlist, cardalist, cardblist = job.result self.memory_view.set_database(mainlist) @@ -790,11 +790,12 @@ class DeviceMixin(object): # {{{ self.device_manager.remove_books_from_metadata(paths, self.booklists()) model.paths_deleted(paths) - self.upload_booklists() # Force recomputation the library's ondevice info. We need to call # set_books_in_library even though books were not added because - # the deleted book might have been an exact match. - self.set_books_in_library(self.booklists(), reset=True) + # the deleted book might have been an exact match. Upload the booklists + # if set_books_in_library did not. + if not self.set_books_in_library(self.booklists(), reset=True): + self.upload_booklists() self.book_on_device(None, None, reset=True) # We need to reset the ondevice flags in the library. Use a big hammer, # so we don't need to worry about whether some succeeded or not. @@ -1231,7 +1232,7 @@ class DeviceMixin(object): # {{{ self.location_manager.update_devices(cp, fs, self.device_manager.device.icon) # reset the views so that up-to-date info is shown. These need to be - # here because the sony driver updates collections in sync_booklists + # here because some drivers update collections in sync_booklists self.memory_view.reset() self.card_a_view.reset() self.card_b_view.reset() @@ -1281,8 +1282,6 @@ class DeviceMixin(object): # {{{ self.device_manager.add_books_to_metadata(job.result, metadata, self.booklists()) - self.upload_booklists() - books_to_be_deleted = [] if memory and memory[1]: books_to_be_deleted = memory[1] @@ -1292,12 +1291,15 @@ class DeviceMixin(object): # {{{ # book already there with a different book. This happens frequently in # news. When this happens, the book match indication will be wrong # because the UUID changed. Force both the device and the library view - # to refresh the flags. - self.set_books_in_library(self.booklists(), reset=True) + # to refresh the flags. Set_books_in_library could upload the booklists. + # If it does not, then do it here. + if not self.set_books_in_library(self.booklists(), reset=True): + self.upload_booklists() self.book_on_device(None, reset=True) self.refresh_ondevice_info(device_connected = True) - view = self.card_a_view if on_card == 'carda' else self.card_b_view if on_card == 'cardb' else self.memory_view + view = self.card_a_view if on_card == 'carda' else \ + self.card_b_view if on_card == 'cardb' else self.memory_view view.model().resort(reset=False) view.model().research() for f in files: @@ -1372,7 +1374,7 @@ class DeviceMixin(object): # {{{ try: db = self.library_view.model().db except: - return + return False # Build a cache (map) of the library, so the search isn't On**2 self.db_book_title_cache = {} self.db_book_uuid_cache = {} @@ -1467,10 +1469,13 @@ class DeviceMixin(object): # {{{ # Set author_sort if it isn't already asort = getattr(book, 'author_sort', None) if not asort and book.authors: - book.author_sort = self.library_view.model().db.author_sort_from_authors(book.authors) + book.author_sort = self.library_view.model().db.\ + author_sort_from_authors(book.authors) if update_metadata: if self.device_manager.is_device_connected: - self.device_manager.sync_booklists(None, booklists) + self.device_manager.sync_booklists( + Dispatcher(self.metadata_synced), booklists) + return update_metadata # }}} diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py index 3d79b01c14..d07eac7670 100644 --- a/src/calibre/gui2/dialogs/metadata_single.py +++ b/src/calibre/gui2/dialogs/metadata_single.py @@ -6,10 +6,7 @@ The dialog used to edit meta information for a book as well as add/remove formats ''' -import os -import re -import time -import traceback +import os, re, time, traceback, textwrap from PyQt4.Qt import SIGNAL, QObject, Qt, QTimer, QThread, QDate, \ QPixmap, QListWidgetItem, QDialog, pyqtSignal @@ -331,6 +328,14 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): ResizableDialog.__init__(self, window) self.bc_box.layout().setAlignment(self.cover, Qt.AlignCenter|Qt.AlignHCenter) self.cancel_all = False + base = unicode(self.author_sort.toolTip()) + self.ok_aus_tooltip = '

' + textwrap.fill(base+'

'+ + _(' The green color indicates that the current ' + 'author sort matches the current author')) + self.bad_aus_tooltip = '

'+textwrap.fill(base + '

'+ + _(' The red color indicates that the current ' + 'author sort does not match the current author')) + if cancel_all: self.__abort_button = self.button_box.addButton(self.button_box.Abort) self.__abort_button.setToolTip(_('Abort the editing of all remaining books')) @@ -375,6 +380,10 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): self.remove_unused_series) QObject.connect(self.auto_author_sort, SIGNAL('clicked()'), self.deduce_author_sort) + self.connect(self.author_sort, SIGNAL('textChanged(const QString&)'), + self.author_sort_box_changed) + self.connect(self.authors, SIGNAL('editTextChanged(const QString&)'), + self.authors_box_changed) self.connect(self.formats, SIGNAL('itemDoubleClicked(QListWidgetItem*)'), self.show_format) self.connect(self.formats, SIGNAL('delete_format()'), self.remove_format) @@ -467,6 +476,28 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): for c in range(2, len(ans[i].widgets), 2): w.setTabOrder(ans[i].widgets[c-1], ans[i].widgets[c+1]) + def authors_box_changed(self, txt): + aus = unicode(txt) + aus = re.sub(r'\s+et al\.$', '', aus) + aus = self.db.author_sort_from_authors(string_to_authors(aus)) + self.mark_author_sort(normal=(unicode(self.author_sort.text()) == aus)) + + def author_sort_box_changed(self, txt): + au = unicode(self.authors.text()) + au = re.sub(r'\s+et al\.$', '', au) + au = self.db.author_sort_from_authors(string_to_authors(au)) + self.mark_author_sort(normal=(au == txt)) + + def mark_author_sort(self, normal=True): + if normal: + col = 'rgb(0, 255, 0, 20%)' + else: + col = 'rgb(255, 0, 0, 20%)' + self.author_sort.setStyleSheet('QLineEdit { color: black; ' + 'background-color: %s; }'%col) + tt = self.ok_aus_tooltip if normal else self.bad_aus_tooltip + self.author_sort.setToolTip(tt) + def validate_isbn(self, isbn): isbn = unicode(isbn).strip() if not isbn: diff --git a/src/calibre/gui2/dialogs/metadata_single.ui b/src/calibre/gui2/dialogs/metadata_single.ui index 7184192eba..74febf9c29 100644 --- a/src/calibre/gui2/dialogs/metadata_single.ui +++ b/src/calibre/gui2/dialogs/metadata_single.ui @@ -151,14 +151,16 @@ - Specify how the author(s) of this book should be sorted. For example Charles Dickens should be sorted as Dickens, Charles. + Specify how the author(s) of this book should be sorted. For example Charles Dickens should be sorted as Dickens, Charles. +If the box is colored green, then text matches the individual author's sort strings. If it is colored red, then the authors and this text do not match. - Automatically create the author sort entry based on the current author entry + Automatically create the author sort entry based on the current author entry. +Using this button to create author sort will change author sort from red to green. ... diff --git a/src/calibre/gui2/layout.py b/src/calibre/gui2/layout.py index 58d5267c8e..ec7e023dc1 100644 --- a/src/calibre/gui2/layout.py +++ b/src/calibre/gui2/layout.py @@ -61,7 +61,7 @@ class LocationManager(QObject): # {{{ ac('library', _('Library'), 'lt.png', _('Show books in calibre library')) - ac('main', _('Reader'), 'reader.png', + ac('main', _('Device'), 'reader.png', _('Show books in the main memory of the device')) ac('carda', _('Card A'), 'sd.png', _('Show books in storage card A')) @@ -197,11 +197,21 @@ class SearchBar(QWidget): # {{{ # }}} +class Spacer(QWidget): + + def __init__(self, parent): + QWidget.__init__(self, parent) + self.l = QHBoxLayout() + self.setLayout(self.l) + self.l.addStretch(10) + + class ToolBar(QToolBar): # {{{ - def __init__(self, donate, location_manager, parent): + def __init__(self, donate, location_manager, child_bar, parent): QToolBar.__init__(self, parent) self.gui = parent + self.child_bar = child_bar self.setContextMenuPolicy(Qt.PreventContextMenu) self.setMovable(False) self.setFloatable(False) @@ -223,16 +233,19 @@ class ToolBar(QToolBar): # {{{ sz = gprefs['toolbar_icon_size'] sz = {'small':24, 'medium':48, 'large':64}[sz] self.setIconSize(QSize(sz, sz)) + self.child_bar.setIconSize(QSize(sz, sz)) style = Qt.ToolButtonTextUnderIcon if gprefs['toolbar_text'] == 'never': style = Qt.ToolButtonIconOnly self.setToolButtonStyle(style) + self.child_bar.setToolButtonStyle(style) self.donate_button.set_normal_icon_size(sz, sz) def contextMenuEvent(self, *args): pass def build_bar(self): + self.child_bar.setVisible(gprefs['show_child_bar']) self.showing_donate = False showing_device = self.location_manager.has_device actions = '-device' if showing_device else '' @@ -244,10 +257,16 @@ class ToolBar(QToolBar): # {{{ m.setVisible(False) self.clear() + self.child_bar.clear() self.added_actions = [] + self.spacers = [Spacer(self.child_bar), Spacer(self.child_bar), + Spacer(self), Spacer(self)] + self.child_bar.addWidget(self.spacers[0]) + if gprefs['show_child_bar']: + self.addWidget(self.spacers[2]) for what in actions: - if what is None: + if what is None and not gprefs['show_child_bar']: self.addSeparator() elif what == 'Location Manager': for ac in self.location_manager.available_actions: @@ -262,12 +281,21 @@ class ToolBar(QToolBar): # {{{ self.showing_donate = True elif what in self.gui.iactions: action = self.gui.iactions[what] - self.addAction(action.qaction) + bar = self + if action.action_type == 'current' and gprefs['show_child_bar']: + bar = self.child_bar + bar.addAction(action.qaction) self.added_actions.append(action.qaction) self.setup_tool_button(action.qaction, action.popup_type) + self.child_bar.addWidget(self.spacers[1]) + if gprefs['show_child_bar']: + self.addWidget(self.spacers[3]) + def setup_tool_button(self, ac, menu_mode=None): ch = self.widgetForAction(ac) + if ch is None: + ch = self.child_bar.widgetForAction(ac) ch.setCursor(Qt.PointingHandCursor) ch.setAutoRaise(True) if ac.menu() is not None and menu_mode is not None: @@ -280,7 +308,8 @@ class ToolBar(QToolBar): # {{{ if p == 'never': style = Qt.ToolButtonIconOnly - if p == 'auto' and self.preferred_width > self.width()+35: + if p == 'auto' and self.preferred_width > self.width()+35 and \ + not gprefs['show_child_bar']: style = Qt.ToolButtonIconOnly self.setToolButtonStyle(style) @@ -309,9 +338,11 @@ class MainWindowMixin(object): # {{{ self.iactions['Fetch News'].init_scheduler(db) self.search_bar = SearchBar(self) + self.child_bar = QToolBar(self) self.tool_bar = ToolBar(self.donate_button, - self.location_manager, self) + self.location_manager, self.child_bar, self) self.addToolBar(Qt.TopToolBarArea, self.tool_bar) + self.addToolBar(Qt.BottomToolBarArea, self.child_bar) l = self.centralwidget.layout() l.addWidget(self.search_bar) diff --git a/src/calibre/gui2/library/models.py b/src/calibre/gui2/library/models.py index c746a5aa56..3370fd4b75 100644 --- a/src/calibre/gui2/library/models.py +++ b/src/calibre/gui2/library/models.py @@ -1027,7 +1027,9 @@ class DeviceBooksModel(BooksModel): # {{{ def resort(self, reset=True): if self.sorted_on: self.sort(self.column_map.index(self.sorted_on[0]), - self.sorted_on[1], reset=reset) + self.sorted_on[1], reset=False) + if reset: + self.reset() def columnCount(self, parent): if parent and parent.isValid(): diff --git a/src/calibre/gui2/preferences/look_feel.py b/src/calibre/gui2/preferences/look_feel.py index f30b2fddbb..10c2fcfe95 100644 --- a/src/calibre/gui2/preferences/look_feel.py +++ b/src/calibre/gui2/preferences/look_feel.py @@ -46,6 +46,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): r('use_roman_numerals_for_series_number', config) r('separate_cover_flow', config, restart_required=True) r('search_as_you_type', config) + r('show_child_bar', gprefs) choices = [(_('Small'), 'small'), (_('Medium'), 'medium'), (_('Large'), 'large')] diff --git a/src/calibre/gui2/preferences/look_feel.ui b/src/calibre/gui2/preferences/look_feel.ui index 7c6c736b24..1de55d51ef 100644 --- a/src/calibre/gui2/preferences/look_feel.ui +++ b/src/calibre/gui2/preferences/look_feel.ui @@ -173,6 +173,13 @@ + + + + &Split the toolbar into two toolbars + + + diff --git a/src/calibre/gui2/tag_view.py b/src/calibre/gui2/tag_view.py index 519d533ff6..6c50a71b92 100644 --- a/src/calibre/gui2/tag_view.py +++ b/src/calibre/gui2/tag_view.py @@ -376,7 +376,7 @@ class TagsModel(QAbstractItemModel): # {{{ 'series' : QIcon(I('series.png')), 'formats' : QIcon(I('book.png')), 'publisher' : QIcon(I('publisher.png')), - 'rating' : QIcon(I('star.png')), + 'rating' : QIcon(I('rating.png')), 'news' : QIcon(I('news.png')), 'tags' : QIcon(I('tags.png')), ':custom' : QIcon(I('column.png')), diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py index bd2160aff1..e14d092727 100644 --- a/src/calibre/library/catalog.py +++ b/src/calibre/library/catalog.py @@ -2523,6 +2523,10 @@ class EPUB_MOBI(CatalogPlugin): # Fetch the database as a dictionary self.booksBySeries = self.plugin.search_sort_db(self.db, self.opts) + if not self.booksBySeries: + self.opts.generate_series = False + self.opts.log(" no series found in selected books, cancelling series generation") + return friendly_name = "Series" @@ -2586,7 +2590,7 @@ class EPUB_MOBI(CatalogPlugin): aTag = Tag(soup, 'a') aTag['name'] = "%s_series" % re.sub('\W','',book['series']).lower() pSeriesTag.insert(0,aTag) - pSeriesTag.insert(1,NavigableString(self.NOT_READ_SYMBOL + '%s' % book['series'])) + pSeriesTag.insert(1,NavigableString('%s' % book['series'])) divTag.insert(dtc,pSeriesTag) dtc += 1 @@ -2595,7 +2599,14 @@ class EPUB_MOBI(CatalogPlugin): ptc = 0 # book with read/reading/unread symbol - if 'read' in book and book['read']: + for tag in book['tags']: + if tag == self.opts.read_tag: + book['read'] = True + break + else: + book['read'] = False + + if book['read']: # check mark pBookTag.insert(ptc,NavigableString(self.READ_SYMBOL)) pBookTag['class'] = "read_book" diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 8a5ab75c3c..f5f0f724ba 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -597,8 +597,12 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): return identical_book_ids def has_cover(self, index, index_is_id=False): - id = index if index_is_id else self.id(index) - path = os.path.join(self.library_path, self.path(id, index_is_id=True), 'cover.jpg') + id = index if index_is_id else self.id(index) + try: + path = os.path.join(self.abspath(id, index_is_id=True), 'cover.jpg') + except: + # Can happen if path has not yet been set + return False return os.access(path, os.R_OK) def remove_cover(self, id, notify=True): @@ -609,6 +613,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): except (IOError, OSError): time.sleep(0.2) os.remove(path) + self.data.set(id, self.FIELD_MAP['cover'], False, row_is_id=True) if notify: self.notify('cover', [id]) @@ -629,6 +634,7 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): except (IOError, OSError): time.sleep(0.2) save_cover_data_to(data, path) + self.data.set(id, self.FIELD_MAP['cover'], True, row_is_id=True) if notify: self.notify('cover', [id]) @@ -1087,8 +1093,11 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): self.set_path(id, True) self.notify('metadata', [id]) - # Given a book, return the list of author sort strings for the book's authors def authors_sort_strings(self, id, index_is_id=False): + ''' + Given a book, return the list of author sort strings + for the book's authors + ''' id = id if index_is_id else self.id(id) aut_strings = self.conn.get(''' SELECT sort @@ -1744,10 +1753,10 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): series_index = 1.0 if mi.series_index is None else mi.series_index aus = mi.author_sort if mi.author_sort else self.author_sort_from_authors(mi.authors) title = mi.title - if isinstance(aus, str): + if isbytestring(aus): aus = aus.decode(preferred_encoding, 'replace') - if isinstance(title, str): - title = title.decode(preferred_encoding) + if isbytestring(title): + title = title.decode(preferred_encoding, 'replace') obj = self.conn.execute('INSERT INTO books(title, series_index, author_sort) VALUES (?, ?, ?)', (title, series_index, aus)) id = obj.lastrowid diff --git a/src/calibre/manual/conversion.rst b/src/calibre/manual/conversion.rst index c8bc3ef665..cfc2871396 100644 --- a/src/calibre/manual/conversion.rst +++ b/src/calibre/manual/conversion.rst @@ -329,6 +329,17 @@ There are a few more options in this section. of as a separate cover. If you also specify a cover in |app|, then the converted book will have two covers. This option will simply remove the first image from the source document, thereby ensuring that the converted book has only one cover, the one specified in |app|. + +:guilabel:`Preprocess input` + This option activates various algorithms that try to detect and correct common cases of + badly formatted input documents. Things like hard line breaks, large blocks of text with no formatting, etc. + Turn this option on if your input document suffers from bad formatting. But be aware that in + some cases, this option can lead to worse results, so use with care. + +:guilabel:`Line-unwrap factor` + This option control the algorithm |app| uses to remove hard line breaks. For example, if the value of this + option is 0.4, that means calibre will remove hard line breaks from the end of lines whose lengths are less + than the length of 40% of all lines in the document. Table of Contents ------------------ diff --git a/src/calibre/utils/magick/__init__.py b/src/calibre/utils/magick/__init__.py index 073a030361..2707430c67 100644 --- a/src/calibre/utils/magick/__init__.py +++ b/src/calibre/utils/magick/__init__.py @@ -194,7 +194,7 @@ class Image(_magick.Image): # {{{ # }}} -def create_canvas(width, height, bgcolor='white'): +def create_canvas(width, height, bgcolor='#ffffff'): canvas = Image() canvas.create_canvas(int(width), int(height), str(bgcolor)) return canvas diff --git a/src/calibre/utils/magick/draw.py b/src/calibre/utils/magick/draw.py index 82a0237b8d..ed9e3d3d83 100644 --- a/src/calibre/utils/magick/draw.py +++ b/src/calibre/utils/magick/draw.py @@ -5,12 +5,14 @@ __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal ' __docformat__ = 'restructuredtext en' +import os from calibre.utils.magick import Image, DrawingWand, create_canvas from calibre.constants import __appname__, __version__ from calibre import fit_image -def save_cover_data_to(data, path, bgcolor='white', resize_to=None): +def save_cover_data_to(data, path, bgcolor='#ffffff', resize_to=None, + return_data=False): ''' Saves image in data to path, in the format specified by the path extension. Composes the image onto a blank canvas so as to @@ -22,9 +24,11 @@ def save_cover_data_to(data, path, bgcolor='white', resize_to=None): img.size = (resize_to[0], resize_to[1]) canvas = create_canvas(img.size[0], img.size[1], bgcolor) canvas.compose(img) + if return_data: + return canvas.export(os.path.splitext(path)[1][1:]) canvas.save(path) -def thumbnail(data, width=120, height=120, bgcolor='white', fmt='jpg'): +def thumbnail(data, width=120, height=120, bgcolor='#ffffff', fmt='jpg'): img = Image() img.load(data) owidth, oheight = img.size @@ -57,7 +61,7 @@ def identify(path): return identify_data(data) def add_borders_to_image(path_to_image, left=0, top=0, right=0, bottom=0, - border_color='white'): + border_color='#ffffff'): img = Image() img.open(path_to_image) lwidth, lheight = img.size @@ -76,7 +80,7 @@ def create_text_wand(font_size, font_path=None): ans.text_alias = True return ans -def create_text_arc(text, font_size, font=None, bgcolor='white'): +def create_text_arc(text, font_size, font=None, bgcolor='#ffffff'): if isinstance(text, unicode): text = text.encode('utf-8') @@ -144,7 +148,7 @@ class TextLine(object): def create_cover_page(top_lines, logo_path, width=590, height=750, - bgcolor='white', output_format='jpg'): + bgcolor='#ffffff', output_format='jpg'): ''' Create the standard calibre cover page and return it as a byte string in the specified output_format. diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 9ba9583c73..a140dfbf05 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -290,10 +290,12 @@ class BasicNewsRecipe(Recipe): #: the cover for the periodical. Overriding this in your recipe instructs #: calibre to render the downloaded cover into a frame whose width and height #: are expressed as a percentage of the downloaded cover. - #: cover_margins = (10,15,'white') pads the cover with a white margin + #: cover_margins = (10, 15, '#ffffff') pads the cover with a white margin #: 10px on the left and right, 15px on the top and bottom. - #: Colors name defined at http://www.imagemagick.org/script/color.php - cover_margins = (0,0,'white') + #: Color names defined at http://www.imagemagick.org/script/color.php + #: Note that for some reason, white does not always work on windows. Use + #: #ffffff instead + cover_margins = (0, 0, '#ffffff') #: Set to a non empty string to disable this recipe #: The string will be used as the disabled message