diff --git a/Changelog.yaml b/Changelog.yaml
index c2124aadd9..1ec61fa0b5 100644
--- a/Changelog.yaml
+++ b/Changelog.yaml
@@ -4,6 +4,107 @@
# for important features/bug fixes.
# Also, each release can have new and improved recipes.
+- version: 0.6.37
+ date: 2010-02-01
+
+ new features:
+ - title: "E-book viewer: Add support for viewing SVG images"
+ type: major
+
+ - title: "Add category of Recently added books when generating catalog in e-book format"
+
+ - title: "OS X: Allow adding of books to calibre via drag and drop on the calibre dock icon"
+
+ - title: "Add support for masthead images when downloading news for the Kindle"
+
+ - title: "MOBI metadata: Allow setting of metadata in old PRC files without EXTH headers as well"
+
+ bug fixes:
+ - title: Changing the date in Dutch
+ tickets: [4732]
+
+ - title: "Fix regression that broke sending files to unupdated PRS 500s"
+
+ - title: "MOBI Input: Ignore width and height percentage measures for
tags."
+ tickets: [4726]
+
+ - title: "EPUB Output: Remove
tags that point to the internet for their images as this causes the ever delicate ADE to crash."
+ tickets: [4692]
+
+ - title: "Comic Input: Handle UTF-8 BOM when converting a cbc file"
+ tickets: [4683]
+
+ - title: "Allow rating to be cleared via the Bulk metadata edit dialog"
+ tickets: [4693]
+
+ - title: "Add workaround for broken linux systems with multiply encoded file names"
+ tickets: [4721]
+
+ - title: Fix bug preventing the the use of indices when setting save to disk templates
+ tickets: [4710]
+
+ - title: "Linux device mounting. Use filetype of auto to allow non vfat filesystems to be mounted"
+ tickets: [4707]
+
+ - title: "Catalog generation: Make sorting of numbers in title as text optional"
+
+ - title: "Fix error while sending book with non-ascii character in title/author to device on linux"
+ tickets: [4690]
+
+ - title: "Fix reset cover in edit meta information dialog does not actually remove cover"
+ tickets: [4731]
+
+ new recipes:
+ - title: Kamera Bild
+ author: Darko Miletic
+
+ - title: The Online Photographer
+ author: Darko Miletic
+
+ - title: The Luminous Landscape
+ author: Darko Miletic
+
+ - title: Slovo
+ author: Abelturd
+
+ - title: Various Danish newspapers
+ author: Darko Miletic
+
+ - title: Heraldo de Aragon
+ author: Lorenzo Vigentini
+
+ - title: Orange County Register
+ author: Lorenzi Vigentini
+
+ - title: Open Left
+ author: Xanthan Gum
+
+ - title: Michelle Malkin
+ author: Walt Anthony
+
+ - title: The Metro Montreal
+ author: Jerry Clapperton
+
+ - title: The Gazette
+ author: Jerry Clapperton
+
+ - title: Macleans Magazine
+ author: Nick Redding
+
+ - title: NY Time Sunday Book Review
+ author: Krittika Goyal
+
+ - title: Various Italian newspapers
+ author: Lorenzo Vigentini
+
+
+ improved recipes:
+ - The Irish Times
+ - Washington Post
+ - NIN
+ - The Discover Magazine
+ - Pagina 12
+
- version: 0.6.36
date: 2010-01-25
diff --git a/resources/images/news/kamerabild.png b/resources/images/news/kamerabild.png
new file mode 100644
index 0000000000..401d5a33a3
Binary files /dev/null and b/resources/images/news/kamerabild.png differ
diff --git a/resources/images/news/theluminouslandscape.png b/resources/images/news/theluminouslandscape.png
new file mode 100644
index 0000000000..e6d452f5d2
Binary files /dev/null and b/resources/images/news/theluminouslandscape.png differ
diff --git a/resources/recipes/discover_magazine.recipe b/resources/recipes/discover_magazine.recipe
index 0d2ee3ee74..cd4a078231 100644
--- a/resources/recipes/discover_magazine.recipe
+++ b/resources/recipes/discover_magazine.recipe
@@ -4,7 +4,7 @@ __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
-doscovermagazine.com
+discovermagazine.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
@@ -12,42 +12,36 @@ from calibre.web.feeds.news import BasicNewsRecipe
class DiscoverMagazine(BasicNewsRecipe):
title = u'Discover Magazine'
- description = u'Science, Technology and the Future'
- __author__ = 'Mike Diaz'
+ description = u'Science, Technology and the Future'
+ __author__ = 'Mike Diaz'
language = 'en'
oldest_article = 33
max_articles_per_feed = 20
no_stylesheets = True
- remove_javascript = True
+ remove_javascript = True
use_embedded_content = False
encoding = 'utf-8'
-
extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
+
+ remove_tags = [dict(name='div', attrs={'id':['searchModule', 'mainMenu', 'tool-box']}),
+ dict(name='img', attrs={'src':'http://discovermagazine.com/onebyone.gif'})]
- remove_tags_before = dict(id='articlePage')
-
- keep_only_tags = [dict(name='div', attrs={'id':'articlePage'})]
-
- remove_tags = [dict(attrs={'id':['buttons', 'tool-box', 'teaser', 'already-subscriber', 'teaser-suite', 'related-articles', 'relatedItem', 'box-popular', 'box-blogs', 'box-news', 'footer']}),
- dict(attrs={'class':'popularNewsBox'}),
- dict(name=['img', 'style', 'head'])]
-
- remove_tags_after = dict(id='articlePage')
-
+ remove_tags_after = [dict(name='div', attrs={'class':'articlebody'})]
+
feeds = [
- (u'Technology', u'http://discovermagazine.com/topics/technology/rss.xml'),
- (u'Health - Medicine', u'http://discovermagazine.com/topics/health-medicine/rss.xml'),
- (u'Mind Brain', u'http://discovermagazine.com/topics/mind-brain/rss.xml'),
- (u'Space', u'http://discovermagazine.com/topics/space/rss.xml'),
- (u'Human Origins', u'http://discovermagazine.com/topics/human-origins/rss.xml'),
- (u'Living World', u'http://discovermagazine.com/topics/living-world/rss.xml'),
- (u'Environment', u'http://discovermagazine.com/topics/environment/rss.xml'),
- (u'Physics & Math', u'http://discovermagazine.com/topics/physics-math/rss.xml'),
- (u'Vital Signs', u'http://discovermagazine.com/columns/vital-signs/rss.xml'),
- (u"20 Things you didn't know about...", u'http://discovermagazine.com/columns/20-things-you-didnt-know/rss.xml'),
- (u'Fuzzy Math', u'http://discovermagazine.com/columns/fuzzy-math/rss.xml'),
- (u'The Brain', u'http://discovermagazine.com/columns/the-brain/rss.xml'),
- (u'Stupid Science Word of the Month', u'http://discovermagazine.com/columns/stupid-science-word-of-the-month/rss.xml'),
+ (u'Technology', u'http://discovermagazine.com/topics/technology/rss.xml'),
+ (u'Health - Medicine', u'http://discovermagazine.com/topics/health-medicine/rss.xml'),
+ (u'Mind Brain', u'http://discovermagazine.com/topics/mind-brain/rss.xml'),
+ (u'Space', u'http://discovermagazine.com/topics/space/rss.xml'),
+ (u'Human Origins', u'http://discovermagazine.com/topics/human-origins/rss.xml'),
+ (u'Living World', u'http://discovermagazine.com/topics/living-world/rss.xml'),
+ (u'Environment', u'http://discovermagazine.com/topics/environment/rss.xml'),
+ (u'Physics & Math', u'http://discovermagazine.com/topics/physics-math/rss.xml'),
+ (u'Vital Signs', u'http://discovermagazine.com/columns/vital-signs/rss.xml'),
+ (u"20 Things you didn't know about...", u'http://discovermagazine.com/columns/20-things-you-didnt-know/rss.xml'),
+ (u'Fuzzy Math', u'http://discovermagazine.com/columns/fuzzy-math/rss.xml'),
+ (u'The Brain', u'http://discovermagazine.com/columns/the-brain/rss.xml'),
+ (u'Stupid Science Word of the Month', u'http://discovermagazine.com/columns/stupid-science-word-of-the-month/rss.xml'),
(u'Science Not Fiction', u'http://blogs.discovermagazine.com/sciencenotfiction/wp-rss.php')
- ]
+ ]
\ No newline at end of file
diff --git a/resources/recipes/ilsole24ore.recipe b/resources/recipes/ilsole24ore.recipe
new file mode 100644
index 0000000000..8258bb563d
--- /dev/null
+++ b/resources/recipes/ilsole24ore.recipe
@@ -0,0 +1,67 @@
+#!/usr/bin/env python
+__license__ = 'GPL v3'
+__author__ = 'Lorenzo Vigentini & Edwin van Maastrigt'
+__copyright__ = '2009, Lorenzo Vigentini and Edwin van Maastrigt '
+__description__ = 'Financial news daily paper - v1.02 (30, January 2010)'
+
+'''
+http://www.ilsole24ore.com/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class ilsole(BasicNewsRecipe):
+ author = 'Lorenzo Vigentini & Edwin van Maastrigt'
+ description = 'Financial news daily paper'
+
+ cover_url = 'http://www.ilsole24ore.com/img2009/header/t_logosole.gif'
+ title = u'il Sole 24 Ore '
+ publisher = 'italiaNews'
+ category = 'News, finance, economy, politics'
+
+ language = 'it'
+ timefmt = '[%a, %d %b, %Y]'
+
+ oldest_article = 2
+ max_articles_per_feed = 50
+ use_embedded_content = False
+
+ remove_javascript = True
+ no_stylesheets = True
+
+ def get_article_url(self, article):
+ return article.get('id', article.get('guid', None))
+
+ def print_version(self, url):
+ link, sep, params = url.rpartition('?')
+ return link.replace('.shtml', '_PRN.shtml')
+
+ keep_only_tags = [
+ dict(name='div', attrs={'class':'txt'})
+ ]
+ remove_tags = [dict(name='br')]
+
+ feeds = [
+ (u'Prima pagina', u'http://www.ilsole24ore.com/rss/primapagina.xml'),
+ (u'Norme e tributi', u'http://www.ilsole24ore.com/rss/norme-tributi.xml'),
+ (u'Finanza e mercati', u'http://www.ilsole24ore.com/rss/finanza-mercati.xml'),
+ (u'Economia e lavoro', u'http://www.ilsole24ore.com/rss/economia-lavoro.xml'),
+ (u'Italia', u'http://www.ilsole24ore.com/rss/italia.xml'),
+ (u'Mondo', u'http://www.ilsole24ore.com/rss/mondo.xml'),
+ (u'Tecnologia e business', u'http://www.ilsole24ore.com/rss/tecnologia-business.xml'),
+ (u'Cultura e tempo libero', u'http://www.ilsole24ore.com/rss/tempolibero-cultura.xml'),
+ (u'Sport', u'http://www.ilsole24ore.com/rss/sport.xml'),
+ (u'Professionisti 24', u'http://www.ilsole24ore.com/rss/prof_home.xml')
+ ]
+
+ extra_css = '''
+ html, body, table, tr, td, h1, h2, h3, h4, h5, h6, p, a, span, br, img {margin:0;padding:0;border:0;font-size:12px;font-family:Arial;}
+ .linkHighlight {color:#0292c6;}
+ .txt {border-bottom:1px solid #7c7c7c;padding-bottom:20px;text-align:justify;}
+ .txt p {line-height:18px;}
+ .txt span {line-height:22px;}
+ .title h3 {color:#7b7b7b;}
+ .title h4 {color:#08526e;font-size:26px;font-family:"Times New Roman";font-weight:normal;}
+ '''
+
diff --git a/resources/recipes/irish_times.recipe b/resources/recipes/irish_times.recipe
index 7c5772eaa5..a5f1b70d0c 100644
--- a/resources/recipes/irish_times.recipe
+++ b/resources/recipes/irish_times.recipe
@@ -11,7 +11,7 @@ class IrishTimes(BasicNewsRecipe):
title = u'The Irish Times'
__author__ = "Derry FitzGerald, Ray Kinsella and David O'Callaghan"
language = 'en'
- timefmt = ' (%A, %B %e, %Y)'
+ timefmt = ' (%A, %B %d, %Y)'
oldest_article = 3
diff --git a/resources/recipes/kamerabild.recipe b/resources/recipes/kamerabild.recipe
new file mode 100644
index 0000000000..181f752612
--- /dev/null
+++ b/resources/recipes/kamerabild.recipe
@@ -0,0 +1,46 @@
+
+__license__ = 'GPL v3'
+__copyright__ = '2010, Darko Miletic '
+'''
+www.kamerabild.se
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class Kamerabild(BasicNewsRecipe):
+ title = 'Kamera & Bild'
+ __author__ = 'Darko Miletic'
+ description = 'Photo News from Sweden'
+ publisher = 'politiken.dk'
+ category = 'news, photograph, Sweden'
+ oldest_article = 7
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ remove_empty_feeds = True
+ use_embedded_content = False
+ encoding = 'utf8'
+ language = 'sv'
+
+ extra_css = ' body{font-family: Verdana,Arial,Helvetica,sans-serif } .title{font-weight: bold} .pricerunnerAdContainer{border-bottom: 1px solid; border-top: 1px solid; margin-top: 0.5em; margin-bottom: 0.5em} .elementTeaserKicker{font-weight: bold; color: #AE0A10} '
+
+ conversion_options = {
+ 'comment' : description
+ , 'tags' : category
+ , 'publisher': publisher
+ , 'language' : language
+ }
+
+ feeds = [(u'Articles', u'http://www.kamerabild.se/cmlink/Nyheter-fran-KAMERA-BILD-1.43315.xml')]
+ keep_only_tags = [dict(name='div',attrs={'class':'container'})]
+ remove_tags_after = dict(name='div',attrs={'class':'editor'})
+ remove_tags = [
+ dict(name=['object','link','iframe'])
+ ,dict(name='div',attrs={'class':['pricerunner_head','sideBar','img']})
+ ]
+
+ def preprocess_html(self, soup):
+ for item in soup.findAll(style=True):
+ del item['style']
+ return self.adeify_images(soup)
+
+
diff --git a/resources/recipes/lescienze.recipe b/resources/recipes/lescienze.recipe
new file mode 100644
index 0000000000..13d7ea8ea2
--- /dev/null
+++ b/resources/recipes/lescienze.recipe
@@ -0,0 +1,89 @@
+#!/usr/bin/env python
+__license__ = 'GPL v3'
+__author__ = 'Lorenzo Vigentini'
+__copyright__ = '2009, Lorenzo Vigentini '
+__version__ = 'v1.01'
+__date__ = '10, January 2010'
+__description__ = 'Monthly Italian edition of Scientific American'
+
+'''
+http://lescienze.espresso.repubblica.it/
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class leScienze(BasicNewsRecipe):
+ author = 'Lorenzo Vigentini'
+ description = 'Monthly Italian edition of Scientific American'
+
+ cover_url = 'http://lescienze.espresso.repubblica.it/images/logo_lescienze.gif'
+ title = 'le Scienze'
+ publisher = 'Gruppo editoriale lEspresso'
+ category = 'Science, general interest'
+
+ language = 'it'
+ encoding = 'cp1252'
+ timefmt = '[%a, %d %b, %Y]'
+
+ oldest_article = 31
+ max_articles_per_feed = 20
+ use_embedded_content = False
+ recursion = 10
+
+ remove_javascript = True
+ no_stylesheets = True
+
+
+ keep_only_tags = [
+ dict(name='div', attrs={'class':'bigbox'})
+ ]
+
+ remove_tags = [
+ dict(name='span',attrs={'class':'linkindice'}),
+ dict(name='div',attrs={'class':'box-commenti'}),
+ dict(name='div',attrs={'id':['rssdiv','blocco']})
+ ]
+ remove_tags_after = [dict(name='div',attrs={'class':'box-commenti'})]
+
+ feeds = [
+ (u'Antropologia', u'http://data.kataweb.it/rss/scienze/antropologia'),
+ (u'Archeologia', u'http://data.kataweb.it/rss/scienze/archeologia'),
+ (u'Arte e Musica', u'http://data.kataweb.it/rss/scienze/arte_e_musica'),
+ (u'Astrofisica', u'http://data.kataweb.it/rss/scienze/astrofisica'),
+ (u'Astronautica', u'http://data.kataweb.it/rss/scienze/astronautica'),
+ (u'Astronomia', u'http://data.kataweb.it/rss/scienze/astronomia_e_cosmologia'),
+ (u'Biologia', u'http://data.kataweb.it/rss/scienze/biologia'),
+ (u'Chimica', u'http://data.kataweb.it/rss/scienze/chimica'),
+ (u'Ecologia & ambiente', u'http://data.kataweb.it/rss/scienze/ecologia_e_ambiente'),
+ (u'Economia', u'http://data.kataweb.it/rss/scienze/Economia'),
+ (u'Fisica', u'http://data.kataweb.it/rss/scienze/Fisica'),
+ (u'Informatica', u'http://data.kataweb.it/rss/scienze/informatica_e_telecomunicazioni'),
+ (u'Ingegneria', u'http://data.kataweb.it/rss/scienze/ingegneria_e_tecnologia'),
+ (u'Matematica', u'http://data.kataweb.it/rss/scienze/Matematica'),
+ (u'Medicina', u'http://data.kataweb.it/rss/scienze/Medicina'),
+ (u'Paleontologia', u'http://data.kataweb.it/rss/scienze/Paleontologia'),
+ (u'Recensioni', u'http://data.kataweb.it/rss/scienze/Recensioni'),
+ (u'Psicologia', u'http://data.kataweb.it/rss/scienze/psicologie_e_scienze_cognitive'),
+ (u'Scienze della Terra', u'http://data.kataweb.it/rss/scienze/scienze_della_terra'),
+ (u'Scienze dello spazio', u'http://data.kataweb.it/rss/scienze/scienze_dello_spazio'),
+ (u'Scienze naturali', u'http://data.kataweb.it/rss/scienze/scienze_naturali'),
+ (u'Scienze sociali', u'http://data.kataweb.it/rss/scienze/scienze_sociali'),
+ (u'Statistica', u'http://data.kataweb.it/rss/scienze/statistica'),
+ (u'Storia della scienza', u'http://data.kataweb.it/rss/scienze/storia_della_scienza')
+ ]
+
+ extra_css = '''
+ h1 {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:18px;}
+ h2 {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:18px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; }
+ h3 {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px;}
+ h4 {color:#333333; font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; }
+ h5 {color:#333333; font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:14px; text-transform:uppercase;}
+ .occhiello {color:#666666;display:block;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:13px;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:bold;line-height:15px;}
+ .titolo {font-weight:bold;}
+ .label {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:12px;font-size-adjust:none;font-stretch:normal;font-style:normal;font-variant:normal;font-weight:bold;height:15px;line-height:15px;text-transform:uppercase;}
+ .firma {color:#333333;font-family:"Trebuchet MS",Arial,Helvetica,sans-serif;font-size:12px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:15px; text-decoration:none;}
+ .testo {font-family:"Trebuchet MS",Arial,Helvetica,sans-serif; font-size:10px;}
+ '''
+
+
+
diff --git a/resources/recipes/metro_montreal.recipe b/resources/recipes/metro_montreal.recipe
index 094f00316f..8272c760cc 100644
--- a/resources/recipes/metro_montreal.recipe
+++ b/resources/recipes/metro_montreal.recipe
@@ -4,21 +4,26 @@ class Metro_Montreal(BasicNewsRecipe):
title = u'M\xe9tro Montr\xe9al'
__author__ = 'Jerry Clapperton'
- description = u'Le quotidien le plus branch\xe9 sur le monde'
- language = 'fr'
+ description = 'Le quotidien le plus branch sur le monde'
+ language = 'fr'
- oldest_article = 7
+ oldest_article = 7
max_articles_per_feed = 20
use_embedded_content = False
- remove_javascript = True
- no_stylesheets = True
- encoding = 'utf-8'
+ remove_javascript = True
+ no_stylesheets = True
+ encoding = 'utf-8'
+ extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
- extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
-
- remove_tags = [dict(attrs={'id':'buttons'}), dict(name=['img', 'style'])]
-
- feeds = [(u"L'info", u'http://journalmetro.com/linfo/rss'), (u'Monde', u'http://journalmetro.com/monde/rss'), (u'Culture', u'http://journalmetro.com/culture/rss'), (u'Sports', u'http://journalmetro.com/sports/rss'), (u'Paroles', u'http://journalmetro.com/paroles/rss')]
+ remove_tags = [dict(attrs={'id':'buttons'})]
+
+ feeds = [
+ (u"L'info", u'http://journalmetro.com/linfo/rss'),
+ (u'Monde', u'http://journalmetro.com/monde/rss'),
+ (u'Culture', u'http://journalmetro.com/culture/rss'),
+ (u'Sports', u'http://journalmetro.com/sports/rss'),
+ (u'Paroles', u'http://journalmetro.com/paroles/rss')
+ ]
def print_version(self, url):
- return url.replace('article', 'ArticlePrint') + '?language=fr'
+ return url.replace('article', 'ArticlePrint') + '?language=fr'
\ No newline at end of file
diff --git a/resources/recipes/nin.recipe b/resources/recipes/nin.recipe
index 0872467d2f..a349f0e11f 100644
--- a/resources/recipes/nin.recipe
+++ b/resources/recipes/nin.recipe
@@ -72,9 +72,8 @@ class Nin(BasicNewsRecipe):
section = self.tag_to_string(item)
feedlink = self.PREFIX + item['href']
feedpage = self.index_to_soup(feedlink)
- self.report_progress(0, _('Fetching feed')+' %s...'%(section))
+ self.report_progress(0, _('Fetching feed')+' %s...'%(section))
inarts = []
- count2 = 0
for art in feedpage.findAll('span',attrs={'class':'artTitle'}):
alink = art.parent
url = self.PREFIX + alink['href']
diff --git a/resources/recipes/nytimes.recipe b/resources/recipes/nytimes.recipe
index 8b9283a0af..32e5a4825e 100644
--- a/resources/recipes/nytimes.recipe
+++ b/resources/recipes/nytimes.recipe
@@ -5,7 +5,7 @@ __copyright__ = '2008, Kovid Goyal '
'''
nytimes.com
'''
-import re
+import re, time
from calibre import entity_to_unicode
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString, Comment
diff --git a/resources/recipes/nytimesbook.recipe b/resources/recipes/nytimesbook.recipe
new file mode 100644
index 0000000000..686f30b69a
--- /dev/null
+++ b/resources/recipes/nytimesbook.recipe
@@ -0,0 +1,56 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+
+class NewYorkTimesBookReview(BasicNewsRecipe):
+ title = u'New York Times Book Review'
+ language = 'en'
+ __author__ = 'Krittika Goyal'
+ oldest_article = 8 #days
+ max_articles_per_feed = 1000
+ recursions = 2
+ #encoding = 'latin1'
+
+ remove_stylesheets = True
+ #remove_tags_before = dict(name='h1', attrs={'class':'heading'})
+ remove_tags_after = dict(name='div', attrs={'id':'authorId'})
+ remove_tags = [
+ dict(name='iframe'),
+ dict(name=['div', 'a'], attrs={'class':['enlargeThis', 'jumpLink']}),
+ dict(name='div', attrs={'id':['sidebarArticles', 'toolsRight']}),
+ #dict(name='ul', attrs={'class':'article-tools'}),
+ #dict(name='ul', attrs={'class':'articleTools'}),
+ ]
+ match_regexps = [
+ r'http://www.nytimes.com/.+pagewanted=[2-9]+'
+ ]
+
+ feeds = [
+('New York Times Sunday Book Review',
+ 'http://feeds.nytimes.com/nyt/rss/SundayBookReview'),
+]
+
+
+ def preprocess_html(self, soup):
+ story = soup.find(name='div', attrs={'id':'article'})
+ #td = heading.findParent(name='td')
+ #td.extract()
+ soup = BeautifulSoup('t')
+ body = soup.find(name='body')
+ body.insert(0, story)
+ #for x in soup.findAll(name='p', text=lambda x:x and '-->' in x):
+ #p = x.findParent('p')
+ #if p is not None:
+ #p.extract()
+ return soup
+
+ def postprocess_html(self, soup, first):
+ for div in soup.findAll(id='pageLinks'):
+ div.extract()
+ if not first:
+ h1 = soup.find('h1')
+ if h1 is not None:
+ h1.extract()
+ t = soup.find(attrs={'class':'timestamp'})
+ if t is not None:
+ t.extract()
+ return soup
diff --git a/resources/recipes/slovo.recipe b/resources/recipes/slovo.recipe
new file mode 100644
index 0000000000..88babdb7b6
--- /dev/null
+++ b/resources/recipes/slovo.recipe
@@ -0,0 +1,41 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+import re
+
+class SlovoRecipe(BasicNewsRecipe):
+ __license__ = 'GPL v3'
+ __author__ = 'Abelturd'
+ language = 'sk'
+ version = 1
+
+ title = u'SLOVO'
+ publisher = u''
+ category = u'News, Newspaper'
+ description = u'Politicko-spolo\u010densk\xfd t\xfd\u017edenn\xedk'
+ encoding = 'Windows-1250'
+
+ oldest_article = 1
+ max_articles_per_feed = 100
+ use_embedded_content = False
+ remove_empty_feeds = True
+
+ no_stylesheets = True
+ remove_javascript = True
+
+
+ feeds = []
+ feeds.append((u'V\u0161etky \u010dl\xe1nky', u'http://www.noveslovo.sk/rss.asp'))
+
+ keep_only_tags = []
+ remove_tags = []
+
+ preprocess_regexps = [
+ (re.compile(r'
', re.DOTALL|re.IGNORECASE),
+ lambda match: ''),
+ ]
+
+
+ def print_version(self, url):
+ m = re.search('(?<=id=)[0-9]*', url)
+
+
+ return u'http://www.noveslovo.sk/clanoktlac.asp?id=' + str(m.group(0))
diff --git a/resources/recipes/theluminouslandscape.recipe b/resources/recipes/theluminouslandscape.recipe
new file mode 100644
index 0000000000..ac046dd3ef
--- /dev/null
+++ b/resources/recipes/theluminouslandscape.recipe
@@ -0,0 +1,37 @@
+
+__license__ = 'GPL v3'
+__copyright__ = '2010, Darko Miletic '
+'''
+luminous-landscape.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class theluminouslandscape(BasicNewsRecipe):
+ title = 'The Luminous Landscape'
+ __author__ = 'Darko Miletic'
+ description = 'A photography news and information website in the form of a weblog with multiple authors who write on a variety of photography and art-photography related issues.'
+ publisher = 'The Luminous Landscape '
+ category = 'news, blog, photograph, international'
+ oldest_article = 15
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ remove_empty_feeds = True
+ use_embedded_content = True
+ encoding = 'cp1252'
+ language = 'en'
+
+ conversion_options = {
+ 'comment' : description
+ , 'tags' : category
+ , 'publisher': publisher
+ , 'language' : language
+ }
+
+ feeds = [(u"What's new", u'http://www.luminous-landscape.com/whatsnew/rssfeed.php')]
+ remove_tags = [dict(name=['object','link','iframe'])]
+
+ def preprocess_html(self, soup):
+ return self.adeify_images(soup)
+
+
diff --git a/resources/recipes/theonlinephotographer.recipe b/resources/recipes/theonlinephotographer.recipe
new file mode 100644
index 0000000000..2ff8af518a
--- /dev/null
+++ b/resources/recipes/theonlinephotographer.recipe
@@ -0,0 +1,41 @@
+
+__license__ = 'GPL v3'
+__copyright__ = '2010, Darko Miletic '
+'''
+theonlinephotographer.typepad.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class theonlinephotographer(BasicNewsRecipe):
+ title = 'The Online Photographer'
+ __author__ = 'Darko Miletic'
+ description = 'A photography news and information website in the form of a weblog with multiple authors who write on a variety of photography and art-photography related issues.'
+ publisher = 'The Online Photographer'
+ category = 'news, blog, photograph, international'
+ oldest_article = 15
+ max_articles_per_feed = 100
+ no_stylesheets = True
+ remove_empty_feeds = True
+ use_embedded_content = False
+ encoding = 'utf8'
+ language = 'en'
+
+ extra_css = ' body{font-family: Georgia,"Times New Roman",serif } '
+
+ conversion_options = {
+ 'comment' : description
+ , 'tags' : category
+ , 'publisher': publisher
+ , 'language' : language
+ }
+
+ feeds = [(u'Articles', u'http://feeds.feedburner.com/typepad/ZSjz')]
+ remove_tags_before = dict(name='h3',attrs={'class':'entry-header'})
+ remove_tags_after = dict(name='div',attrs={'class':'entry-footer'})
+ remove_tags = [dict(name=['object','link','iframe'])]
+
+ def preprocess_html(self, soup):
+ return self.adeify_images(soup)
+
+
diff --git a/resources/recipes/tuttosport.recipe b/resources/recipes/tuttosport.recipe
new file mode 100644
index 0000000000..cc1f27e73a
--- /dev/null
+++ b/resources/recipes/tuttosport.recipe
@@ -0,0 +1,66 @@
+#!/usr/bin/env python
+__license__ = 'GPL v3'
+__author__ = 'Lorenzo Vigentini'
+__copyright__ = '2009, Lorenzo Vigentini '
+__version__ = 'v1.01'
+__date__ = '30, January 2010'
+__description__ = 'Sport daily news from Italy'
+
+'''www.tuttosport.com'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class tuttosport(BasicNewsRecipe):
+ author = 'Lorenzo Vigentini'
+ description = 'Sport daily news from Italy'
+
+ cover_url = 'http://www.tuttosport.com/res/imgs/logo_TuttoSport.png'
+ title = 'Tuttosport'
+ publisher = 'Nuova Editoriale Sportiva S.r.l'
+ category = 'Sport News'
+
+ language = 'it'
+ timefmt = '[%a, %d %b, %Y]'
+
+ oldest_article = 2
+ max_articles_per_feed = 20
+ use_embedded_content = False
+ recursion = 10
+
+ remove_javascript = True
+ no_stylesheets = True
+
+ def print_version(self,url):
+ segments = url.split('/')
+ printURL = '/'.join(segments[0:10]) + '?print'
+ return printURL
+
+ keep_only_tags = [
+ dict(name='h2', attrs={'class':'tit_Article'}),
+ dict(name='div', attrs={'class':['box_Img img_L ','txt_ArticleAbstract','txt_Article txtBox_cms']})
+ ]
+
+ feeds = [
+ (u'Primo piano',u'http://www.tuttosport.com/rss/primo_piano.xml'),
+ (u'Cronanca',u'http://www.tuttosport.com/rss/Cronaca-205.xml'),
+ (u'Lettere al direttore',u'http://blog.tuttosport.com/direttore/feed'),
+ (u'Calcio',u'http://www.tuttosport.com/rss/Calcio-3.xml'),
+ (u'Speciale Derby',u'http://www.tuttosport.com/rss/Speciale-derby-310.xml'),
+ (u'Formula 1',u'hhttp://www.tuttosport.com/rss/Formula-1-7.xml'),
+ (u'Moto',u'hhttp://www.tuttosport.com/rss/Moto-8.xml'),
+ (u'Basket',u'http://www.tuttosport.com/rss/Basket-9.xml'),
+ (u'Altri Sport',u'http://www.tuttosport.com/rss/Altri-Sport-2.xml'),
+ (u'Tuttosport League',u'http://www.tuttosport.com/rss/Tuttosport-League-245.xml'),
+ (u'Scommesse',u'http://www.tuttosport.com/rss/Scommesse-286.xml')
+ ]
+
+ extra_css = '''
+ body {font-family: Arial, Verdana, sans-serif; margin-bottom: 3em;}
+ h1 {color:#9C3A0B;font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:20px;}
+ h3 {color:#9C3A0B;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:15px;}
+ h2.tit_Article {color:#9C3A0B;margin: 15px 8px 0; margin-bottom: 1px; border-bottom: 3px solid;}
+ .txt_ArticleAbstract {color:#4080AE;clear: both; margin: 3px 8px;}
+ .txt_Article {clear: both; margin: 8px 8px 12px;}
+ .txt_Author {float: right;}
+ .txt_ArticleAuthor {clear: both; margin: 8px;}
+ '''
diff --git a/src/calibre/constants.py b/src/calibre/constants.py
index 67278efb25..c06876f692 100644
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@@ -2,7 +2,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
__appname__ = 'calibre'
-__version__ = '0.6.36'
+__version__ = '0.6.37'
__author__ = "Kovid Goyal "
import re
diff --git a/src/calibre/ebooks/oeb/transforms/rescale.py b/src/calibre/ebooks/oeb/transforms/rescale.py
index 7ce3b5a588..fbf0e9bc4f 100644
--- a/src/calibre/ebooks/oeb/transforms/rescale.py
+++ b/src/calibre/ebooks/oeb/transforms/rescale.py
@@ -35,7 +35,10 @@ class RescaleImages(object):
if not raw: continue
if qt:
img = QImage(10, 10, QImage.Format_ARGB32_Premultiplied)
- if not img.loadFromData(raw): continue
+ try:
+ if not img.loadFromData(raw): continue
+ except:
+ continue
width, height = img.width(), img.height()
else:
f = cStringIO.StringIO(raw)
diff --git a/src/calibre/ebooks/pdf/reflow.py b/src/calibre/ebooks/pdf/reflow.py
index 42c16225d2..9f98147032 100644
--- a/src/calibre/ebooks/pdf/reflow.py
+++ b/src/calibre/ebooks/pdf/reflow.py
@@ -439,7 +439,7 @@ class Page(object):
# closer to the avg number of cols in the set, if equal use larger
# region)
# merge contiguous regions that can contain each other
- absorbed = set([])
+ '''absorbed = set([])
found = True
while found:
found = False
@@ -454,6 +454,8 @@ class Page(object):
break
prev = None if i == 0 else i-1
next = j if self.regions[j] not in regions else None
+ '''
+ pass
diff --git a/src/calibre/gui2/add.py b/src/calibre/gui2/add.py
index ace2ac5c7e..0b37fe2515 100644
--- a/src/calibre/gui2/add.py
+++ b/src/calibre/gui2/add.py
@@ -43,28 +43,34 @@ class RecursiveFind(QThread):
self.single_book_per_directory = single
self.canceled = False
+ def walk(self, root):
+ self.books = []
+ for dirpath in os.walk(root):
+ if self.canceled:
+ return
+ self.emit(SIGNAL('update(PyQt_PyObject)'),
+ _('Searching in')+' '+dirpath[0])
+ self.books += list(self.db.find_books_in_directory(dirpath[0],
+ self.single_book_per_directory))
+
def run(self):
root = os.path.abspath(self.path)
- self.books = []
- if isinstance(root, unicode):
- root = root.encode(filesystem_encoding)
try:
- for dirpath in os.walk(root):
- if self.canceled:
- return
- self.emit(SIGNAL('update(PyQt_PyObject)'),
- _('Searching in')+' '+dirpath[0])
- self.books += list(self.db.find_books_in_directory(dirpath[0],
- self.single_book_per_directory))
- except Exception, err:
- import traceback
- traceback.print_exc()
+ self.walk(root)
+ except:
try:
- msg = unicode(err)
- except:
- msg = repr(err)
- self.emit(SIGNAL('found(PyQt_PyObject)'), msg)
- return
+ if isinstance(root, unicode):
+ root = root.encode(filesystem_encoding)
+ self.walk(root)
+ except Exception, err:
+ import traceback
+ traceback.print_exc()
+ try:
+ msg = unicode(err)
+ except:
+ msg = repr(err)
+ self.emit(SIGNAL('found(PyQt_PyObject)'), msg)
+ return
self.books = [formats for formats in self.books if formats]
diff --git a/src/calibre/gui2/catalog/catalog_epub_mobi.py b/src/calibre/gui2/catalog/catalog_epub_mobi.py
index 2037545bb4..12971528b2 100644
--- a/src/calibre/gui2/catalog/catalog_epub_mobi.py
+++ b/src/calibre/gui2/catalog/catalog_epub_mobi.py
@@ -18,10 +18,13 @@ class PluginWidget(QWidget,Ui_Form):
HELP = _('Options specific to')+' EPUB/MOBI '+_('output')
OPTION_FIELDS = [('exclude_genre','\[[\w ]*\]'),
('exclude_tags','~,'+_('Catalog')),
+ ('generate_titles', True),
+ ('generate_recently_added', True),
('note_tag','*'),
('numbers_as_text', False),
('read_tag','+')]
+
# Output synced to the connected device?
sync_enabled = True
@@ -37,7 +40,7 @@ class PluginWidget(QWidget,Ui_Form):
# Update dialog fields from stored options
for opt in self.OPTION_FIELDS:
opt_value = gprefs.get(self.name + '_' + opt[0], opt[1])
- if opt[0] == 'numbers_as_text':
+ if opt[0] in ['numbers_as_text','generate_titles','generate_recently_added']:
getattr(self, opt[0]).setChecked(opt_value)
else:
getattr(self, opt[0]).setText(opt_value)
@@ -45,19 +48,20 @@ class PluginWidget(QWidget,Ui_Form):
def options(self):
# Save/return the current options
# exclude_genre stores literally
- # numbers_as_text stores as True/False
+ # generate_titles, generate_recently_added, numbers_as_text stores as True/False
# others store as lists
opts_dict = {}
for opt in self.OPTION_FIELDS:
- if opt[0] == 'numbers_as_text':
+ if opt[0] in ['numbers_as_text','generate_titles','generate_recently_added']:
opt_value = getattr(self,opt[0]).isChecked()
else:
opt_value = unicode(getattr(self, opt[0]).text())
gprefs.set(self.name + '_' + opt[0], opt_value)
- if opt[0] == 'exclude_genre' or 'numbers_as_text':
+
+ if opt[0] in ['exclude_genre','numbers_as_text','generate_titles','generate_recently_added']:
opts_dict[opt[0]] = opt_value
else:
- opt_value = opt_value.split(',')
+ opts_dict[opt[0]] = opt_value.split(',')
opts_dict['output_profile'] = [load_defaults('page_setup')['output_profile']]
diff --git a/src/calibre/gui2/catalog/catalog_epub_mobi.ui b/src/calibre/gui2/catalog/catalog_epub_mobi.ui
index 044ecdaaec..91fcbdc364 100644
--- a/src/calibre/gui2/catalog/catalog_epub_mobi.ui
+++ b/src/calibre/gui2/catalog/catalog_epub_mobi.ui
@@ -14,63 +14,56 @@
Form
- -
+
-
'Don't include this book' tag:
- -
+
-
- -
+
-
'Mark this book as read' tag:
- -
+
-
- -
+
-
Additional note tag prefix:
- -
+
-
- -
-
-
- Sort numbers as text
-
-
-
- -
+
-
- -
+
-
Regex pattern describing tags to exclude as genres:
@@ -83,36 +76,19 @@
- -
-
-
-
- 14
- 75
- true
-
-
-
- Special marker tags for catalog generation
-
-
- Qt::AlignCenter
-
-
-
- -
+
-
Regex tips:
-- The default regex of '\[[\w]*\]' ignores tags of the form '[tag]', e.g., '[Amazon Freebie]'
-- A regex of '.' ignores all tags, generating no genre categories in the catalog
+- The default regex - \[[\w]*\] - excludes genre tags of the form [tag], e.g., [Amazon Freebie]
+- A regex pattern of a single dot excludes all genre tags, generating no Genre Section
true
- -
+
-
Qt::Vertical
@@ -125,6 +101,27 @@
+ -
+
+
+ Include 'Titles' Section
+
+
+
+ -
+
+
+ Include 'Recently Added' Section
+
+
+
+ -
+
+
+ Sort numbers as text
+
+
+
diff --git a/src/calibre/gui2/convert/gui_conversion.py b/src/calibre/gui2/convert/gui_conversion.py
index 70321b049b..5f339bf91d 100644
--- a/src/calibre/gui2/convert/gui_conversion.py
+++ b/src/calibre/gui2/convert/gui_conversion.py
@@ -42,6 +42,7 @@ def gui_catalog(fmt, title, dbspec, ids, out_file_name, sync, fmt_options,
opts, args = parser.parse_args()
# Populate opts
+ # opts.gui_search_text = something
opts.catalog_title = title
opts.ids = ids
opts.search_text = None
diff --git a/src/calibre/gui2/convert/mobi_output.py b/src/calibre/gui2/convert/mobi_output.py
index 611ef96e11..57cc3a2ac1 100644
--- a/src/calibre/gui2/convert/mobi_output.py
+++ b/src/calibre/gui2/convert/mobi_output.py
@@ -6,9 +6,14 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal '
__docformat__ = 'restructuredtext en'
+from PyQt4.Qt import Qt
from calibre.gui2.convert.mobi_output_ui import Ui_Form
from calibre.gui2.convert import Widget
+from calibre.gui2.widgets import FontFamilyModel
+from calibre.utils.fonts import fontconfig
+
+font_family_model = None
class PluginWidget(Widget, Ui_Form):
@@ -19,8 +24,35 @@ class PluginWidget(Widget, Ui_Form):
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
Widget.__init__(self, parent, 'mobi_output',
['prefer_author_sort', 'rescale_images', 'toc_title',
- 'dont_compress', 'no_inline_toc']
+ 'dont_compress', 'no_inline_toc', 'masthead_font']
)
self.db, self.book_id = db, book_id
+
+ global font_family_model
+ if font_family_model is None:
+ font_family_model = FontFamilyModel()
+ try:
+ font_family_model.families = fontconfig.find_font_families(allowed_extensions=['ttf'])
+ except:
+ import traceback
+ font_family_model.families = []
+ print 'WARNING: Could not load fonts'
+ traceback.print_exc()
+ font_family_model.families.sort()
+ font_family_model.families[:0] = [_('Default')]
+
+ self.font_family_model = font_family_model
+ self.opt_masthead_font.setModel(self.font_family_model)
+
self.initialize_options(get_option, get_help, db, book_id)
+ def set_value_handler(self, g, val):
+ if unicode(g.objectName()) in 'opt_masthead_font':
+ idx = -1
+ if val:
+ idx = g.findText(val, Qt.MatchFixedString)
+ if idx < 0:
+ idx = 0
+ g.setCurrentIndex(idx)
+ return True
+ return False
diff --git a/src/calibre/gui2/convert/mobi_output.ui b/src/calibre/gui2/convert/mobi_output.ui
index a1bad48fb0..9c3ec9e68e 100644
--- a/src/calibre/gui2/convert/mobi_output.ui
+++ b/src/calibre/gui2/convert/mobi_output.ui
@@ -6,7 +6,7 @@
0
0
- 400
+ 421
300
@@ -41,19 +41,6 @@
- -
-
-
- Qt::Vertical
-
-
-
- 20
- 40
-
-
-
-
-
@@ -68,6 +55,51 @@
+ -
+
+
+ Kindle options
+
+
+
-
+
+
+ Masthead font:
+
+
+
+ -
+
+
+ -
+
+
+ Qt::Vertical
+
+
+
+ 20
+ 55
+
+
+
+
+
+
+
+ -
+
+
+ Qt::Vertical
+
+
+
+ 20
+ 40
+
+
+
+
diff --git a/src/calibre/gui2/device_drivers/configwidget.py b/src/calibre/gui2/device_drivers/configwidget.py
index 6eb6e1226d..d1cebcb81d 100644
--- a/src/calibre/gui2/device_drivers/configwidget.py
+++ b/src/calibre/gui2/device_drivers/configwidget.py
@@ -38,7 +38,7 @@ class ConfigWidget(QWidget, Ui_ConfigWidget):
self.opt_read_metadata.setChecked(self.settings.read_metadata)
else:
self.opt_read_metadata.hide()
- if extra_customization_message:
+ if extra_customization_message and settings.extra_customization:
self.extra_customization_label.setText(extra_customization_message)
self.opt_extra_customization.setText(settings.extra_customization)
else:
diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py
index 846851fd21..f385b76c4c 100644
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@@ -653,8 +653,11 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
d = d + self.local_timezone_offset
self.db.set_timestamp(self.id, d)
- if self.cover_changed and self.cover_data is not None:
- self.db.set_cover(self.id, self.cover_data)
+ if self.cover_changed:
+ if self.cover_data is not None:
+ self.db.set_cover(self.id, self.cover_data)
+ else:
+ self.db.remove_cover(self.id)
except IOError, err:
if err.errno == 13: # Permission denied
fname = err.filename if err.filename else 'file'
diff --git a/src/calibre/library/catalog.py b/src/calibre/library/catalog.py
index 67e360da68..78155326dc 100644
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
@@ -4,7 +4,7 @@ from collections import namedtuple
from datetime import date
from xml.sax.saxutils import escape
-from calibre import filesystem_encoding, prints, strftime
+from calibre import filesystem_encoding, prints, prepare_string_for_xml, strftime
from calibre.customize import CatalogPlugin
from calibre.customize.conversion import OptionRecommendation, DummyReporter
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag, NavigableString
@@ -274,6 +274,18 @@ class EPUB_MOBI(CatalogPlugin):
"--exclude-tags=skip will match 'skip this book' and 'Skip will like this'.\n"
"Default: '%default'\n"
"Applies to: ePub, MOBI output formats")),
+ Option('--generate-titles',
+ default=True,
+ dest='generate_titles',
+ help=_("Include 'Titles' section in catalog.\n"
+ "Default: '%default'\n"
+ "Applies to: ePub, MOBI output formats")),
+ Option('--generate-recently-added',
+ default=True,
+ dest='generate_recently_added',
+ help=_("Include 'Recently Added' section in catalog.\n"
+ "Default: '%default'\n"
+ "Applies to: ePub, MOBI output formats")),
Option('--note-tag',
default='*',
dest='note_tag',
@@ -306,17 +318,19 @@ class EPUB_MOBI(CatalogPlugin):
456 => four hundred fifty-six
4:56 => four fifty-six
'''
-
+ ORDINALS = ['zeroth','first','second','third','fourth','fifth','sixth','seventh','eighth','ninth']
lessThanTwenty = ["","one","two","three","four","five","six","seven","eight","nine",
"ten","eleven","twelve","thirteen","fourteen","fifteen","sixteen","seventeen",
"eighteen","nineteen"]
tens = ["","","twenty","thirty","forty","fifty","sixty","seventy","eighty","ninety"]
hundreds = ["","one","two","three","four","five","six","seven","eight","nine"]
- def __init__(self, number):
+ def __init__(self, number, verbose=False):
self.number = number
self.number_as_float = 0.0
self.text = ''
+ self.verbose = verbose
+ self.log = Log()
self.numberTranslate()
def stringFromInt(self, intToTranslate):
@@ -324,7 +338,6 @@ class EPUB_MOBI(CatalogPlugin):
# intToTranslate is a three-digit number
tensComponentString = ""
-
hundredsComponent = intToTranslate - (intToTranslate % 100)
tensComponent = intToTranslate % 100
@@ -336,8 +349,7 @@ class EPUB_MOBI(CatalogPlugin):
# Build the tens component
if tensComponent < 20:
- if tensComponent > 0:
- tensComponentString = self.lessThanTwenty[tensComponent]
+ tensComponentString = self.lessThanTwenty[tensComponent]
else:
tensPart = ""
onesPart = ""
@@ -369,9 +381,27 @@ class EPUB_MOBI(CatalogPlugin):
hundredsString = ""
thousandsString = ""
resultString = ""
+ self.suffix = ''
+
+ if self.verbose: self.log("numberTranslate(): %s" % self.number)
+
+ # Special case ordinals
+ if re.search('[st|nd|rd|th]',self.number):
+ self.number = re.sub(',','',self.number)
+ ordinal_suffix = re.search('[\D]', self.number)
+ ordinal_number = re.sub('\D','',re.sub(',','',self.number))
+ if self.verbose: self.log("Ordinal: %s" % ordinal_number)
+ self.number_as_float = ordinal_number
+ self.suffix = self.number[ordinal_suffix.start():]
+ if int(ordinal_number) > 9:
+ # Some typos (e.g., 'twentyth'), acceptable
+ self.text = '%s' % (EPUB_MOBI.NumberToText(ordinal_number).text)
+ else:
+ self.text = '%s' % (self.ORDINALS[int(ordinal_number)])
# Test for time
- if re.search(':',self.number):
+ elif re.search(':',self.number):
+ if self.verbose: self.log("Time: %s" % self.number)
self.number_as_float = re.sub(':','.',self.number)
time_strings = self.number.split(":")
hours = EPUB_MOBI.NumberToText(time_strings[0]).text
@@ -380,11 +410,13 @@ class EPUB_MOBI(CatalogPlugin):
# Test for %
elif re.search('%', self.number):
+ if self.verbose: self.log("Percent: %s" % self.number)
self.number_as_float = self.number.split('%')[0]
self.text = EPUB_MOBI.NumberToText(self.number.replace('%',' percent')).text
# Test for decimal
elif re.search('\.',self.number):
+ if self.verbose: self.log("Decimal: %s" % self.number)
self.number_as_float = self.number
decimal_strings = self.number.split(".")
left = EPUB_MOBI.NumberToText(decimal_strings[0]).text
@@ -393,6 +425,7 @@ class EPUB_MOBI(CatalogPlugin):
# Test for hypenated
elif re.search('-', self.number):
+ if self.verbose: self.log("Hyphenated: %s" % self.number)
self.number_as_float = self.number.split('-')[0]
strings = self.number.split('-')
if re.search('[0-9]+', strings[0]):
@@ -403,44 +436,54 @@ class EPUB_MOBI(CatalogPlugin):
right = EPUB_MOBI.NumberToText(strings[1]).text
self.text = '%s-%s' % (left, right)
- # Test for comma
- elif re.search(',', self.number):
+ # Test for only commas and numbers
+ elif re.search(',', self.number) and not re.search('[^0-9,]',self.number):
+ if self.verbose: self.log("Comma(s): %s" % self.number)
self.number_as_float = re.sub(',','',self.number)
- self.text = EPUB_MOBI.NumberToText(self.number.replace(',','')).text
+ self.text = EPUB_MOBI.NumberToText(self.number_as_float).text
- # Test for hybrid e.g., 'K2'
+ # Test for hybrid e.g., 'K2, 2nd, 10@10'
elif re.search('[\D]+', self.number):
- result = []
- for char in self.number:
- if re.search('[\d]+', char):
- result.append(EPUB_MOBI.NumberToText(char).text)
- else:
- result.append(char)
- self.text = ''.join(result)
+ if self.verbose: self.log("Hybrid: %s" % self.number)
+ # Split the token into number/text
+ number_position = re.search('\d',self.number).start()
+ text_position = re.search('\D',self.number).start()
+ if number_position < text_position:
+ number = self.number[:text_position]
+ text = self.number[text_position:]
+ self.text = '%s%s' % (EPUB_MOBI.NumberToText(number).text,text)
+ else:
+ text = self.number[:number_position]
+ number = self.number[number_position:]
+ self.text = '%s%s' % (text, EPUB_MOBI.NumberToText(number).text)
else:
+ if self.verbose: self.log("Clean: %s" % self.number)
try:
self.float_as_number = float(self.number)
number = int(self.number)
except:
return
- if number > 1000000:
+ if number > 10**9:
self.text = "%d out of range" % number
return
- if number == 1000000:
- self.text = "one million"
+ if number == 10**9:
+ self.text = "one billion"
else :
- # Strip out the three-digit number groups
- thousandsNumber = number/1000
- hundredsNumber = number - (thousandsNumber * 1000)
+ # Isolate the three-digit number groups
+ millionsNumber = number/10**6
+ thousandsNumber = (number - (millionsNumber * 10**6))/10**3
+ hundredsNumber = number - (millionsNumber * 10**6) - (thousandsNumber * 10**3)
+ if self.verbose:
+ print "Converting %s %s %s" % (millionsNumber, thousandsNumber, hundredsNumber)
- # Convert the lower 3 numbers - hundredsNumber
+ # Convert hundredsNumber
if hundredsNumber :
hundredsString = self.stringFromInt(hundredsNumber)
- # Convert the upper 3 numbers - thousandsNumber
+ # Convert thousandsNumber
if thousandsNumber:
if number > 1099 and number < 2000:
resultString = '%s %s' % (self.lessThanTwenty[number/100],
@@ -450,19 +493,26 @@ class EPUB_MOBI(CatalogPlugin):
else:
thousandsString = self.stringFromInt(thousandsNumber)
+ # Convert millionsNumber
+ if millionsNumber:
+ millionsString = self.stringFromInt(millionsNumber)
+
# Concatenate the strings
- if thousandsNumber and not hundredsNumber:
- resultString = "%s thousand" % thousandsString
+ resultString = ''
+ if millionsNumber:
+ resultString += "%s million " % millionsString
- if thousandsNumber and hundredsNumber:
- resultString = "%s thousand %s" % (thousandsString, hundredsString)
+ if thousandsNumber:
+ resultString += "%s thousand " % thousandsString
- if not thousandsNumber and hundredsNumber:
- resultString = "%s" % hundredsString
+ if hundredsNumber:
+ resultString += "%s" % hundredsString
- if not thousandsNumber and not hundredsNumber:
+ if not millionsNumber and not thousandsNumber and not hundredsNumber:
resultString = "zero"
+ if self.verbose:
+ self.log(u'resultString: %s' % resultString)
self.text = resultString.strip().capitalize()
class CatalogBuilder(object):
@@ -482,16 +532,11 @@ class EPUB_MOBI(CatalogPlugin):
catalog.createDirectoryStructure()
catalog.copyResources()
catalog.buildSources()
-
- - To do:
- *** generateThumbnails() creates a default book image from book.svg, but the background
- is black instead of white. This needs to be fixed (approx line #1418)
-
'''
# Number of discrete steps to catalog creation
- current_step = 0.0
- total_steps = 14.0
+# current_step = 0.0
+# total_steps = 10.0
THUMB_WIDTH = 75
THUMB_HEIGHT = 100
@@ -516,6 +561,7 @@ class EPUB_MOBI(CatalogPlugin):
self.__booksByTitle = None
self.__catalogPath = PersistentTemporaryDirectory("_epub_mobi_catalog", prefix='')
self.__contentDir = os.path.join(self.catalogPath, "content")
+ self.__currentStep = 0.0
self.__creator = opts.creator
self.__db = db
self.__descriptionClip = opts.descriptionClip
@@ -524,6 +570,7 @@ class EPUB_MOBI(CatalogPlugin):
self.opts.output_profile and \
self.opts.output_profile.startswith("kindle")) else False
self.__genres = None
+ self.__genre_tags_dict = None
self.__htmlFileList = []
self.__markerTags = self.getMarkerTags()
self.__ncxSoup = None
@@ -536,8 +583,15 @@ class EPUB_MOBI(CatalogPlugin):
self.__stylesheet = stylesheet
self.__thumbs = None
self.__title = opts.catalog_title
+ self.__totalSteps = 10.0
self.__verbose = opts.verbose
+ # Tweak build steps based on optional sections
+ if self.opts.generate_titles:
+ self.__totalSteps += 2
+ if self.opts.generate_recently_added:
+ self.__totalSteps += 2
+
# Accessors
'''
@dynamic_property
@@ -592,6 +646,13 @@ class EPUB_MOBI(CatalogPlugin):
self.__contentDir = val
return property(fget=fget, fset=fset)
@dynamic_property
+ def currentStep(self):
+ def fget(self):
+ return self.__currentStep
+ def fset(self, val):
+ self.__currentStep = val
+ return property(fget=fget, fset=fset)
+ @dynamic_property
def creator(self):
def fget(self):
return self.__creator
@@ -630,6 +691,13 @@ class EPUB_MOBI(CatalogPlugin):
self.__genres = val
return property(fget=fget, fset=fset)
@dynamic_property
+ def genre_tags_dict(self):
+ def fget(self):
+ return self.__genre_tags_dict
+ def fset(self, val):
+ self.__genre_tags_dict = val
+ return property(fget=fget, fset=fset)
+ @dynamic_property
def htmlFileList(self):
def fget(self):
return self.__htmlFileList
@@ -724,6 +792,11 @@ class EPUB_MOBI(CatalogPlugin):
self.__title = val
return property(fget=fget, fset=fset)
@dynamic_property
+ def totalSteps(self):
+ def fget(self):
+ return self.__totalSteps
+ return property(fget=fget)
+ @dynamic_property
def verbose(self):
def fget(self):
return self.__verbose
@@ -757,11 +830,15 @@ class EPUB_MOBI(CatalogPlugin):
# Methods
def buildSources(self):
self.fetchBooksByTitle()
+ if not self.booksByTitle:
+ return False
self.fetchBooksByAuthor()
self.generateHTMLDescriptions()
self.generateHTMLByAuthor()
- self.generateHTMLByTitle()
- self.generateHTMLByDateAdded()
+ if self.opts.generate_titles:
+ self.generateHTMLByTitle()
+ if self.opts.generate_recently_added:
+ self.generateHTMLByDateAdded()
self.generateHTMLByTags()
from calibre.utils.PythonMagickWand import ImageMagick
@@ -772,10 +849,13 @@ class EPUB_MOBI(CatalogPlugin):
self.generateNCXHeader()
self.generateNCXDescriptions("Descriptions")
self.generateNCXByAuthor("Authors")
- self.generateNCXByTitle("Titles")
- self.generateNCXByDateAdded("Recently Added")
+ if self.opts.generate_titles:
+ self.generateNCXByTitle("Titles")
+ if self.opts.generate_recently_added:
+ self.generateNCXByDateAdded("Recently Added")
self.generateNCXByGenre("Genres")
self.writeNCX()
+ return True
def cleanUp(self):
pass
@@ -797,10 +877,13 @@ class EPUB_MOBI(CatalogPlugin):
os.path.join(self.catalogPath, file[0]))
# Create the custom masthead image overwriting default
- try:
- self.generate_masthead_image(os.path.join(self.catalogPath, 'images/mastheadImage.gif'))
- except:
- pass
+ # If failure, default mastheadImage.gif should still be in place
+ if self.generateForKindle:
+ try:
+ self.generateMastheadImage(os.path.join(self.catalogPath,
+ 'images/mastheadImage.gif'))
+ except:
+ pass
def fetchBooksByTitle(self):
self.updateProgressFullStep("Fetching database")
@@ -817,11 +900,14 @@ class EPUB_MOBI(CatalogPlugin):
# Merge opts.exclude_tag with opts.search_text
# What if no exclude tags?
- exclude_tags = self.opts.exclude_tags.split(',')
- search_terms = []
- for tag in exclude_tags:
- search_terms.append("tag:%s" % tag)
- search_phrase = "not (%s)" % " or ".join(search_terms)
+ empty_exclude_tags = False if len(self.opts.exclude_tags) else True
+ search_phrase = ''
+ if not empty_exclude_tags:
+ exclude_tags = self.opts.exclude_tags.split(',')
+ search_terms = []
+ for tag in exclude_tags:
+ search_terms.append("tag:%s" % tag)
+ search_phrase = "not (%s)" % " or ".join(search_terms)
# If a list of ids are provided, don't use search_text
if self.opts.ids:
@@ -832,6 +918,7 @@ class EPUB_MOBI(CatalogPlugin):
else:
self.opts.search_text = search_phrase
+ #print "fetchBooksByTitle(): opts.search_text: %s" % self.opts.search_text
# Fetch the database as a dictionary
data = self.plugin.search_sort_db(self.db, self.opts)
@@ -842,7 +929,10 @@ class EPUB_MOBI(CatalogPlugin):
title = this_title['title'] = self.convertHTMLEntities(record['title'])
this_title['title_sort'] = self.generateSortTitle(title)
- this_title['author'] = " & ".join(record['authors'])
+ if 'authors' in record and len(record['authors']):
+ this_title['author'] = " & ".join(record['authors'])
+ else:
+ this_title['author'] = 'Unknown'
this_title['author_sort'] = record['author_sort'] if len(record['author_sort']) \
else self.author_to_author_sort(this_title['author'])
this_title['id'] = record['id']
@@ -853,8 +943,14 @@ class EPUB_MOBI(CatalogPlugin):
this_title['date'] = strftime(u'%B %Y', record['pubdate'].timetuple())
this_title['timestamp'] = record['timestamp']
if record['comments']:
- this_title['description'] = re.sub('&', '&', record['comments'])
- this_title['short_description'] = self.generateShortDescription(this_title['description'])
+ this_title['description'] = self.markdownComments(record['comments'])
+ paras = BeautifulSoup(this_title['description']).findAll('p')
+ tokens = []
+ for p in paras:
+ for token in p.contents:
+ if token.string is not None:
+ tokens.append(token.string)
+ this_title['short_description'] = self.generateShortDescription(' '.join(tokens))
else:
this_title['description'] = None
this_title['short_description'] = None
@@ -881,8 +977,10 @@ class EPUB_MOBI(CatalogPlugin):
key=lambda x:(x['title_sort'].upper(), x['title_sort'].upper()))
if False and self.verbose:
self.opts.log.info("fetchBooksByTitle(): %d books" % len(self.booksByTitle))
+ self.opts.log.info(" %-40s %-40s" % ('title', 'title_sort'))
for title in self.booksByTitle:
- self.opts.log.info((u" %-50s %-25s" % (title['title'][0:45], title['title_sort'][0:20])).encode('utf-8'))
+ self.opts.log.info((u" %-40s %-40s" % (title['title'][0:40],
+ title['title_sort'][0:40])).encode('utf-8'))
def fetchBooksByAuthor(self):
# Generate a list of titles sorted by author from the database
@@ -1003,7 +1101,8 @@ class EPUB_MOBI(CatalogPlugin):
for tag in title['tags']:
aTag = Tag(soup,'a')
- aTag['href'] = "Genre%s.html" % re.sub("\W","",self.convertHTMLEntities(tag))
+ #print "aTag: %s" % "Genre_%s.html" % re.sub("\W","",tag.lower())
+ aTag['href'] = "Genre_%s.html" % re.sub("\W","",tag.lower())
aTag.insert(0,escape(NavigableString(tag)))
emTag = Tag(soup, "em")
emTag.insert(0, aTag)
@@ -1432,75 +1531,108 @@ class EPUB_MOBI(CatalogPlugin):
def generateHTMLByTags(self):
# Generate individual HTML files for each tag, e.g. Fiction, Nonfiction ...
# Note that special tags - ~+*[] - have already been filtered from books[]
+ # There may be synonomous tags
self.updateProgressFullStep("'Genres'")
- # Filter out REMOVE_TAGS, sort
- filtered_tags = self.filterDbTags(self.db.all_tags())
+ self.genre_tags_dict = self.filterDbTags(self.db.all_tags())
# Extract books matching filtered_tags
genre_list = []
- for tag in filtered_tags:
+ for friendly_tag in sorted(self.genre_tags_dict):
+ #print "\ngenerateHTMLByTags(): looking for books with friendly_tag '%s'" % friendly_tag
+ # tag_list => { normalized_genre_tag : [{book},{},{}],
+ # normalized_genre_tag : [{book},{},{}] }
+
tag_list = {}
- tag_list['tag'] = tag
- tag_list['books'] = []
for book in self.booksByAuthor:
- if 'tags' in book and tag in book['tags']:
+ # Scan each book for tag matching friendly_tag
+ if 'tags' in book and friendly_tag in book['tags']:
this_book = {}
this_book['author'] = book['author']
this_book['title'] = book['title']
this_book['author_sort'] = book['author_sort']
this_book['read'] = book['read']
this_book['id'] = book['id']
- tag_list['books'].append(this_book)
+ normalized_tag = self.genre_tags_dict[friendly_tag]
+ genre_tag_list = [key for genre in genre_list for key in genre]
+ if normalized_tag in genre_tag_list:
+ for existing_genre in genre_list:
+ for key in existing_genre:
+ new_book = None
+ if key == normalized_tag:
+ for book in existing_genre[key]:
+ if book['title'] == this_book['title']:
+ new_book = False
+ break
+ else:
+ new_book = True
+ if new_book:
+ existing_genre[key].append(this_book)
+ else:
+ tag_list[normalized_tag] = [this_book]
+ genre_list.append(tag_list)
- if len(tag_list['books']):
- # Possible to have an empty tag list if the books were excluded
- genre_list.append(tag_list)
+ if self.opts.verbose:
+ self.opts.log.info(" Genre summary: %d active genre tags used in generating catalog with %d titles" %
+ (len(genre_list), len(self.booksByTitle)))
+
+ for genre in genre_list:
+ for key in genre:
+ self.opts.log.info(" %s: %d titles" % (key, len(genre[key])))
# Write the results
- # genre_list = [ [tag_list], [tag_list] ...]
+ # genre_list = [ {friendly_tag:[{book},{book}]}, {friendly_tag:[{book},{book}]}, ...]
master_genre_list = []
- for (index, genre) in enumerate(genre_list):
- # Create sorted_authors[0] = friendly, [1] = author_sort for NCX creation
- authors = []
- for book in genre['books']:
- authors.append((book['author'],book['author_sort']))
+ for genre_tag_set in genre_list:
+ for (index, genre) in enumerate(genre_tag_set):
+ #print "genre: %s \t genre_tag_set[genre]: %s" % (genre, genre_tag_set[genre])
- # authors[] contains a list of all book authors, with multiple entries for multiple books by author
- # Create unique_authors with a count of books per author as the third tuple element
- books_by_current_author = 1
- current_author = authors[0]
- unique_authors = []
- for (i,author) in enumerate(authors):
- if author != current_author and i:
- unique_authors.append((current_author[0], current_author[1], books_by_current_author))
- current_author = author
- books_by_current_author = 1
- elif i==0 and len(authors) == 1:
- # Allow for single-book lists
- unique_authors.append((current_author[0], current_author[1], books_by_current_author))
- else:
- books_by_current_author += 1
- '''
- # Extract the unique entries
- unique_authors = []
- for author in authors:
- if not author in unique_authors:
- unique_authors.append(author)
- '''
+ # Create sorted_authors[0] = friendly, [1] = author_sort for NCX creation
+ authors = []
+ for book in genre_tag_set[genre]:
+ authors.append((book['author'],book['author_sort']))
- # Write the genre book list as an article
- titles_spanned = self.generateHTMLByGenre(genre['tag'], True if index==0 else False, genre['books'],
- "%s/Genre%s.html" % (self.contentDir, re.sub("\W","", self.convertHTMLEntities(genre['tag']))))
+ # authors[] contains a list of all book authors, with multiple entries for multiple books by author
+ # Create unique_authors with a count of books per author as the third tuple element
+ books_by_current_author = 1
+ current_author = authors[0]
+ unique_authors = []
+ for (i,author) in enumerate(authors):
+ if author != current_author and i:
+ unique_authors.append((current_author[0], current_author[1], books_by_current_author))
+ current_author = author
+ books_by_current_author = 1
+ elif i==0 and len(authors) == 1:
+ # Allow for single-book lists
+ unique_authors.append((current_author[0], current_author[1], books_by_current_author))
+ else:
+ books_by_current_author += 1
+ '''
+ # Extract the unique entries
+ unique_authors = []
+ for author in authors:
+ if not author in unique_authors:
+ unique_authors.append(author)
+ '''
+ # Write the genre book list as an article
+ titles_spanned = self.generateHTMLByGenre(genre, True if index==0 else False,
+ genre_tag_set[genre],
+ "%s/Genre_%s.html" % (self.contentDir,
+ genre))
- tag_file = "content/Genre%s.html" % (re.sub("\W","", self.convertHTMLEntities(genre['tag'])))
- master_genre_list.append({'tag':genre['tag'],
- 'file':tag_file,
- 'authors':unique_authors,
- 'books':genre['books'],
- 'titles_spanned':titles_spanned})
+ tag_file = "content/Genre_%s.html" % genre
+ master_genre_list.append({'tag':genre,
+ 'file':tag_file,
+ 'authors':unique_authors,
+ 'books':genre_tag_set[genre],
+ 'titles_spanned':titles_spanned})
+ if False and self.opts.verbose:
+ for genre in master_genre_list:
+ print "genre['tag']: %s" % genre['tag']
+ for book in genre['books']:
+ print book['title']
self.genres = master_genre_list
def generateThumbnails(self):
@@ -1586,7 +1718,7 @@ class EPUB_MOBI(CatalogPlugin):
def generateOPF(self):
- self.updateProgressFullStep("Saving OPF")
+ self.updateProgressFullStep("Generating OPF")
header = '''
@@ -2107,9 +2239,6 @@ class EPUB_MOBI(CatalogPlugin):
self.updateProgressFullStep("NCX 'Genres'")
-
-
-
if not len(self.genres):
self.opts.log.warn(" No genres found in tags.\n"
" No Genre section added to Catalog")
@@ -2136,13 +2265,12 @@ class EPUB_MOBI(CatalogPlugin):
navPointTag.insert(nptc, navLabelTag)
nptc += 1
contentTag = Tag(ncx_soup,"content")
- contentTag['src'] = "content/Genre%s.html#section_start" % (re.sub("\W","", self.convertHTMLEntities(self.genres[0]['tag'])))
+ contentTag['src'] = "content/Genre_%s.html#section_start" % self.genres[0]['tag']
navPointTag.insert(nptc, contentTag)
nptc += 1
for genre in self.genres:
# Add an article for each genre
-
navPointVolumeTag = Tag(ncx_soup, 'navPoint')
navPointVolumeTag['class'] = "article"
navPointVolumeTag['id'] = "genre-%s-ID" % genre['tag']
@@ -2150,13 +2278,18 @@ class EPUB_MOBI(CatalogPlugin):
self.playOrder += 1
navLabelTag = Tag(ncx_soup, "navLabel")
textTag = Tag(ncx_soup, "text")
- textTag.insert(0, self.formatNCXText(NavigableString(genre['tag'])))
+
+ # GwR *** Can this be optimized?
+ normalized_tag = None
+ for friendly_tag in self.genre_tags_dict:
+ if self.genre_tags_dict[friendly_tag] == genre['tag']:
+ normalized_tag = self.genre_tags_dict[friendly_tag]
+ break
+ textTag.insert(0, self.formatNCXText(NavigableString(friendly_tag)))
navLabelTag.insert(0,textTag)
navPointVolumeTag.insert(0,navLabelTag)
-
contentTag = Tag(ncx_soup, "content")
- genre_name = re.sub("\W","", self.convertHTMLEntities(genre['tag']))
- contentTag['src'] = "content/Genre%s.html#Genre%s" % (genre_name, genre_name)
+ contentTag['src'] = "content/Genre_%s.html#Genre_%s" % (normalized_tag, normalized_tag)
navPointVolumeTag.insert(1, contentTag)
if self.generateForKindle:
@@ -2258,26 +2391,12 @@ class EPUB_MOBI(CatalogPlugin):
if not os.path.isdir(images_path):
os.makedirs(images_path)
- def getMarkerTags(self):
- ''' Return a list of special marker tags to be excluded from genre list '''
- markerTags = []
- markerTags.extend(self.opts.exclude_tags.split(','))
- markerTags.extend(self.opts.note_tag.split(','))
- markerTags.extend(self.opts.read_tag.split(','))
- return markerTags
-
def filterDbTags(self, tags):
# Remove the special marker tags from the database's tag list,
- # return sorted list of tags representing valid genres
+ # return sorted list of normalized genre tags
- def next_tag(tags):
- for (i, tag) in enumerate(tags):
- if i < len(tags) - 1:
- yield tag + ", "
- else:
- yield tag
-
- filtered_tags = []
+ normalized_tags = []
+ friendly_tags = []
for tag in tags:
if tag[0] in self.markerTags:
continue
@@ -2286,32 +2405,38 @@ class EPUB_MOBI(CatalogPlugin):
if tag == ' ':
continue
- filtered_tags.append(tag)
+ normalized_tags.append(re.sub('\W','',tag).lower())
+ friendly_tags.append(tag)
- filtered_tags.sort()
+ genre_tags_dict = dict(zip(friendly_tags,normalized_tags))
- # Enable this code to force certain tags to the front of the genre list
- if False:
- for (i, tag) in enumerate(filtered_tags):
- if tag == 'Fiction':
- filtered_tags.insert(0, (filtered_tags.pop(i)))
- elif tag == 'Nonfiction':
- filtered_tags.insert(1, (filtered_tags.pop(i)))
- else:
- continue
+ # Test for multiple genres resolving to same normalized form
+ normalized_set = set(normalized_tags)
+ for normalized in normalized_set:
+ if normalized_tags.count(normalized) > 1:
+ self.opts.log.warn(" Warning: multiple tags resolving to genre '%s':" % normalized)
+ for key in genre_tags_dict:
+ if genre_tags_dict[key] == normalized:
+ self.opts.log.warn(" %s" % key)
if self.verbose:
- self.opts.log.info(u' %d Genre tags in database (exclude_genre: %s):' % \
- (len(filtered_tags), self.opts.exclude_genre))
- out_buf = ''
+ def next_tag(tags):
+ for (i, tag) in enumerate(tags):
+ if i < len(tags) - 1:
+ yield tag + ", "
+ else:
+ yield tag
- for tag in next_tag(filtered_tags):
- out_buf += tag
- if len(out_buf) > 72:
- self.opts.log(u' %s' % out_buf.rstrip())
- out_buf = ''
- self.opts.log(u' %s' % out_buf)
+ self.opts.log.info(u' %d available genre tags in database (exclude_genre: %s):' % \
+ (len(genre_tags_dict), self.opts.exclude_genre))
- return filtered_tags
+ # Display friendly/normalized genres
+ # friendly => normalized
+ sorted_tags = ['%s => %s' % (key, genre_tags_dict[key]) for key in sorted(genre_tags_dict.keys())]
+
+ for tag in next_tag(sorted_tags):
+ self.opts.log(u' %s' % tag)
+
+ return genre_tags_dict
def formatNCXText(self, description):
# Kindle TOC descriptions won't render certain characters
@@ -2343,15 +2468,19 @@ class EPUB_MOBI(CatalogPlugin):
body.insert(btc, aTag)
btc += 1
- # Insert the anchor with spaces stripped
+ # Create an anchor from the tag
aTag = Tag(soup, 'a')
- aTag['name'] = "Genre%s" % re.sub("\W","", genre)
+ aTag['name'] = "Genre_%s" % genre
body.insert(btc,aTag)
btc += 1
- # Insert the genre title
+ # Find the first instance of friendly_tag matching genre
+ for friendly_tag in self.genre_tags_dict:
+ if self.genre_tags_dict[friendly_tag] == genre:
+ break
+
titleTag = body.find(attrs={'class':'title'})
- titleTag.insert(0,NavigableString('%s' % escape(genre)))
+ titleTag.insert(0,NavigableString('%s' % escape(friendly_tag)))
# Insert the books by author list
divTag = body.find(attrs={'class':'authors'})
@@ -2457,9 +2586,7 @@ class EPUB_MOBI(CatalogPlugin):
-
-
-
+