Catalog generation: Make inclusion of titles and recently added optional. Also support <br> tags in description field

This commit is contained in:
Kovid Goyal 2010-02-02 15:20:03 -07:00
commit b1852e0e3b
4 changed files with 146 additions and 86 deletions

View File

@ -18,10 +18,13 @@ class PluginWidget(QWidget,Ui_Form):
HELP = _('Options specific to')+' EPUB/MOBI '+_('output') HELP = _('Options specific to')+' EPUB/MOBI '+_('output')
OPTION_FIELDS = [('exclude_genre','\[[\w ]*\]'), OPTION_FIELDS = [('exclude_genre','\[[\w ]*\]'),
('exclude_tags','~,'+_('Catalog')), ('exclude_tags','~,'+_('Catalog')),
('generate_titles', True),
('generate_recently_added', True),
('note_tag','*'), ('note_tag','*'),
('numbers_as_text', False), ('numbers_as_text', False),
('read_tag','+')] ('read_tag','+')]
# Output synced to the connected device? # Output synced to the connected device?
sync_enabled = True sync_enabled = True
@ -37,7 +40,7 @@ class PluginWidget(QWidget,Ui_Form):
# Update dialog fields from stored options # Update dialog fields from stored options
for opt in self.OPTION_FIELDS: for opt in self.OPTION_FIELDS:
opt_value = gprefs.get(self.name + '_' + opt[0], opt[1]) opt_value = gprefs.get(self.name + '_' + opt[0], opt[1])
if opt[0] == 'numbers_as_text': if opt[0] in ['numbers_as_text','generate_titles','generate_recently_added']:
getattr(self, opt[0]).setChecked(opt_value) getattr(self, opt[0]).setChecked(opt_value)
else: else:
getattr(self, opt[0]).setText(opt_value) getattr(self, opt[0]).setText(opt_value)
@ -45,19 +48,20 @@ class PluginWidget(QWidget,Ui_Form):
def options(self): def options(self):
# Save/return the current options # Save/return the current options
# exclude_genre stores literally # exclude_genre stores literally
# numbers_as_text stores as True/False # generate_titles, generate_recently_added, numbers_as_text stores as True/False
# others store as lists # others store as lists
opts_dict = {} opts_dict = {}
for opt in self.OPTION_FIELDS: for opt in self.OPTION_FIELDS:
if opt[0] == 'numbers_as_text': if opt[0] in ['numbers_as_text','generate_titles','generate_recently_added']:
opt_value = getattr(self,opt[0]).isChecked() opt_value = getattr(self,opt[0]).isChecked()
else: else:
opt_value = unicode(getattr(self, opt[0]).text()) opt_value = unicode(getattr(self, opt[0]).text())
gprefs.set(self.name + '_' + opt[0], opt_value) gprefs.set(self.name + '_' + opt[0], opt_value)
if opt[0] == 'exclude_genre' or 'numbers_as_text':
if opt[0] in ['exclude_genre','numbers_as_text','generate_titles','generate_recently_added']:
opts_dict[opt[0]] = opt_value opts_dict[opt[0]] = opt_value
else: else:
opt_value = opt_value.split(',') opts_dict[opt[0]] = opt_value.split(',')
opts_dict['output_profile'] = [load_defaults('page_setup')['output_profile']] opts_dict['output_profile'] = [load_defaults('page_setup')['output_profile']]

View File

@ -14,63 +14,56 @@
<string>Form</string> <string>Form</string>
</property> </property>
<layout class="QGridLayout" name="gridLayout"> <layout class="QGridLayout" name="gridLayout">
<item row="1" column="0"> <item row="0" column="0">
<widget class="QLabel" name="label_2"> <widget class="QLabel" name="label_2">
<property name="text"> <property name="text">
<string>'Don't include this book' tag:</string> <string>'Don't include this book' tag:</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="1" column="1"> <item row="0" column="1">
<widget class="QLineEdit" name="exclude_tags"> <widget class="QLineEdit" name="exclude_tags">
<property name="toolTip"> <property name="toolTip">
<string extracomment="Default: ~,Catalog"/> <string extracomment="Default: ~,Catalog"/>
</property> </property>
</widget> </widget>
</item> </item>
<item row="2" column="0"> <item row="1" column="0">
<widget class="QLabel" name="label_3"> <widget class="QLabel" name="label_3">
<property name="text"> <property name="text">
<string>'Mark this book as read' tag:</string> <string>'Mark this book as read' tag:</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="2" column="1"> <item row="1" column="1">
<widget class="QLineEdit" name="read_tag"> <widget class="QLineEdit" name="read_tag">
<property name="toolTip"> <property name="toolTip">
<string extracomment="Default: +"/> <string extracomment="Default: +"/>
</property> </property>
</widget> </widget>
</item> </item>
<item row="3" column="0"> <item row="2" column="0">
<widget class="QLabel" name="label_4"> <widget class="QLabel" name="label_4">
<property name="text"> <property name="text">
<string>Additional note tag prefix:</string> <string>Additional note tag prefix:</string>
</property> </property>
</widget> </widget>
</item> </item>
<item row="3" column="1"> <item row="2" column="1">
<widget class="QLineEdit" name="note_tag"> <widget class="QLineEdit" name="note_tag">
<property name="toolTip"> <property name="toolTip">
<string extracomment="Default: *"/> <string extracomment="Default: *"/>
</property> </property>
</widget> </widget>
</item> </item>
<item row="8" column="0"> <item row="4" column="1">
<widget class="QCheckBox" name="numbers_as_text">
<property name="text">
<string>Sort numbers as text</string>
</property>
</widget>
</item>
<item row="5" column="1">
<widget class="QLineEdit" name="exclude_genre"> <widget class="QLineEdit" name="exclude_genre">
<property name="toolTip"> <property name="toolTip">
<string extracomment="Default: \[[\w]*\]"/> <string extracomment="Default: \[[\w]*\]"/>
</property> </property>
</widget> </widget>
</item> </item>
<item row="5" column="0"> <item row="4" column="0">
<widget class="QLabel" name="label"> <widget class="QLabel" name="label">
<property name="text"> <property name="text">
<string>Regex pattern describing tags to exclude as genres:</string> <string>Regex pattern describing tags to exclude as genres:</string>
@ -83,36 +76,19 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="0" column="0" colspan="2"> <item row="5" column="1">
<widget class="QLabel" name="label_5">
<property name="font">
<font>
<pointsize>14</pointsize>
<weight>75</weight>
<bold>true</bold>
</font>
</property>
<property name="text">
<string>Special marker tags for catalog generation</string>
</property>
<property name="alignment">
<set>Qt::AlignCenter</set>
</property>
</widget>
</item>
<item row="6" column="1">
<widget class="QLabel" name="label_6"> <widget class="QLabel" name="label_6">
<property name="text"> <property name="text">
<string>Regex tips: <string>Regex tips:
- The default regex of '\[[\w]*\]' ignores tags of the form '[tag]', e.g., '[Amazon Freebie]' - The default regex - \[[\w]*\] - excludes genre tags of the form [tag], e.g., [Amazon Freebie]
- A regex of '.' ignores all tags, generating no genre categories in the catalog</string> - A regex pattern of a single dot excludes all genre tags, generating no Genre Section</string>
</property> </property>
<property name="wordWrap"> <property name="wordWrap">
<bool>true</bool> <bool>true</bool>
</property> </property>
</widget> </widget>
</item> </item>
<item row="7" column="0"> <item row="6" column="0">
<spacer name="verticalSpacer"> <spacer name="verticalSpacer">
<property name="orientation"> <property name="orientation">
<enum>Qt::Vertical</enum> <enum>Qt::Vertical</enum>
@ -125,6 +101,27 @@
</property> </property>
</spacer> </spacer>
</item> </item>
<item row="8" column="0">
<widget class="QCheckBox" name="generate_titles">
<property name="text">
<string>Include 'Titles' Section</string>
</property>
</widget>
</item>
<item row="9" column="0">
<widget class="QCheckBox" name="generate_recently_added">
<property name="text">
<string>Include 'Recently Added' Section</string>
</property>
</widget>
</item>
<item row="10" column="0">
<widget class="QCheckBox" name="numbers_as_text">
<property name="text">
<string>Sort numbers as text</string>
</property>
</widget>
</item>
</layout> </layout>
</widget> </widget>
<resources/> <resources/>

View File

@ -274,6 +274,18 @@ class EPUB_MOBI(CatalogPlugin):
"--exclude-tags=skip will match 'skip this book' and 'Skip will like this'.\n" "--exclude-tags=skip will match 'skip this book' and 'Skip will like this'.\n"
"Default: '%default'\n" "Default: '%default'\n"
"Applies to: ePub, MOBI output formats")), "Applies to: ePub, MOBI output formats")),
Option('--generate-titles',
default=True,
dest='generate_titles',
help=_("Include 'Titles' section in catalog.\n"
"Default: '%default'\n"
"Applies to: ePub, MOBI output formats")),
Option('--generate-recently-added',
default=True,
dest='generate_recently_added',
help=_("Include 'Recently Added' section in catalog.\n"
"Default: '%default'\n"
"Applies to: ePub, MOBI output formats")),
Option('--note-tag', Option('--note-tag',
default='*', default='*',
dest='note_tag', dest='note_tag',
@ -523,8 +535,8 @@ class EPUB_MOBI(CatalogPlugin):
''' '''
# Number of discrete steps to catalog creation # Number of discrete steps to catalog creation
current_step = 0.0 # current_step = 0.0
total_steps = 14.0 # total_steps = 10.0
THUMB_WIDTH = 75 THUMB_WIDTH = 75
THUMB_HEIGHT = 100 THUMB_HEIGHT = 100
@ -549,6 +561,7 @@ class EPUB_MOBI(CatalogPlugin):
self.__booksByTitle = None self.__booksByTitle = None
self.__catalogPath = PersistentTemporaryDirectory("_epub_mobi_catalog", prefix='') self.__catalogPath = PersistentTemporaryDirectory("_epub_mobi_catalog", prefix='')
self.__contentDir = os.path.join(self.catalogPath, "content") self.__contentDir = os.path.join(self.catalogPath, "content")
self.__currentStep = 0.0
self.__creator = opts.creator self.__creator = opts.creator
self.__db = db self.__db = db
self.__descriptionClip = opts.descriptionClip self.__descriptionClip = opts.descriptionClip
@ -570,8 +583,15 @@ class EPUB_MOBI(CatalogPlugin):
self.__stylesheet = stylesheet self.__stylesheet = stylesheet
self.__thumbs = None self.__thumbs = None
self.__title = opts.catalog_title self.__title = opts.catalog_title
self.__totalSteps = 10.0
self.__verbose = opts.verbose self.__verbose = opts.verbose
# Tweak build steps based on optional sections
if self.opts.generate_titles:
self.__totalSteps += 2
if self.opts.generate_recently_added:
self.__totalSteps += 2
# Accessors # Accessors
''' '''
@dynamic_property @dynamic_property
@ -626,6 +646,13 @@ class EPUB_MOBI(CatalogPlugin):
self.__contentDir = val self.__contentDir = val
return property(fget=fget, fset=fset) return property(fget=fget, fset=fset)
@dynamic_property @dynamic_property
def currentStep(self):
def fget(self):
return self.__currentStep
def fset(self, val):
self.__currentStep = val
return property(fget=fget, fset=fset)
@dynamic_property
def creator(self): def creator(self):
def fget(self): def fget(self):
return self.__creator return self.__creator
@ -765,6 +792,11 @@ class EPUB_MOBI(CatalogPlugin):
self.__title = val self.__title = val
return property(fget=fget, fset=fset) return property(fget=fget, fset=fset)
@dynamic_property @dynamic_property
def totalSteps(self):
def fget(self):
return self.__totalSteps
return property(fget=fget)
@dynamic_property
def verbose(self): def verbose(self):
def fget(self): def fget(self):
return self.__verbose return self.__verbose
@ -803,8 +835,10 @@ class EPUB_MOBI(CatalogPlugin):
self.fetchBooksByAuthor() self.fetchBooksByAuthor()
self.generateHTMLDescriptions() self.generateHTMLDescriptions()
self.generateHTMLByAuthor() self.generateHTMLByAuthor()
self.generateHTMLByTitle() if self.opts.generate_titles:
self.generateHTMLByDateAdded() self.generateHTMLByTitle()
if self.opts.generate_recently_added:
self.generateHTMLByDateAdded()
self.generateHTMLByTags() self.generateHTMLByTags()
from calibre.utils.PythonMagickWand import ImageMagick from calibre.utils.PythonMagickWand import ImageMagick
@ -815,8 +849,10 @@ class EPUB_MOBI(CatalogPlugin):
self.generateNCXHeader() self.generateNCXHeader()
self.generateNCXDescriptions("Descriptions") self.generateNCXDescriptions("Descriptions")
self.generateNCXByAuthor("Authors") self.generateNCXByAuthor("Authors")
self.generateNCXByTitle("Titles") if self.opts.generate_titles:
self.generateNCXByDateAdded("Recently Added") self.generateNCXByTitle("Titles")
if self.opts.generate_recently_added:
self.generateNCXByDateAdded("Recently Added")
self.generateNCXByGenre("Genres") self.generateNCXByGenre("Genres")
self.writeNCX() self.writeNCX()
return True return True
@ -907,16 +943,14 @@ class EPUB_MOBI(CatalogPlugin):
this_title['date'] = strftime(u'%B %Y', record['pubdate'].timetuple()) this_title['date'] = strftime(u'%B %Y', record['pubdate'].timetuple())
this_title['timestamp'] = record['timestamp'] this_title['timestamp'] = record['timestamp']
if record['comments']: if record['comments']:
#this_title['description'] = re.sub('&', '&amp;', record['comments']) this_title['description'] = self.markdownComments(record['comments'])
has_xml = re.search('<(?P<tag>.+)>.+</(?P=tag)>|<!--.+-->|<.+/>',record['comments']) paras = BeautifulSoup(this_title['description']).findAll('p')
if has_xml and not re.search('<br', record['comments']): tokens = []
self.opts.log.warning(" %d: %s (%s) contains suspect markup" % \ for p in paras:
(this_title['id'], this_title['title'],this_title['author'])) for token in p.contents:
this_title['description'] = prepare_string_for_xml(record['comments']) if token.string is not None:
else: tokens.append(token.string)
# If <br/> present, take a chance that the markup is valid this_title['short_description'] = self.generateShortDescription(' '.join(tokens))
this_title['description'] = record['comments']
this_title['short_description'] = self.generateShortDescription(this_title['description'])
else: else:
this_title['description'] = None this_title['description'] = None
this_title['short_description'] = None this_title['short_description'] = None
@ -2552,9 +2586,7 @@ class EPUB_MOBI(CatalogPlugin):
</tr> </tr>
</table> </table>
<blockquote><hr/></blockquote> <blockquote><hr/></blockquote>
<p class="description"></p> <div class="description"></div>
<!--blockquote><hr/></blockquote-->
<!--p class="instructions">&#9654; Press <span style="font-variant:small-caps"><b>back</b></span> to return to list &#9664;</p-->
</body> </body>
</html> </html>
'''.format(title_border) '''.format(title_border)
@ -2729,12 +2761,6 @@ class EPUB_MOBI(CatalogPlugin):
except RuntimeError: except RuntimeError:
self.opts.log.error("generateThumbnail(): RuntimeError with %s" % title['title']) self.opts.log.error("generateThumbnail(): RuntimeError with %s" % title['title'])
def letter_or_symbol(self,char):
if not re.search('[a-zA-Z]',char):
return 'Symbols'
else:
return char
def getMarkerTags(self): def getMarkerTags(self):
''' Return a list of special marker tags to be excluded from genre list ''' ''' Return a list of special marker tags to be excluded from genre list '''
markerTags = [] markerTags = []
@ -2743,6 +2769,33 @@ class EPUB_MOBI(CatalogPlugin):
markerTags.extend(self.opts.read_tag.split(',')) markerTags.extend(self.opts.read_tag.split(','))
return markerTags return markerTags
def letter_or_symbol(self,char):
if not re.search('[a-zA-Z]',char):
return 'Symbols'
else:
return char
def markdownComments(self, comments):
''' Convert random comment text to normalized, xml-legal block of <p>s'''
# reformat illegal xml
desc = prepare_string_for_xml(comments)
# normalize <br/> tags
desc = re.sub(r'&lt;br[/]{0,1}&gt;', '<br/>', desc)
# tokenize double line breaks
desc = comments.replace('\r', '')
tokens = comments.split('\n\n')
soup = BeautifulSoup()
ptc = 0
for token in tokens:
pTag = Tag(soup, 'p')
pTag.insert(0,token)
soup.insert(ptc, pTag)
ptc += 1
return soup.renderContents(encoding=None)
def processSpecialTags(self, tags, this_title, opts): def processSpecialTags(self, tags, this_title, opts):
tag_list = [] tag_list = []
for tag in tags: for tag in tags:
@ -2757,6 +2810,22 @@ class EPUB_MOBI(CatalogPlugin):
tag_list.append(tag) tag_list.append(tag)
return tag_list return tag_list
def updateProgressFullStep(self, description):
self.currentStep += 1
self.progressString = description
self.progressInt = float((self.currentStep-1)/self.totalSteps)
self.reporter(self.progressInt, self.progressString)
if self.opts.cli_environment:
self.opts.log(u"%3.0f%% %s" % (self.progressInt*100, self.progressString))
def updateProgressMicroStep(self, description, micro_step_pct):
step_range = 100/self.totalSteps
self.progressString = description
coarse_progress = float((self.currentStep-1)/self.totalSteps)
fine_progress = float((micro_step_pct*step_range)/100)
self.progressInt = coarse_progress + fine_progress
self.reporter(self.progressInt, self.progressString)
class NotImplementedError: class NotImplementedError:
def __init__(self, error): def __init__(self, error):
self.error = error self.error = error
@ -2764,22 +2833,6 @@ class EPUB_MOBI(CatalogPlugin):
def logerror(self): def logerror(self):
self.opts.log.info('%s not implemented' % self.error) self.opts.log.info('%s not implemented' % self.error)
def updateProgressFullStep(self, description):
self.current_step += 1
self.progressString = description
self.progressInt = float((self.current_step-1)/self.total_steps)
self.reporter(self.progressInt, self.progressString)
if self.opts.cli_environment:
self.opts.log(u"%3.0f%% %s" % (self.progressInt*100, self.progressString))
def updateProgressMicroStep(self, description, micro_step_pct):
step_range = 100/self.total_steps
self.progressString = description
coarse_progress = float((self.current_step-1)/self.total_steps)
fine_progress = float((micro_step_pct*step_range)/100)
self.progressInt = coarse_progress + fine_progress
self.reporter(self.progressInt, self.progressString)
def run(self, path_to_output, opts, db, notification=DummyReporter()): def run(self, path_to_output, opts, db, notification=DummyReporter()):
opts.log = log = Log() opts.log = log = Log()
opts.fmt = self.fmt = path_to_output.rpartition('.')[2] opts.fmt = self.fmt = path_to_output.rpartition('.')[2]
@ -2808,14 +2861,15 @@ class EPUB_MOBI(CatalogPlugin):
log(" opts:") log(" opts:")
for key in keys: for key in keys:
if key in ['catalog_title','exclude_genre','exclude_tags','note_tag', if key in ['catalog_title','exclude_genre','exclude_tags','generate_titles',
'numbers_as_text','read_tag','search_text','sort_by','sync']: 'generate_recently_added','note_tag','numbers_as_text','read_tag',
'search_text','sort_by','sync']:
log(" %s: %s" % (key, opts_dict[key])) log(" %s: %s" % (key, opts_dict[key]))
# Launch the Catalog builder # Launch the Catalog builder
catalog = self.CatalogBuilder(db, opts, self, report_progress=notification)
if opts.verbose: if opts.verbose:
log.info("Begin catalog source generation") log.info("Begin catalog source generation")
catalog = self.CatalogBuilder(db, opts, self, report_progress=notification)
catalog.createDirectoryStructure() catalog.createDirectoryStructure()
catalog.copyResources() catalog.copyResources()
catalog_source_built = catalog.buildSources() catalog_source_built = catalog.buildSources()

View File

@ -761,14 +761,19 @@ class BasicNewsRecipe(Recipe):
self.download_cover() self.download_cover()
self.report_progress(0, _('Generating masthead...')) self.report_progress(0, _('Generating masthead...'))
self.masthead_path = None self.masthead_path = None
try: try:
murl = self.get_masthead_url() murl = self.get_masthead_url()
except: except:
self.log.exception('Failed to get masthead url') self.log.exception('Failed to get masthead url')
murl = None murl = None
if murl is not None: if murl is not None:
# Try downloading the user-supplied masthead_url
# Failure sets self.masthead_path to None
self.download_masthead(murl) self.download_masthead(murl)
if self.masthead_path is None: if self.masthead_path is None:
self.log.info("Synthesizing mastheadImage")
self.masthead_path = os.path.join(self.output_dir, 'mastheadImage.jpg') self.masthead_path = os.path.join(self.output_dir, 'mastheadImage.jpg')
try: try:
self.default_masthead_image(self.masthead_path) self.default_masthead_image(self.masthead_path)
@ -916,7 +921,7 @@ class BasicNewsRecipe(Recipe):
try: try:
self._download_masthead(url) self._download_masthead(url)
except: except:
self.log.exception("Failed to download supplied masthead_url, synthesizing") self.log.exception("Failed to download supplied masthead_url")
def default_cover(self, cover_file): def default_cover(self, cover_file):
''' '''