mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Added --debug-pipeline switch to EPUB/MOBI
This commit is contained in:
commit
958579d3dc
BIN
resources/images/news/greensboro_news_and_record.png
Normal file
BIN
resources/images/news/greensboro_news_and_record.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 480 B |
BIN
resources/images/news/hotair.png
Normal file
BIN
resources/images/news/hotair.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 363 B |
54
resources/recipes/greensboro_news_and_record.recipe
Normal file
54
resources/recipes/greensboro_news_and_record.recipe
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Walt Anthony <workshop.northpole at gmail.com>'
|
||||||
|
'''
|
||||||
|
www.news-record.com
|
||||||
|
'''
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class NewsandRecord(BasicNewsRecipe):
|
||||||
|
title = u'Greensboro News & Record'
|
||||||
|
description = "News from Greensboro, North Carolina"
|
||||||
|
__author__ = 'Walt Anthony'
|
||||||
|
publisher = 'News & Record and Landmark Media Enterprises, LLC'
|
||||||
|
category = 'news, USA'
|
||||||
|
oldest_article = 3 #days
|
||||||
|
max_articles_per_feed = 25
|
||||||
|
summary_length = 150
|
||||||
|
language = 'en'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
remove_javascript = True
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
remove_tags_before = dict(name='h3', attrs={'class':'nrcTxt_headline'})
|
||||||
|
remove_tags_after = dict(name='div', attrs={'id':'nrcBlk_ContentBody'})
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='iframe'),
|
||||||
|
dict(name=['notags','embed','object','link','img']),
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('News', 'http://www.news-record.com/news/archive/feed'),
|
||||||
|
('Greensboro News', 'http://www.news-record.com/news/greensboro/feed'),
|
||||||
|
('Education', 'http://www.news-record.com/news/education/feed'),
|
||||||
|
('Government', 'http://www.news-record.com/news/government/feed'),
|
||||||
|
('College Sports', 'http://www.news-record.com/sports/college/feed'),
|
||||||
|
('Sports Extra', 'http://www.news-record.com/blog/sportsextra/feed'),
|
||||||
|
('Life', 'http://www.news-record.com/life/top/feed'),
|
||||||
|
('NASCAR', 'http://www.news-record.com/sports/nascar/top/feed'),
|
||||||
|
('Editorials', 'http://www.news-record.com/opinion/editorials/feed'),
|
||||||
|
('Letters to the Editor', 'http://www.news-record.com/opinion/letters/feed')
|
||||||
|
]
|
||||||
|
|
41
resources/recipes/hotair.recipe
Normal file
41
resources/recipes/hotair.recipe
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Walt Anthony <workshop.northpole at gmail.com>'
|
||||||
|
'''
|
||||||
|
www.hotair.com
|
||||||
|
'''
|
||||||
|
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class hotair(BasicNewsRecipe):
|
||||||
|
title = u'Hot Air'
|
||||||
|
__author__ = 'Walt Anthony'
|
||||||
|
description = "The world's first, full-service conservative Internet broadcast network"
|
||||||
|
publisher = 'Hot Air'
|
||||||
|
category = 'news, politics, USA'
|
||||||
|
oldest_article = 3
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
summary_length = 150
|
||||||
|
language = 'en'
|
||||||
|
encoding = 'utf-8'
|
||||||
|
use_embedded_content = False
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'id':'page-post'})]
|
||||||
|
|
||||||
|
remove_tags = [dict(name=['iframe', 'small', 'embed', 'object','link','script','form'])]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('Hot Air', 'http://feeds.feedburner.com/hotair/main'),
|
||||||
|
('The Greenroom', 'http://feeds2.feedburner.com/hotair/greenroom')
|
||||||
|
]
|
@ -263,7 +263,7 @@ class CatalogPlugin(Plugin):
|
|||||||
else:
|
else:
|
||||||
opts.search_text = 'not tag:'+cat
|
opts.search_text = 'not tag:'+cat
|
||||||
'''
|
'''
|
||||||
|
|
||||||
db.search(opts.search_text)
|
db.search(opts.search_text)
|
||||||
|
|
||||||
if opts.sort_by:
|
if opts.sort_by:
|
||||||
|
@ -146,12 +146,14 @@ class Region(object):
|
|||||||
self.columns = []
|
self.columns = []
|
||||||
self.top = self.bottom = self.left = self.right = self.width = self.height = 0
|
self.top = self.bottom = self.left = self.right = self.width = self.height = 0
|
||||||
|
|
||||||
def add_columns(self, columns):
|
def add(self, columns):
|
||||||
if not self.columns:
|
if not self.columns:
|
||||||
for x in sorted(columns, cmp=lambda x,y: cmp(x.left, y.left)):
|
for x in sorted(columns, cmp=lambda x,y: cmp(x.left, y.left)):
|
||||||
self.columns.append(x)
|
self.columns.append(x)
|
||||||
else:
|
else:
|
||||||
pass
|
for i in range(len(columns)):
|
||||||
|
for elem in columns[i]:
|
||||||
|
self.columns[i].add(elem)
|
||||||
|
|
||||||
def contains(self, columns):
|
def contains(self, columns):
|
||||||
if not self.columns:
|
if not self.columns:
|
||||||
@ -168,6 +170,11 @@ class Region(object):
|
|||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_empty(self):
|
||||||
|
return len(self.elements) == 0
|
||||||
|
|
||||||
|
|
||||||
class Page(object):
|
class Page(object):
|
||||||
|
|
||||||
# Fraction of a character width that two strings have to be apart,
|
# Fraction of a character width that two strings have to be apart,
|
||||||
@ -242,19 +249,25 @@ class Page(object):
|
|||||||
self.texts.remove(match)
|
self.texts.remove(match)
|
||||||
|
|
||||||
def first_pass(self):
|
def first_pass(self):
|
||||||
|
'Sort page into regions and columns'
|
||||||
self.regions = []
|
self.regions = []
|
||||||
if not self.elements:
|
if not self.elements:
|
||||||
return
|
return
|
||||||
for i, x in enumerate(self.elements):
|
for i, x in enumerate(self.elements):
|
||||||
x.idx = i
|
x.idx = i
|
||||||
self.current_region = None
|
current_region = Region()
|
||||||
processed = set([])
|
processed = set([])
|
||||||
for x in self.elements:
|
for x in self.elements:
|
||||||
if x in processed: continue
|
if x in processed: continue
|
||||||
elems = set(self.find_elements_in_row_of(x))
|
elems = set(self.find_elements_in_row_of(x))
|
||||||
columns = self.sort_into_columns(x, elems)
|
columns = self.sort_into_columns(x, elems)
|
||||||
processed.update(elems)
|
processed.update(elems)
|
||||||
columns
|
if not current_region.contains(columns):
|
||||||
|
self.regions.append(self.current_region)
|
||||||
|
current_region = Region()
|
||||||
|
current_region.add(columns)
|
||||||
|
if not self.current_region.is_empty():
|
||||||
|
self.regions.append(current_region)
|
||||||
|
|
||||||
def sort_into_columns(self, elem, neighbors):
|
def sort_into_columns(self, elem, neighbors):
|
||||||
columns = [Column()]
|
columns = [Column()]
|
||||||
|
@ -17,7 +17,7 @@ class PluginWidget(QWidget,Ui_Form):
|
|||||||
TITLE = _('E-book Options')
|
TITLE = _('E-book Options')
|
||||||
HELP = _('Options specific to')+' EPUB/MOBI '+_('output')
|
HELP = _('Options specific to')+' EPUB/MOBI '+_('output')
|
||||||
OPTION_FIELDS = [('exclude_genre','\[[\w ]*\]'),
|
OPTION_FIELDS = [('exclude_genre','\[[\w ]*\]'),
|
||||||
('exclude_tags','~,Catalog'),
|
('exclude_tags','~,'+_('Catalog')),
|
||||||
('read_tag','+'),
|
('read_tag','+'),
|
||||||
('note_tag','*')]
|
('note_tag','*')]
|
||||||
|
|
||||||
|
@ -932,7 +932,7 @@ class DeviceGUI(object):
|
|||||||
if isinstance(job.exception, FreeSpaceError):
|
if isinstance(job.exception, FreeSpaceError):
|
||||||
where = 'in main memory.' if 'memory' in str(job.exception) \
|
where = 'in main memory.' if 'memory' in str(job.exception) \
|
||||||
else 'on the storage card.'
|
else 'on the storage card.'
|
||||||
titles = '\n'.join(['<li>'+mi['title']+'</li>' \
|
titles = '\n'.join(['<li>'+mi.title+'</li>' \
|
||||||
for mi in metadata])
|
for mi in metadata])
|
||||||
d = error_dialog(self, _('No space on device'),
|
d = error_dialog(self, _('No space on device'),
|
||||||
_('<p>Cannot upload books to device there '
|
_('<p>Cannot upload books to device there '
|
||||||
|
@ -241,13 +241,22 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
help = _('Title of generated catalog used as title in metadata.\n'
|
help = _('Title of generated catalog used as title in metadata.\n'
|
||||||
"Default: '%default'\n"
|
"Default: '%default'\n"
|
||||||
"Applies to: ePub, MOBI output formats")),
|
"Applies to: ePub, MOBI output formats")),
|
||||||
|
Option('--debug-pipeline',
|
||||||
|
default=None,
|
||||||
|
dest='debug_pipeline',
|
||||||
|
help=_('Save the output from different stages of the conversion '
|
||||||
|
'pipeline to the specified '
|
||||||
|
'directory. Useful if you are unsure at which stage '
|
||||||
|
'of the conversion process a bug is occurring.\n'
|
||||||
|
'Default: None\n'
|
||||||
|
'Applies to: ePub, MOBI output formats')),
|
||||||
Option('--exclude-genre',
|
Option('--exclude-genre',
|
||||||
default='\[[\w ]*\]',
|
default='\[[\w ]*\]',
|
||||||
dest='exclude_genre',
|
dest='exclude_genre',
|
||||||
help=_("Regex describing tags to exclude as genres.\n" "Default: '%default' excludes bracketed tags, e.g. '[<tag>]'\n"
|
help=_("Regex describing tags to exclude as genres.\n" "Default: '%default' excludes bracketed tags, e.g. '[<tag>]'\n"
|
||||||
"Applies to: ePub, MOBI output formats")),
|
"Applies to: ePub, MOBI output formats")),
|
||||||
Option('--exclude-tags',
|
Option('--exclude-tags',
|
||||||
default='~,Catalog',
|
default=('~,'+_('Catalog')),
|
||||||
dest='exclude_tags',
|
dest='exclude_tags',
|
||||||
help=_("Comma-separated list of tag words indicating book should be excluded from output. Case-insensitive.\n"
|
help=_("Comma-separated list of tag words indicating book should be excluded from output. Case-insensitive.\n"
|
||||||
"--exclude-tags=skip will match 'skip this book' and 'Skip will like this'.\n"
|
"--exclude-tags=skip will match 'skip this book' and 'Skip will like this'.\n"
|
||||||
@ -2497,6 +2506,7 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
pw.MagickThumbnailImage(thumb, 75, 100)
|
pw.MagickThumbnailImage(thumb, 75, 100)
|
||||||
pw.MagickWriteImage(thumb, os.path.join(image_dir, thumb_file))
|
pw.MagickWriteImage(thumb, os.path.join(image_dir, thumb_file))
|
||||||
pw.DestroyMagickWand(thumb)
|
pw.DestroyMagickWand(thumb)
|
||||||
|
pw.DestroyMagickWand(img)
|
||||||
except IOError:
|
except IOError:
|
||||||
print "generate_thumbnail() IOError with %s" % title['title']
|
print "generate_thumbnail() IOError with %s" % title['title']
|
||||||
except RuntimeError:
|
except RuntimeError:
|
||||||
@ -2541,11 +2551,8 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
return "%.2f%% %s" % (self.progressInt, self.progressString)
|
return "%.2f%% %s" % (self.progressInt, self.progressString)
|
||||||
|
|
||||||
def run(self, path_to_output, opts, db, notification=DummyReporter()):
|
def run(self, path_to_output, opts, db, notification=DummyReporter()):
|
||||||
import gc
|
|
||||||
from calibre.utils.logging import Log
|
from calibre.utils.logging import Log
|
||||||
|
|
||||||
gc.set_debug(gc.DEBUG_LEAK)
|
|
||||||
|
|
||||||
log = Log()
|
log = Log()
|
||||||
opts.fmt = self.fmt = path_to_output.rpartition('.')[2]
|
opts.fmt = self.fmt = path_to_output.rpartition('.')[2]
|
||||||
self.opts = opts
|
self.opts = opts
|
||||||
@ -2562,13 +2569,18 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
log("%s:run" % self.name)
|
log("%s:run" % self.name)
|
||||||
log(" path_to_output: %s" % path_to_output)
|
log(" path_to_output: %s" % path_to_output)
|
||||||
log(" Output format: %s" % self.fmt)
|
log(" Output format: %s" % self.fmt)
|
||||||
log(" Book count: %d" % len(opts_dict['ids']))
|
if opts_dict['ids']:
|
||||||
|
log(" Book count: %d" % len(opts_dict['ids']))
|
||||||
# Display opts
|
# Display opts
|
||||||
keys = opts_dict.keys()
|
keys = opts_dict.keys()
|
||||||
keys.sort()
|
keys.sort()
|
||||||
log(" opts:")
|
log(" opts:")
|
||||||
for key in keys:
|
for key in keys:
|
||||||
if key == 'ids': continue
|
if key == 'ids':
|
||||||
|
if opts_dict[key]:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
log(" %s: (all)" % key)
|
||||||
log(" %s: %s" % (key, opts_dict[key]))
|
log(" %s: %s" % (key, opts_dict[key]))
|
||||||
|
|
||||||
# Launch the Catalog builder
|
# Launch the Catalog builder
|
||||||
@ -2593,5 +2605,3 @@ class EPUB_MOBI(CatalogPlugin):
|
|||||||
plumber.merge_ui_recommendations(recommendations)
|
plumber.merge_ui_recommendations(recommendations)
|
||||||
|
|
||||||
plumber.run()
|
plumber.run()
|
||||||
|
|
||||||
print gc.garbage
|
|
||||||
|
@ -112,6 +112,7 @@ def get_components(template, mi, id, timefmt='%b %Y', length=250,
|
|||||||
format_args['title'] = mi.title
|
format_args['title'] = mi.title
|
||||||
if mi.authors:
|
if mi.authors:
|
||||||
format_args['authors'] = mi.format_authors()
|
format_args['authors'] = mi.format_authors()
|
||||||
|
format_args['author'] = format_args['authors']
|
||||||
if mi.author_sort:
|
if mi.author_sort:
|
||||||
format_args['author_sort'] = mi.author_sort
|
format_args['author_sort'] = mi.author_sort
|
||||||
if mi.tags:
|
if mi.tags:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user