mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Pull from trunk
This commit is contained in:
commit
9ebb335346
@ -8,24 +8,24 @@ Conversion of HTML/OPF files follows several stages:
|
|||||||
|
|
||||||
* All links in the HTML files or in the OPF manifest are
|
* All links in the HTML files or in the OPF manifest are
|
||||||
followed to build up a list of HTML files to be converted.
|
followed to build up a list of HTML files to be converted.
|
||||||
This stage is implemented by
|
This stage is implemented by
|
||||||
:function:`calibre.ebooks.html.traverse` and
|
:function:`calibre.ebooks.html.traverse` and
|
||||||
:class:`calibre.ebooks.html.HTMLFile`.
|
:class:`calibre.ebooks.html.HTMLFile`.
|
||||||
|
|
||||||
* The HTML is pre-processed to make it more semantic.
|
* The HTML is pre-processed to make it more semantic.
|
||||||
All links in the HTML files to other resources like images,
|
All links in the HTML files to other resources like images,
|
||||||
stylesheets, etc. are relativized. The resources are copied
|
stylesheets, etc. are relativized. The resources are copied
|
||||||
into the `resources` sub directory. This is accomplished by
|
into the `resources` sub directory. This is accomplished by
|
||||||
:class:`calibre.ebooks.html.PreProcessor` and
|
:class:`calibre.ebooks.html.PreProcessor` and
|
||||||
:class:`calibre.ebooks.html.Parser`.
|
:class:`calibre.ebooks.html.Parser`.
|
||||||
|
|
||||||
* The HTML is processed. Various operations are performed.
|
* The HTML is processed. Various operations are performed.
|
||||||
All style declarations are extracted and consolidated into
|
All style declarations are extracted and consolidated into
|
||||||
a single style sheet. Chapters are auto-detected and marked.
|
a single style sheet. Chapters are auto-detected and marked.
|
||||||
Various font related manipulations are performed. See
|
Various font related manipulations are performed. See
|
||||||
:class:`HTMLProcessor`.
|
:class:`HTMLProcessor`.
|
||||||
|
|
||||||
* The processed HTML is saved and the
|
* The processed HTML is saved and the
|
||||||
:module:`calibre.ebooks.epub.split` module is used to split up
|
:module:`calibre.ebooks.epub.split` module is used to split up
|
||||||
large HTML files into smaller chunks.
|
large HTML files into smaller chunks.
|
||||||
|
|
||||||
@ -64,7 +64,7 @@ def remove_bad_link(element, attribute, link, pos):
|
|||||||
|
|
||||||
def check_links(opf_path, pretty_print):
|
def check_links(opf_path, pretty_print):
|
||||||
'''
|
'''
|
||||||
Find and remove all invalid links in the HTML files
|
Find and remove all invalid links in the HTML files
|
||||||
'''
|
'''
|
||||||
logger = logging.getLogger('html2epub')
|
logger = logging.getLogger('html2epub')
|
||||||
logger.info('\tChecking files for bad links...')
|
logger.info('\tChecking files for bad links...')
|
||||||
@ -78,7 +78,7 @@ def check_links(opf_path, pretty_print):
|
|||||||
if isinstance(f, str):
|
if isinstance(f, str):
|
||||||
f = f.decode('utf-8')
|
f = f.decode('utf-8')
|
||||||
html_files.append(os.path.abspath(content(f)))
|
html_files.append(os.path.abspath(content(f)))
|
||||||
|
|
||||||
for path in html_files:
|
for path in html_files:
|
||||||
if not os.access(path, os.R_OK):
|
if not os.access(path, os.R_OK):
|
||||||
continue
|
continue
|
||||||
@ -113,27 +113,27 @@ def find_html_index(files):
|
|||||||
return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:]
|
return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:]
|
||||||
|
|
||||||
class HTMLProcessor(Processor, Rationalizer):
|
class HTMLProcessor(Processor, Rationalizer):
|
||||||
|
|
||||||
def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, stylesheets):
|
def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, stylesheets):
|
||||||
Processor.__init__(self, htmlfile, opts, tdir, resource_map, htmlfiles,
|
Processor.__init__(self, htmlfile, opts, tdir, resource_map, htmlfiles,
|
||||||
name='html2epub')
|
name='html2epub')
|
||||||
if opts.verbose > 2:
|
if opts.verbose > 2:
|
||||||
self.debug_tree('parsed')
|
self.debug_tree('parsed')
|
||||||
self.detect_chapters()
|
self.detect_chapters()
|
||||||
|
|
||||||
self.extract_css(stylesheets)
|
self.extract_css(stylesheets)
|
||||||
if self.opts.base_font_size2 > 0:
|
if self.opts.base_font_size2 > 0:
|
||||||
self.font_css = self.rationalize(self.external_stylesheets+[self.stylesheet],
|
self.font_css = self.rationalize(self.external_stylesheets+[self.stylesheet],
|
||||||
self.root, self.opts)
|
self.root, self.opts)
|
||||||
if opts.verbose > 2:
|
if opts.verbose > 2:
|
||||||
self.debug_tree('nocss')
|
self.debug_tree('nocss')
|
||||||
|
|
||||||
if hasattr(self.body, 'xpath'):
|
if hasattr(self.body, 'xpath'):
|
||||||
for script in list(self.body.xpath('descendant::script')):
|
for script in list(self.body.xpath('descendant::script')):
|
||||||
script.getparent().remove(script)
|
script.getparent().remove(script)
|
||||||
|
|
||||||
self.fix_markup()
|
self.fix_markup()
|
||||||
|
|
||||||
def convert_image(self, img):
|
def convert_image(self, img):
|
||||||
rpath = img.get('src', '')
|
rpath = img.get('src', '')
|
||||||
path = os.path.join(os.path.dirname(self.save_path()), *rpath.split('/'))
|
path = os.path.join(os.path.dirname(self.save_path()), *rpath.split('/'))
|
||||||
@ -150,10 +150,10 @@ class HTMLProcessor(Processor, Rationalizer):
|
|||||||
if val == rpath:
|
if val == rpath:
|
||||||
self.resource_map[key] = rpath+'_calibre_converted.jpg'
|
self.resource_map[key] = rpath+'_calibre_converted.jpg'
|
||||||
img.set('src', rpath+'_calibre_converted.jpg')
|
img.set('src', rpath+'_calibre_converted.jpg')
|
||||||
|
|
||||||
def fix_markup(self):
|
def fix_markup(self):
|
||||||
'''
|
'''
|
||||||
Perform various markup transforms to get the output to render correctly
|
Perform various markup transforms to get the output to render correctly
|
||||||
in the quirky ADE.
|
in the quirky ADE.
|
||||||
'''
|
'''
|
||||||
# Replace <br> that are children of <body> as ADE doesn't handle them
|
# Replace <br> that are children of <body> as ADE doesn't handle them
|
||||||
@ -179,8 +179,8 @@ class HTMLProcessor(Processor, Rationalizer):
|
|||||||
if not br.tail:
|
if not br.tail:
|
||||||
br.tail = ''
|
br.tail = ''
|
||||||
br.tail += sibling.tail
|
br.tail += sibling.tail
|
||||||
|
|
||||||
|
|
||||||
if self.opts.profile.remove_object_tags:
|
if self.opts.profile.remove_object_tags:
|
||||||
for tag in self.root.xpath('//embed'):
|
for tag in self.root.xpath('//embed'):
|
||||||
tag.getparent().remove(tag)
|
tag.getparent().remove(tag)
|
||||||
@ -188,42 +188,46 @@ class HTMLProcessor(Processor, Rationalizer):
|
|||||||
if tag.get('type', '').lower().strip() in ('image/svg+xml',):
|
if tag.get('type', '').lower().strip() in ('image/svg+xml',):
|
||||||
continue
|
continue
|
||||||
tag.getparent().remove(tag)
|
tag.getparent().remove(tag)
|
||||||
|
|
||||||
|
|
||||||
for tag in self.root.xpath('//title|//style'):
|
for tag in self.root.xpath('//title|//style'):
|
||||||
if not tag.text:
|
if not tag.text:
|
||||||
tag.getparent().remove(tag)
|
tag.getparent().remove(tag)
|
||||||
for tag in self.root.xpath('//script'):
|
for tag in self.root.xpath('//script'):
|
||||||
if not tag.text and not tag.get('src', False):
|
if not tag.text and not tag.get('src', False):
|
||||||
tag.getparent().remove(tag)
|
tag.getparent().remove(tag)
|
||||||
|
|
||||||
for tag in self.root.xpath('//form'):
|
for tag in self.root.xpath('//form'):
|
||||||
tag.getparent().remove(tag)
|
tag.getparent().remove(tag)
|
||||||
|
|
||||||
for tag in self.root.xpath('//center'):
|
for tag in self.root.xpath('//center'):
|
||||||
tag.tag = 'div'
|
tag.tag = 'div'
|
||||||
tag.set('style', 'text-align:center')
|
tag.set('style', 'text-align:center')
|
||||||
|
|
||||||
if self.opts.linearize_tables:
|
if self.opts.linearize_tables:
|
||||||
for tag in self.root.xpath('//table | //tr | //th | //td'):
|
for tag in self.root.xpath('//table | //tr | //th | //td'):
|
||||||
tag.tag = 'div'
|
tag.tag = 'div'
|
||||||
|
|
||||||
|
# ADE can't handle & in an img url
|
||||||
|
for tag in self.root.xpath('//img[@src]'):
|
||||||
|
tag.set('src', tag.get('src', '').replace('&', ''))
|
||||||
|
|
||||||
|
|
||||||
def save(self):
|
def save(self):
|
||||||
for meta in list(self.root.xpath('//meta')):
|
for meta in list(self.root.xpath('//meta')):
|
||||||
meta.getparent().remove(meta)
|
meta.getparent().remove(meta)
|
||||||
# Strip all comments since Adobe DE is petrified of them
|
# Strip all comments since Adobe DE is petrified of them
|
||||||
Processor.save(self, strip_comments=True)
|
Processor.save(self, strip_comments=True)
|
||||||
|
|
||||||
def remove_first_image(self):
|
def remove_first_image(self):
|
||||||
images = self.root.xpath('//img')
|
images = self.root.xpath('//img')
|
||||||
if images:
|
if images:
|
||||||
images[0].getparent().remove(images[0])
|
images[0].getparent().remove(images[0])
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def config(defaults=None):
|
def config(defaults=None):
|
||||||
return common_config(defaults=defaults)
|
return common_config(defaults=defaults)
|
||||||
@ -235,7 +239,7 @@ def option_parser():
|
|||||||
|
|
||||||
Convert a HTML file to an EPUB ebook. Recursively follows links in the HTML file.
|
Convert a HTML file to an EPUB ebook. Recursively follows links in the HTML file.
|
||||||
If you specify an OPF file instead of an HTML file, the list of links is takes from
|
If you specify an OPF file instead of an HTML file, the list of links is takes from
|
||||||
the <spine> element of the OPF file.
|
the <spine> element of the OPF file.
|
||||||
'''))
|
'''))
|
||||||
|
|
||||||
def parse_content(filelist, opts, tdir):
|
def parse_content(filelist, opts, tdir):
|
||||||
@ -246,7 +250,7 @@ def parse_content(filelist, opts, tdir):
|
|||||||
first_image_removed = False
|
first_image_removed = False
|
||||||
for htmlfile in filelist:
|
for htmlfile in filelist:
|
||||||
logging.getLogger('html2epub').debug('Processing %s...'%htmlfile)
|
logging.getLogger('html2epub').debug('Processing %s...'%htmlfile)
|
||||||
hp = HTMLProcessor(htmlfile, opts, os.path.join(tdir, 'content'),
|
hp = HTMLProcessor(htmlfile, opts, os.path.join(tdir, 'content'),
|
||||||
resource_map, filelist, stylesheets)
|
resource_map, filelist, stylesheets)
|
||||||
if not first_image_removed and opts.remove_first_image:
|
if not first_image_removed and opts.remove_first_image:
|
||||||
first_image_removed = hp.remove_first_image()
|
first_image_removed = hp.remove_first_image()
|
||||||
@ -254,7 +258,7 @@ def parse_content(filelist, opts, tdir):
|
|||||||
hp.save()
|
hp.save()
|
||||||
stylesheet_map[os.path.basename(hp.save_path())] = \
|
stylesheet_map[os.path.basename(hp.save_path())] = \
|
||||||
[s for s in hp.external_stylesheets + [hp.stylesheet, hp.font_css, hp.override_css] if s is not None]
|
[s for s in hp.external_stylesheets + [hp.stylesheet, hp.font_css, hp.override_css] if s is not None]
|
||||||
|
|
||||||
logging.getLogger('html2epub').debug('Saving stylesheets...')
|
logging.getLogger('html2epub').debug('Saving stylesheets...')
|
||||||
if opts.base_font_size2 > 0:
|
if opts.base_font_size2 > 0:
|
||||||
Rationalizer.remove_font_size_information(stylesheets.values())
|
Rationalizer.remove_font_size_information(stylesheets.values())
|
||||||
@ -268,7 +272,7 @@ def parse_content(filelist, opts, tdir):
|
|||||||
if toc.count('chapter') + toc.count('file') > opts.toc_threshold:
|
if toc.count('chapter') + toc.count('file') > opts.toc_threshold:
|
||||||
toc.purge(['link', 'unknown'])
|
toc.purge(['link', 'unknown'])
|
||||||
toc.purge(['link'], max=opts.max_toc_links)
|
toc.purge(['link'], max=opts.max_toc_links)
|
||||||
|
|
||||||
return resource_map, hp.htmlfile_map, toc, stylesheet_map
|
return resource_map, hp.htmlfile_map, toc, stylesheet_map
|
||||||
|
|
||||||
TITLEPAGE = '''\
|
TITLEPAGE = '''\
|
||||||
@ -325,26 +329,26 @@ def process_title_page(mi, filelist, htmlfilemap, opts, tdir):
|
|||||||
metadata_cover = mi.cover
|
metadata_cover = mi.cover
|
||||||
if metadata_cover and not os.path.exists(metadata_cover):
|
if metadata_cover and not os.path.exists(metadata_cover):
|
||||||
metadata_cover = None
|
metadata_cover = None
|
||||||
|
|
||||||
cpath = '/'.join(('resources', '_cover_.jpg'))
|
cpath = '/'.join(('resources', '_cover_.jpg'))
|
||||||
cover_dest = os.path.join(tdir, 'content', *cpath.split('/'))
|
cover_dest = os.path.join(tdir, 'content', *cpath.split('/'))
|
||||||
if metadata_cover is not None:
|
if metadata_cover is not None:
|
||||||
if not create_cover_image(metadata_cover, cover_dest,
|
if not create_cover_image(metadata_cover, cover_dest,
|
||||||
opts.profile.screen_size):
|
opts.profile.screen_size):
|
||||||
metadata_cover = None
|
metadata_cover = None
|
||||||
specified_cover = opts.cover
|
specified_cover = opts.cover
|
||||||
if specified_cover and not os.path.exists(specified_cover):
|
if specified_cover and not os.path.exists(specified_cover):
|
||||||
specified_cover = None
|
specified_cover = None
|
||||||
if specified_cover is not None:
|
if specified_cover is not None:
|
||||||
if not create_cover_image(specified_cover, cover_dest,
|
if not create_cover_image(specified_cover, cover_dest,
|
||||||
opts.profile.screen_size):
|
opts.profile.screen_size):
|
||||||
specified_cover = None
|
specified_cover = None
|
||||||
|
|
||||||
cover = metadata_cover if specified_cover is None or (opts.prefer_metadata_cover and metadata_cover is not None) else specified_cover
|
cover = metadata_cover if specified_cover is None or (opts.prefer_metadata_cover and metadata_cover is not None) else specified_cover
|
||||||
|
|
||||||
if cover is not None:
|
if cover is not None:
|
||||||
titlepage = TITLEPAGE%cpath
|
titlepage = TITLEPAGE%cpath
|
||||||
tp = 'calibre_title_page.html' if old_title_page is None else old_title_page
|
tp = 'calibre_title_page.html' if old_title_page is None else old_title_page
|
||||||
tppath = os.path.join(tdir, 'content', tp)
|
tppath = os.path.join(tdir, 'content', tp)
|
||||||
with open(tppath, 'wb') as f:
|
with open(tppath, 'wb') as f:
|
||||||
f.write(titlepage)
|
f.write(titlepage)
|
||||||
@ -370,7 +374,7 @@ def condense_ncx(ncx_path):
|
|||||||
compressed = etree.tostring(tree.getroot(), encoding='utf-8')
|
compressed = etree.tostring(tree.getroot(), encoding='utf-8')
|
||||||
open(ncx_path, 'wb').write(compressed)
|
open(ncx_path, 'wb').write(compressed)
|
||||||
|
|
||||||
def convert(htmlfile, opts, notification=None, create_epub=True,
|
def convert(htmlfile, opts, notification=None, create_epub=True,
|
||||||
oeb_cover=False, extract_to=None):
|
oeb_cover=False, extract_to=None):
|
||||||
htmlfile = os.path.abspath(htmlfile)
|
htmlfile = os.path.abspath(htmlfile)
|
||||||
if opts.output is None:
|
if opts.output is None:
|
||||||
@ -399,16 +403,16 @@ def convert(htmlfile, opts, notification=None, create_epub=True,
|
|||||||
else:
|
else:
|
||||||
opf, filelist = get_filelist(htmlfile, opts)
|
opf, filelist = get_filelist(htmlfile, opts)
|
||||||
mi = merge_metadata(htmlfile, opf, opts)
|
mi = merge_metadata(htmlfile, opf, opts)
|
||||||
opts.chapter = XPath(opts.chapter,
|
opts.chapter = XPath(opts.chapter,
|
||||||
namespaces={'re':'http://exslt.org/regular-expressions'})
|
namespaces={'re':'http://exslt.org/regular-expressions'})
|
||||||
for x in (1, 2, 3):
|
for x in (1, 2, 3):
|
||||||
attr = 'level%d_toc'%x
|
attr = 'level%d_toc'%x
|
||||||
if getattr(opts, attr):
|
if getattr(opts, attr):
|
||||||
setattr(opts, attr, XPath(getattr(opts, attr),
|
setattr(opts, attr, XPath(getattr(opts, attr),
|
||||||
namespaces={'re':'http://exslt.org/regular-expressions'}))
|
namespaces={'re':'http://exslt.org/regular-expressions'}))
|
||||||
else:
|
else:
|
||||||
setattr(opts, attr, None)
|
setattr(opts, attr, None)
|
||||||
|
|
||||||
with TemporaryDirectory(suffix='_html2epub', keep=opts.keep_intermediate) as tdir:
|
with TemporaryDirectory(suffix='_html2epub', keep=opts.keep_intermediate) as tdir:
|
||||||
if opts.keep_intermediate:
|
if opts.keep_intermediate:
|
||||||
print 'Intermediate files in', tdir
|
print 'Intermediate files in', tdir
|
||||||
@ -416,16 +420,16 @@ def convert(htmlfile, opts, notification=None, create_epub=True,
|
|||||||
parse_content(filelist, opts, tdir)
|
parse_content(filelist, opts, tdir)
|
||||||
logger = logging.getLogger('html2epub')
|
logger = logging.getLogger('html2epub')
|
||||||
resources = [os.path.join(tdir, 'content', f) for f in resource_map.values()]
|
resources = [os.path.join(tdir, 'content', f) for f in resource_map.values()]
|
||||||
|
|
||||||
|
|
||||||
title_page, has_title_page = process_title_page(mi, filelist, htmlfile_map, opts, tdir)
|
title_page, has_title_page = process_title_page(mi, filelist, htmlfile_map, opts, tdir)
|
||||||
spine = [htmlfile_map[f.path] for f in filelist]
|
spine = [htmlfile_map[f.path] for f in filelist]
|
||||||
if not oeb_cover and title_page is not None:
|
if not oeb_cover and title_page is not None:
|
||||||
spine = [title_page] + spine
|
spine = [title_page] + spine
|
||||||
mi.cover = None
|
mi.cover = None
|
||||||
mi.cover_data = (None, None)
|
mi.cover_data = (None, None)
|
||||||
|
|
||||||
|
|
||||||
mi = create_metadata(tdir, mi, spine, resources)
|
mi = create_metadata(tdir, mi, spine, resources)
|
||||||
buf = cStringIO.StringIO()
|
buf = cStringIO.StringIO()
|
||||||
if mi.toc:
|
if mi.toc:
|
||||||
@ -453,7 +457,7 @@ def convert(htmlfile, opts, notification=None, create_epub=True,
|
|||||||
logger.info('\tBuilding page map...')
|
logger.info('\tBuilding page map...')
|
||||||
add_page_map(opf_path, opts)
|
add_page_map(opf_path, opts)
|
||||||
check_links(opf_path, opts.pretty_print)
|
check_links(opf_path, opts.pretty_print)
|
||||||
|
|
||||||
opf = OPF(opf_path, tdir)
|
opf = OPF(opf_path, tdir)
|
||||||
opf.remove_guide()
|
opf.remove_guide()
|
||||||
oeb_cover_file = None
|
oeb_cover_file = None
|
||||||
@ -465,7 +469,7 @@ def convert(htmlfile, opts, notification=None, create_epub=True,
|
|||||||
opf.add_guide_item('cover', 'Cover', 'content/'+spine[0])
|
opf.add_guide_item('cover', 'Cover', 'content/'+spine[0])
|
||||||
if oeb_cover and oeb_cover_file:
|
if oeb_cover and oeb_cover_file:
|
||||||
opf.add_guide_item('cover', 'Cover', 'content/'+oeb_cover_file)
|
opf.add_guide_item('cover', 'Cover', 'content/'+oeb_cover_file)
|
||||||
|
|
||||||
cpath = os.path.join(tdir, 'content', 'resources', '_cover_.jpg')
|
cpath = os.path.join(tdir, 'content', 'resources', '_cover_.jpg')
|
||||||
if os.path.exists(cpath):
|
if os.path.exists(cpath):
|
||||||
opf.add_path_to_manifest(cpath, 'image/jpeg')
|
opf.add_path_to_manifest(cpath, 'image/jpeg')
|
||||||
@ -477,29 +481,29 @@ def convert(htmlfile, opts, notification=None, create_epub=True,
|
|||||||
condense_ncx(ncx_path)
|
condense_ncx(ncx_path)
|
||||||
if os.stat(ncx_path).st_size > opts.profile.flow_size:
|
if os.stat(ncx_path).st_size > opts.profile.flow_size:
|
||||||
logger.warn('NCX still larger than allowed size at %d bytes. Menu based Table of Contents may not work on device.'%os.stat(ncx_path).st_size)
|
logger.warn('NCX still larger than allowed size at %d bytes. Menu based Table of Contents may not work on device.'%os.stat(ncx_path).st_size)
|
||||||
|
|
||||||
if create_epub:
|
if create_epub:
|
||||||
epub = initialize_container(opts.output)
|
epub = initialize_container(opts.output)
|
||||||
epub.add_dir(tdir)
|
epub.add_dir(tdir)
|
||||||
epub.close()
|
epub.close()
|
||||||
run_plugins_on_postprocess(opts.output, 'epub')
|
run_plugins_on_postprocess(opts.output, 'epub')
|
||||||
logger.info(_('Output written to ')+opts.output)
|
logger.info(_('Output written to ')+opts.output)
|
||||||
|
|
||||||
if opts.show_opf:
|
if opts.show_opf:
|
||||||
print open(opf_path, 'rb').read()
|
print open(opf_path, 'rb').read()
|
||||||
|
|
||||||
if opts.extract_to is not None:
|
if opts.extract_to is not None:
|
||||||
if os.path.exists(opts.extract_to):
|
if os.path.exists(opts.extract_to):
|
||||||
shutil.rmtree(opts.extract_to)
|
shutil.rmtree(opts.extract_to)
|
||||||
shutil.copytree(tdir, opts.extract_to)
|
shutil.copytree(tdir, opts.extract_to)
|
||||||
|
|
||||||
if extract_to is not None:
|
if extract_to is not None:
|
||||||
if os.path.exists(extract_to):
|
if os.path.exists(extract_to):
|
||||||
shutil.rmtree(extract_to)
|
shutil.rmtree(extract_to)
|
||||||
shutil.copytree(tdir, extract_to)
|
shutil.copytree(tdir, extract_to)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def main(args=sys.argv):
|
def main(args=sys.argv):
|
||||||
parser = option_parser()
|
parser = option_parser()
|
||||||
opts, args = parser.parse_args(args)
|
opts, args = parser.parse_args(args)
|
||||||
@ -509,6 +513,6 @@ def main(args=sys.argv):
|
|||||||
return 1
|
return 1
|
||||||
convert(args[1], opts)
|
convert(args[1], opts)
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
sys.exit(main())
|
sys.exit(main())
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -13,7 +13,6 @@ class Exiled(BasicNewsRecipe):
|
|||||||
__author__ = 'Darko Miletic'
|
__author__ = 'Darko Miletic'
|
||||||
description = "Mankind's only alternative since 1997 - Formerly known as The eXile"
|
description = "Mankind's only alternative since 1997 - Formerly known as The eXile"
|
||||||
publisher = 'Exiled Online'
|
publisher = 'Exiled Online'
|
||||||
language = _('English')
|
|
||||||
category = 'news, politics, international'
|
category = 'news, politics, international'
|
||||||
oldest_article = 15
|
oldest_article = 15
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
@ -21,16 +20,18 @@ class Exiled(BasicNewsRecipe):
|
|||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
encoding = 'utf8'
|
encoding = 'utf8'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
language = _('English')
|
||||||
cover_url = 'http://exiledonline.com/wp-content/themes/exiledonline_theme/images/header-sm.gif'
|
cover_url = 'http://exiledonline.com/wp-content/themes/exiledonline_theme/images/header-sm.gif'
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment' , description
|
'--comment' , description
|
||||||
|
, '--base-font-size', '10'
|
||||||
, '--category' , category
|
, '--category' , category
|
||||||
, '--publisher' , publisher
|
, '--publisher' , publisher
|
||||||
]
|
]
|
||||||
|
|
||||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':'main'})]
|
keep_only_tags = [dict(name='div', attrs={'id':'main'})]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
@ -39,8 +40,8 @@ class Exiled(BasicNewsRecipe):
|
|||||||
,dict(name='div', attrs={'id':['comments','navig']})
|
,dict(name='div', attrs={'id':['comments','navig']})
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
feeds = [(u'Articles', u'http://exiledonline.com/feed/' )]
|
feeds = [(u'Articles', u'http://exiledonline.com/feed/')]
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
for item in soup.findAll(style=True):
|
for item in soup.findAll(style=True):
|
||||||
@ -48,4 +49,9 @@ class Exiled(BasicNewsRecipe):
|
|||||||
mtag = '\n<meta http-equiv="Content-Language" content="en"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n'
|
mtag = '\n<meta http-equiv="Content-Language" content="en"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n'
|
||||||
soup.head.insert(0,mtag)
|
soup.head.insert(0,mtag)
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
def get_article_url(self, article):
|
||||||
|
raw = article.get('link', None)
|
||||||
|
final = raw + 'all/1/'
|
||||||
|
return final
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
|||||||
laprensa.com.ni
|
laprensa.com.ni
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import locale
|
import datetime
|
||||||
import time
|
import time
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
@ -23,23 +23,9 @@ class LaPrensa_ni(BasicNewsRecipe):
|
|||||||
encoding = 'cp1252'
|
encoding = 'cp1252'
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
language = _('Spanish')
|
language = _('Spanish')
|
||||||
|
months_es = ['enero','febrero','marzo','abril','mayo','junio','julio','agosto','septiembre','octubre','noviembre','diciembre']
|
||||||
#Locale setting to get appropriate date/month values in Spanish
|
current_month = months_es[datetime.date.today().month - 1]
|
||||||
try:
|
current_index = time.strftime("http://www.laprensa.com.ni/archivo/%Y/" + current_month + "/%d/noticias/")
|
||||||
#Windows seting for locale
|
|
||||||
locale.setlocale(locale.LC_TIME,'Spanish_Nicaragua')
|
|
||||||
except locale.Error:
|
|
||||||
#Linux setting for locale -- choose one appropriate for your distribution
|
|
||||||
try:
|
|
||||||
locale.setlocale(locale.LC_TIME,'es_NI')
|
|
||||||
except locale.Error:
|
|
||||||
try:
|
|
||||||
locale.setlocale(locale.LC_TIME,'es_ES')
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
current_index = time.strftime("http://www.laprensa.com.ni/archivo/%Y/%B/%d/noticias/")
|
|
||||||
|
|
||||||
html2lrf_options = [
|
html2lrf_options = [
|
||||||
'--comment', description
|
'--comment', description
|
||||||
@ -91,6 +77,3 @@ class LaPrensa_ni(BasicNewsRecipe):
|
|||||||
totalfeeds.append((feedtitle, articles))
|
totalfeeds.append((feedtitle, articles))
|
||||||
return totalfeeds
|
return totalfeeds
|
||||||
|
|
||||||
def cleanup(self):
|
|
||||||
#Going back to the default locale
|
|
||||||
locale.setlocale(locale.LC_TIME,'')
|
|
||||||
|
@ -2,10 +2,10 @@ from django.db import models
|
|||||||
from django.utils.translation import ugettext_lazy as _
|
from django.utils.translation import ugettext_lazy as _
|
||||||
from django.db.models import permalink
|
from django.db.models import permalink
|
||||||
from django.contrib.auth.models import User
|
from django.contrib.auth.models import User
|
||||||
from calibre.www.apps.tagging.fields import TagField
|
from tagging.fields import TagField
|
||||||
from calibre.www.apps.blog.managers import PublicManager
|
from calibre.www.apps.blog.managers import PublicManager
|
||||||
|
|
||||||
import calibre.www.apps.tagging as tagging
|
import tagging
|
||||||
|
|
||||||
class Category(models.Model):
|
class Category(models.Model):
|
||||||
"""Category model."""
|
"""Category model."""
|
||||||
|
@ -40,10 +40,10 @@ INSTALLED_APPS = (
|
|||||||
'django.contrib.sites',
|
'django.contrib.sites',
|
||||||
'django.contrib.admin',
|
'django.contrib.admin',
|
||||||
'django.contrib.comments',
|
'django.contrib.comments',
|
||||||
|
'django.contrib.markup',
|
||||||
'calibre.www.apps.inlines',
|
'calibre.www.apps.inlines',
|
||||||
'calibre.www.apps.tagging',
|
'tagging',
|
||||||
'calibre.www.apps.blog',
|
'calibre.www.apps.blog',
|
||||||
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -2,14 +2,16 @@ from django.conf.urls.defaults import patterns, include, handler404, handler500
|
|||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
|
|
||||||
# Uncomment the next two lines to enable the admin:
|
# Uncomment the next two lines to enable the admin:
|
||||||
#from django.contrib import admin
|
from django.contrib import admin
|
||||||
#admin.autodiscover()
|
admin.autodiscover()
|
||||||
|
|
||||||
urlpatterns = patterns('',
|
urlpatterns = patterns('',
|
||||||
|
|
||||||
# (r'^admin/(.*)', admin.site.root),
|
(r'^admin/(.*)', admin.site.root),
|
||||||
|
|
||||||
(r'^comments/', include('django.contrib.comments.urls')),
|
(r'^comments/', include('django.contrib.comments.urls')),
|
||||||
|
(r'', include('calibre.www.apps.blog.urls')),
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
)
|
)
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
from django.utils.translation import ugettext as _
|
from django.utils.translation import ugettext as _
|
||||||
|
|
||||||
from calibre.www.apps.tagging.managers import ModelTaggedItemManager, TagDescriptor
|
from tagging.managers import ModelTaggedItemManager, TagDescriptor
|
||||||
|
|
||||||
VERSION = (0, 3, 'pre')
|
VERSION = (0, 3, 'pre')
|
||||||
|
|
@ -1,5 +1,5 @@
|
|||||||
from django.contrib import admin
|
from django.contrib import admin
|
||||||
from calibre.www.apps.tagging.models import Tag, TaggedItem
|
from tagging.models import Tag, TaggedItem
|
||||||
|
|
||||||
admin.site.register(TaggedItem)
|
admin.site.register(TaggedItem)
|
||||||
admin.site.register(Tag)
|
admin.site.register(Tag)
|
@ -5,9 +5,9 @@ from django.db.models import signals
|
|||||||
from django.db.models.fields import CharField
|
from django.db.models.fields import CharField
|
||||||
from django.utils.translation import ugettext_lazy as _
|
from django.utils.translation import ugettext_lazy as _
|
||||||
|
|
||||||
from calibre.www.apps.tagging import settings
|
from tagging import settings
|
||||||
from calibre.www.apps.tagging.models import Tag
|
from tagging.models import Tag
|
||||||
from calibre.www.apps.tagging.utils import edit_string_for_tags
|
from tagging.utils import edit_string_for_tags
|
||||||
|
|
||||||
class TagField(CharField):
|
class TagField(CharField):
|
||||||
"""
|
"""
|
||||||
@ -101,7 +101,7 @@ class TagField(CharField):
|
|||||||
return 'CharField'
|
return 'CharField'
|
||||||
|
|
||||||
def formfield(self, **kwargs):
|
def formfield(self, **kwargs):
|
||||||
from calibre.www.apps.tagging import forms
|
from tagging import forms
|
||||||
defaults = {'form_class': forms.TagField}
|
defaults = {'form_class': forms.TagField}
|
||||||
defaults.update(kwargs)
|
defaults.update(kwargs)
|
||||||
return super(TagField, self).formfield(**defaults)
|
return super(TagField, self).formfield(**defaults)
|
@ -4,9 +4,9 @@ Tagging components for Django's form library.
|
|||||||
from django import forms
|
from django import forms
|
||||||
from django.utils.translation import ugettext as _
|
from django.utils.translation import ugettext as _
|
||||||
|
|
||||||
from calibre.www.apps.tagging import settings
|
from tagging import settings
|
||||||
from calibre.www.apps.tagging.models import Tag
|
from tagging.models import Tag
|
||||||
from calibre.www.apps.tagging.utils import parse_tag_input
|
from tagging.utils import parse_tag_input
|
||||||
|
|
||||||
class AdminTagForm(forms.ModelForm):
|
class AdminTagForm(forms.ModelForm):
|
||||||
class Meta:
|
class Meta:
|
@ -5,7 +5,7 @@ application.
|
|||||||
from django.contrib.contenttypes.models import ContentType
|
from django.contrib.contenttypes.models import ContentType
|
||||||
from django.db import models
|
from django.db import models
|
||||||
|
|
||||||
from calibre.www.apps.tagging.models import Tag, TaggedItem
|
from tagging.models import Tag, TaggedItem
|
||||||
|
|
||||||
class ModelTagManager(models.Manager):
|
class ModelTagManager(models.Manager):
|
||||||
"""
|
"""
|
@ -13,9 +13,9 @@ from django.db import connection, models
|
|||||||
from django.db.models.query import QuerySet
|
from django.db.models.query import QuerySet
|
||||||
from django.utils.translation import ugettext_lazy as _
|
from django.utils.translation import ugettext_lazy as _
|
||||||
|
|
||||||
from calibre.www.apps.tagging import settings
|
from tagging import settings
|
||||||
from calibre.www.apps.tagging.utils import calculate_cloud, get_tag_list, get_queryset_and_model, parse_tag_input
|
from tagging.utils import calculate_cloud, get_tag_list, get_queryset_and_model, parse_tag_input
|
||||||
from calibre.www.apps.tagging.utils import LOGARITHMIC
|
from tagging.utils import LOGARITHMIC
|
||||||
|
|
||||||
qn = connection.ops.quote_name
|
qn = connection.ops.quote_name
|
||||||
|
|
@ -159,7 +159,7 @@ def get_tag_list(tags):
|
|||||||
* A ``Tag`` ``QuerySet``.
|
* A ``Tag`` ``QuerySet``.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
from calibre.www.apps.tagging.models import Tag
|
from tagging.models import Tag
|
||||||
if isinstance(tags, Tag):
|
if isinstance(tags, Tag):
|
||||||
return [tags]
|
return [tags]
|
||||||
elif isinstance(tags, QuerySet) and tags.model is Tag:
|
elif isinstance(tags, QuerySet) and tags.model is Tag:
|
||||||
@ -201,7 +201,7 @@ def get_tag(tag):
|
|||||||
|
|
||||||
If no matching tag can be found, ``None`` will be returned.
|
If no matching tag can be found, ``None`` will be returned.
|
||||||
"""
|
"""
|
||||||
from calibre.www.apps.tagging.models import Tag
|
from tagging.models import Tag
|
||||||
if isinstance(tag, Tag):
|
if isinstance(tag, Tag):
|
||||||
return tag
|
return tag
|
||||||
|
|
@ -5,8 +5,8 @@ from django.http import Http404
|
|||||||
from django.utils.translation import ugettext as _
|
from django.utils.translation import ugettext as _
|
||||||
from django.views.generic.list_detail import object_list
|
from django.views.generic.list_detail import object_list
|
||||||
|
|
||||||
from calibre.www.apps.tagging.models import Tag, TaggedItem
|
from tagging.models import Tag, TaggedItem
|
||||||
from calibre.www.apps.tagging.utils import get_tag, get_queryset_and_model
|
from tagging.utils import get_tag, get_queryset_and_model
|
||||||
|
|
||||||
def tagged_object_list(request, queryset_or_model=None, tag=None,
|
def tagged_object_list(request, queryset_or_model=None, tag=None,
|
||||||
related_tags=False, related_tag_counts=True, **kwargs):
|
related_tags=False, related_tag_counts=True, **kwargs):
|
Loading…
x
Reference in New Issue
Block a user