Pull from trunk

2025-07-09 03:04:10 -04:00 · 2009-04-02 11:12:00 -07:00 · 2009-04-02 11:12:00 -07:00 · 9ebb335346
commit 9ebb335346
parent 8d7553f2f0 62688a6822
19 changed files with 313 additions and 314 deletions
--- a/src/calibre/ebooks/epub/from_html.py
+++ b/src/calibre/ebooks/epub/from_html.py
@ -8,24 +8,24 @@ Conversion of HTML/OPF files follows several stages:

    * All links in the HTML files or in the OPF manifest are
    followed to build up a list of HTML files to be converted.
-    This stage is implemented by 
+    This stage is implemented by
    :function:`calibre.ebooks.html.traverse` and
    :class:`calibre.ebooks.html.HTMLFile`.

-    * The HTML is pre-processed to make it more semantic. 
+    * The HTML is pre-processed to make it more semantic.
    All links in the HTML files to other resources like images,
-    stylesheets, etc. are relativized. The resources are copied 
+    stylesheets, etc. are relativized. The resources are copied
    into the `resources` sub directory. This is accomplished by
-    :class:`calibre.ebooks.html.PreProcessor` and 
+    :class:`calibre.ebooks.html.PreProcessor` and
    :class:`calibre.ebooks.html.Parser`.

    * The HTML is processed. Various operations are performed.
-    All style declarations are extracted and consolidated into 
+    All style declarations are extracted and consolidated into
    a single style sheet. Chapters are auto-detected and marked.
    Various font related manipulations are performed. See
    :class:`HTMLProcessor`.

-    * The processed HTML is saved and the 
+    * The processed HTML is saved and the
    :module:`calibre.ebooks.epub.split` module is used to split up
    large HTML files into smaller chunks.

@ -64,7 +64,7 @@ def remove_bad_link(element, attribute, link, pos):

 def check_links(opf_path, pretty_print):
    '''
-    Find and remove all invalid links in the HTML files 
+    Find and remove all invalid links in the HTML files
    '''
    logger = logging.getLogger('html2epub')
    logger.info('\tChecking files for bad links...')
@ -78,7 +78,7 @@ def check_links(opf_path, pretty_print):
                if isinstance(f, str):
                    f = f.decode('utf-8')
                html_files.append(os.path.abspath(content(f)))
-        
+
        for path in html_files:
            if not os.access(path, os.R_OK):
                continue
@ -113,27 +113,27 @@ def find_html_index(files):
    return html_files[-1], os.path.splitext(html_files[-1])[1].lower()[1:]

 class HTMLProcessor(Processor, Rationalizer):
-    
+
    def __init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, stylesheets):
-        Processor.__init__(self, htmlfile, opts, tdir, resource_map, htmlfiles, 
+        Processor.__init__(self, htmlfile, opts, tdir, resource_map, htmlfiles,
                           name='html2epub')
        if opts.verbose > 2:
            self.debug_tree('parsed')
        self.detect_chapters()
-        
+
        self.extract_css(stylesheets)
        if self.opts.base_font_size2 > 0:
-            self.font_css = self.rationalize(self.external_stylesheets+[self.stylesheet], 
+            self.font_css = self.rationalize(self.external_stylesheets+[self.stylesheet],
                                             self.root, self.opts)
        if opts.verbose > 2:
            self.debug_tree('nocss')
-            
+
        if hasattr(self.body, 'xpath'):
            for script in list(self.body.xpath('descendant::script')):
                script.getparent().remove(script)
-                
+
        self.fix_markup()
-            
+
    def convert_image(self, img):
        rpath = img.get('src', '')
        path = os.path.join(os.path.dirname(self.save_path()), *rpath.split('/'))
@ -150,10 +150,10 @@ class HTMLProcessor(Processor, Rationalizer):
                    if val == rpath:
                        self.resource_map[key] = rpath+'_calibre_converted.jpg'
        img.set('src', rpath+'_calibre_converted.jpg')
-        
+
    def fix_markup(self):
        '''
-        Perform various markup transforms to get the output to render correctly 
+        Perform various markup transforms to get the output to render correctly
        in the quirky ADE.
        '''
        # Replace <br> that are children of <body> as ADE doesn't handle them
@ -179,8 +179,8 @@ class HTMLProcessor(Processor, Rationalizer):
                        if not br.tail:
                            br.tail = ''
                        br.tail += sibling.tail
-                
-                
+
+
        if self.opts.profile.remove_object_tags:
            for tag in self.root.xpath('//embed'):
                tag.getparent().remove(tag)
@ -188,42 +188,46 @@ class HTMLProcessor(Processor, Rationalizer):
                if tag.get('type', '').lower().strip() in ('image/svg+xml',):
                    continue
                tag.getparent().remove(tag)
-                
-        
+
+
        for tag in self.root.xpath('//title|//style'):
            if not tag.text:
                tag.getparent().remove(tag)
        for tag in self.root.xpath('//script'):
            if not tag.text and not tag.get('src', False):
                tag.getparent().remove(tag)
-                
+
        for tag in self.root.xpath('//form'):
            tag.getparent().remove(tag)
-            
+
        for tag in self.root.xpath('//center'):
            tag.tag = 'div'
            tag.set('style', 'text-align:center')
-                
+
        if self.opts.linearize_tables:
            for tag in self.root.xpath('//table | //tr | //th | //td'):
                tag.tag = 'div'
-            
-    
+
+        # ADE can't handle &amp; in an img url
+        for tag in self.root.xpath('//img[@src]'):
+            tag.set('src', tag.get('src', '').replace('&', ''))
+
+
    def save(self):
        for meta in list(self.root.xpath('//meta')):
            meta.getparent().remove(meta)
        # Strip all comments since Adobe DE is petrified of them
        Processor.save(self, strip_comments=True)
-        
+
    def remove_first_image(self):
        images = self.root.xpath('//img')
        if images:
            images[0].getparent().remove(images[0])
            return True
        return False
-        
-    
-            
+
+
+

 def config(defaults=None):
    return common_config(defaults=defaults)
@ -235,7 +239,7 @@ def option_parser():

 Convert a HTML file to an EPUB ebook. Recursively follows links in the HTML file.
 If you specify an OPF file instead of an HTML file, the list of links is takes from
-the <spine> element of the OPF file.  
+the <spine> element of the OPF file.
 '''))

 def parse_content(filelist, opts, tdir):
@ -246,7 +250,7 @@ def parse_content(filelist, opts, tdir):
    first_image_removed = False
    for htmlfile in filelist:
        logging.getLogger('html2epub').debug('Processing %s...'%htmlfile)
-        hp = HTMLProcessor(htmlfile, opts, os.path.join(tdir, 'content'), 
+        hp = HTMLProcessor(htmlfile, opts, os.path.join(tdir, 'content'),
                           resource_map, filelist, stylesheets)
        if not first_image_removed and opts.remove_first_image:
            first_image_removed = hp.remove_first_image()
@ -254,7 +258,7 @@ def parse_content(filelist, opts, tdir):
        hp.save()
        stylesheet_map[os.path.basename(hp.save_path())] = \
            [s for s in hp.external_stylesheets + [hp.stylesheet, hp.font_css, hp.override_css] if s is not None]
-    
+
    logging.getLogger('html2epub').debug('Saving stylesheets...')
    if opts.base_font_size2 > 0:
        Rationalizer.remove_font_size_information(stylesheets.values())
@ -268,7 +272,7 @@ def parse_content(filelist, opts, tdir):
    if toc.count('chapter') + toc.count('file') > opts.toc_threshold:
        toc.purge(['link', 'unknown'])
    toc.purge(['link'], max=opts.max_toc_links)
-    
+
    return resource_map, hp.htmlfile_map, toc, stylesheet_map

 TITLEPAGE = '''\
@ -325,26 +329,26 @@ def process_title_page(mi, filelist, htmlfilemap, opts, tdir):
    metadata_cover = mi.cover
    if metadata_cover and not os.path.exists(metadata_cover):
        metadata_cover = None
-        
+
    cpath = '/'.join(('resources', '_cover_.jpg'))
    cover_dest = os.path.join(tdir, 'content', *cpath.split('/'))
    if metadata_cover is not None:
-        if not create_cover_image(metadata_cover, cover_dest, 
+        if not create_cover_image(metadata_cover, cover_dest,
                                  opts.profile.screen_size):
            metadata_cover = None
    specified_cover = opts.cover
    if specified_cover and not os.path.exists(specified_cover):
        specified_cover = None
    if specified_cover is not None:
-        if not create_cover_image(specified_cover, cover_dest, 
+        if not create_cover_image(specified_cover, cover_dest,
                                  opts.profile.screen_size):
            specified_cover = None
-            
+
    cover = metadata_cover if specified_cover is None or (opts.prefer_metadata_cover and metadata_cover is not None) else specified_cover

    if cover is not None:
        titlepage = TITLEPAGE%cpath
-        tp = 'calibre_title_page.html' if old_title_page is None else old_title_page 
+        tp = 'calibre_title_page.html' if old_title_page is None else old_title_page
        tppath = os.path.join(tdir, 'content', tp)
        with open(tppath, 'wb') as f:
            f.write(titlepage)
@ -370,7 +374,7 @@ def condense_ncx(ncx_path):
    compressed = etree.tostring(tree.getroot(), encoding='utf-8')
    open(ncx_path, 'wb').write(compressed)

-def convert(htmlfile, opts, notification=None, create_epub=True, 
+def convert(htmlfile, opts, notification=None, create_epub=True,
            oeb_cover=False, extract_to=None):
    htmlfile = os.path.abspath(htmlfile)
    if opts.output is None:
@ -399,16 +403,16 @@ def convert(htmlfile, opts, notification=None, create_epub=True,
    else:
        opf, filelist = get_filelist(htmlfile, opts)
        mi = merge_metadata(htmlfile, opf, opts)
-    opts.chapter = XPath(opts.chapter, 
+    opts.chapter = XPath(opts.chapter,
                    namespaces={'re':'http://exslt.org/regular-expressions'})
    for x in (1, 2, 3):
        attr = 'level%d_toc'%x
        if getattr(opts, attr):
-            setattr(opts, attr, XPath(getattr(opts, attr), 
+            setattr(opts, attr, XPath(getattr(opts, attr),
                      namespaces={'re':'http://exslt.org/regular-expressions'}))
        else:
-            setattr(opts, attr, None) 
-    
+            setattr(opts, attr, None)
+
    with TemporaryDirectory(suffix='_html2epub', keep=opts.keep_intermediate) as tdir:
        if opts.keep_intermediate:
            print 'Intermediate files in', tdir
@ -416,16 +420,16 @@ def convert(htmlfile, opts, notification=None, create_epub=True,
                                        parse_content(filelist, opts, tdir)
        logger = logging.getLogger('html2epub')
        resources = [os.path.join(tdir, 'content', f) for f in resource_map.values()]
-        
-        
+
+
        title_page, has_title_page = process_title_page(mi, filelist, htmlfile_map, opts, tdir)
        spine = [htmlfile_map[f.path] for f in filelist]
        if not oeb_cover and title_page is not None:
            spine = [title_page] + spine
        mi.cover = None
        mi.cover_data = (None, None)
-            
-            
+
+
        mi = create_metadata(tdir, mi, spine, resources)
        buf = cStringIO.StringIO()
        if mi.toc:
@ -453,7 +457,7 @@ def convert(htmlfile, opts, notification=None, create_epub=True,
            logger.info('\tBuilding page map...')
            add_page_map(opf_path, opts)
        check_links(opf_path, opts.pretty_print)
-        
+
        opf = OPF(opf_path, tdir)
        opf.remove_guide()
        oeb_cover_file = None
@ -465,7 +469,7 @@ def convert(htmlfile, opts, notification=None, create_epub=True,
                opf.add_guide_item('cover', 'Cover', 'content/'+spine[0])
            if oeb_cover and oeb_cover_file:
                opf.add_guide_item('cover', 'Cover', 'content/'+oeb_cover_file)
-        
+
        cpath = os.path.join(tdir, 'content', 'resources', '_cover_.jpg')
        if os.path.exists(cpath):
            opf.add_path_to_manifest(cpath, 'image/jpeg')
@ -477,29 +481,29 @@ def convert(htmlfile, opts, notification=None, create_epub=True,
            condense_ncx(ncx_path)
            if os.stat(ncx_path).st_size > opts.profile.flow_size:
                logger.warn('NCX still larger than allowed size at %d bytes. Menu based Table of Contents may not work on device.'%os.stat(ncx_path).st_size)
-            
+
        if create_epub:
            epub = initialize_container(opts.output)
            epub.add_dir(tdir)
            epub.close()
            run_plugins_on_postprocess(opts.output, 'epub')
            logger.info(_('Output written to ')+opts.output)
-        
+
        if opts.show_opf:
            print open(opf_path, 'rb').read()
-        
+
        if opts.extract_to is not None:
            if os.path.exists(opts.extract_to):
                shutil.rmtree(opts.extract_to)
            shutil.copytree(tdir, opts.extract_to)
-            
+
        if extract_to is not None:
            if os.path.exists(extract_to):
                shutil.rmtree(extract_to)
            shutil.copytree(tdir, extract_to)
-            
-        
-            
+
+
+
 def main(args=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(args)
@ -509,6 +513,6 @@ def main(args=sys.argv):
        return 1
    convert(args[1], opts)
    return 0
-    
+
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/lrf/objects.py
+++ b/src/calibre/ebooks/lrf/objects.py
--- a/src/calibre/web/feeds/recipes/recipe_exiled.py
+++ b/src/calibre/web/feeds/recipes/recipe_exiled.py
@ -13,7 +13,6 @@ class Exiled(BasicNewsRecipe):
    __author__            = 'Darko Miletic'
    description           = "Mankind's only alternative since 1997 - Formerly known as The eXile"
    publisher             = 'Exiled Online'
-    language              = _('English')
    category              = 'news, politics, international'
    oldest_article        = 15
    max_articles_per_feed = 100
@ -21,16 +20,18 @@ class Exiled(BasicNewsRecipe):
    use_embedded_content  = False
    encoding              = 'utf8'
    remove_javascript     = True
+    language              = _('English')
    cover_url             = 'http://exiledonline.com/wp-content/themes/exiledonline_theme/images/header-sm.gif'

    html2lrf_options = [
                          '--comment'       , description
+                        , '--base-font-size', '10'
                        , '--category'      , category
                        , '--publisher'     , publisher
                        ]
-    
+
    html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
-    
+
    keep_only_tags = [dict(name='div', attrs={'id':'main'})]

    remove_tags = [
@ -39,8 +40,8 @@ class Exiled(BasicNewsRecipe):
                    ,dict(name='div', attrs={'id':['comments','navig']})
                  ]

-                        
-    feeds = [(u'Articles', u'http://exiledonline.com/feed/' )]
+
+    feeds = [(u'Articles', u'http://exiledonline.com/feed/')]

    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
@ -48,4 +49,9 @@ class Exiled(BasicNewsRecipe):
        mtag = '\n<meta http-equiv="Content-Language" content="en"/>\n<meta http-equiv="Content-Type" content="text/html; charset=utf-8">\n'
        soup.head.insert(0,mtag)
        return soup
-    
+
+    def get_article_url(self, article):
+        raw = article.get('link',  None)
+        final = raw + 'all/1/'
+        return final
+
--- a/src/calibre/web/feeds/recipes/recipe_laprensa_ni.py
+++ b/src/calibre/web/feeds/recipes/recipe_laprensa_ni.py
@ -6,7 +6,7 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 laprensa.com.ni
 '''

-import locale
+import datetime
 import time
 from calibre.web.feeds.news import BasicNewsRecipe

@ -23,23 +23,9 @@ class LaPrensa_ni(BasicNewsRecipe):
    encoding              = 'cp1252'
    remove_javascript     = True
    language              = _('Spanish')
-
-    #Locale setting to get appropriate date/month values in Spanish
-    try:
-      #Windows seting for locale
-      locale.setlocale(locale.LC_TIME,'Spanish_Nicaragua')
-    except locale.Error:
-      #Linux setting for locale -- choose one appropriate for your distribution
-      try:
-        locale.setlocale(locale.LC_TIME,'es_NI')
-      except locale.Error:
-          try:
-              locale.setlocale(locale.LC_TIME,'es_ES')
-          except:
-              pass
-
-
-    current_index         = time.strftime("http://www.laprensa.com.ni/archivo/%Y/%B/%d/noticias/")
+    months_es             = ['enero','febrero','marzo','abril','mayo','junio','julio','agosto','septiembre','octubre','noviembre','diciembre']
+    current_month         = months_es[datetime.date.today().month - 1]
+    current_index         = time.strftime("http://www.laprensa.com.ni/archivo/%Y/" + current_month + "/%d/noticias/")

    html2lrf_options = [
                          '--comment', description
@ -91,6 +77,3 @@ class LaPrensa_ni(BasicNewsRecipe):
            totalfeeds.append((feedtitle, articles))
        return totalfeeds

-    def cleanup(self):
-        #Going back to the default locale
-        locale.setlocale(locale.LC_TIME,'')
--- a/src/calibre/www/apps/blog/models.py
+++ b/src/calibre/www/apps/blog/models.py
@ -2,10 +2,10 @@ from django.db import models
 from django.utils.translation import ugettext_lazy as _
 from django.db.models import permalink
 from django.contrib.auth.models import User
-from calibre.www.apps.tagging.fields import TagField
+from tagging.fields import TagField
 from calibre.www.apps.blog.managers import PublicManager

-import calibre.www.apps.tagging as tagging
+import tagging

 class Category(models.Model):
    """Category model."""
--- a/src/calibre/www/kovid/settings.py
+++ b/src/calibre/www/kovid/settings.py
@ -40,10 +40,10 @@ INSTALLED_APPS = (
    'django.contrib.sites',
    'django.contrib.admin',
    'django.contrib.comments',
+    'django.contrib.markup',
    'calibre.www.apps.inlines',
-    'calibre.www.apps.tagging',
+    'tagging',
    'calibre.www.apps.blog',
-
 )


--- a/src/calibre/www/kovid/urls.py
+++ b/src/calibre/www/kovid/urls.py
@ -2,14 +2,16 @@ from django.conf.urls.defaults import patterns, include, handler404, handler500
 from django.conf import settings

 # Uncomment the next two lines to enable the admin:
-#from django.contrib import admin
-#admin.autodiscover()
+from django.contrib import admin
+admin.autodiscover()

 urlpatterns = patterns('',

- #       (r'^admin/(.*)', admin.site.root),
+    (r'^admin/(.*)', admin.site.root),

    (r'^comments/', include('django.contrib.comments.urls')),
+    (r'', include('calibre.www.apps.blog.urls')),
+


 )
--- a/src/calibre/www/apps/tagging/init.py
+++ b/src/calibre/www/apps/tagging/init.py
@ -1,6 +1,6 @@
 from django.utils.translation import ugettext as _

-from calibre.www.apps.tagging.managers import ModelTaggedItemManager, TagDescriptor
+from tagging.managers import ModelTaggedItemManager, TagDescriptor

 VERSION = (0, 3, 'pre')

--- a/src/calibre/www/apps/tagging/admin.py
+++ b/src/calibre/www/apps/tagging/admin.py
@ -1,5 +1,5 @@
 from django.contrib import admin
-from calibre.www.apps.tagging.models import Tag, TaggedItem
+from tagging.models import Tag, TaggedItem

 admin.site.register(TaggedItem)
 admin.site.register(Tag)
--- a/src/calibre/www/apps/tagging/fields.py
+++ b/src/calibre/www/apps/tagging/fields.py
@ -5,9 +5,9 @@ from django.db.models import signals
 from django.db.models.fields import CharField
 from django.utils.translation import ugettext_lazy as _

-from calibre.www.apps.tagging import settings
-from calibre.www.apps.tagging.models import Tag
-from calibre.www.apps.tagging.utils import edit_string_for_tags
+from tagging import settings
+from tagging.models import Tag
+from tagging.utils import edit_string_for_tags

 class TagField(CharField):
    """
@ -101,7 +101,7 @@ class TagField(CharField):
        return 'CharField'

    def formfield(self, **kwargs):
-        from calibre.www.apps.tagging import forms
+        from tagging import forms
        defaults = {'form_class': forms.TagField}
        defaults.update(kwargs)
        return super(TagField, self).formfield(**defaults)
--- a/src/calibre/www/apps/tagging/forms.py
+++ b/src/calibre/www/apps/tagging/forms.py
@ -4,9 +4,9 @@ Tagging components for Django's form library.
 from django import forms
 from django.utils.translation import ugettext as _

-from calibre.www.apps.tagging import settings
-from calibre.www.apps.tagging.models import Tag
-from calibre.www.apps.tagging.utils import parse_tag_input
+from tagging import settings
+from tagging.models import Tag
+from tagging.utils import parse_tag_input

 class AdminTagForm(forms.ModelForm):
    class Meta:
--- a/src/calibre/www/apps/tagging/generic.py
+++ b/src/calibre/www/apps/tagging/generic.py
--- a/src/calibre/www/apps/tagging/managers.py
+++ b/src/calibre/www/apps/tagging/managers.py
@ -5,7 +5,7 @@ application.
 from django.contrib.contenttypes.models import ContentType
 from django.db import models

-from calibre.www.apps.tagging.models import Tag, TaggedItem
+from tagging.models import Tag, TaggedItem

 class ModelTagManager(models.Manager):
    """
--- a/src/calibre/www/apps/tagging/models.py
+++ b/src/calibre/www/apps/tagging/models.py
@ -13,9 +13,9 @@ from django.db import connection, models
 from django.db.models.query import QuerySet
 from django.utils.translation import ugettext_lazy as _

-from calibre.www.apps.tagging import settings
-from calibre.www.apps.tagging.utils import calculate_cloud, get_tag_list, get_queryset_and_model, parse_tag_input
-from calibre.www.apps.tagging.utils import LOGARITHMIC
+from tagging import settings
+from tagging.utils import calculate_cloud, get_tag_list, get_queryset_and_model, parse_tag_input
+from tagging.utils import LOGARITHMIC

 qn = connection.ops.quote_name

--- a/src/calibre/www/apps/tagging/settings.py
+++ b/src/calibre/www/apps/tagging/settings.py
--- a/src/calibre/www/apps/tagging/templatetags/init.py
+++ b/src/calibre/www/apps/tagging/templatetags/init.py
--- a/src/calibre/www/apps/tagging/templatetags/tagging_tags.py
+++ b/src/calibre/www/apps/tagging/templatetags/tagging_tags.py
--- a/src/calibre/www/apps/tagging/utils.py
+++ b/src/calibre/www/apps/tagging/utils.py
@ -159,7 +159,7 @@ def get_tag_list(tags):
       * A ``Tag`` ``QuerySet``.

    """
-    from calibre.www.apps.tagging.models import Tag
+    from tagging.models import Tag
    if isinstance(tags, Tag):
        return [tags]
    elif isinstance(tags, QuerySet) and tags.model is Tag:
@ -201,7 +201,7 @@ def get_tag(tag):

    If no matching tag can be found, ``None`` will be returned.
    """
-    from calibre.www.apps.tagging.models import Tag
+    from tagging.models import Tag
    if isinstance(tag, Tag):
        return tag

--- a/src/calibre/www/apps/tagging/views.py
+++ b/src/calibre/www/apps/tagging/views.py
@ -5,8 +5,8 @@ from django.http import Http404
 from django.utils.translation import ugettext as _
 from django.views.generic.list_detail import object_list

-from calibre.www.apps.tagging.models import Tag, TaggedItem
-from calibre.www.apps.tagging.utils import get_tag, get_queryset_and_model
+from tagging.models import Tag, TaggedItem
+from tagging.utils import get_tag, get_queryset_and_model

 def tagged_object_list(request, queryset_or_model=None, tag=None,
        related_tags=False, related_tag_counts=True, **kwargs):