Merge from trunk

2025-07-09 03:04:10 -04:00 · 2010-10-10 21:05:56 +02:00 · 2010-10-10 21:05:56 +02:00 · 5e1888210a
commit 5e1888210a
parent f4cdefafaa c650488bce
6 changed files with 141 additions and 29 deletions
--- a/resources/recipes/el_cultural.recipe
+++ b/resources/recipes/el_cultural.recipe
@ -0,0 +1,86 @@
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class RevistaElCultural(BasicNewsRecipe):
+
+    title       = 'Revista El Cultural'
+    __author__  = 'Jefferson Frantz'
+    description = 'Revista de cultura'
+    timefmt = ' [%d %b, %Y]'
+    language = 'es'
+
+    no_stylesheets = True
+    remove_javascript = True
+
+    extra_css              = 'h1{ font-family: sans-serif; font-size: large; font-weight: bolder; text-align: justify } h2{ font-family: sans-serif; font-size: small; font-weight: 500; text-align: justify } h3{ font-family: sans-serif; font-size: small; font-weight: 500; text-align: justify } h4{ font-family: sans-serif; font-weight: lighter; font-size: medium; font-style: italic; text-align: justify } .rtsArticuloFirma{ font-family: sans-serif; font-size: small; text-align: justify } .column span-13 last{ font-family: sans-serif; font-size: medium; text-align: justify } .rtsImgArticulo{font-family: serif; font-size: small; color: #000000; text-align: justify}'
+
+
+    def preprocess_html(self, soup):
+            for item in soup.findAll(style=True):
+               del item['style']
+
+            return soup
+
+    keep_only_tags = [dict(name='div', attrs={'class':['column span-13 last']}),dict(name='div', attrs={'class':['rtsImgArticulo']})]
+
+    remove_tags        = [
+                             dict(name=['object','link','script','ul'])
+                            ,dict(name='div', attrs={'class':['rtsRating']})
+
+                         ]
+
+
+    #TO GET ARTICLES IN SECTION
+    def ec_parse_section(self, url, titleSection):
+            print 'Section: '+ titleSection
+            soup = self.index_to_soup(url)
+            div = soup.find(attrs={'id':'gallery'})
+            current_articles = []
+
+            for a in div.findAllNext('a', href=True):
+                    if a is None:
+                        continue
+                    title = self.tag_to_string(a)
+
+                    url = a.get('href', False)
+                    if not url or not title:
+                        continue
+
+                    if not url.startswith('/version_papel/'+titleSection+'/'):
+                        if len(current_articles) > 0 and not url.startswith('/secciones/'):
+                            break
+                        continue
+
+                    if url.startswith('/version_papel/'+titleSection+'/'):
+                         url = 'http://www.elcultural.es'+url
+
+                    self.log('\t\tFound article:', title[0:title.find("|")-1])
+                    self.log('\t\t\t', url)
+                    current_articles.append({'title': title[0:title.find("|")-1], 'url':url,
+                        'description':'', 'date':''})
+
+            return current_articles
+
+
+    # To GET SECTIONS
+    def parse_index(self):
+            feeds = []
+            for title, url in [
+                ('LETRAS',
+                 'http://www.elcultural.es/pdf_sumario/cultural/Sumario_El_Cultural_en_PDF'),
+                ('ARTE',
+                 'http://www.elcultural.es/pdf_sumario/cultural/Sumario_El_Cultural_en_PDF'),
+                ('CINE',
+                 'http://www.elcultural.es/pdf_sumario/cultural/Sumario_El_Cultural_en_PDF'),
+                ('CIENCIA',
+                 'http://www.elcultural.es/pdf_sumario/cultural/Sumario_El_Cultural_en_PDF'),
+##                ('OPINION',
+##                 'http://www.elcultural.es/pdf_sumario/cultural/Sumario_El_Cultural_en_PDF'),
+                ('ESCENARIOS',
+                 'http://www.elcultural.es/pdf_sumario/cultural/Sumario_El_Cultural_en_PDF'),
+             ]:
+               articles = self.ec_parse_section(url,title)
+               if articles:
+                   feeds.append((title, articles))
+
+
+            return feeds
--- a/src/calibre/ebooks/metadata/isbndb.py
+++ b/src/calibre/ebooks/metadata/isbndb.py
@ -45,37 +45,34 @@ def fetch_metadata(url, max=100, timeout=5.):
 class ISBNDBMetadata(Metadata):

    def __init__(self, book):
-        Metadata.__init__(self, None, [])
-        self.getmetadata(book)
+        Metadata.__init__(self, None)
+
+        def tostring(e):
+            if not hasattr(e, 'string'):
+                return None
+            ans = e.string
+            if ans is not None:
+                ans = unicode(ans).strip()
+            if not ans:
+                ans = None
+            return ans

-    def tostring(self, e):
-        if not hasattr(e, 'string'):
-            return None
-        ans = e.string
-        if ans is not None:
-            ans = unicode(ans).strip()
-        if not ans:
-            ans = None
-        return ans
-        
-    def getmetadata(self, book):
        self.isbn = unicode(book.get('isbn13', book.get('isbn')))
-        temptitle = self.tostring(book.find('titlelong'))
-        if not temptitle:
-            temptitle = self.tostring(book.find('title'))
-        if temptitle:            
-            self.title = unicode(temptitle).strip()
-        else:
-            self.title = _('Unknown')
-        self.authors = []
-        au = self.tostring(book.find('authorstext'))
+        title = tostring(book.find('titlelong'))
+        if not title:
+            title = tostring(book.find('title'))
+        self.title = title
+        self.title = unicode(self.title).strip()
+        authors = []
+        au = tostring(book.find('authorstext'))
        if au:
            au = au.strip()
            temp = au.split(',')
            for au in temp:
                if not au: continue
-                self.authors.extend([a.strip() for a in au.split('&amp;')])
-
+                authors.extend([a.strip() for a in au.split('&amp;')])
+        if authors:
+            self.authors = authors
        try:
            self.author_sort = self.tostring(book.find('authors').find('person'))
            if self.authors and self.author_sort == self.authors[0]:
--- a/src/calibre/gui2/dialogs/metadata_single.py
+++ b/src/calibre/gui2/dialogs/metadata_single.py
@ -25,7 +25,7 @@ from calibre.ebooks.metadata.covers import download_cover
 from calibre.ebooks.metadata.meta import get_metadata
 from calibre.ebooks.metadata import MetaInformation
 from calibre.utils.config import prefs, tweaks
-from calibre.utils.date import qt_to_dt, local_tz, utcfromtimestamp
+from calibre.utils.date import qt_to_dt, local_tz, utcfromtimestamp, utc_tz
 from calibre.customize.ui import run_plugins_on_import, get_isbndb_key
 from calibre.gui2.preferences.social import SocialMetadata
 from calibre.gui2.custom_column_widgets import populate_metadata_page
@ -434,7 +434,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
        self.pubdate.setDate(QDate(pubdate.year, pubdate.month,
            pubdate.day))
        timestamp = db.timestamp(self.id, index_is_id=True)
-        self.orig_timestamp = timestamp
+        self.orig_timestamp = timestamp.astimezone(utc_tz)
        self.date.setDate(QDate(timestamp.year, timestamp.month,
            timestamp.day))

--- a/src/calibre/manual/index.rst
+++ b/src/calibre/manual/index.rst
@ -17,10 +17,10 @@ To get started with more advanced usage, you should read about the :ref:`Graphic

 You will find the list of :ref:`Frequently Asked Questions <faq>` useful as well. 

-.. only:: html and online
+.. only:: online
+    
+    An e-book version of this User Manual is available in `EPUB format <calibre.epub>`_. 
    
-    An e-book version of this User Manual is available in `EPUB format <calibre.epub>`_. Because the User Manual uses advanced formatting, it is only suitable for use with the |app| e-book viewer.
-
 Sections
 ------------

--- a/src/calibre/utils/magick/init.py
+++ b/src/calibre/utils/magick/init.py
@ -109,6 +109,13 @@ class Image(_magick.Image): # {{{
        return _magick.Image.load(self, bytes(data))

    def open(self, path_or_file):
+        if not hasattr(path_or_file, 'read') and \
+            path_or_file.lower().endswith('.wmf'):
+            # Special handling for WMF files as ImageMagick seems
+            # to hand while reading them from a blob on linux
+            if isinstance(path_or_file, unicode):
+                path_or_file = path_or_file.encode(filesystem_encoding)
+            return _magick.Image.read(self, path_or_file)
        data = path_or_file
        if hasattr(data, 'read'):
            data = data.read()
--- a/src/calibre/utils/magick/magick.c
+++ b/src/calibre/utils/magick/magick.c
@ -414,6 +414,24 @@ magick_Image_load(magick_Image *self, PyObject *args, PyObject *kwargs) {

 // }}}

+// Image.load {{{
+static PyObject *
+magick_Image_read(magick_Image *self, PyObject *args, PyObject *kwargs) {
+    const char *data;
+    MagickBooleanType res;
+    
+    if (!PyArg_ParseTuple(args, "s", &data)) return NULL;
+
+    res = MagickReadImage(self->wand, data);
+
+    if (!res)
+        return magick_set_exception(self->wand);
+
+    Py_RETURN_NONE;
+}
+
+// }}}
+
 // Image.create_canvas {{{
 static PyObject *
 magick_Image_create_canvas(magick_Image *self, PyObject *args, PyObject *kwargs)
@ -873,6 +891,10 @@ static PyMethodDef magick_Image_methods[] = {
     "Load an image from a byte buffer (string)"
    },

+    {"read", (PyCFunction)magick_Image_read, METH_VARARGS,
+     "Read image from path. Path must be a bytestring in the filesystem encoding"
+    },
+
    {"export", (PyCFunction)magick_Image_export, METH_VARARGS,
     "export(format) -> bytestring\n\n Export the image as the specified format"
    },