Merge upstream changes

2025-07-09 03:04:10 -04:00 · 2009-01-19 22:54:27 -05:00 · 2009-01-19 22:54:27 -05:00 · fd389eeca2
commit fd389eeca2
parent 97468e61fe c198458f65
38 changed files with 13786 additions and 6917 deletions
--- a/src/calibre/devices/cybookg3/t2b.py
+++ b/src/calibre/devices/cybookg3/t2b.py
--- a/src/calibre/devices/prs505/driver.py
+++ b/src/calibre/devices/prs505/driver.py
@ -146,36 +146,7 @@ class PRS505(Device):
            self._card_prefix = re.search(card_pat, mount).group(2) + os.sep
            
    
-    def open_windows_nowmi(self):
-        from calibre import plugins
-        winutil = plugins['winutil'][0]
-        volumes = winutil.get_mounted_volumes_for_usb_device(self.VENDOR_ID, self.PRODUCT_ID)
-        main = None
-        for device_id in volumes.keys():
-            if 'PRS-505/UC&' in device_id:
-                main = volumes[device_id]+':\\'
-        if not main:
-            raise DeviceError(_('Unable to detect the %s disk drive. Try rebooting.')%self.__class__.__name__)
-        self._main_prefix = main
-        card = self._card_prefix = None
-        win32api = __import__('win32api')
-        for device_id in volumes.keys():
-            if 'PRS-505/UC:' in device_id:
-                card = volumes[device_id]+':\\'
-                try:
-                    win32api.GetVolumeInformation(card)
-                    self._card_prefix = card
-                    break
-                except:
-                    continue
-            
-    
    def open_windows(self):
-        try:
-            self.open_windows_nowmi()
-            return
-        except:
-            pass
        drives = []
        wmi = __import__('wmi', globals(), locals(), [], -1) 
        c = wmi.WMI()
--- a/src/calibre/ebooks/epub/init.py
+++ b/src/calibre/ebooks/epub/init.py
@ -156,7 +156,7 @@ to auto-generate a Table of Contents.
           help=_('Set the right margin in pts. Default is %default'))
    layout('base_font_size2', ['--base-font-size'], default=12.0,
           help=_('The base font size in pts. Default is %defaultpt. Set to 0 to disable rescaling of fonts.'))
-    layout('remove_paragraph_spacing', ['--remove-paragraph-spacing'], default=True,
+    layout('remove_paragraph_spacing', ['--remove-paragraph-spacing'], default=False,
           help=_('Remove spacing between paragraphs. Will not work if the source file forces inter-paragraph spacing.'))
    layout('preserve_tag_structure', ['--preserve-tag-structure'], default=False,
           help=_('Preserve the HTML tag structure while splitting large HTML files. This is only neccessary if the HTML files contain CSS that uses sibling selectors. Enabling this greatly slows down processing of large HTML files.'))
--- a/src/calibre/ebooks/epub/from_feeds.py
+++ b/src/calibre/ebooks/epub/from_feeds.py
@ -52,6 +52,7 @@ def convert(opts, recipe_arg, notification=None):
        
        print 'Generating epub...'
        opts.encoding = 'utf-8'
+        opts.remove_paragraph_spacing = True
        html2epub(opf, opts, notification=notification)
    

--- a/src/calibre/ebooks/epub/from_html.py
+++ b/src/calibre/ebooks/epub/from_html.py
@ -128,6 +128,8 @@ class HTMLProcessor(Processor, Rationalizer):
        if hasattr(self.body, 'xpath'):
            for script in list(self.body.xpath('descendant::script')):
                script.getparent().remove(script)
+                
+        self.fix_markup()
            
    def convert_image(self, img):
        rpath = img.get('src', '')
@ -145,6 +147,17 @@ class HTMLProcessor(Processor, Rationalizer):
                    if val == rpath:
                        self.resource_map[key] = rpath+'_calibre_converted.jpg'
        img.set('src', rpath+'_calibre_converted.jpg')
+        
+    def fix_markup(self):
+        '''
+        Perform various markup transforms to get the output to render correctly 
+        in the quirky ADE.
+        '''
+        # Replace <br> that are children of <body> with <p>&nbsp;</p>
+        if hasattr(self.body, 'xpath'):
+            for br in self.body.xpath('./br'):
+                br.tag = 'p'
+                br.text = u'\u00a0'
    
    def save(self):
        for meta in list(self.root.xpath('//meta')):
--- a/src/calibre/ebooks/epub/iterator.py
+++ b/src/calibre/ebooks/epub/iterator.py
@ -95,7 +95,7 @@ class EbookIterator(object):
                for match in re.compile(r'@font-face\s*{([^}]+)}').finditer(css):
                    block  = match.group(1)
                    family = re.compile(r'font-family\s*:\s*([^;]+)').search(block)
-                    url    = re.compile(r'url\s*\((.+?)\)', re.DOTALL).search(block)
+                    url    = re.compile(r'url\s*\([\'"]*(.+?)[\'"]*\)', re.DOTALL).search(block)
                    if url:
                        path = url.group(1).split('/')
                        path = os.path.join(os.path.dirname(item.path), *path) 
--- a/src/calibre/ebooks/html.py
+++ b/src/calibre/ebooks/html.py
@ -848,7 +848,7 @@ class Processor(Parser):
        # Workaround for anchor rendering bug in ADE
        css += '\n\na { color: inherit; text-decoration: inherit; cursor: default; }\na[href] { color: blue; text-decoration: underline; cursor:pointer; }'
        if self.opts.remove_paragraph_spacing:
-            css += '\n\np {text-indent: 2em; margin-top:0pt; margin-bottom:0pt; padding:0pt; border:0pt;}'
+            css += '\n\np {text-indent: 1.5em; margin-top:0pt; margin-bottom:0pt; padding:0pt; border:0pt;}'
        if self.opts.override_css:
            css += '\n\n' + self.opts.override_css
        self.override_css = self.css_parser.parseString(self.preprocess_css(css))
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@ -12,7 +12,7 @@ import copy
 import re
 from lxml import etree
 from calibre.ebooks.oeb.base import namespace, barename
-from calibre.ebooks.oeb.base import XHTML, XHTML_NS
+from calibre.ebooks.oeb.base import XHTML, XHTML_NS, OEB_DOCS
 from calibre.ebooks.oeb.stylizer import Stylizer
 from calibre.ebooks.oeb.transforms.flatcss import KeyMapper

@ -96,8 +96,11 @@ class MobiMLizer(object):
        href = oeb.guide['cover'].href
        del oeb.guide['cover']
        item = oeb.manifest.hrefs[href]
-        oeb.manifest.remove(item)
-
+        if item.spine_position is not None:
+            oeb.spine.remove(item)                
+            if item.media_type in OEB_DOCS:
+                self.oeb.manifest.remove(item)
+    
    def mobimlize_spine(self):
        for item in self.oeb.spine:
            stylizer = Stylizer(item.data, item.href, self.oeb, self.profile)
@ -137,7 +140,7 @@ class MobiMLizer(object):
        para = bstate.para
        if tag in SPECIAL_TAGS and not text:
            para = para if para is not None else bstate.body
-        elif para is None:
+        elif para is None or tag in ('td', 'th'):
            body = bstate.body
            if bstate.pbreak:
                etree.SubElement(body, MBP('pagebreak'))
@ -157,7 +160,8 @@ class MobiMLizer(object):
            elif indent != 0 and abs(indent) < self.profile.fbase:
                indent = (indent / abs(indent)) * self.profile.fbase
            if tag in NESTABLE_TAGS:
-                para = wrapper = etree.SubElement(parent, XHTML(tag))
+                para = wrapper = etree.SubElement(
+                    parent, XHTML(tag), attrib=istate.attrib)
                bstate.nested.append(para)
                if tag == 'li' and len(istates) > 1:
                    istates[-2].list_num += 1
@ -337,6 +341,10 @@ class MobiMLizer(object):
            tag = 'tr'
        elif display == 'table-cell':
            tag = 'td'
+        if tag in TABLE_TAGS:
+            for attr in ('rowspan', 'colspan'):
+                if attr in elem.attrib:
+                    istate.attrib[attr] = elem.attrib[attr]
        text = None
        if elem.text:
            if istate.preserve:
@ -374,6 +382,6 @@ class MobiMLizer(object):
                bstate.vpadding += bstate.vmargin
                bstate.vmargin = 0
                bstate.vpadding += vpadding
-        if tag in NESTABLE_TAGS and bstate.nested:
+        if bstate.nested and bstate.nested[-1].tag == elem.tag:
            bstate.nested.pop()
        istates.pop()
--- a/src/calibre/ebooks/mobi/reader.py
+++ b/src/calibre/ebooks/mobi/reader.py
@ -124,6 +124,7 @@ class BookHeader(object):
            sublangid = (langcode >> 10) & 0xFF
            self.language = main_language.get(langid, 'ENGLISH')
            self.sublanguage = sub_language.get(sublangid, 'NEUTRAL')
+            self.first_image_index = struct.unpack('>L', raw[0x6c:0x6c+4])[0]
            
            self.exth_flag, = struct.unpack('>L', raw[0x80:0x84])
            self.exth = None
@ -441,17 +442,18 @@ class MobiReader(object):
            os.makedirs(output_dir)
        image_index = 0
        self.image_names = []
-        for i in range(self.num_sections):
+        for i in range(self.book_header.first_image_index, self.num_sections):
            if i in processed_records:
                continue
            processed_records.append(i)
            data  = self.sections[i][0]
            buf = cStringIO.StringIO(data)
+            image_index += 1
            try:
                im = PILImage.open(buf)                
-            except IOError:
+            except IOError, e:
                continue
-            image_index += 1 
+             
            path = os.path.join(output_dir, '%05d.jpg'%image_index)
            self.image_names.append(os.path.basename(path))
            im.convert('RGB').save(open(path, 'wb'), format='JPEG')
@ -476,6 +478,7 @@ def get_metadata(stream):
    else:
        tdir = tempfile.mkdtemp('_mobi_meta', __appname__+'_')
        atexit.register(shutil.rmtree, tdir)
+        #print tdir
        mr.extract_images([], tdir)
        mi = mr.create_opf('dummy.html')
        if mi.cover:
@ -491,7 +494,6 @@ def get_metadata(stream):
                        if os.access(candidate, os.R_OK):
                            cover = candidate
                            break
-                    
            if os.access(cover, os.R_OK):
                mi.cover_data = ('JPEG', open(os.path.join(tdir, cover), 'rb').read())
        else:
--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@ -95,6 +95,7 @@ class Serializer(object):
    def __init__(self, oeb, images):
        self.oeb = oeb
        self.images = images
+        self.logger = oeb.logger
        self.id_offsets = {}
        self.href_offsets = defaultdict(list)
        self.breaks = []
@ -144,8 +145,8 @@ class Serializer(object):
        item = hrefs[path] if path else None
        if item and item.spine_position is None:
            return False
-        id =  item.id if item else base.id
-        href = '#'.join((id, frag)) if frag else id
+        path =  item.href if item else base.href
+        href = '#'.join((path, frag)) if frag else path
        buffer.write('filepos=')
        self.href_offsets[href].append(buffer.tell())
        buffer.write('0000000000')
@ -170,7 +171,7 @@ class Serializer(object):
        buffer = self.buffer
        if not item.linear:
            self.breaks.append(buffer.tell() - 1)
-        self.id_offsets[item.id] = buffer.tell()
+        self.id_offsets[item.href] = buffer.tell()
        for elem in item.data.find(XHTML('body')):
            self.serialize_elem(elem, item)
        buffer.write('<mbp:pagebreak/>')
@ -180,12 +181,11 @@ class Serializer(object):
        if not isinstance(elem.tag, basestring) \
           or namespace(elem.tag) not in nsrmap:
            return
-        hrefs = self.oeb.manifest.hrefs
        tag = prefixname(elem.tag, nsrmap)
        for attr in ('name', 'id'):
            if attr in elem.attrib:
-                id = '#'.join((item.id, elem.attrib[attr]))
-                self.id_offsets[id] = buffer.tell()
+                href = '#'.join((item.href, elem.attrib[attr]))
+                self.id_offsets[href] = buffer.tell()
                del elem.attrib[attr]
        if tag == 'a' and not elem.attrib \
           and not len(elem) and not elem.text:
@ -203,7 +203,7 @@ class Serializer(object):
                        continue
                elif attr == 'src':
                    href = item.abshref(val)
-                    if href in hrefs:
+                    if href in self.images:
                        index = self.images[href]
                        buffer.write('recindex="%05d"' % index)
                        continue
@ -233,8 +233,12 @@ class Serializer(object):

    def fixup_links(self):
        buffer = self.buffer
-        for id, hoffs in self.href_offsets.items():
-            ioff = self.id_offsets[id]
+        id_offsets = self.id_offsets
+        for href, hoffs in self.href_offsets.items():
+            if href not in id_offsets:
+                self.logger.warn('Hyperlink target %r not found' % href)
+                href, _ = urldefrag(href)
+            ioff = self.id_offsets[href]
            for hoff in hoffs:
                buffer.seek(hoff)
                buffer.write('%010d' % ioff)
@ -360,7 +364,11 @@ class MobiWriter(object):
        if image.format not in ('JPEG', 'GIF'):
            width, height = image.size
            area = width * height
-            format = 'GIF' if area <= 40000 else 'JPEG'
+            if area <= 40000:
+                format = 'GIF'
+            else:
+                image = image.convert('RGBA')
+                format = 'JPEG'
            changed = True
        if dimen is not None:
            image.thumbnail(dimen, Image.ANTIALIAS)
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -500,6 +500,7 @@ class Spine(object):
        self.items.pop(index)
        for i in xrange(index, len(self.items)):
            self.items[i].spine_position = i
+        item.spine_position = None
    
    def __iter__(self):
        for item in self.items:
@ -796,12 +797,20 @@ class OEBBook(object):
    def _manifest_from_opf(self, opf):
        self.manifest = manifest = Manifest(self)
        for elem in xpath(opf, '/o2:package/o2:manifest/o2:item'):
+            id = elem.get('id')
            href = elem.get('href')
+            media_type = elem.get('media-type')
+            fallback = elem.get('fallback')
+            if href in manifest.hrefs:
+                self.logger.warn(u'Duplicate manifest entry for %r.' % href)
+                continue
            if not self.container.exists(href):
                self.logger.warn(u'Manifest item %r not found.' % href)
                continue
-            manifest.add(elem.get('id'), href, elem.get('media-type'),
-                         elem.get('fallback'))
+            if id in manifest.ids:
+                self.logger.warn(u'Duplicate manifest id %r.' % id)
+                id, href = manifest.generate(id, href)
+            manifest.add(id, href, media_type, fallback)
    
    def _spine_from_opf(self, opf):
        self.spine = spine = Spine(self)
--- a/src/calibre/ebooks/oeb/transforms/trimmanifest.py
+++ b/src/calibre/ebooks/oeb/transforms/trimmanifest.py
@ -41,8 +41,9 @@ class ManifestTrimmer(object):
        while unchecked:
            new = set()
            for item in unchecked:
-                if item.media_type in OEB_DOCS or \
-                   item.media_type[-4:] in ('/xml', '+xml'):
+                if (item.media_type in OEB_DOCS or 
+                    item.media_type[-4:] in ('/xml', '+xml')) and \
+                   item.data is not None:
                    hrefs = [sel(item.data) for sel in LINK_SELECTORS]
                    for href in chain(*hrefs):
                        href = item.abshref(href)
--- a/src/calibre/gui2/main.py
+++ b/src/calibre/gui2/main.py
@ -309,18 +309,7 @@ class Main(MainWindow, Ui_MainWindow):
                self.library_path = dir
                db = LibraryDatabase2(self.library_path)
        self.library_view.set_database(db)
-        if self.olddb is not None:
-            pd = QProgressDialog('', '', 0, 100, self)
-            pd.setWindowModality(Qt.ApplicationModal)
-            pd.setCancelButton(None)
-            pd.setWindowTitle(_('Migrating database'))
-            pd.show()
-            number_of_books = db.migrate_old(self.olddb, pd)
-            self.olddb.close()
-            if number_of_books == 0:
-                os.remove(self.olddb.dbpath)
-            self.olddb = None
-            prefs['library_path'] = self.library_path
+        prefs['library_path'] = self.library_path
        self.library_view.sortByColumn(*dynamic.get('sort_column', ('timestamp', Qt.DescendingOrder)))
        if not self.library_view.restore_column_widths():
            self.library_view.resizeColumnsToContents()
@ -1392,39 +1381,14 @@ class Main(MainWindow, Ui_MainWindow):

    def initialize_database(self):
        self.library_path = prefs['library_path']
-        self.olddb = None
        if self.library_path is None: # Need to migrate to new database layout
-            QMessageBox.information(self, 'Database format changed',
-                '''\
-<p>calibre's book storage format has changed. Instead of storing book files in a database, the
-files are now stored in a folder on your filesystem. You will now be asked to choose the folder 
-in which you want to store your books files. Any existing books will be automatically migrated.
-                ''')
-            self.database_path = prefs['database_path']
-            if not os.access(os.path.dirname(self.database_path), os.W_OK):
-                error_dialog(self, _('Database does not exist'), 
-                             _('The directory in which the database should be: %s no longer exists. Please choose a new database location.')%self.database_path).exec_()
-                self.database_path = choose_dir(self, 'database path dialog', 
-                                                _('Choose new location for database'))
-                if not self.database_path:
-                    self.database_path = os.path.expanduser('~').decode(sys.getfilesystemencoding())
-                if not os.path.exists(self.database_path):
-                    os.makedirs(self.database_path)
-                self.database_path = os.path.join(self.database_path, 'library1.db')
-                prefs['database_path'] = self.database_path
-            home = os.path.dirname(self.database_path)
-            if not os.path.exists(home):
-                home = os.getcwd()
            dir = unicode(QFileDialog.getExistingDirectory(self, 
-                            _('Choose a location for your ebook library.'), home))
+                            _('Choose a location for your ebook library.'), os.getcwd()))
            if not dir:
-                dir = os.path.dirname(self.database_path)
+                dir = os.path.expanduser('~/Library')
            self.library_path = os.path.abspath(dir)
-            try:
-                self.olddb = LibraryDatabase(self.database_path)
-            except:
-                traceback.print_exc()
-                self.olddb = None
+        if not os.path.exists(self.library_path):
+            os.makedirs(self.library_path)


    def read_settings(self):
--- a/src/calibre/translations/bg.po
+++ b/src/calibre/translations/bg.po
--- a/src/calibre/translations/ca.po
+++ b/src/calibre/translations/ca.po
--- a/src/calibre/translations/cs.po
+++ b/src/calibre/translations/cs.po
--- a/src/calibre/translations/de.po
+++ b/src/calibre/translations/de.po
--- a/src/calibre/translations/el.po
+++ b/src/calibre/translations/el.po
--- a/src/calibre/translations/es.po
+++ b/src/calibre/translations/es.po
--- a/src/calibre/translations/fr.po
+++ b/src/calibre/translations/fr.po
--- a/src/calibre/translations/gl.po
+++ b/src/calibre/translations/gl.po
--- a/src/calibre/translations/hu.po
+++ b/src/calibre/translations/hu.po
--- a/src/calibre/translations/it.po
+++ b/src/calibre/translations/it.po
--- a/src/calibre/translations/nb.po
+++ b/src/calibre/translations/nb.po
--- a/src/calibre/translations/nds.po
+++ b/src/calibre/translations/nds.po
--- a/src/calibre/translations/nl.po
+++ b/src/calibre/translations/nl.po
--- a/src/calibre/translations/pl.po
+++ b/src/calibre/translations/pl.po
--- a/src/calibre/translations/pt.po
+++ b/src/calibre/translations/pt.po
--- a/src/calibre/translations/ro.po
+++ b/src/calibre/translations/ro.po
--- a/src/calibre/translations/ru.po
+++ b/src/calibre/translations/ru.po
--- a/src/calibre/translations/sk.po
+++ b/src/calibre/translations/sk.po
--- a/src/calibre/translations/sl.po
+++ b/src/calibre/translations/sl.po
--- a/src/calibre/translations/sv.po
+++ b/src/calibre/translations/sv.po
--- a/src/calibre/translations/te.po
+++ b/src/calibre/translations/te.po
--- a/src/calibre/web/feeds/recipes/init.py
+++ b/src/calibre/web/feeds/recipes/init.py
@ -22,7 +22,8 @@ recipe_modules = ['recipe_' + r for r in (
           'time_magazine', 'endgadget', 'fudzilla', 'nspm_int', 'nspm', 'pescanik',
           'spiegel_int', 'themarketticker', 'tomshardware', 'xkcd', 'ftd', 'zdnet',
           'joelonsoftware', 'telepolis', 'common_dreams', 'nin', 'tomshardware_de',
-           'pagina12', 'infobae', 'ambito', 'elargentino', 'sueddeutsche', 
+           'pagina12', 'infobae', 'ambito', 'elargentino', 'sueddeutsche', 'the_age',
+           'laprensa',
          )]

 import re, imp, inspect, time, os
--- a/src/calibre/web/feeds/recipes/recipe_laprensa.py
+++ b/src/calibre/web/feeds/recipes/recipe_laprensa.py
@ -0,0 +1,50 @@
+#!/usr/bin/env  python
+
+__license__   = 'GPL v3'
+__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
+'''
+laprensa.com.ar
+'''
+import urllib
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class LaPrensa(BasicNewsRecipe):
+    title                 = 'La Prensa'
+    __author__            = 'Darko Miletic'
+    description           = 'Informacion Libre las 24 horas'    
+    oldest_article        = 7
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    encoding              = 'cp1252'
+    cover_url             = 'http://www.laprensa.com.ar/imgs/logo.gif'
+
+    html2lrf_options = [
+                          '--comment'       , description
+                        , '--category'      , 'news, Argentina'
+                        , '--publisher'     , title
+                        ]
+                            
+    feeds = [ 
+              (u'Politica'    , u'http://www.laprensa.com.ar/Rss.aspx?Rss=4' )
+             ,(u'Economia'    , u'http://www.laprensa.com.ar/Rss.aspx?Rss=5' )
+             ,(u'Opinion'     , u'http://www.laprensa.com.ar/Rss.aspx?Rss=6' )
+             ,(u'El Mundo'    , u'http://www.laprensa.com.ar/Rss.aspx?Rss=7' )
+             ,(u'Actualidad'  , u'http://www.laprensa.com.ar/Rss.aspx?Rss=8' )
+             ,(u'Deportes'    , u'http://www.laprensa.com.ar/Rss.aspx?Rss=9' )
+             ,(u'Espectaculos', u'http://www.laprensa.com.ar/Rss.aspx?Rss=10')
+            ]
+
+    def print_version(self, url):
+        return url.replace('.note.aspx','.NotePrint.note.aspx')
+
+    def get_article_url(self, article):
+        raw = article.get('link',  None).encode('utf8')
+        final = urllib.quote(raw,':/') 
+        return final
+
+    def preprocess_html(self, soup):
+        del soup.body['onload']
+        return soup
+    
--- a/src/calibre/web/feeds/recipes/recipe_the_age.py
+++ b/src/calibre/web/feeds/recipes/recipe_the_age.py
@ -0,0 +1,55 @@
+#!/usr/bin/env  python
+__license__   = 'GPL v3'
+__copyright__ = '2009, Matthew Briggs <hal.sulphur@gmail.com>'
+__docformat__ = 'restructuredtext en'
+
+'''
+theage.com.au
+'''
+from calibre import strftime
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+
+
+class TheAge(BasicNewsRecipe):
+    
+    title = 'The Age'
+    description = 'Business News, World News and Breaking News in Melbourne, Australia'
+    __author__ = 'Matthew Briggs'
+    
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        br.set_handle_refresh(False)
+        return br
+    
+    def parse_index(self):
+        
+        soup = BeautifulSoup(self.browser.open('http://www.theage.com.au/text/').read())
+        
+        feeds, articles = [], []
+        feed = None
+        
+        
+        for tag in soup.findAll(['h3', 'a']):
+            if tag.name == 'h3':
+                if articles:
+                    feeds.append((feed, articles))
+                    articles = []
+                feed = self.tag_to_string(tag)
+            elif feed is not None and tag.has_key('href') and tag['href'].strip():
+                url = tag['href'].strip()
+                if url.startswith('/'):
+                    url   = 'http://www.theage.com.au' + url 
+                title = self.tag_to_string(tag)
+                articles.append({
+                                 'title': title,
+                                 'url'  : url,
+                                 'date' : strftime('%a, %d %b'),
+                                 'description' : '',
+                                 'content'     : '',
+                                 }) 
+                
+        return feeds
+                
+
+
--- a/src/calibre/web/fetch/simple.py
+++ b/src/calibre/web/fetch/simple.py
@ -398,7 +398,7 @@ class RecursiveFetcher(object, LoggingInterface):
                    _fname = basename(iurl)
                    if not isinstance(_fname, unicode):
                        _fname.decode('latin1', 'replace')
-                    _fname.encode('ascii', 'replace').replace('%', '')
+                    _fname = _fname.encode('ascii', 'replace').replace('%', '').replace(os.sep, '')
                    res = os.path.join(linkdiskpath, _fname)
                    self.downloaded_paths.append(res)
                    self.filemap[nurl] = res