Merge from trunk

2025-07-09 03:04:10 -04:00 · 2011-03-15 17:17:06 +00:00 · 2011-03-15 17:17:06 +00:00 · 6bdb3232cf
commit 6bdb3232cf
parent 659ff4ec68 ef1f808d5b
3 changed files with 237 additions and 1 deletions
--- a/resources/recipes/modoros.recipe
+++ b/resources/recipes/modoros.recipe
@ -0,0 +1,89 @@
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.constants import config_dir, CONFIG_DIR_MODE
 import os, os.path, urllib
 from hashlib import md5
 class ModorosBlogHu(BasicNewsRecipe):
    __author__              = 'Zsolt Botykai'
    title                   = u'Modoros Blog'
    description             = u"Modoros.blog.hu"
    oldest_article          = 10000
    max_articles_per_feed   = 10000
    reverse_article_order   = True
    language                = 'hu'
    remove_javascript       = True
    remove_empty_feeds      = True
    no_stylesheets          = True
    feeds                   = [(u'Modoros Blog', u'http://modoros.blog.hu/rss')]
    remove_javascript       = True
    use_embedded_content    = False
    preprocess_regexps      = [
        (re.compile(r'<!--megosztas -->.*?</body>', re.DOTALL|re.IGNORECASE),
         lambda match: '</body>'),
        (re.compile(r'<p align="left"'), lambda m: '<p'),
        (re.compile(r'<noscript.+?noscript>', re.DOTALL|re.IGNORECASE), lambda m: ''),
        (re.compile(r'<img style="position: absolute;top:-10px.+?>', re.DOTALL|re.IGNORECASE), lambda m: ''),
        (re.compile(r'<p>( |&nbsp;)*?</p>', re.DOTALL|re.IGNORECASE), lambda match: ''),
    ]
    extra_css = '''
                    body { background-color: white; color: black }
                '''
    remove_tags = [
                       dict(name='div', attrs={'id':['csucs']}) ,
                       dict(name='img', attrs={'style':['position: absolute;top:-10px;left:-10px;']}) ,
                       dict(name='div', attrs={'class':['tovabb-is-van', \
                                                        'page-break', \
                                                        'clear']}) ,
                       dict(name='span', attrs={'class':['hozzaszolas-szamlalo']})
                  ]
    masthead_url='http://modoros.blog.hu/media/skins/modoros-neon/img/modorosblog-felirat.png'
    def get_cover_url(self):
        return 'http://modoros.blog.hu/media/skins/modoros-neon/img/modorosblog-felirat.png'
    # As seen here: http://www.mobileread.com/forums/showpost.php?p=1295505&postcount=10
    def parse_feeds(self):
        recipe_dir = os.path.join(config_dir,'recipes')
        hash_dir = os.path.join(recipe_dir,'recipe_storage')
        feed_dir = os.path.join(hash_dir,self.title.encode('utf-8').replace('/',':'))
        if not os.path.isdir(feed_dir):
            os.makedirs(feed_dir,mode=CONFIG_DIR_MODE)
        feeds = BasicNewsRecipe.parse_feeds(self)
        for feed in feeds:
            feed_hash = urllib.quote(feed.title.encode('utf-8'),safe='')
            feed_fn = os.path.join(feed_dir,feed_hash)
            past_items = set()
            if os.path.exists(feed_fn):
               with file(feed_fn) as f:
                   for h in f:
                       past_items.add(h.strip())
            cur_items = set()
            for article in feed.articles[:]:
                item_hash = md5()
                if article.content: item_hash.update(article.content.encode('utf-8'))
                if article.summary: item_hash.update(article.summary.encode('utf-8'))
                item_hash = item_hash.hexdigest()
                if article.url:
                    item_hash = article.url + ':' + item_hash
                cur_items.add(item_hash)
                if item_hash in past_items:
                    feed.articles.remove(article)
            with file(feed_fn,'w') as f:
                for h in cur_items:
                    f.write(h+'\n')
        remove = [f for f in feeds if len(f) == 0 and
                self.remove_empty_feeds]
        for f in remove:
            feeds.remove(f)
        return feeds
--- a/resources/recipes/office_space.recipe
+++ b/resources/recipes/office_space.recipe
@ -0,0 +1,109 @@
 import re
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.constants import config_dir, CONFIG_DIR_MODE
 import os, os.path, urllib
 from hashlib import md5
 class OfficeSpaceBlogHu(BasicNewsRecipe):
    __author__              = 'Zsolt Botykai'
    title                   = u'Office Space Blog'
    description             = u"officespace.blog.hu"
    oldest_article          = 10000
    max_articles_per_feed   = 10000
    reverse_article_order   = True
    language                = 'hu'
    remove_javascript       = True
    remove_empty_feeds      = True
    no_stylesheets          = True
    feeds                   = [(u'Office Space Blog', u'http://officespace.blog.hu/rss')]
    remove_javascript       = True
    use_embedded_content    = False
    title          = u'Irodai patkényok'
    feeds          = [(u'Office Space', u'http://officespace.blog.hu/rss')]
    masthead_url='http://m.blog.hu/of/officespace/ipfejlec7.jpg'
    keep_only_tags = [
                    dict(name='div', attrs={'id':['mainWrapper']})
                    ]
    #   1.: I like justified lines more
    #   2.: remove empty paragraphs
    #   3.: drop header and sidebar
    #   4.: drop comments counter
    #   5.: drop everything after article-tags
    # 6-8.: drop audit images
    preprocess_regexps = [
        (re.compile(r'<p align="left"'), lambda m: '<p'),
        (re.compile(r'<p>( |&nbsp;)*?</p>', re.DOTALL|re.IGNORECASE), lambda match: ''),
        (re.compile(r'<body[^>]+>.*?<div id="mainIn"', re.DOTALL|re.IGNORECASE), lambda match: '<body><div id="mainIn"'),
        (re.compile(r'<h3 class="comments">.*?</h3>', re.DOTALL|re.IGNORECASE), lambda match: ''),
        (re.compile(r'<div class="related">.*?</body>', re.DOTALL|re.IGNORECASE), lambda match: '<body>'),
        (re.compile(r'<img style="position: absolute;" src="[^"]+pixel\?uc.*?>', re.DOTALL|re.IGNORECASE), lambda match: ''),
        (re.compile(r'<noscript.+?noscript>', re.DOTALL|re.IGNORECASE), lambda m: ''),
        (re.compile(r'<img style="position: absolute;top:-10px.+?>', re.DOTALL|re.IGNORECASE), lambda m: ''),
                         ]
    extra_css = '''
                    body { background-color: white; color: black }
                '''
    def get_cover_url(self):
        return 'http://m.blog.hu/of/officespace/ipfejlec7.jpg'
    def preprocess_html(self, soup):
        for tagz in soup.findAll('h3', attrs={'class':'tags'}):
            for taglink in tagz.findAll('a'):
                if taglink.string is not None:
                   tstr = taglink.string + ','
                   taglink.replaceWith(tstr)
        for alink in soup.findAll('a'):
            if alink.string is not None:
               tstr = alink.string
               alink.replaceWith(tstr)
        return soup
    # As seen here: http://www.mobileread.com/forums/showpost.php?p=1295505&postcount=10
    def parse_feeds(self):
        recipe_dir = os.path.join(config_dir,'recipes')
        hash_dir = os.path.join(recipe_dir,'recipe_storage')
        feed_dir = os.path.join(hash_dir,self.title.encode('utf-8').replace('/',':'))
        if not os.path.isdir(feed_dir):
            os.makedirs(feed_dir,mode=CONFIG_DIR_MODE)
        feeds = BasicNewsRecipe.parse_feeds(self)
        for feed in feeds:
            feed_hash = urllib.quote(feed.title.encode('utf-8'),safe='')
            feed_fn = os.path.join(feed_dir,feed_hash)
            past_items = set()
            if os.path.exists(feed_fn):
               with file(feed_fn) as f:
                   for h in f:
                       past_items.add(h.strip())
            cur_items = set()
            for article in feed.articles[:]:
                item_hash = md5()
                if article.content: item_hash.update(article.content.encode('utf-8'))
                if article.summary: item_hash.update(article.summary.encode('utf-8'))
                item_hash = item_hash.hexdigest()
                if article.url:
                    item_hash = article.url + ':' + item_hash
                cur_items.add(item_hash)
                if item_hash in past_items:
                    feed.articles.remove(article)
            with file(feed_fn,'w') as f:
                for h in cur_items:
                    f.write(h+'\n')
        remove = [f for f in feeds if len(f) == 0 and
                self.remove_empty_feeds]
        for f in remove:
            feeds.remove(f)
        return feeds
--- a/src/calibre/devices/kobo/driver.py
+++ b/src/calibre/devices/kobo/driver.py
@ -115,6 +115,8 @@ class KOBO(USBMS):
                    playlist_map[lpath]= "Im_Reading"
                elif readstatus == 2:
                    playlist_map[lpath]= "Read"
                elif readstatus == 3:
                    playlist_map[lpath]= "Closed"
                path = self.normalize_path(path)
                # print "Normalized FileName: " + path
@ -599,11 +601,47 @@ class KOBO(USBMS):
                        try:
                            cursor.execute('update content set ReadStatus=2,FirstTimeReading=\'true\' where BookID is Null and ContentID = ?', t)
                        except:
-                            debug_print('Database Exception:  Unable set book as Rinished')
+                            debug_print('Database Exception:  Unable set book as Finished')
                            raise
                        else:
                            connection.commit()
 #                            debug_print('Database: Commit set ReadStatus as Finished')
                if category == 'Closed':
                    # Reset Im_Reading list in the database
                    if oncard == 'carda':
                        query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 3 and ContentID like \'file:///mnt/sd/%\''
                    elif oncard != 'carda' and oncard != 'cardb':
                        query= 'update content set ReadStatus=0, FirstTimeReading = \'true\' where BookID is Null and ReadStatus = 3 and ContentID not like \'file:///mnt/sd/%\''
                    try:
                        cursor.execute (query)
                    except:
                        debug_print('Database Exception:  Unable to reset Closed list')
                        raise
                    else:
 #                       debug_print('Commit: Reset Closed list')
                        connection.commit()
                    for book in books:
 #                       debug_print('Title:', book.title, 'lpath:', book.path)
                        book.device_collections = ['Closed']
                        extension =  os.path.splitext(book.path)[1]
                        ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(book.path)
                        ContentID = self.contentid_from_path(book.path, ContentType)
 #                        datelastread = time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime())
                        t = (ContentID,)
                        try:
                            cursor.execute('update content set ReadStatus=3,FirstTimeReading=\'true\' where BookID is Null and ContentID = ?', t)
                        except:
                            debug_print('Database Exception:  Unable set book as Closed')
                            raise
                        else:
                            connection.commit()
 #                            debug_print('Database: Commit set ReadStatus as Closed')
        else: # No collections
            # Since no collections exist the ReadStatus needs to be reset to 0 (Unread)
            print "Reseting ReadStatus to 0"