KG updates

2025-07-09 03:04:10 -04:00 · 2011-01-22 14:42:57 -07:00 · 2011-01-22 14:42:57 -07:00 · d7b1b084a4
commit d7b1b084a4
parent a8b9c46e32 beb9196079
7 changed files with 172 additions and 15 deletions
--- a/resources/recipes/roger_ebert.recipe
+++ b/resources/recipes/roger_ebert.recipe
@ -0,0 +1,120 @@
+import re
+import urllib2
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup, SoupStrainer
+
+class Ebert(BasicNewsRecipe):
+    title                 = 'Roger Ebert'
+    __author__            = 'Shane Erstad'
+    description           = 'Roger Ebert Movie Reviews'
+    publisher             = 'Chicago Sun Times'
+    category              = 'movies'
+    oldest_article        = 8
+    max_articles_per_feed = 100
+    no_stylesheets        = True
+    use_embedded_content  = False
+    encoding              = 'utf-8'
+    masthead_url          = 'http://rogerebert.suntimes.com/graphics/global/roger.jpg'
+    language              = 'en'
+    remove_empty_feeds    = False
+    PREFIX                  = 'http://rogerebert.suntimes.com'
+    patternReviews                = r'<span class="*?movietitle"*?>(.*?)</span>.*?<div class="*?headline"*?>(.*?)</div>(.*?)</div>'
+    patternCommentary       = r'<div class="*?headline"*?>.*?(<a href="/apps/pbcs.dll/article\?AID=.*?COMMENTARY.*?" id="ltred">.*?</a>).*?<div class="blurb clear">(.*?)</div>'
+    patternPeople           = r'<div class="*?headline"*?>.*?(<a href="/apps/pbcs.dll/article\?AID=.*?PEOPLE.*?" id="ltred">.*?</a>).*?<div class="blurb clear">(.*?)</div>'
+    patternGlossary           = r'<div class="*?headline"*?>.*?(<a href="/apps/pbcs.dll/article\?AID=.*?GLOSSARY.*?" id="ltred">.*?</a>).*?<div class="blurb clear">(.*?)</div>'
+
+
+
+    conversion_options = {
+                          'comment'          : description
+                        , 'tags'             : category
+                        , 'publisher'        : publisher
+                        , 'language'         : language
+                        , 'linearize_tables' : True
+                        }
+
+
+    feeds          = [
+                        (u'Reviews'   , u'http://rogerebert.suntimes.com/apps/pbcs.dll/section?category=reviews' )
+                        ,(u'Commentary'   , u'http://rogerebert.suntimes.com/apps/pbcs.dll/section?category=COMMENTARY')
+                        ,(u'Great Movies'   , u'http://rogerebert.suntimes.com/apps/pbcs.dll/section?category=REVIEWS08')
+                        ,(u'People'   , u'http://rogerebert.suntimes.com/apps/pbcs.dll/section?category=PEOPLE')
+                        ,(u'Glossary'   , u'http://rogerebert.suntimes.com/apps/pbcs.dll/section?category=GLOSSARY')
+
+                     ]
+
+    preprocess_regexps = [
+        (re.compile(r'<font.*?>.*?This is a printer friendly.*?</font>.*?<hr>', re.DOTALL|re.IGNORECASE),
+            lambda m: '')
+    ]
+
+
+
+    def print_version(self, url):
+        return url + '&template=printart'
+
+    def parse_index(self):
+        totalfeeds = []
+        lfeeds = self.get_feeds()
+        for feedobj in lfeeds:
+            feedtitle, feedurl = feedobj
+            self.log('\tFeedurl: ', feedurl)
+            self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
+            articles = []
+            page = urllib2.urlopen(feedurl).read()
+
+            if feedtitle == 'Reviews' or feedtitle == 'Great Movies':
+                    pattern = self.patternReviews
+            elif feedtitle == 'Commentary':
+                    pattern = self.patternCommentary
+            elif feedtitle == 'People':
+                    pattern = self.patternPeople
+            elif feedtitle == 'Glossary':
+                    pattern = self.patternGlossary
+
+
+            regex = re.compile(pattern, re.IGNORECASE|re.DOTALL)
+
+            for match in regex.finditer(page):
+                if feedtitle == 'Reviews' or feedtitle == 'Great Movies':
+                    movietitle = match.group(1)
+                    thislink = match.group(2)
+                    description = match.group(3)
+                elif feedtitle == 'Commentary' or feedtitle == 'People' or feedtitle == 'Glossary':
+                    thislink = match.group(1)
+                    description = match.group(2)
+
+                self.log(thislink)
+
+                for link in BeautifulSoup(thislink, parseOnlyThese=SoupStrainer('a')):
+                    thisurl = self.PREFIX + link['href']
+                    thislinktext = self.tag_to_string(link)
+
+                    if feedtitle == 'Reviews' or feedtitle == 'Great Movies':
+                        thistitle = movietitle
+                    elif feedtitle == 'Commentary' or feedtitle == 'People' or feedtitle == 'Glossary':
+                        thistitle = thislinktext
+
+                    if thistitle == '':
+                        thistitle = 'Ebert Journal Post'
+
+                    """
+                    pattern2 = r'AID=\/(.*?)\/'
+                    reg2 = re.compile(pattern2, re.IGNORECASE|re.DOTALL)
+                    match2 = reg2.search(thisurl)
+                    date = match2.group(1)
+                    c = time.strptime(match2.group(1),"%Y%m%d")
+                    date=time.strftime("%a, %b %d, %Y", c)
+                    self.log(date)
+                    """
+
+                    articles.append({
+                                      'title'      :thistitle
+                                     ,'date'       :''
+                                     ,'url'        :thisurl
+                                     ,'description':description
+                                    })
+            totalfeeds.append((feedtitle, articles))
+
+        return totalfeeds
+
--- a/src/calibre/ebooks/oeb/base.py
+++ b/src/calibre/ebooks/oeb/base.py
@ -221,7 +221,10 @@ def rewrite_links(root, link_repl_func, resolve_base_href=False):
                        el.text):
            stylesheet = parseString(el.text)
            replaceUrls(stylesheet, link_repl_func)
-            el.text = '\n'+stylesheet.cssText + '\n'
+            repl = stylesheet.cssText
+            if isbytestring(repl):
+                repl = repl.decode('utf-8')
+            el.text = '\n'+ repl + '\n'

        if 'style' in el.attrib:
            text = el.attrib['style']
@ -234,8 +237,11 @@ def rewrite_links(root, link_repl_func, resolve_base_href=False):
                            set_property(item)
                    elif v.CSS_PRIMITIVE_VALUE == v.cssValueType:
                        set_property(v)
-                el.attrib['style'] = stext.cssText.replace('\n', ' ').replace('\r',
+                repl = stext.cssText.replace('\n', ' ').replace('\r',
                        ' ')
+                if isbytestring(repl):
+                    repl = repl.decode('utf-8')
+                el.attrib['style'] = repl



--- a/src/calibre/gui2/actions/choose_library.py
+++ b/src/calibre/gui2/actions/choose_library.py
@ -385,13 +385,27 @@ class ChooseLibraryAction(InterfaceAction):

        prefs['library_path'] = loc
        #from calibre.utils.mem import memory
-        #import weakref, gc
-        #ref = weakref.ref(self.gui.library_view.model().db)
-        #before = memory()/1024**2
+        #import weakref
+        #from PyQt4.Qt import QTimer
+        #self.dbref = weakref.ref(self.gui.library_view.model().db)
+        #self.before_mem = memory()/1024**2
        self.gui.library_moved(loc)
-        #print gc.get_referrers(ref)[0]
-        #for i in xrange(3): gc.collect()
-        #print 'leaked:', memory()/1024**2 - before
+        #QTimer.singleShot(1000, self.debug_leak)
+
+    def debug_leak(self):
+        import gc
+        from calibre.utils.mem import memory
+        ref = self.dbref
+        for i in xrange(3): gc.collect()
+        if ref() is not None:
+            print 11111, ref()
+            for r in gc.get_referrers(ref())[:10]:
+                print r
+                print
+        print 'before:', self.before_mem
+        print 'after:', memory()/1024**2
+        self.dbref = self.before_mem = None
+

    def qs_requested(self, idx, *args):
        self.switch_requested(self.qs_locations[idx])
--- a/src/calibre/gui2/main.py
+++ b/src/calibre/gui2/main.py
@ -150,13 +150,13 @@ class GuiRunner(QObject):
        if DEBUG:
            prints('Starting up...')

-    def start_gui(self):
+    def start_gui(self, db):
        from calibre.gui2.ui import Main
        main = Main(self.opts, gui_debug=self.gui_debug)
        if self.splash_screen is not None:
            self.splash_screen.showMessage(_('Initializing user interface...'))
            self.splash_screen.finish(main)
-        main.initialize(self.library_path, self.db, self.listener, self.actions)
+        main.initialize(self.library_path, db, self.listener, self.actions)
        if DEBUG:
            prints('Started up in', time.time() - self.startup_time)
        add_filesystem_book = partial(main.iactions['Add Books'].add_filesystem_book, allow_device=False)
@ -200,8 +200,7 @@ class GuiRunner(QObject):
                    det_msg=traceback.format_exc(), show=True)
                self.initialization_failed()

-        self.db = db
-        self.start_gui()
+        self.start_gui(db)

    def initialize_db(self):
        db = None
--- a/src/calibre/gui2/tag_view.py
+++ b/src/calibre/gui2/tag_view.py
@ -114,6 +114,9 @@ class TagsView(QTreeView): # {{{

    def set_database(self, db, tag_match, sort_by):
        self.hidden_categories = config['tag_browser_hidden_categories']
+        old = getattr(self, '_model', None)
+        if old is not None:
+            old.break_cycles()
        self._model = TagsModel(db, parent=self,
                                hidden_categories=self.hidden_categories,
                                search_restriction=None,
@ -371,6 +374,9 @@ class TagsView(QTreeView): # {{{
    # model. Reason: it is much easier than reconstructing the browser tree.
    def set_new_model(self, filter_categories_by=None):
        try:
+            old = getattr(self, '_model', None)
+            if old is not None:
+                old.break_cycles()
            self._model = TagsModel(self.db, parent=self,
                                    hidden_categories=self.hidden_categories,
                                    search_restriction=self.search_restriction,
@ -544,6 +550,9 @@ class TagsModel(QAbstractItemModel): # {{{
                    tooltip=tt, category_key=r)
        self.refresh(data=data)

+    def break_cycles(self):
+        self.db = self.root_item = None
+
    def mimeTypes(self):
        return ["application/calibre+from_library"]

@ -1109,8 +1118,7 @@ class TagBrowserMixin(object): # {{{

    def __init__(self, db):
        self.library_view.model().count_changed_signal.connect(self.tags_view.recount)
-        self.tags_view.set_database(self.library_view.model().db,
-                self.tag_match, self.sort_by)
+        self.tags_view.set_database(db, self.tag_match, self.sort_by)
        self.tags_view.tags_marked.connect(self.search.set_search_string)
        self.tags_view.tag_list_edit.connect(self.do_tags_list_edit)
        self.tags_view.user_category_edit.connect(self.do_user_categories_edit)
--- a/src/calibre/library/caches.py
+++ b/src/calibre/library/caches.py
@ -42,6 +42,9 @@ class MetadataBackup(Thread): # {{{

    def stop(self):
        self.keep_running = False
+        # Break cycles so that this object doesn't hold references to db
+        self.do_write = self.get_metadata_for_dump = self.clear_dirtied = \
+            self.set_dirtied = self.db = None

    def run(self):
        while self.keep_running:
@ -185,6 +188,11 @@ class ResultCache(SearchQueryParser): # {{{
        self.build_date_relop_dict()
        self.build_numeric_relop_dict()

+    def break_cycles(self):
+        self._data = self.field_metadata = self.FIELD_MAP = \
+            self.numeric_search_relops = self.date_search_relops = \
+            self.all_search_locations = None
+

    def __getitem__(self, row):
        return self._data[self._map_filtered[row]]
--- a/src/calibre/library/database2.py
+++ b/src/calibre/library/database2.py
@ -362,7 +362,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
        self.last_update_check = self.last_modified()

    def break_cycles(self):
-        self.data = self.field_metadata = self.prefs = self.listeners = None
+        self.data.break_cycles()
+        self.data = self.field_metadata = self.prefs = self.listeners = \
+            self.refresh_ondevice = None

    def initialize_database(self):
        metadata_sqlite = open(P('metadata_sqlite.sql'), 'rb').read()