diff --git a/resources/recipes/roger_ebert.recipe b/resources/recipes/roger_ebert.recipe new file mode 100644 index 0000000000..2ea5b52a45 --- /dev/null +++ b/resources/recipes/roger_ebert.recipe @@ -0,0 +1,120 @@ +import re +import urllib2 +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup, SoupStrainer + +class Ebert(BasicNewsRecipe): + title = 'Roger Ebert' + __author__ = 'Shane Erstad' + description = 'Roger Ebert Movie Reviews' + publisher = 'Chicago Sun Times' + category = 'movies' + oldest_article = 8 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + encoding = 'utf-8' + masthead_url = 'http://rogerebert.suntimes.com/graphics/global/roger.jpg' + language = 'en' + remove_empty_feeds = False + PREFIX = 'http://rogerebert.suntimes.com' + patternReviews = r'(.*?).*?
(.*?)
(.*?)' + patternCommentary = r'
.*?(.*?).*?
(.*?)
' + patternPeople = r'
.*?(.*?).*?
(.*?)
' + patternGlossary = r'
.*?(.*?).*?
(.*?)
' + + + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + , 'linearize_tables' : True + } + + + feeds = [ + (u'Reviews' , u'http://rogerebert.suntimes.com/apps/pbcs.dll/section?category=reviews' ) + ,(u'Commentary' , u'http://rogerebert.suntimes.com/apps/pbcs.dll/section?category=COMMENTARY') + ,(u'Great Movies' , u'http://rogerebert.suntimes.com/apps/pbcs.dll/section?category=REVIEWS08') + ,(u'People' , u'http://rogerebert.suntimes.com/apps/pbcs.dll/section?category=PEOPLE') + ,(u'Glossary' , u'http://rogerebert.suntimes.com/apps/pbcs.dll/section?category=GLOSSARY') + + ] + + preprocess_regexps = [ + (re.compile(r'.*?This is a printer friendly.*?.*?
', re.DOTALL|re.IGNORECASE), + lambda m: '') + ] + + + + def print_version(self, url): + return url + '&template=printart' + + def parse_index(self): + totalfeeds = [] + lfeeds = self.get_feeds() + for feedobj in lfeeds: + feedtitle, feedurl = feedobj + self.log('\tFeedurl: ', feedurl) + self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl)) + articles = [] + page = urllib2.urlopen(feedurl).read() + + if feedtitle == 'Reviews' or feedtitle == 'Great Movies': + pattern = self.patternReviews + elif feedtitle == 'Commentary': + pattern = self.patternCommentary + elif feedtitle == 'People': + pattern = self.patternPeople + elif feedtitle == 'Glossary': + pattern = self.patternGlossary + + + regex = re.compile(pattern, re.IGNORECASE|re.DOTALL) + + for match in regex.finditer(page): + if feedtitle == 'Reviews' or feedtitle == 'Great Movies': + movietitle = match.group(1) + thislink = match.group(2) + description = match.group(3) + elif feedtitle == 'Commentary' or feedtitle == 'People' or feedtitle == 'Glossary': + thislink = match.group(1) + description = match.group(2) + + self.log(thislink) + + for link in BeautifulSoup(thislink, parseOnlyThese=SoupStrainer('a')): + thisurl = self.PREFIX + link['href'] + thislinktext = self.tag_to_string(link) + + if feedtitle == 'Reviews' or feedtitle == 'Great Movies': + thistitle = movietitle + elif feedtitle == 'Commentary' or feedtitle == 'People' or feedtitle == 'Glossary': + thistitle = thislinktext + + if thistitle == '': + thistitle = 'Ebert Journal Post' + + """ + pattern2 = r'AID=\/(.*?)\/' + reg2 = re.compile(pattern2, re.IGNORECASE|re.DOTALL) + match2 = reg2.search(thisurl) + date = match2.group(1) + c = time.strptime(match2.group(1),"%Y%m%d") + date=time.strftime("%a, %b %d, %Y", c) + self.log(date) + """ + + articles.append({ + 'title' :thistitle + ,'date' :'' + ,'url' :thisurl + ,'description':description + }) + totalfeeds.append((feedtitle, articles)) + + return totalfeeds + diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index e11f6b45be..9389964962 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -221,7 +221,10 @@ def rewrite_links(root, link_repl_func, resolve_base_href=False): el.text): stylesheet = parseString(el.text) replaceUrls(stylesheet, link_repl_func) - el.text = '\n'+stylesheet.cssText + '\n' + repl = stylesheet.cssText + if isbytestring(repl): + repl = repl.decode('utf-8') + el.text = '\n'+ repl + '\n' if 'style' in el.attrib: text = el.attrib['style'] @@ -234,8 +237,11 @@ def rewrite_links(root, link_repl_func, resolve_base_href=False): set_property(item) elif v.CSS_PRIMITIVE_VALUE == v.cssValueType: set_property(v) - el.attrib['style'] = stext.cssText.replace('\n', ' ').replace('\r', + repl = stext.cssText.replace('\n', ' ').replace('\r', ' ') + if isbytestring(repl): + repl = repl.decode('utf-8') + el.attrib['style'] = repl diff --git a/src/calibre/gui2/actions/choose_library.py b/src/calibre/gui2/actions/choose_library.py index 930e5e29aa..d726241432 100644 --- a/src/calibre/gui2/actions/choose_library.py +++ b/src/calibre/gui2/actions/choose_library.py @@ -385,13 +385,27 @@ class ChooseLibraryAction(InterfaceAction): prefs['library_path'] = loc #from calibre.utils.mem import memory - #import weakref, gc - #ref = weakref.ref(self.gui.library_view.model().db) - #before = memory()/1024**2 + #import weakref + #from PyQt4.Qt import QTimer + #self.dbref = weakref.ref(self.gui.library_view.model().db) + #self.before_mem = memory()/1024**2 self.gui.library_moved(loc) - #print gc.get_referrers(ref)[0] - #for i in xrange(3): gc.collect() - #print 'leaked:', memory()/1024**2 - before + #QTimer.singleShot(1000, self.debug_leak) + + def debug_leak(self): + import gc + from calibre.utils.mem import memory + ref = self.dbref + for i in xrange(3): gc.collect() + if ref() is not None: + print 11111, ref() + for r in gc.get_referrers(ref())[:10]: + print r + print + print 'before:', self.before_mem + print 'after:', memory()/1024**2 + self.dbref = self.before_mem = None + def qs_requested(self, idx, *args): self.switch_requested(self.qs_locations[idx]) diff --git a/src/calibre/gui2/main.py b/src/calibre/gui2/main.py index aaca398e44..b88b1d680d 100644 --- a/src/calibre/gui2/main.py +++ b/src/calibre/gui2/main.py @@ -150,13 +150,13 @@ class GuiRunner(QObject): if DEBUG: prints('Starting up...') - def start_gui(self): + def start_gui(self, db): from calibre.gui2.ui import Main main = Main(self.opts, gui_debug=self.gui_debug) if self.splash_screen is not None: self.splash_screen.showMessage(_('Initializing user interface...')) self.splash_screen.finish(main) - main.initialize(self.library_path, self.db, self.listener, self.actions) + main.initialize(self.library_path, db, self.listener, self.actions) if DEBUG: prints('Started up in', time.time() - self.startup_time) add_filesystem_book = partial(main.iactions['Add Books'].add_filesystem_book, allow_device=False) @@ -200,8 +200,7 @@ class GuiRunner(QObject): det_msg=traceback.format_exc(), show=True) self.initialization_failed() - self.db = db - self.start_gui() + self.start_gui(db) def initialize_db(self): db = None diff --git a/src/calibre/gui2/tag_view.py b/src/calibre/gui2/tag_view.py index f6eac49426..837776cb9a 100644 --- a/src/calibre/gui2/tag_view.py +++ b/src/calibre/gui2/tag_view.py @@ -114,6 +114,9 @@ class TagsView(QTreeView): # {{{ def set_database(self, db, tag_match, sort_by): self.hidden_categories = config['tag_browser_hidden_categories'] + old = getattr(self, '_model', None) + if old is not None: + old.break_cycles() self._model = TagsModel(db, parent=self, hidden_categories=self.hidden_categories, search_restriction=None, @@ -371,6 +374,9 @@ class TagsView(QTreeView): # {{{ # model. Reason: it is much easier than reconstructing the browser tree. def set_new_model(self, filter_categories_by=None): try: + old = getattr(self, '_model', None) + if old is not None: + old.break_cycles() self._model = TagsModel(self.db, parent=self, hidden_categories=self.hidden_categories, search_restriction=self.search_restriction, @@ -544,6 +550,9 @@ class TagsModel(QAbstractItemModel): # {{{ tooltip=tt, category_key=r) self.refresh(data=data) + def break_cycles(self): + self.db = self.root_item = None + def mimeTypes(self): return ["application/calibre+from_library"] @@ -1109,8 +1118,7 @@ class TagBrowserMixin(object): # {{{ def __init__(self, db): self.library_view.model().count_changed_signal.connect(self.tags_view.recount) - self.tags_view.set_database(self.library_view.model().db, - self.tag_match, self.sort_by) + self.tags_view.set_database(db, self.tag_match, self.sort_by) self.tags_view.tags_marked.connect(self.search.set_search_string) self.tags_view.tag_list_edit.connect(self.do_tags_list_edit) self.tags_view.user_category_edit.connect(self.do_user_categories_edit) diff --git a/src/calibre/library/caches.py b/src/calibre/library/caches.py index 4168360d3a..291d71f572 100644 --- a/src/calibre/library/caches.py +++ b/src/calibre/library/caches.py @@ -42,6 +42,9 @@ class MetadataBackup(Thread): # {{{ def stop(self): self.keep_running = False + # Break cycles so that this object doesn't hold references to db + self.do_write = self.get_metadata_for_dump = self.clear_dirtied = \ + self.set_dirtied = self.db = None def run(self): while self.keep_running: @@ -185,6 +188,11 @@ class ResultCache(SearchQueryParser): # {{{ self.build_date_relop_dict() self.build_numeric_relop_dict() + def break_cycles(self): + self._data = self.field_metadata = self.FIELD_MAP = \ + self.numeric_search_relops = self.date_search_relops = \ + self.all_search_locations = None + def __getitem__(self, row): return self._data[self._map_filtered[row]] diff --git a/src/calibre/library/database2.py b/src/calibre/library/database2.py index 3dc110c1c8..4b66b6620f 100644 --- a/src/calibre/library/database2.py +++ b/src/calibre/library/database2.py @@ -362,7 +362,9 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns): self.last_update_check = self.last_modified() def break_cycles(self): - self.data = self.field_metadata = self.prefs = self.listeners = None + self.data.break_cycles() + self.data = self.field_metadata = self.prefs = self.listeners = \ + self.refresh_ondevice = None def initialize_database(self): metadata_sqlite = open(P('metadata_sqlite.sql'), 'rb').read()