From 7e903eaded5ce7b47065743fe1bc48507ee79fed Mon Sep 17 00:00:00 2001 From: Starson17 Date: Fri, 29 Jan 2010 15:27:33 -0500 Subject: [PATCH 1/6] Adds checkboxes on config and single fetch metadata screens to control author/title overwrites during metadata fetch --- src/calibre/gui2/__init__.py | 2 ++ src/calibre/gui2/dialogs/config/__init__.py | 2 ++ src/calibre/gui2/dialogs/config/config.ui | 7 +++++++ src/calibre/gui2/dialogs/fetch_metadata.py | 1 + src/calibre/gui2/dialogs/fetch_metadata.ui | 7 +++++++ src/calibre/gui2/dialogs/metadata_single.py | 7 ++++--- src/calibre/gui2/metadata.py | 5 +++++ 7 files changed, 28 insertions(+), 3 deletions(-) diff --git a/src/calibre/gui2/__init__.py b/src/calibre/gui2/__init__.py index 7181c16329..fed22f87e2 100644 --- a/src/calibre/gui2/__init__.py +++ b/src/calibre/gui2/__init__.py @@ -89,6 +89,8 @@ def _config(): help=_('Maximum number of waiting worker processes')) c.add_opt('get_social_metadata', default=True, help=_('Download social metadata (tags/rating/etc.)')) + c.add_opt('overwrite_author_title_metadata', default=True, + help=_('Overwrite author and title with new metadata')) c.add_opt('enforce_cpu_limit', default=True, help=_('Limit max simultaneous jobs to number of CPUs')) diff --git a/src/calibre/gui2/dialogs/config/__init__.py b/src/calibre/gui2/dialogs/config/__init__.py index 156faec7ce..88697e55bb 100644 --- a/src/calibre/gui2/dialogs/config/__init__.py +++ b/src/calibre/gui2/dialogs/config/__init__.py @@ -458,6 +458,7 @@ class ConfigDialog(ResizableDialog, Ui_Dialog): self.connect(self.button_open_config_dir, SIGNAL('clicked()'), self.open_config_dir) self.opt_get_social_metadata.setChecked(config['get_social_metadata']) + self.opt_overwrite_author_title_metadata.setChecked(config['overwrite_author_title_metadata']) self.opt_enforce_cpu_limit.setChecked(config['enforce_cpu_limit']) self.device_detection_button.clicked.connect(self.debug_device_detection) @@ -751,6 +752,7 @@ class ConfigDialog(ResizableDialog, Ui_Dialog): config['upload_news_to_device'] = self.sync_news.isChecked() config['search_as_you_type'] = self.search_as_you_type.isChecked() config['get_social_metadata'] = self.opt_get_social_metadata.isChecked() + config['overwrite_author_title_metadata'] = self.opt_overwrite_author_title_metadata.isChecked() config['enforce_cpu_limit'] = bool(self.opt_enforce_cpu_limit.isChecked()) fmts = [] for i in range(self.viewer.count()): diff --git a/src/calibre/gui2/dialogs/config/config.ui b/src/calibre/gui2/dialogs/config/config.ui index b9306b0f10..6da5362248 100644 --- a/src/calibre/gui2/dialogs/config/config.ui +++ b/src/calibre/gui2/dialogs/config/config.ui @@ -171,6 +171,13 @@ + + + + Overwrite & author/title by default when fetching metadata + + + diff --git a/src/calibre/gui2/dialogs/fetch_metadata.py b/src/calibre/gui2/dialogs/fetch_metadata.py index b021a2470d..59f07f0399 100644 --- a/src/calibre/gui2/dialogs/fetch_metadata.py +++ b/src/calibre/gui2/dialogs/fetch_metadata.py @@ -119,6 +119,7 @@ class FetchMetadata(QDialog, Ui_FetchMetadata): self.matches.setMouseTracking(True) self.fetch_metadata() self.opt_get_social_metadata.setChecked(config['get_social_metadata']) + self.opt_overwrite_author_title_metadata.setChecked(config['overwrite_author_title_metadata']) def show_summary(self, current, *args): diff --git a/src/calibre/gui2/dialogs/fetch_metadata.ui b/src/calibre/gui2/dialogs/fetch_metadata.ui index fe97b32f28..f14d402e11 100644 --- a/src/calibre/gui2/dialogs/fetch_metadata.ui +++ b/src/calibre/gui2/dialogs/fetch_metadata.ui @@ -116,6 +116,13 @@ + + + + Overwrite &author/title with author/title of selected book + + + diff --git a/src/calibre/gui2/dialogs/metadata_single.py b/src/calibre/gui2/dialogs/metadata_single.py index 89b7c92125..846851fd21 100644 --- a/src/calibre/gui2/dialogs/metadata_single.py +++ b/src/calibre/gui2/dialogs/metadata_single.py @@ -574,9 +574,10 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog): det_msg=det, show=True) else: book.tags = [] - self.title.setText(book.title) - self.authors.setText(authors_to_string(book.authors)) - if book.author_sort: self.author_sort.setText(book.author_sort) + if d.opt_overwrite_author_title_metadata.isChecked(): + self.title.setText(book.title) + self.authors.setText(authors_to_string(book.authors)) + if book.author_sort: self.author_sort.setText(book.author_sort) if book.publisher: self.publisher.setEditText(book.publisher) if book.isbn: self.isbn.setText(book.isbn) if book.pubdate: diff --git a/src/calibre/gui2/metadata.py b/src/calibre/gui2/metadata.py index ecdca29422..d63e9648cc 100644 --- a/src/calibre/gui2/metadata.py +++ b/src/calibre/gui2/metadata.py @@ -12,6 +12,7 @@ from Queue import Queue, Empty from calibre.ebooks.metadata.fetch import search, get_social_metadata +from calibre.gui2 import config from calibre.ebooks.metadata.library_thing import cover_from_isbn from calibre.customize.ui import get_isbndb_key @@ -98,6 +99,10 @@ class DownloadMetadata(Thread): self.fetched_metadata[id] = fmi if fmi.isbn and self.get_covers: self.worker.jobs.put(fmi.isbn) + if (not config['overwrite_author_title_metadata']): + fmi.authors = mi.authors + fmi.author_sort = mi.author_sort + fmi.title = mi.title mi.smart_update(fmi) if mi.isbn and self.get_social_metadata: self.social_metadata_exceptions = get_social_metadata(mi) From 845e558ae06e2bbf0585bdcb4281d78fbff91191 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 29 Jan 2010 14:12:12 -0700 Subject: [PATCH 2/6] Don't die if generating default masthead fails --- src/calibre/web/feeds/news.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 113d7dd756..d182d856d8 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -770,7 +770,11 @@ class BasicNewsRecipe(Recipe): self.download_masthead(murl) if self.masthead_path is None: self.masthead_path = os.path.join(self.output_dir, 'mastheadImage.jpg') - self.default_masthead_image(self.masthead_path) + try: + self.default_masthead_image(self.masthead_path) + except: + self.log.exception('Failed to generate default masthead image') + self.masthead_path = None if self.test: feeds = feeds[:2] @@ -1061,7 +1065,7 @@ class BasicNewsRecipe(Recipe): opf = OPFCreator(dir, mi) # Add mastheadImage entry to section mp = getattr(self, 'masthead_path', None) - if mp is not None: + if mp is not None and os.access(mp, os.R_OK): from calibre.ebooks.metadata.opf2 import Guide ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu()) ref.type = 'masthead' From bb11c80871ae629016f9338d46641a7584f4dc79 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 29 Jan 2010 18:20:12 -0700 Subject: [PATCH 3/6] Fix #4730 (Updated recipe for Serbian weekly magazine NIN) --- resources/recipes/nin.recipe | 47 +++++++++++------------------------- 1 file changed, 14 insertions(+), 33 deletions(-) diff --git a/resources/recipes/nin.recipe b/resources/recipes/nin.recipe index 535652b6a0..a349f0e11f 100644 --- a/resources/recipes/nin.recipe +++ b/resources/recipes/nin.recipe @@ -1,46 +1,42 @@ -#!/usr/bin/env python __license__ = 'GPL v3' -__copyright__ = '2008-2009, Darko Miletic ' +__copyright__ = '2008-2010, Darko Miletic ' ''' -nin.co.rs +www.nin.co.rs ''' import re, urllib from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag class Nin(BasicNewsRecipe): title = 'NIN online' __author__ = 'Darko Miletic' - description = 'Nedeljne informativne novine' - publisher = 'NIN D.O.O.' + description = 'Nedeljne Informativne Novine' + publisher = 'NIN d.o.o.' category = 'news, politics, Serbia' no_stylesheets = True oldest_article = 15 - simultaneous_downloads = 1 - delay = 1 encoding = 'utf-8' needs_subscription = True + remove_empty_feeds = True PREFIX = 'http://www.nin.co.rs' INDEX = PREFIX + '/?change_lang=ls' LOGIN = PREFIX + '/?logout=true' use_embedded_content = False language = 'sr' - lang = 'sr-Latn-RS' - direction = 'ltr' - extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif} .artTitle{font-size: x-large; font-weight: bold} .columnhead{font-size: small; font-weight: bold}' + extra_css = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Verdana, Lucida, sans1, sans-serif} .article_description{font-family: Verdana, Lucida, sans1, sans-serif} .artTitle{font-size: x-large; font-weight: bold; color: #900} .izjava{font-size: x-large; font-weight: bold} .columnhead{font-size: small; font-weight: bold;} img{margin-top:0.5em; margin-bottom: 0.7em} b{margin-top: 1em} ' conversion_options = { 'comment' : description , 'tags' : category , 'publisher' : publisher , 'language' : language - , 'pretty_print' : True + , 'linearize_tables' : True } preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')] + remove_attributes = ['height','width'] def get_browser(self): br = BasicNewsRecipe.get_browser() @@ -65,34 +61,18 @@ class Nin(BasicNewsRecipe): cover_url = self.PREFIX + link_item['src'] return cover_url - def preprocess_html(self, soup): - soup.html['lang'] = self.lang - soup.html['dir' ] = self.direction - mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)]) - mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")]) - soup.head.insert(0,mlang) - soup.head.insert(1,mcharset) - attribs = [ 'style','font','valign' - ,'colspan','width','height' - ,'rowspan','summary','align' - ,'cellspacing','cellpadding' - ,'frames','rules','border' - ] - for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']): - item.name = 'div' - for attrib in attribs: - if item.has_key(attrib): - del item[attrib] - return soup - def parse_index(self): articles = [] + count = 0 soup = self.index_to_soup(self.PREFIX) for item in soup.findAll('a',attrs={'class':'lmeninavFont'}): + count = count +1 + if self.test and count > 2: + return articles section = self.tag_to_string(item) feedlink = self.PREFIX + item['href'] feedpage = self.index_to_soup(feedlink) - self.report_progress(0, _('Fetching feed')+' %s...'%(section)) + self.report_progress(0, _('Fetching feed')+' %s...'%(section)) inarts = [] for art in feedpage.findAll('span',attrs={'class':'artTitle'}): alink = art.parent @@ -110,3 +90,4 @@ class Nin(BasicNewsRecipe): }) articles.append((section,inarts)) return articles + From a5f13fa593080c8e5a63b4cc4bdbffb1dfd968be Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 29 Jan 2010 18:25:41 -0700 Subject: [PATCH 4/6] Workaround for broken XHTML in some Washington Post articles --- resources/recipes/wash_post.recipe | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/resources/recipes/wash_post.recipe b/resources/recipes/wash_post.recipe index 5e62aa753c..a917371cec 100644 --- a/resources/recipes/wash_post.recipe +++ b/resources/recipes/wash_post.recipe @@ -46,3 +46,10 @@ class WashingtonPost(BasicNewsRecipe): div['style'] = '' return soup + def preprocess_html(self, soup): + for tag in soup.findAll('font'): + if tag.has_key('size'): + if tag['size'] == '+2': + if tag.b: + return soup + return None From f284a6c2b60b1ed16d80f62f29334ad77a2e0f8a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 29 Jan 2010 19:26:48 -0700 Subject: [PATCH 5/6] Various Danish newspapers by Darko Miletic --- resources/images/news/information_dk.png | Bin 0 -> 343 bytes resources/images/news/jp_dk.png | Bin 0 -> 609 bytes resources/images/news/politiken_dk.png | Bin 0 -> 482 bytes resources/recipes/information_dk.recipe | 50 +++++++++++++++++++++ resources/recipes/jp_dk.recipe | 50 +++++++++++++++++++++ resources/recipes/metro_montreal.recipe | 2 +- resources/recipes/nin.recipe | 3 +- resources/recipes/politiken_dk.recipe | 55 +++++++++++++++++++++++ 8 files changed, 158 insertions(+), 2 deletions(-) create mode 100644 resources/images/news/information_dk.png create mode 100644 resources/images/news/jp_dk.png create mode 100644 resources/images/news/politiken_dk.png create mode 100644 resources/recipes/information_dk.recipe create mode 100644 resources/recipes/jp_dk.recipe create mode 100644 resources/recipes/politiken_dk.recipe diff --git a/resources/images/news/information_dk.png b/resources/images/news/information_dk.png new file mode 100644 index 0000000000000000000000000000000000000000..301e2992c700338d75149db8b43905a3f9e06ffa GIT binary patch literal 343 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!73?$#)eFPFP2=EDU{owBVrLgRsmeHrs=s#Dk zeNN8&v1;w#$4`D8JMp!le;+@-l~DmI|Ns9#ki3@T{1!-Y7I;J!18EO1b~~AE z2NEnRa7+i%V4E0Xj;%cmK<0IIqoUzs;I> zU=AZgU}- uD+7Z!oF|LIhYn5Ix<=1C@(=%`MShWI&gq1gq;ET}vaD(E$(U^G%QN>#p3U0w zd&?yc{o?TFtQB9M+)P-W@LV`$CDYG#gQSi*o&3vp@814>-Pf(v)vwp}{}VUab#3~) zsNaTHjLXY^$$tC5*QL5ZfmMn71nX8fH(}&~mcpOsK5ji+#ywd0Q-v zW+tCKzVIjK6VVs>9HK{b6;9l?o-m0!<;B_^EUOpZPT1ytO8ZRJ2DV+gEkzksO?x7n zLpq@|-SL97xpuW7629aQpQOk}*)_|E;a(FyM*d)js!|D|gb>$Fkn^S9N`c2aA2 z>yFIIpWyGv|5A&6M$;65ddC$W$u{kI&9eI#f|u;_lzhYZNUv7t*@IPemtxvgqjqM$ zem=ogf7!>b+Z(n0qbk^rO$_l{{jt8wqS5}fms1I!;#o6b6seZDMwFx^mZVxG7o`Fz z1|tJQ16>0{T_ej7LrW_o11l4AT?2C~1A{l7KNwLogTe~DWM4fJwEpK literal 0 HcmV?d00001 diff --git a/resources/images/news/politiken_dk.png b/resources/images/news/politiken_dk.png new file mode 100644 index 0000000000000000000000000000000000000000..66f324a8c79a560af7b081f42682e0486c1dcd07 GIT binary patch literal 482 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87#O)dT^vI!POlC2^*iJs^3QES=dp~$);tMu zwPT^`Z&uz}aPPt0CBphl519&@gm#C`5O%n^QBf?}L~+*ftXXXxVXvm|N%>T_Z_oSF zHV4mWG%pUi%&>~#gO1BW(>~ktI~)!6@Em5JEqo?shJt^I^;|866&Y(86>m%|`k2@= zJwHd_>suo?Z1Bn`-?Kf9Z#;GtP;NaXnFFK+V{#@e<|91|xUVFHZWzieUd`2d= z+FyJJ4!Q%wNVUW@q9i4;B-JXpC>2OC7#SEE=o%R68d-)IT3Q(ySecmX8kk!d7`*ZP m!HA+EH$NpatrAUxsi~Eb2}Hxv#@+;=1_n=8KbLh*2~7Z$7` Date: Fri, 29 Jan 2010 23:17:17 -0700 Subject: [PATCH 6/6] Orange County Register by Lorenzo Vigentini --- resources/recipes/oc_register.recipe | 73 ++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 resources/recipes/oc_register.recipe diff --git a/resources/recipes/oc_register.recipe b/resources/recipes/oc_register.recipe new file mode 100644 index 0000000000..9a04585a3c --- /dev/null +++ b/resources/recipes/oc_register.recipe @@ -0,0 +1,73 @@ +#!/usr/bin/env python +__license__ = 'GPL v3' +__author__ = 'Lorenzo Vigentini' +__copyright__ = '2009, Lorenzo Vigentini ' +description = 'News from the Orange county - v1.01 (29, January 2010)' + +''' +http://www.ocregister.com/ +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class ocRegister(BasicNewsRecipe): + author = 'Lorenzo Vigentini' + description = 'News from the Orange county' + + cover_url = 'http://images.onset.freedom.com/ocregister/logo.gif' + title = u'Orange County Register' + publisher = 'Orange County Register Communication' + category = 'News, finance, economy, politics' + + language = 'en' + timefmt = '[%a, %d %b, %Y]' + + oldest_article = 1 + max_articles_per_feed = 25 + use_embedded_content = False + recursion = 10 + + remove_javascript = True + no_stylesheets = True + + def print_version(self,url): + printUrl = 'http://www.ocregister.com/common/printer/view.php?db=ocregister&id=' + segments = url.split('/') + subSegments = (segments[4]).split('.') + myArticle = (subSegments[0]).replace('-', '') + myURL= printUrl + myArticle + return myURL + + keep_only_tags = [ + dict(name='div', attrs={'id':'ArticleContentWrap'}) + ] + + remove_tags = [ + dict(name='div', attrs={'class':'hideForPrint'}), + dict(name='div', attrs={'id':'ContentFooter'}) + ] + + feeds = [ + (u'News', u'http://www.ocregister.com/common/rss/rss.php?catID=18800'), + (u'Today paper', u'http://www.ocregister.com/common/rss/rss.php?catID=18976'), + (u'Business', u'http://www.ocregister.com/common/rss/rss.php?catID=18909'), + (u'Cars', u'http://www.ocregister.com/common/rss/rss.php?catID=20128'), + (u'Entertainment', u'http://www.ocregister.com/common/rss/rss.php?catID=18926'), + (u'Home', u'http://www.ocregister.com/common/rss/rss.php?catID=19142'), + (u'Life', u'http://www.ocregister.com/common/rss/rss.php?catID=18936'), + (u'Opinion', u'http://www.ocregister.com/common/rss/rss.php?catID=18963'), + (u'Sports', u'http://www.ocregister.com/common/rss/rss.php?catID=18901'), + (u'Travel', u'http://www.ocregister.com/common/rss/rss.php?catID=18959') + ] + + extra_css = ''' + h1 {color:#ff6600;font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:20px;} + h2 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:16px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:16px; } + h3 {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:15px;} + h4 {color:#333333; font-family:Arial,Helvetica,sans-serif;font-size:13px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:13px; } + h5 {color:#333333; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:11px; text-transform:uppercase;} + #articledate {color:#333333;font-family:Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:italic; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;} + #articlebyline {color:#4D4D4D;font-family:Arial,Helvetica,sans-serif;font-size:10px; font-size-adjust:none; font-stretch:normal; font-style:bold; font-variant:normal; font-weight:bold; line-height:10px; text-decoration:none;} + img {align:left;} + #topstoryhead {color:#ff6600;font-family:Arial,Helvetica,sans-serif; font-size:22px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; line-height:20px;} + '''