From 7ea11f7c1ce2554194b428c78a1ef2469e5de309 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 6 Oct 2012 13:02:50 +0530 Subject: [PATCH 1/5] Metadata download: Add an option to turn off the use of the published date for the earliest edition a book as the published date --- src/calibre/ebooks/metadata/sources/base.py | 1 + .../ebooks/metadata/sources/identify.py | 2 + .../gui2/preferences/metadata_sources.py | 1 + .../gui2/preferences/metadata_sources.ui | 37 +++++++++++-------- 4 files changed, 26 insertions(+), 15 deletions(-) diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py index 95aa5a3563..47b9f00d30 100644 --- a/src/calibre/ebooks/metadata/sources/base.py +++ b/src/calibre/ebooks/metadata/sources/base.py @@ -26,6 +26,7 @@ msprefs.defaults['wait_after_first_identify_result'] = 30 # seconds msprefs.defaults['wait_after_first_cover_result'] = 60 # seconds msprefs.defaults['swap_author_names'] = False msprefs.defaults['fewer_tags'] = True +msprefs.defaults['find_first_edition_date'] = True # Google covers are often poor quality (scans/errors) but they have high # resolution, so they trump covers from better sources. So make sure they diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py index 5113342b83..16aa39d59d 100644 --- a/src/calibre/ebooks/metadata/sources/identify.py +++ b/src/calibre/ebooks/metadata/sources/identify.py @@ -120,6 +120,8 @@ class ISBNMerge(object): self.log.debug(xw.tb) else: isbns, min_year = xw.isbns, xw.min_year + if not msprefs['find_first_edition_date']: + min_year = None if not isbns: isbns = frozenset([isbn]) if isbns in self.pools: diff --git a/src/calibre/gui2/preferences/metadata_sources.py b/src/calibre/gui2/preferences/metadata_sources.py index 541da2e203..db4d4a01e9 100644 --- a/src/calibre/gui2/preferences/metadata_sources.py +++ b/src/calibre/gui2/preferences/metadata_sources.py @@ -296,6 +296,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form): r('wait_after_first_cover_result', msprefs) r('swap_author_names', msprefs) r('fewer_tags', msprefs) + r('find_first_edition_date', msprefs) self.configure_plugin_button.clicked.connect(self.configure_plugin) self.sources_model = SourcesModel(self) diff --git a/src/calibre/gui2/preferences/metadata_sources.ui b/src/calibre/gui2/preferences/metadata_sources.ui index 89f6454df7..cef0e8562b 100644 --- a/src/calibre/gui2/preferences/metadata_sources.ui +++ b/src/calibre/gui2/preferences/metadata_sources.ui @@ -7,7 +7,7 @@ 0 0 781 - 394 + 439 @@ -21,7 +21,7 @@ - + Metadata sources @@ -104,22 +104,22 @@ + + Restore your own subset of checked fields that you define using the 'Set as default' button + &Select default - - Restore your own subset of checked fields that you define using the 'Set as default' button - + + Store the currently checked fields as a default you can restore using the 'Select default' button + &Set as default - - Store the currently checked fields as a default you can restore using the 'Select default' button - @@ -139,7 +139,7 @@ - + Max. number of &tags to download: @@ -149,10 +149,10 @@ - + - + Max. &time to wait after first match is found: @@ -162,14 +162,14 @@ - + secs - + Max. time to wait after first &cover is found: @@ -179,14 +179,14 @@ - + secs - + <p>Different metadata sources have different sets of tags for the same book. If this option is checked, then calibre will use the smaller tag sets. These tend to be more like genres, while the larger tag sets tend to describe the books content. @@ -197,6 +197,13 @@ + + + + Use published date of earliest "edition" (from worldcat.org) + + + From 82472c1a9e859bdf45d946e176fb0884b25c4032 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 6 Oct 2012 13:13:40 +0530 Subject: [PATCH 2/5] turn off use of worldcat for earliest edition dates by default --- src/calibre/ebooks/metadata/sources/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py index 47b9f00d30..46c6f7a313 100644 --- a/src/calibre/ebooks/metadata/sources/base.py +++ b/src/calibre/ebooks/metadata/sources/base.py @@ -26,7 +26,7 @@ msprefs.defaults['wait_after_first_identify_result'] = 30 # seconds msprefs.defaults['wait_after_first_cover_result'] = 60 # seconds msprefs.defaults['swap_author_names'] = False msprefs.defaults['fewer_tags'] = True -msprefs.defaults['find_first_edition_date'] = True +msprefs.defaults['find_first_edition_date'] = False # Google covers are often poor quality (scans/errors) but they have high # resolution, so they trump covers from better sources. So make sure they From d6ab9d1b6e875f977dc5aba49dabae1d6d79060e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 6 Oct 2012 13:15:50 +0530 Subject: [PATCH 3/5] ... --- src/calibre/gui2/preferences/metadata_sources.ui | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/gui2/preferences/metadata_sources.ui b/src/calibre/gui2/preferences/metadata_sources.ui index cef0e8562b..cfbdb51571 100644 --- a/src/calibre/gui2/preferences/metadata_sources.ui +++ b/src/calibre/gui2/preferences/metadata_sources.ui @@ -200,7 +200,7 @@ - Use published date of earliest "edition" (from worldcat.org) + Use published date of "first edition" (from worldcat.org) From 768cc4c95d86e571dc01908bdf5cd7252cdc69c1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 6 Oct 2012 21:45:32 +0530 Subject: [PATCH 4/5] noz.de by Krittika Goyal --- recipes/noz.recipe | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 recipes/noz.recipe diff --git a/recipes/noz.recipe b/recipes/noz.recipe new file mode 100644 index 0000000000..521ad2f5c1 --- /dev/null +++ b/recipes/noz.recipe @@ -0,0 +1,29 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1344926684(BasicNewsRecipe): + title = u'Neue Osnabrücker Zeitung' + __author__ = 'Krittika Goyal' + oldest_article = 7 + max_articles_per_feed = 100 + #auto_cleanup = True + no_stylesheets = True + use_embedded_content = False + language = 'de' + remove_javascript = True + + keep_only_tags = [dict(name='h1', attrs={'class':'enlargeable'}), dict(name='h2', attrs={'class':'enlargeable vorspann'}), dict(name='div', attrs={'id':'largePicContainer'}), dict(name='span', attrs={'id':'articletext'})] + remove_tags = [dict(name='div', attrs={'id':'retresco-title'}),dict(name='div', attrs={'class':'retresco-item s1 relative'}),dict(name='a', attrs={'class':'medium2 largeSpaceTop icon'})] + + feeds = [(u'Lokales', u'http://www.noz.de/rss/Lokales'), +(u'Vermischtes', u'http://www.noz.de/rss/Vermischtes'), +(u'Politik', u'http://www.noz.de/rss/Politik'), +(u'Wirtschaft', u'http://www.noz.de/rss/Wirtschaft'), +(u'Kultur', u'http://www.noz.de/rss/Kultur'), +(u'Medien', u'http://www.noz.de/rss/Medien'), +(u'Wissenschaft', u'http://www.noz.de/rss/wissenschaft'), +(u'Sport', u'http://www.noz.de/rss/Sport'), +(u'Computer', u'http://www.noz.de/rss/Computer'), +(u'Musik', u'http://www.noz.de/rss/Musik'), +(u'Szene', u'http://www.noz.de/rss/Szene'), +(u'Niedersachsen', u'http://www.noz.de/rss/Niedersachsen'), +(u'Kino', u'http://www.noz.de/rss/Kino')] From a84bdaebeb0611031d7fb01ea305fe51fce9dc21 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 6 Oct 2012 21:55:54 +0530 Subject: [PATCH 5/5] Update The Sun UK --- recipes/the_sun.recipe | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/recipes/the_sun.recipe b/recipes/the_sun.recipe index d93ac2c49b..a5c4dbdd10 100644 --- a/recipes/the_sun.recipe +++ b/recipes/the_sun.recipe @@ -8,13 +8,11 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe): title = u'The Sun UK' description = 'Articles from The Sun tabloid UK' __author__ = 'Dave Asbury' - # last updated 25/7/12 + # last updated 6/10/12 added starsons remove article code language = 'en_GB' oldest_article = 1 - max_articles_per_feed = 12 + max_articles_per_feed = 15 remove_empty_feeds = True - no_stylesheets = True - masthead_url = 'http://www.thesun.co.uk/sol/img/global/Sun-logo.gif' encoding = 'UTF-8' @@ -23,13 +21,9 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe): - #preprocess_regexps = [ - # (re.compile(r'