From 7ea11f7c1ce2554194b428c78a1ef2469e5de309 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 6 Oct 2012 13:02:50 +0530
Subject: [PATCH 1/5] Metadata download: Add an option to turn off the use of
 the published date for the earliest edition a book as the published date

---
 src/calibre/ebooks/metadata/sources/base.py   |  1 +
 .../ebooks/metadata/sources/identify.py       |  2 +
 .../gui2/preferences/metadata_sources.py      |  1 +
 .../gui2/preferences/metadata_sources.ui      | 37 +++++++++++--------
 4 files changed, 26 insertions(+), 15 deletions(-)
diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index 95aa5a3563..47b9f00d30 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -26,6 +26,7 @@ msprefs.defaults['wait_after_first_identify_result'] = 30 # seconds
 msprefs.defaults['wait_after_first_cover_result'] = 60 # seconds
 msprefs.defaults['swap_author_names'] = False
 msprefs.defaults['fewer_tags'] = True
+msprefs.defaults['find_first_edition_date'] = True
 
 # Google covers are often poor quality (scans/errors) but they have high
 # resolution, so they trump covers from better sources. So make sure they
diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py
index 5113342b83..16aa39d59d 100644
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@@ -120,6 +120,8 @@ class ISBNMerge(object):
                             self.log.debug(xw.tb)
                         else:
                             isbns, min_year = xw.isbns, xw.min_year
+                            if not msprefs['find_first_edition_date']:
+                                min_year = None
                 if not isbns:
                     isbns = frozenset([isbn])
                 if isbns in self.pools:
diff --git a/src/calibre/gui2/preferences/metadata_sources.py b/src/calibre/gui2/preferences/metadata_sources.py
index 541da2e203..db4d4a01e9 100644
--- a/src/calibre/gui2/preferences/metadata_sources.py
+++ b/src/calibre/gui2/preferences/metadata_sources.py
@@ -296,6 +296,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
         r('wait_after_first_cover_result', msprefs)
         r('swap_author_names', msprefs)
         r('fewer_tags', msprefs)
+        r('find_first_edition_date', msprefs)
 
         self.configure_plugin_button.clicked.connect(self.configure_plugin)
         self.sources_model = SourcesModel(self)
diff --git a/src/calibre/gui2/preferences/metadata_sources.ui b/src/calibre/gui2/preferences/metadata_sources.ui
index 89f6454df7..cef0e8562b 100644
--- a/src/calibre/gui2/preferences/metadata_sources.ui
+++ b/src/calibre/gui2/preferences/metadata_sources.ui
@@ -7,7 +7,7 @@
     <x>0</x>
     <y>0</y>
     <width>781</width>
-    <height>394</height>
+    <height>439</height>
    </rect>
   </property>
   <property name="windowTitle">
@@ -21,7 +21,7 @@
     <widget class="QStackedWidget" name="stack">
      <widget class="QWidget" name="page">
       <layout class="QGridLayout" name="gridLayout">
-       <item row="0" column="0" rowspan="7">
+       <item row="0" column="0" rowspan="8">
         <widget class="QGroupBox" name="groupBox">
          <property name="title">
           <string>Metadata sources</string>
@@ -104,22 +104,22 @@
           </item>
           <item row="2" column="0">
            <widget class="QPushButton" name="select_default_button">
+            <property name="toolTip">
+             <string>Restore your own subset of checked fields that you define using the 'Set as default' button</string>
+            </property>
             <property name="text">
              <string>&amp;Select default</string>
             </property>
-             <property name="toolTip">
-             <string>Restore your own subset of checked fields that you define using the 'Set as default' button</string>
-            </property>
            </widget>
           </item>
           <item row="2" column="1">
            <widget class="QPushButton" name="set_as_default_button">
+            <property name="toolTip">
+             <string>Store the currently checked fields as a default you can restore using the 'Select default' button</string>
+            </property>
             <property name="text">
              <string>&amp;Set as default</string>
             </property>
-             <property name="toolTip">
-             <string>Store the currently checked fields as a default you can restore using the 'Select default' button</string>
-            </property>
            </widget>
           </item>
          </layout>
@@ -139,7 +139,7 @@
          </property>
         </widget>
        </item>
-       <item row="4" column="1">
+       <item row="5" column="1">
         <widget class="QLabel" name="label_2">
          <property name="text">
           <string>Max. number of &amp;tags to download:</string>
@@ -149,10 +149,10 @@
          </property>
         </widget>
        </item>
-       <item row="4" column="2">
+       <item row="5" column="2">
         <widget class="QSpinBox" name="opt_max_tags"/>
        </item>
-       <item row="5" column="1">
+       <item row="6" column="1">
         <widget class="QLabel" name="label_3">
          <property name="text">
           <string>Max. &amp;time to wait after first match is found:</string>
@@ -162,14 +162,14 @@
          </property>
         </widget>
        </item>
-       <item row="5" column="2">
+       <item row="6" column="2">
         <widget class="QSpinBox" name="opt_wait_after_first_identify_result">
          <property name="suffix">
           <string> secs</string>
          </property>
         </widget>
        </item>
-       <item row="6" column="1">
+       <item row="7" column="1">
         <widget class="QLabel" name="label_4">
          <property name="text">
           <string>Max. time to wait after first &amp;cover is found:</string>
@@ -179,14 +179,14 @@
          </property>
         </widget>
        </item>
-       <item row="6" column="2">
+       <item row="7" column="2">
         <widget class="QSpinBox" name="opt_wait_after_first_cover_result">
          <property name="suffix">
           <string> secs</string>
          </property>
         </widget>
        </item>
-       <item row="3" column="1" colspan="2">
+       <item row="4" column="1" colspan="2">
         <widget class="QCheckBox" name="opt_fewer_tags">
          <property name="toolTip">
           <string>&lt;p&gt;Different metadata sources have different sets of tags for the same book. If this option is checked, then calibre will use the smaller tag sets. These tend to be more like genres, while the larger tag sets tend to describe the books content.
@@ -197,6 +197,13 @@
          </property>
         </widget>
        </item>
+       <item row="3" column="1" colspan="2">
+        <widget class="QCheckBox" name="opt_find_first_edition_date">
+         <property name="text">
+          <string>Use published date of earliest &quot;edition&quot; (from worldcat.org)</string>
+         </property>
+        </widget>
+       </item>
       </layout>
      </widget>
      <widget class="QWidget" name="page_2"/>

From 82472c1a9e859bdf45d946e176fb0884b25c4032 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 6 Oct 2012 13:13:40 +0530
Subject: [PATCH 2/5] turn off use of worldcat for earliest edition dates by
 default

---
 src/calibre/ebooks/metadata/sources/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index 47b9f00d30..46c6f7a313 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -26,7 +26,7 @@ msprefs.defaults['wait_after_first_identify_result'] = 30 # seconds
 msprefs.defaults['wait_after_first_cover_result'] = 60 # seconds
 msprefs.defaults['swap_author_names'] = False
 msprefs.defaults['fewer_tags'] = True
-msprefs.defaults['find_first_edition_date'] = True
+msprefs.defaults['find_first_edition_date'] = False
 
 # Google covers are often poor quality (scans/errors) but they have high
 # resolution, so they trump covers from better sources. So make sure they

From d6ab9d1b6e875f977dc5aba49dabae1d6d79060e Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 6 Oct 2012 13:15:50 +0530
Subject: [PATCH 3/5] ...

---
 src/calibre/gui2/preferences/metadata_sources.ui | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/gui2/preferences/metadata_sources.ui b/src/calibre/gui2/preferences/metadata_sources.ui
index cef0e8562b..cfbdb51571 100644
--- a/src/calibre/gui2/preferences/metadata_sources.ui
+++ b/src/calibre/gui2/preferences/metadata_sources.ui
@@ -200,7 +200,7 @@
        <item row="3" column="1" colspan="2">
         <widget class="QCheckBox" name="opt_find_first_edition_date">
          <property name="text">
-          <string>Use published date of earliest &quot;edition&quot; (from worldcat.org)</string>
+          <string>Use published date of &quot;first edition&quot; (from worldcat.org)</string>
          </property>
         </widget>
        </item>

From 768cc4c95d86e571dc01908bdf5cd7252cdc69c1 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 6 Oct 2012 21:45:32 +0530
Subject: [PATCH 4/5] noz.de by Krittika Goyal

---
 recipes/noz.recipe | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 recipes/noz.recipe

diff --git a/recipes/noz.recipe b/recipes/noz.recipe
new file mode 100644
index 0000000000..521ad2f5c1
--- /dev/null
+++ b/recipes/noz.recipe
@@ -0,0 +1,29 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1344926684(BasicNewsRecipe):
+    title          = u'Neue Osnabrücker Zeitung'
+    __author__ = 'Krittika Goyal'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    #auto_cleanup = True
+    no_stylesheets         = True
+    use_embedded_content   = False
+    language               = 'de'
+    remove_javascript      = True
+
+    keep_only_tags = [dict(name='h1', attrs={'class':'enlargeable'}), dict(name='h2', attrs={'class':'enlargeable vorspann'}), dict(name='div', attrs={'id':'largePicContainer'}), dict(name='span', attrs={'id':'articletext'})]
+    remove_tags = [dict(name='div', attrs={'id':'retresco-title'}),dict(name='div', attrs={'class':'retresco-item s1 relative'}),dict(name='a', attrs={'class':'medium2 largeSpaceTop icon'})]
+
+    feeds          = [(u'Lokales', u'http://www.noz.de/rss/Lokales'),
+(u'Vermischtes', u'http://www.noz.de/rss/Vermischtes'),
+(u'Politik', u'http://www.noz.de/rss/Politik'),
+(u'Wirtschaft', u'http://www.noz.de/rss/Wirtschaft'),
+(u'Kultur', u'http://www.noz.de/rss/Kultur'),
+(u'Medien', u'http://www.noz.de/rss/Medien'),
+(u'Wissenschaft', u'http://www.noz.de/rss/wissenschaft'),
+(u'Sport', u'http://www.noz.de/rss/Sport'),
+(u'Computer', u'http://www.noz.de/rss/Computer'),
+(u'Musik', u'http://www.noz.de/rss/Musik'),
+(u'Szene', u'http://www.noz.de/rss/Szene'),
+(u'Niedersachsen', u'http://www.noz.de/rss/Niedersachsen'),
+(u'Kino', u'http://www.noz.de/rss/Kino')]

From a84bdaebeb0611031d7fb01ea305fe51fce9dc21 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 6 Oct 2012 21:55:54 +0530
Subject: [PATCH 5/5] Update The Sun UK

---
 recipes/the_sun.recipe | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/recipes/the_sun.recipe b/recipes/the_sun.recipe
index d93ac2c49b..a5c4dbdd10 100644
--- a/recipes/the_sun.recipe
+++ b/recipes/the_sun.recipe
@@ -8,13 +8,11 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
     title          = u'The Sun UK'
     description = 'Articles from The Sun tabloid UK'
     __author__ = 'Dave Asbury'
-    # last updated 25/7/12
+    # last updated 6/10/12 added starsons remove article code
     language = 'en_GB'
     oldest_article = 1
-    max_articles_per_feed = 12
+    max_articles_per_feed = 15
     remove_empty_feeds = True
-    no_stylesheets = True
-
 
     masthead_url = 'http://www.thesun.co.uk/sol/img/global/Sun-logo.gif'
     encoding = 'UTF-8'
@@ -23,13 +21,9 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
 
 
 
-    #preprocess_regexps = [
-     #   (re.compile(r'<div class="foot-copyright".*?</div>', re.IGNORECASE | re.DOTALL), lambda match: '')]
-
-
     extra_css  = '''
     body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
-                '''
+           '''
     keep_only_tags = [
         dict(name='div',attrs={'class' : 'intro'}),
                                 dict(name='h3'),
@@ -52,6 +46,15 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
     (u'Showbiz', u'http://www.thesun.co.uk/sol/homepage/showbiz/rss'),
     (u'Woman', u'http://www.thesun.co.uk/sol/homepage/woman/rss'),
     ]
+# starsons code
+    def parse_feeds (self):
+      feeds = BasicNewsRecipe.parse_feeds(self)
+      for feed in feeds:
+        for article in feed.articles[:]:
+          # print 'article.title is: ', article.title
+          if 'Web porn harms kids' in article.title.upper() or 'The-Sun-says' in article.url:
+            feed.articles.remove(article)
+      return feeds
 
     def get_cover_url(self):
         soup = self.index_to_soup('http://www.politicshome.com/uk/latest_frontpage.html')
@@ -69,6 +72,7 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
         cov2 = str(cov)
         cov2=cov2[27:-18]
         #cov2 now is pic url, now  go back to original function
+        # print "**** cov2 =",cov2,"****"
         br = browser()
         br.set_handle_redirect(False)
         try: