Merge from trunk

This commit is contained in:
Charles Haley 2012-10-07 08:52:26 +02:00
commit 7f471abd99
6 changed files with 68 additions and 24 deletions

29
recipes/noz.recipe Normal file
View File

@ -0,0 +1,29 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1344926684(BasicNewsRecipe):
title = u'Neue Osnabrücker Zeitung'
__author__ = 'Krittika Goyal'
oldest_article = 7
max_articles_per_feed = 100
#auto_cleanup = True
no_stylesheets = True
use_embedded_content = False
language = 'de'
remove_javascript = True
keep_only_tags = [dict(name='h1', attrs={'class':'enlargeable'}), dict(name='h2', attrs={'class':'enlargeable vorspann'}), dict(name='div', attrs={'id':'largePicContainer'}), dict(name='span', attrs={'id':'articletext'})]
remove_tags = [dict(name='div', attrs={'id':'retresco-title'}),dict(name='div', attrs={'class':'retresco-item s1 relative'}),dict(name='a', attrs={'class':'medium2 largeSpaceTop icon'})]
feeds = [(u'Lokales', u'http://www.noz.de/rss/Lokales'),
(u'Vermischtes', u'http://www.noz.de/rss/Vermischtes'),
(u'Politik', u'http://www.noz.de/rss/Politik'),
(u'Wirtschaft', u'http://www.noz.de/rss/Wirtschaft'),
(u'Kultur', u'http://www.noz.de/rss/Kultur'),
(u'Medien', u'http://www.noz.de/rss/Medien'),
(u'Wissenschaft', u'http://www.noz.de/rss/wissenschaft'),
(u'Sport', u'http://www.noz.de/rss/Sport'),
(u'Computer', u'http://www.noz.de/rss/Computer'),
(u'Musik', u'http://www.noz.de/rss/Musik'),
(u'Szene', u'http://www.noz.de/rss/Szene'),
(u'Niedersachsen', u'http://www.noz.de/rss/Niedersachsen'),
(u'Kino', u'http://www.noz.de/rss/Kino')]

View File

@ -8,13 +8,11 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
title = u'The Sun UK'
description = 'Articles from The Sun tabloid UK'
__author__ = 'Dave Asbury'
# last updated 25/7/12
# last updated 6/10/12 added starsons remove article code
language = 'en_GB'
oldest_article = 1
max_articles_per_feed = 12
max_articles_per_feed = 15
remove_empty_feeds = True
no_stylesheets = True
masthead_url = 'http://www.thesun.co.uk/sol/img/global/Sun-logo.gif'
encoding = 'UTF-8'
@ -23,13 +21,9 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
#preprocess_regexps = [
# (re.compile(r'<div class="foot-copyright".*?</div>', re.IGNORECASE | re.DOTALL), lambda match: '')]
extra_css = '''
body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
'''
'''
keep_only_tags = [
dict(name='div',attrs={'class' : 'intro'}),
dict(name='h3'),
@ -52,6 +46,15 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
(u'Showbiz', u'http://www.thesun.co.uk/sol/homepage/showbiz/rss'),
(u'Woman', u'http://www.thesun.co.uk/sol/homepage/woman/rss'),
]
# starsons code
def parse_feeds (self):
feeds = BasicNewsRecipe.parse_feeds(self)
for feed in feeds:
for article in feed.articles[:]:
# print 'article.title is: ', article.title
if 'Web porn harms kids' in article.title.upper() or 'The-Sun-says' in article.url:
feed.articles.remove(article)
return feeds
def get_cover_url(self):
soup = self.index_to_soup('http://www.politicshome.com/uk/latest_frontpage.html')
@ -69,6 +72,7 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
cov2 = str(cov)
cov2=cov2[27:-18]
#cov2 now is pic url, now go back to original function
# print "**** cov2 =",cov2,"****"
br = browser()
br.set_handle_redirect(False)
try:

View File

@ -26,6 +26,7 @@ msprefs.defaults['wait_after_first_identify_result'] = 30 # seconds
msprefs.defaults['wait_after_first_cover_result'] = 60 # seconds
msprefs.defaults['swap_author_names'] = False
msprefs.defaults['fewer_tags'] = True
msprefs.defaults['find_first_edition_date'] = False
# Google covers are often poor quality (scans/errors) but they have high
# resolution, so they trump covers from better sources. So make sure they

View File

@ -120,6 +120,8 @@ class ISBNMerge(object):
self.log.debug(xw.tb)
else:
isbns, min_year = xw.isbns, xw.min_year
if not msprefs['find_first_edition_date']:
min_year = None
if not isbns:
isbns = frozenset([isbn])
if isbns in self.pools:

View File

@ -296,6 +296,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
r('wait_after_first_cover_result', msprefs)
r('swap_author_names', msprefs)
r('fewer_tags', msprefs)
r('find_first_edition_date', msprefs)
self.configure_plugin_button.clicked.connect(self.configure_plugin)
self.sources_model = SourcesModel(self)

View File

@ -7,7 +7,7 @@
<x>0</x>
<y>0</y>
<width>781</width>
<height>394</height>
<height>439</height>
</rect>
</property>
<property name="windowTitle">
@ -21,7 +21,7 @@
<widget class="QStackedWidget" name="stack">
<widget class="QWidget" name="page">
<layout class="QGridLayout" name="gridLayout">
<item row="0" column="0" rowspan="7">
<item row="0" column="0" rowspan="8">
<widget class="QGroupBox" name="groupBox">
<property name="title">
<string>Metadata sources</string>
@ -104,22 +104,22 @@
</item>
<item row="2" column="0">
<widget class="QPushButton" name="select_default_button">
<property name="toolTip">
<string>Restore your own subset of checked fields that you define using the 'Set as default' button</string>
</property>
<property name="text">
<string>&amp;Select default</string>
</property>
<property name="toolTip">
<string>Restore your own subset of checked fields that you define using the 'Set as default' button</string>
</property>
</widget>
</item>
<item row="2" column="1">
<widget class="QPushButton" name="set_as_default_button">
<property name="toolTip">
<string>Store the currently checked fields as a default you can restore using the 'Select default' button</string>
</property>
<property name="text">
<string>&amp;Set as default</string>
</property>
<property name="toolTip">
<string>Store the currently checked fields as a default you can restore using the 'Select default' button</string>
</property>
</widget>
</item>
</layout>
@ -139,7 +139,7 @@
</property>
</widget>
</item>
<item row="4" column="1">
<item row="5" column="1">
<widget class="QLabel" name="label_2">
<property name="text">
<string>Max. number of &amp;tags to download:</string>
@ -149,10 +149,10 @@
</property>
</widget>
</item>
<item row="4" column="2">
<item row="5" column="2">
<widget class="QSpinBox" name="opt_max_tags"/>
</item>
<item row="5" column="1">
<item row="6" column="1">
<widget class="QLabel" name="label_3">
<property name="text">
<string>Max. &amp;time to wait after first match is found:</string>
@ -162,14 +162,14 @@
</property>
</widget>
</item>
<item row="5" column="2">
<item row="6" column="2">
<widget class="QSpinBox" name="opt_wait_after_first_identify_result">
<property name="suffix">
<string> secs</string>
</property>
</widget>
</item>
<item row="6" column="1">
<item row="7" column="1">
<widget class="QLabel" name="label_4">
<property name="text">
<string>Max. time to wait after first &amp;cover is found:</string>
@ -179,14 +179,14 @@
</property>
</widget>
</item>
<item row="6" column="2">
<item row="7" column="2">
<widget class="QSpinBox" name="opt_wait_after_first_cover_result">
<property name="suffix">
<string> secs</string>
</property>
</widget>
</item>
<item row="3" column="1" colspan="2">
<item row="4" column="1" colspan="2">
<widget class="QCheckBox" name="opt_fewer_tags">
<property name="toolTip">
<string>&lt;p&gt;Different metadata sources have different sets of tags for the same book. If this option is checked, then calibre will use the smaller tag sets. These tend to be more like genres, while the larger tag sets tend to describe the books content.
@ -197,6 +197,13 @@
</property>
</widget>
</item>
<item row="3" column="1" colspan="2">
<widget class="QCheckBox" name="opt_find_first_edition_date">
<property name="text">
<string>Use published date of &quot;first edition&quot; (from worldcat.org)</string>
</property>
</widget>
</item>
</layout>
</widget>
<widget class="QWidget" name="page_2"/>