diff --git a/resources/images/news/anandtech.png b/resources/images/news/anandtech.png new file mode 100644 index 0000000000..19270d99a9 Binary files /dev/null and b/resources/images/news/anandtech.png differ diff --git a/resources/images/news/rusiahoy.png b/resources/images/news/rusiahoy.png new file mode 100644 index 0000000000..6fbdefa6a5 Binary files /dev/null and b/resources/images/news/rusiahoy.png differ diff --git a/resources/recipes/anandtech.recipe b/resources/recipes/anandtech.recipe new file mode 100644 index 0000000000..aa10084070 --- /dev/null +++ b/resources/recipes/anandtech.recipe @@ -0,0 +1,32 @@ +__license__ = 'GPL v3' +__copyright__ = '2008, Kovid Goyal ' + +''' +Fetch Anandtech. +''' + +from calibre.web.feeds.news import BasicNewsRecipe + + +class anan(BasicNewsRecipe): + + title = 'Anandtech' + description = 'comprehensive Hardware Tests' + __author__ = 'Oliver Niesner' + use_embedded_content = False + language = 'en' + timefmt = ' [%d %b %Y]' + max_articles_per_feed = 40 + no_stylesheets = True + remove_javascript = True + encoding = 'utf-8' + + remove_tags=[dict(name='a', attrs={'style':'width:110px; margin-top:0px;text-align:center;'}), + dict(name='a', attrs={'style':'width:110px; margin-top:0px; margin-right:20px;text-align:center;'})] + + feeds = [ ('Anandtech', 'http://www.anandtech.com/rss/')] + + def print_version(self,url): + return url.replace('/show/', '/print/') + + diff --git a/resources/recipes/cacm.recipe b/resources/recipes/cacm.recipe new file mode 100644 index 0000000000..1618bae742 --- /dev/null +++ b/resources/recipes/cacm.recipe @@ -0,0 +1,37 @@ +import datetime +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1286242553(BasicNewsRecipe): + title = u'CACM' + oldest_article = 7 + max_articles_per_feed = 100 + needs_subscription = True + feeds = [(u'CACM', u'http://cacm.acm.org/magazine.rss')] + language = 'en' + __author__ = 'jonmisurda' + no_stylesheets = True + remove_tags = [ + dict(name='div', attrs={'class':['FeatureBox', 'ArticleComments', 'SideColumn', \ + 'LeftColumn', 'RightColumn', 'SiteSearch', 'MainNavBar','more', 'SubMenu', 'inner']}) + ] + cover_url_pattern = 'http://cacm.acm.org/magazines/%d/%d' + + def get_browser(self): + br = BasicNewsRecipe.get_browser() + if self.username is not None and self.password is not None: + br.open('https://cacm.acm.org/login') + br.select_form(nr=1) + br['current_member[user]'] = self.username + br['current_member[passwd]'] = self.password + br.submit() + return br + + def get_cover_url(self): + now = datetime.datetime.now() + + cover_url = None + soup = self.index_to_soup(self.cover_url_pattern % (now.year, now.month)) + cover_item = soup.find('img',attrs={'alt':'magazine cover image'}) + if cover_item: + cover_url = cover_item['src'] + return cover_url diff --git a/resources/recipes/gsp.recipe b/resources/recipes/gsp.recipe new file mode 100644 index 0000000000..90a8eecfe6 --- /dev/null +++ b/resources/recipes/gsp.recipe @@ -0,0 +1,20 @@ +from calibre.web.feeds.news import BasicNewsRecipe + +class AdvancedUserRecipe1286351181(BasicNewsRecipe): + title = u'gsp.ro' + __author__ = 'bucsie' + oldest_article = 2 + max_articles_per_feed = 100 + language='ro' + cover_url ='http://www.gsp.ro/images/sigla_rosu.jpg' + + remove_tags = [ + dict(name='div', attrs={'class':['related_articles', 'articol_noteaza straight_line dotted_line_top', 'comentarii','mai_multe_articole']}), + dict(name='div', attrs={'id':'icons'}) + ] + remove_tags_after = dict(name='div', attrs={'id':'adoceanintactrovccmgpmnyt'}) + + feeds = [(u'toate stirile', u'http://www.gsp.ro/index.php?section=section&screen=rss')] + + def print_version(self, url): + return 'http://www1.gsp.ro/print/' + url[(url.rindex('/')+1):] diff --git a/resources/recipes/hna.recipe b/resources/recipes/hna.recipe index e04837bd17..6e843800ee 100644 --- a/resources/recipes/hna.recipe +++ b/resources/recipes/hna.recipe @@ -30,21 +30,33 @@ class hnaDe(BasicNewsRecipe): dict(id='superbanner'), dict(id='navigation'), dict(id='skyscraper'), + dict(id='idNavigationWrap'), + dict(id='idHeaderSearchForm'), + dict(id='idLoginBarWrap'), + dict(id='idAccountButtons'), + dict(id='idHeadButtons'), + dict(id='idBoxesWrap'), dict(id=''), dict(name='span'), dict(name='ul', attrs={'class':'linklist'}), dict(name='a', attrs={'href':'#'}), dict(name='div', attrs={'class':'hlist'}), + dict(name='li', attrs={'class':'idButton idIsLoginGroup idHeaderRegister '}), + dict(name='li', attrs={'class':'idVideoBar idFirst'}), + dict(name='li', attrs={'class':'idSetStartPageLink idLast'}), + dict(name='li', attrs={'class':'idKinderNetzBar idLast'}), + dict(name='li', attrs={'class':'idFotoBar '}), dict(name='div', attrs={'class':'subc noprint'}), + dict(name='div', attrs={'class':'idBreadcrumb'}), + dict(name='div', attrs={'class':'idLay idAdvertising idClStandard '}), + dict(name='span', attrs={'class':'idHeadLineIntro'}), dict(name='p', attrs={'class':'breadcrumb'}), dict(name='a', attrs={'style':'cursor:hand'}), - dict(name='p', attrs={'class':'h5'})] + dict(name='p', attrs={'class':'h5'}), + dict(name='p', attrs={'class':'idMoreEnd'})] #remove_tags_after = [dict(name='div', attrs={'class':'rahmenbreaking'})] - remove_tags_after = [dict(name='a', attrs={'href':'#'})] + remove_tags_after = [dict(name='p', attrs={'class':'idMoreEnd'})] feeds = [ ('hna_soehre', 'http://feeds2.feedburner.com/hna/soehre'), ('hna_kassel', 'http://feeds2.feedburner.com/hna/kassel') ] - - - diff --git a/resources/recipes/rusiahoy.recipe b/resources/recipes/rusiahoy.recipe new file mode 100644 index 0000000000..326c1695b0 --- /dev/null +++ b/resources/recipes/rusiahoy.recipe @@ -0,0 +1,47 @@ +__license__ = 'GPL v3' +__copyright__ = '2010, Darko Miletic ' +''' +rusiahoy.com +''' + +from calibre.web.feeds.news import BasicNewsRecipe + +class RusiaHoy(BasicNewsRecipe): + title = 'Rusia Hoy' + __author__ = 'Darko Miletic' + description = 'Noticias de Russia en castellano' + publisher = 'rusiahoy.com' + category = 'news, politics, Russia' + oldest_article = 7 + max_articles_per_feed = 200 + no_stylesheets = True + encoding = 'utf8' + use_embedded_content = False + language = 'es' + remove_empty_feeds = True + extra_css = """ + body{font-family: Arial,sans-serif } + .article_article_title{font-size: xx-large; font-weight: bold} + .article_date{color: black; font-size: small} + """ + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } + + remove_tags = [dict(name=['meta','link','iframe','base','object','embed'])] + keep_only_tags=[ dict(attrs={'class':['article_rubric_title','article_date','article_article_title','article_article_lead']}) + ,dict(attrs={'class':'article_article_text'}) + ] + remove_attributes=['align','width','height'] + + feeds = [(u'Articulos', u'http://rusiahoy.com/xml/index.xml')] + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + return soup + \ No newline at end of file diff --git a/resources/recipes/sueddeutsche.recipe b/resources/recipes/sueddeutsche.recipe index 9a5b00fc6c..d898376003 100644 --- a/resources/recipes/sueddeutsche.recipe +++ b/resources/recipes/sueddeutsche.recipe @@ -28,7 +28,7 @@ class Sueddeutsche(BasicNewsRecipe): "SKY_AD","NT1_AD","navbar1","sdesiteheader"]}), dict(name='div', attrs={'class':["similar-article-box","artikelliste","nteaser301bg", - "pages closed","basebox right narrow"]}), + "pages closed","basebox right narrow","headslot galleried"]}), dict(name='div', attrs={'class':["articleDistractor","listHeader","listHeader2","hr2", "item","videoBigButton","articlefooter full-column", @@ -38,10 +38,11 @@ class Sueddeutsche(BasicNewsRecipe): dict(name='div', attrs={'style':["position:relative;"]}), dict(name='span', attrs={'class':["nlinkheaderteaserschwarz","artikelLink","r10000000"]}), dict(name='table', attrs={'class':["stoerBS","kommentare","footer","pageBoxBot","pageAktiv","bgcontent"]}), - dict(name='ul', attrs={'class':["breadcrumb","articles","activities","sitenav"]}), + dict(name='ul', attrs={'class':["breadcrumb","articles","activities","sitenav","actions"]}), dict(name='td', attrs={'class':["artikelDruckenRight"]}), dict(name='p', text = "ANZEIGE") ] + remove_tags_after = [dict(name='div', attrs={'class':["themenbox full-column"]})] extra_css = ''' h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #003399;} @@ -70,9 +71,8 @@ class Sueddeutsche(BasicNewsRecipe): (u'Reise', u'http://suche.sueddeutsche.de/query/reise/nav/%C2%A7ressort%3AReise/sort/-docdatetime?output=rss') ] + + def print_version(self, url): - return url.replace('/text/', '/text/print.html') - - - - + main, sep, id = url.rpartition('/') + return main + '/2.220/' + id diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py index 551d8ebe44..fa5a20283e 100644 --- a/src/calibre/ebooks/metadata/book/base.py +++ b/src/calibre/ebooks/metadata/book/base.py @@ -386,7 +386,8 @@ class Metadata(object): self.set_all_user_metadata(other.get_all_user_metadata(make_copy=True)) for x in SC_FIELDS_COPY_NOT_NULL: copy_not_none(self, other, x) - self.set_classifiers(other.get_classifiers()) + if callable(getattr(other, 'get_classifiers', None)): + self.set_classifiers(other.get_classifiers()) # language is handled below else: for attr in SC_COPYABLE_FIELDS: @@ -461,7 +462,7 @@ class Metadata(object): v = self.series_index if val is None else val try: x = float(v) - except ValueError, TypeError: + except (ValueError, TypeError): x = 1 return fmt_sidx(x) diff --git a/src/calibre/gui2/tag_view.py b/src/calibre/gui2/tag_view.py index 0824dedae6..a054bb0645 100644 --- a/src/calibre/gui2/tag_view.py +++ b/src/calibre/gui2/tag_view.py @@ -16,7 +16,6 @@ from PyQt4.Qt import Qt, QTreeView, QApplication, pyqtSignal, \ QPushButton, QWidget, QItemDelegate from calibre.ebooks.metadata import title_sort -from calibre.ebooks.metadata.book import ALL_METADATA_FIELDS from calibre.gui2 import config, NONE from calibre.library.field_metadata import TagsIcons from calibre.utils.search_query_parser import saved_searches @@ -154,6 +153,8 @@ class TagsView(QTreeView): # {{{ ids = list(map(int, str(md.data(mime)).split())) self.handle_drop(item, child, ids) event.accept() + return + event.ignore() def handle_drop(self, parent, child, ids): # print 'Dropped ids:', ids, parent.category_key, child.tag.name