Merge from trunk

2026-02-07 11:33:30 -05:00 · 2010-10-06 18:28:09 +01:00 · 2010-10-06 18:28:09 +01:00 · 3beb30d152
commit 3beb30d152
parent a24834d295 abe8bd9a25
10 changed files with 165 additions and 15 deletions
--- a/resources/images/news/anandtech.png
+++ b/resources/images/news/anandtech.png
--- a/resources/images/news/rusiahoy.png
+++ b/resources/images/news/rusiahoy.png
--- a/resources/recipes/anandtech.recipe
+++ b/resources/recipes/anandtech.recipe
@ -0,0 +1,32 @@
+__license__   = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
+
+'''
+Fetch Anandtech.
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+
+class anan(BasicNewsRecipe):
+
+    title = 'Anandtech'
+    description = 'comprehensive Hardware Tests'
+    __author__ = 'Oliver Niesner'
+    use_embedded_content   = False
+    language = 'en'
+    timefmt = ' [%d %b %Y]'
+    max_articles_per_feed = 40
+    no_stylesheets = True
+    remove_javascript = True
+    encoding = 'utf-8'
+
+    remove_tags=[dict(name='a', attrs={'style':'width:110px; margin-top:0px;text-align:center;'}),
+		 dict(name='a', attrs={'style':'width:110px; margin-top:0px; margin-right:20px;text-align:center;'})]
+
+    feeds =  [ ('Anandtech', 'http://www.anandtech.com/rss/')]
+
+    def print_version(self,url):
+        return url.replace('/show/', '/print/')
+
+
--- a/resources/recipes/cacm.recipe
+++ b/resources/recipes/cacm.recipe
@ -0,0 +1,37 @@
+import datetime
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1286242553(BasicNewsRecipe):
+    title          = u'CACM'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    needs_subscription = True
+    feeds          = [(u'CACM', u'http://cacm.acm.org/magazine.rss')]
+    language = 'en'
+    __author__ = 'jonmisurda'
+    no_stylesheets        = True
+    remove_tags = [
+        dict(name='div', attrs={'class':['FeatureBox', 'ArticleComments', 'SideColumn', \
+              'LeftColumn', 'RightColumn', 'SiteSearch', 'MainNavBar','more', 'SubMenu', 'inner']})
+    ]
+    cover_url_pattern = 'http://cacm.acm.org/magazines/%d/%d'
+
+    def get_browser(self):
+        br = BasicNewsRecipe.get_browser()
+        if self.username is not None and self.password is not None:
+            br.open('https://cacm.acm.org/login')
+            br.select_form(nr=1)
+            br['current_member[user]']   = self.username
+            br['current_member[passwd]'] = self.password
+            br.submit()
+        return br
+
+    def get_cover_url(self):
+        now = datetime.datetime.now()
+
+        cover_url = None
+        soup = self.index_to_soup(self.cover_url_pattern % (now.year, now.month))
+        cover_item = soup.find('img',attrs={'alt':'magazine cover image'})
+        if cover_item:
+           cover_url = cover_item['src']
+        return cover_url
--- a/resources/recipes/gsp.recipe
+++ b/resources/recipes/gsp.recipe
@ -0,0 +1,20 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1286351181(BasicNewsRecipe):
+    title          = u'gsp.ro'
+    __author__   = 'bucsie'
+    oldest_article = 2
+    max_articles_per_feed = 100
+    language='ro'
+    cover_url ='http://www.gsp.ro/images/sigla_rosu.jpg'
+
+    remove_tags = [
+                       dict(name='div', attrs={'class':['related_articles', 'articol_noteaza straight_line dotted_line_top', 'comentarii','mai_multe_articole']}),
+                       dict(name='div', attrs={'id':'icons'})
+                        ]
+    remove_tags_after = dict(name='div', attrs={'id':'adoceanintactrovccmgpmnyt'})
+
+    feeds          = [(u'toate stirile', u'http://www.gsp.ro/index.php?section=section&screen=rss')]
+
+    def print_version(self, url):
+         return 'http://www1.gsp.ro/print/' + url[(url.rindex('/')+1):]
--- a/resources/recipes/hna.recipe
+++ b/resources/recipes/hna.recipe
@ -30,21 +30,33 @@ class hnaDe(BasicNewsRecipe):
 		   dict(id='superbanner'),
 		   dict(id='navigation'),
 		   dict(id='skyscraper'),
+		   dict(id='idNavigationWrap'),
+		   dict(id='idHeaderSearchForm'),
+		   dict(id='idLoginBarWrap'),
+		   dict(id='idAccountButtons'),
+		   dict(id='idHeadButtons'),
+		   dict(id='idBoxesWrap'),
 		   dict(id=''),
                   dict(name='span'),
 		   dict(name='ul', attrs={'class':'linklist'}),
 		   dict(name='a', attrs={'href':'#'}),
 		   dict(name='div', attrs={'class':'hlist'}),
+		   dict(name='li', attrs={'class':'idButton idIsLoginGroup idHeaderRegister '}),
+		   dict(name='li', attrs={'class':'idVideoBar idFirst'}),
+		   dict(name='li', attrs={'class':'idSetStartPageLink idLast'}),
+		   dict(name='li', attrs={'class':'idKinderNetzBar idLast'}),
+		   dict(name='li', attrs={'class':'idFotoBar '}),
 		   dict(name='div', attrs={'class':'subc noprint'}),
+		   dict(name='div', attrs={'class':'idBreadcrumb'}),
+		   dict(name='div', attrs={'class':'idLay idAdvertising idClStandard '}),
+		   dict(name='span', attrs={'class':'idHeadLineIntro'}),
 		   dict(name='p', attrs={'class':'breadcrumb'}),
 		   dict(name='a', attrs={'style':'cursor:hand'}),
-		   dict(name='p', attrs={'class':'h5'})]
+		   dict(name='p', attrs={'class':'h5'}),
+		   dict(name='p', attrs={'class':'idMoreEnd'})]
    #remove_tags_after = [dict(name='div', attrs={'class':'rahmenbreaking'})]
-    remove_tags_after = [dict(name='a', attrs={'href':'#'})]
+    remove_tags_after = [dict(name='p', attrs={'class':'idMoreEnd'})]

    feeds =  [ ('hna_soehre', 'http://feeds2.feedburner.com/hna/soehre'),
 	       ('hna_kassel', 'http://feeds2.feedburner.com/hna/kassel') ]

-
-
-
--- a/resources/recipes/rusiahoy.recipe
+++ b/resources/recipes/rusiahoy.recipe
@ -0,0 +1,47 @@
+__license__   = 'GPL v3'
+__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+'''
+rusiahoy.com
+'''
+
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class RusiaHoy(BasicNewsRecipe):
+    title                 = 'Rusia Hoy'
+    __author__            = 'Darko Miletic'
+    description           = 'Noticias de Russia en castellano'
+    publisher             = 'rusiahoy.com'
+    category              = 'news, politics, Russia'
+    oldest_article        = 7
+    max_articles_per_feed = 200
+    no_stylesheets        = True
+    encoding              = 'utf8'
+    use_embedded_content  = False
+    language              = 'es'
+    remove_empty_feeds    = True
+    extra_css             = """ 
+                                body{font-family: Arial,sans-serif }
+                                .article_article_title{font-size: xx-large; font-weight: bold}
+                                .article_date{color: black; font-size: small}
+                            """
+
+    conversion_options = {
+                          'comment'   : description
+                        , 'tags'      : category
+                        , 'publisher' : publisher
+                        , 'language'  : language
+                        }
+
+    remove_tags = [dict(name=['meta','link','iframe','base','object','embed'])]
+    keep_only_tags=[  dict(attrs={'class':['article_rubric_title','article_date','article_article_title','article_article_lead']})
+                     ,dict(attrs={'class':'article_article_text'})
+                   ]
+    remove_attributes=['align','width','height']
+
+    feeds = [(u'Articulos', u'http://rusiahoy.com/xml/index.xml')]
+
+    def preprocess_html(self, soup):
+        for item in soup.findAll(style=True):
+            del item['style']
+        return soup
+        
--- a/resources/recipes/sueddeutsche.recipe
+++ b/resources/recipes/sueddeutsche.recipe
@ -28,7 +28,7 @@ class Sueddeutsche(BasicNewsRecipe):
 						  "SKY_AD","NT1_AD","navbar1","sdesiteheader"]}),

                    dict(name='div', attrs={'class':["similar-article-box","artikelliste","nteaser301bg",
-					             "pages closed","basebox right narrow"]}),
+					             "pages closed","basebox right narrow","headslot galleried"]}),

                    dict(name='div', attrs={'class':["articleDistractor","listHeader","listHeader2","hr2",
 						     "item","videoBigButton","articlefooter full-column",
@ -38,10 +38,11 @@ class Sueddeutsche(BasicNewsRecipe):
                    dict(name='div', attrs={'style':["position:relative;"]}),
                    dict(name='span', attrs={'class':["nlinkheaderteaserschwarz","artikelLink","r10000000"]}),
                    dict(name='table', attrs={'class':["stoerBS","kommentare","footer","pageBoxBot","pageAktiv","bgcontent"]}),
-                    dict(name='ul', attrs={'class':["breadcrumb","articles","activities","sitenav"]}),
+                    dict(name='ul', attrs={'class':["breadcrumb","articles","activities","sitenav","actions"]}),
                    dict(name='td', attrs={'class':["artikelDruckenRight"]}),
                    dict(name='p', text = "ANZEIGE")
                     ]
+    remove_tags_after = [dict(name='div', attrs={'class':["themenbox full-column"]})]

    extra_css = '''
                    h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #003399;}
@ -70,9 +71,8 @@ class Sueddeutsche(BasicNewsRecipe):
             (u'Reise', u'http://suche.sueddeutsche.de/query/reise/nav/%C2%A7ressort%3AReise/sort/-docdatetime?output=rss')
             ]

+
+
    def print_version(self, url):
-        return url.replace('/text/', '/text/print.html')
-
-
-
-
+        main, sep, id = url.rpartition('/')
+        return main + '/2.220/' + id
--- a/src/calibre/ebooks/metadata/book/base.py
+++ b/src/calibre/ebooks/metadata/book/base.py
@ -386,7 +386,8 @@ class Metadata(object):
            self.set_all_user_metadata(other.get_all_user_metadata(make_copy=True))
            for x in SC_FIELDS_COPY_NOT_NULL:
                copy_not_none(self, other, x)
-            self.set_classifiers(other.get_classifiers())
+            if callable(getattr(other, 'get_classifiers', None)):
+                self.set_classifiers(other.get_classifiers())
            # language is handled below
        else:
            for attr in SC_COPYABLE_FIELDS:
@ -461,7 +462,7 @@ class Metadata(object):
        v = self.series_index if val is None else val
        try:
            x = float(v)
-        except ValueError, TypeError:
+        except (ValueError, TypeError):
            x = 1
        return fmt_sidx(x)

--- a/src/calibre/gui2/tag_view.py
+++ b/src/calibre/gui2/tag_view.py
@ -16,7 +16,6 @@ from PyQt4.Qt import Qt, QTreeView, QApplication, pyqtSignal, \
                     QPushButton, QWidget, QItemDelegate

 from calibre.ebooks.metadata import title_sort
-from calibre.ebooks.metadata.book import ALL_METADATA_FIELDS
 from calibre.gui2 import config, NONE
 from calibre.library.field_metadata import TagsIcons
 from calibre.utils.search_query_parser import saved_searches
@ -154,6 +153,8 @@ class TagsView(QTreeView): # {{{
                    ids = list(map(int, str(md.data(mime)).split()))
                    self.handle_drop(item, child, ids)
                    event.accept()
+                    return
+        event.ignore()

    def handle_drop(self, parent, child, ids):
        # print 'Dropped ids:', ids, parent.category_key, child.tag.name