Sync to trunk.

2025-07-09 03:04:10 -04:00 · 2009-10-19 06:41:51 -04:00 · 2009-10-19 06:41:51 -04:00 · 1e3832a204
commit 1e3832a204
parent c26fd05fce 3c68df5475
11 changed files with 125 additions and 52 deletions
--- a/resources/recipes/corren2.recipe
+++ b/resources/recipes/corren2.recipe
@ -0,0 +1,38 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1255797795(BasicNewsRecipe):
+    title          = u'Corren'
+    __author__ = 'Jonas Svensson'
+    simultaneous_downloads = 1 
+    no_stylesheets = True
+    oldest_article = 7
+    max_articles_per_feed = 100
+    remove_attributes = ['onload']
+    timefmt = ''
+
+    feeds          = [
+                   (u'Toppnyheter (alla kategorier)', u'http://www.corren.se/inc/RssHandler.ashx?id=4122151&ripurl=http://www.corren.se/nyheter/'),
+                   (u'Bostad', u'http://www.corren.se/inc/RssHandler.ashx?id=4122174&ripurl=http://www.corren.se/bostad/'),
+                   (u'Ekonomi & Jobb', u'http://www.corren.se/inc/RssHandler.ashx?id=4122176&ripurl=http://www.corren.se/ekonomi/'),
+                   (u'Kultur & Nöje', u'http://www.corren.se/inc/RssHandler.ashx?id=4122192&ripurl=http://www.corren.se/kultur/'),
+                   (u'Mat & dryck', u'http://www.corren.se/inc/RssHandler.ashx?id=4122201&ripurl=http://www.corren.se/mat-dryck/'),
+                   (u'Motor', u'http://www.corren.se/inc/RssHandler.ashx?id=4122203&ripurl=http://www.corren.se/motor/'),
+                   (u'Sport', u'http://www.corren.se/inc/RssHandler.ashx?id=4122206&ripurl=http://www.corren.se/sport/'),
+                   (u'Åsikter', u'http://www.corren.se/inc/RssHandler.ashx?id=4122223&ripurl=http://www.corren.se/asikter/'),
+                   (u'Mjölby', u'http://www.corren.se/inc/RssHandler.ashx?id=4122235&ripurl=http://www.corren.se/ostergotland/mjolby/'),
+                   (u'Motala', u'http://www.corren.se/inc/RssHandler.ashx?id=4122236&ripurl=http://www.corren.se/ostergotland/motala/')
+                     ]
+
+    def print_version(self, url):
+        url = url.replace("ekonomi/artikel.aspx", "Print.aspx")
+        url = url.replace("bostad/artikel.aspx", "Print.aspx")
+        url = url.replace("kultur/artikel.aspx", "Print.aspx")
+        url = url.replace("motor/artikel.aspx", "Print.aspx")
+        url = url.replace("mat-dryck/artikel.aspx", "Print.aspx")
+        url = url.replace("sport/artikel.aspx", "Print.aspx")
+        url = url.replace("asikter/artikel.aspx", "Print.aspx")
+        url = url.replace("mat-dryck/artikel.aspx", "Print.aspx")
+        url = url.replace("ostergotland/mjolby/artikel.aspx", "Print.aspx")
+        url = url.replace("ostergotland/motala/artikel.aspx", "Print.aspx")
+        return url.replace("nyheter/artikel.aspx", "Print.aspx")
+ 
--- a/resources/recipes/economist.recipe
+++ b/resources/recipes/economist.recipe
@ -18,7 +18,6 @@ class Economist(BasicNewsRecipe):
    __author__ = "Kovid Goyal"
    description = 'Global news and current affairs from a European perspective'
    oldest_article = 7.0
-    needs_subscription = False # Strange but true
    INDEX = 'http://www.economist.com/printedition'
    cover_url = 'http://www.economist.com/images/covers/currentcovereu_large.jpg'
    remove_tags = [dict(name=['script', 'noscript', 'title'])]
--- a/resources/recipes/politico.recipe
+++ b/resources/recipes/politico.recipe
@ -6,9 +6,12 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 politico.com
 '''

+import re, traceback
+
 from calibre.web.feeds.news import BasicNewsRecipe

 class Politico(BasicNewsRecipe):
+  
    title                 = 'Politico'
    __author__            = 'Darko Miletic'
    description           = 'Political news from USA'
@ -55,13 +58,13 @@ class Politico(BasicNewsRecipe):
            del item['style']
        return soup

-    def print_url(self, soup, default):
-        printtags = soup.findAll('a',href=True)
-        for printtag in printtags:
-            if printtag.string == "Print":
-               return printtag['href']
-        return default
+    url_pat = re.compile(r'<a href="([^"]+printstory\.cfm[^"]+)"')

    def print_version(self, url):
-        soup = self.index_to_soup(url)
-        return self.print_url(soup, None)
+        raw = self.index_to_soup(url, raw=True)
+        try:
+            url = self.url_pat.search(raw).group(1)
+        except:
+            traceback.print_exc()
+            url = None
+        return url
--- a/resources/recipes/time_magazine.recipe
+++ b/resources/recipes/time_magazine.recipe
@ -17,18 +17,36 @@ class Time(BasicNewsRecipe):
    no_stylesheets        = True
    language = 'en'

-    extra_css      = '''.headline {font-size: large;}
-    .fact { padding-top: 10pt  }
-    h1 {font-family:Arial,Sans-serif}
-    .byline{font-family:Arial,Sans-serif; font-size:xx-small ;color:blue}
-    .timestamp{font-family:Arial,Sans-serif; font-size:x-small ;color:gray}'''
-    remove_tags_before = dict(id="artHd")
-    remove_tags_after = {'class':"ltCol"}
-    remove_tags    = [
-            {'class':['articleTools', 'enlarge', 'search','socialtools','blogtools','moretools','page','nextUp','next','subnav','RSS','line2','first','ybuzz','articlePagination','chiclets','imgcont','createListLink','rlinks','tabsWrap','pagination']},
-            {'id':['quigoArticle', 'contentTools', 'articleSideBar', 'header', 'navTop','articleTools','feedmodule','feedmodule3','promos','footer','linksFooter','timeArchive','belt','relatedStories','packages','Features']},
-            {'target':'_blank'},
-                      ]
+    extra_css      = ''' h1 {font-family:Arial,Sans-serif;}
+                         h2 {font-family:Arial,Sans-serif;}
+                        .name{font-family:Arial,Sans-serif; font-size:x-small; }
+                        .date{font-family:Arial,Sans-serif; font-size:x-small ;color:#999999;}
+                        .byline{font-family:Arial,Sans-serif; font-size:x-small ;}
+                        .photoBkt{ font-size:x-small ;}
+                        .vertPhoto{font-size:x-small ;}
+                        .credits{font-family:Arial,Sans-serif; font-size:x-small ;color:gray;}
+                        .artTxt{font-family:georgia,serif;}
+                        #article{font-family:georgia,serif;}
+                        .caption{font-family:georgia,serif; font-size:x-small;color:#333333;}
+                        .credit{font-family:georgia,serif; font-size:x-small;color:#999999;}
+                        a:link{color:#CC0000;}
+                        '''
+    
+   # remove_tags_before = dict(id="artHd")
+   # remove_tags_after = {'class':"ltCol"}
+   # remove_tags    = [
+   #         {'class':['articleTools', 'enlarge', 'search','socialtools','blogtools','moretools','page','nextUp','next','subnav','RSS','line2','first','ybuzz','articlePagination','chiclets','imgcont','createListLink','rlinks','tabsWrap','pagination']},
+   #         {'id':['quigoArticle', 'contentTools', 'articleSideBar', 'header', 'navTop','articleTools','feedmodule','feedmodule3','promos','footer','linksFooter','timeArchive','belt','relatedStories','packages','Features']},
+   #         {'target':'_blank'},
+   #                   ]
+
+    keep_only_tags = [ dict(name ="div",attrs = {"id" :["article",]}) ,
+                        dict(name ="div",attrs = {"class" :["artHd","artTxt","photoBkt","vertPhoto","image","copy"]}) ,]
+    remove_tags    = [           dict(name ="div",attrs =  {'class':['articlePagination','nextUp',"rtCol","pagination","enlarge",]}),
+                                  dict(name ="span",attrs =  {'class':['see']}),
+                                 dict(name ="div",attrs =  {'id':['articleSideBar',"articleTools","articleFooter","cmBotLt","quigoPackage"]}),
+                                  dict(name ="a",attrs =  {'class':['listLink']}),
+                     ]
    recursions = 1
    match_regexps = [r'/[0-9,]+-(2|3|4|5|6|7|8|9)(,\d+){0,1}.html']

@ -81,20 +99,3 @@ class Time(BasicNewsRecipe):
            else:
                ans.append(unicode(t))
        return u' '.join(ans).replace(u'\xa0', u'').strip()
-
-    def postprocess_html(self, soup, first_page):
-        div = soup.find(attrs={'class':'artPag'})
-        if div is not None:
-            div.extract()
-        if not first_page:
-            for cls in ('photoBkt', 'artHd'):
-                div = soup.find(attrs={'class':cls})
-                if div is not None:
-                    div.extract()
-            div = soup.find(attrs={'class':'artTxt'})
-            if div is not None:
-                p = div.find('p')
-                if p is not None:
-                    p.extract()
-
-        return soup
--- a/resources/recipes/zeitde.recipe
+++ b/resources/recipes/zeitde.recipe
@ -15,7 +15,7 @@ class ZeitDe(BasicNewsRecipe):
    language = 'de'
    lang = 'de_DE'

-    __author__ = 'Martin Pitt and Suajta Raman'
+    __author__ = 'Martin Pitt and Sujata Raman'
    use_embedded_content   = False
    max_articles_per_feed = 40
    remove_empty_feeds = True
@ -41,7 +41,8 @@ class ZeitDe(BasicNewsRecipe):
                .article{font-family:Georgia,Palatino,Palatino Linotype,FreeSerif,serif;font-size:x-small}
                .headline iconportrait_inline{font-family:Arial,Helvetica,sans-serif;font-size:x-small}
                '''
-    filter_regexps = [r'ad.de.doubleclick.net/']
+    #filter_regexps = [r'ad.de.doubleclick.net/']
+    
    keep_only_tags = [
                        dict(name='div', attrs={'class':["article"]}) ,
                         ]
@ -51,15 +52,32 @@ class ZeitDe(BasicNewsRecipe):
                     dict(name='div', attrs={'id':["place_5","place_4"]})
                  ]

+    
+
    def get_article_url(self, article):
+         
+        ans = article.get('guid',None)
+        
+        try:
+            self.log('Looking for full story link in', ans)
+            soup = self.index_to_soup(ans)
+            x = soup.find(text="Auf einer Seite lesen")
+            
+            if x is not None:
+                
+                a = x.parent
+                if a and a.has_key('href'):
+                    ans = a['href']
+                    self.log('Found full story link', ans)
+        except:
+            pass
+        
+        if 'video' in ans or 'quiz' in ans :

-          url = article.get('guid', None)
-
-          if 'video' in url or 'quiz' in url :
-
-              url = None
-
-          return url
+              ans = None
+        return ans
+         
+    

    def preprocess_html(self, soup):
        soup.html['xml:lang'] = self.lang
@ -69,6 +87,7 @@ class ZeitDe(BasicNewsRecipe):

        return soup

+   
    #def print_version(self,url):
    #    return url.replace('http://www.zeit.de/', 'http://images.zeit.de/text/').replace('?from=rss', '')

--- a/setup/installer/linux/freeze.py
+++ b/setup/installer/linux/freeze.py
@ -46,6 +46,7 @@ class LinuxFreeze(Command):
                        '/usr/lib/libmng.so.1',
                        '/usr/lib/libpodofo.so.0.6.99',
                        '/lib/libz.so.1',
+                        '/lib/libuuid.so.1',
                        '/usr/lib/libtiff.so.3',
                        '/lib/libbz2.so.1',
                        '/usr/lib/libpoppler.so.5',
--- a/setup/installer/windows/wix-template.xml
+++ b/setup/installer/windows/wix-template.xml
@ -120,7 +120,7 @@
                </Condition>
                <InstallExecuteSequence>
                    <Custom Action="PreventDowngrading" After="FindRelatedProducts">NEWPRODUCTFOUND</Custom>
-                    <RemoveExistingProducts Before="InstallInitialize" />
+                    <RemoveExistingProducts After="InstallFinalize" />
                </InstallExecuteSequence>
                <InstallUISequence>
                    <Custom Action="PreventDowngrading" After="FindRelatedProducts">NEWPRODUCTFOUND</Custom>
--- a/src/calibre/debug.py
+++ b/src/calibre/debug.py
@ -165,7 +165,7 @@ def main(args=sys.argv):
        sys.argv = args[:1]
        exec opts.command
    elif opts.exec_file:
-        sys.argv = args[:1]
+        sys.argv = args
        base = os.path.dirname(os.path.abspath(opts.exec_file))
        sys.path.insert(0, base)
        g = globals()
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@ -141,6 +141,11 @@ Now you should be able to access your books on your iPhone by opening Stanza and

 Replace ``192.168.1.2`` with the local IP address of the computer running |app|. If you have changed the port the |app| content server is running on, you will have to change ``8080`` as well to the new port. The local IP address is the IP address you computer is assigned on your home network. A quick Google search will tell you how to find out your local IP address.  

+How do I use |app| with my Android phone?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+First install the WordPlayer e-book reading app from the Android Marketplace onto you phone. Then simply plug your phone into the computer with a USB cable. |app| should automatically detect the phone and then you can transfer books to it by clicking the Send to Device button. |app| does not have support for every single androind device out there, so if you would like to have support for your device added, follow the instructions above for getting your device supported in |app|.
+
 I get the error message "Failed to start content server: Port 8080 not free on '0.0.0.0'"?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

--- a/src/calibre/web/feeds/news.py
+++ b/src/calibre/web/feeds/news.py
@ -501,8 +501,10 @@ class BasicNewsRecipe(Recipe):
            if isinstance(self.feeds, basestring):
                self.feeds = [self.feeds]

-        if self.needs_subscription and (self.username is None or self.password is None):
-            raise ValueError('The %s recipe needs a username and password.'%self.title)
+        if self.needs_subscription and (\
+                self.username is None or self.password is None or \
+                (not self.username and not self.password)):
+            raise ValueError(_('The "%s" recipe needs a username and password.')%self.title)

        self.browser = self.get_browser()
        self.image_map, self.image_counter = {}, 1
--- a/src/calibre/web/feeds/recipes/collection.py
+++ b/src/calibre/web/feeds/recipes/collection.py
@ -193,11 +193,16 @@ class SchedulerConfig(object):

    def write_scheduler_file(self):
        from calibre.utils.lock import ExclusiveFile
+        self.root.text = '\n\n\t'
+        for x in self.root:
+            x.tail = '\n\n\t'
+        if len(self.root) > 0:
+            self.root[-1].tail = '\n\n'
        with ExclusiveFile(self.conf_path) as f:
            f.seek(0)
            f.truncate()
            f.write(etree.tostring(self.root, encoding='utf-8',
-                xml_declaration=True, pretty_print=True))
+                xml_declaration=True, pretty_print=False))

    def serialize_schedule(self, typ, schedule):
        s = E.schedule({'type':typ})