mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-11-03 19:17:02 -05:00 
			
		
		
		
	Sync to trunk.
This commit is contained in:
		
						commit
						a0d1670e6f
					
				
							
								
								
									
										
											BIN
										
									
								
								resources/images/news/ad.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								resources/images/news/ad.png
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| 
		 After Width: | Height: | Size: 569 B  | 
							
								
								
									
										
											BIN
										
									
								
								resources/images/news/digitaljournal.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								resources/images/news/digitaljournal.png
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| 
		 After Width: | Height: | Size: 253 B  | 
							
								
								
									
										
											BIN
										
									
								
								resources/images/news/kitsapun.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								resources/images/news/kitsapun.png
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| 
		 After Width: | Height: | Size: 2.3 KiB  | 
							
								
								
									
										
											BIN
										
									
								
								resources/images/news/ledevoir.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								resources/images/news/ledevoir.png
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| 
		 After Width: | Height: | Size: 531 B  | 
							
								
								
									
										86
									
								
								resources/recipes/ad.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										86
									
								
								resources/recipes/ad.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,86 @@
 | 
				
			|||||||
 | 
					import re
 | 
				
			||||||
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class ADRecipe(BasicNewsRecipe):
 | 
				
			||||||
 | 
					    __license__  = 'GPL v3'
 | 
				
			||||||
 | 
					    __author__ = 'kwetal'
 | 
				
			||||||
 | 
					    language = 'nl'
 | 
				
			||||||
 | 
					    country = 'NL'
 | 
				
			||||||
 | 
					    version = 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    title = u'AD'
 | 
				
			||||||
 | 
					    publisher = u'de Persgroep Publishing Nederland NV'
 | 
				
			||||||
 | 
					    category = u'News, Sports, the Netherlands'
 | 
				
			||||||
 | 
					    description = u'News and Sports from the Netherlands'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    oldest_article = 1.2
 | 
				
			||||||
 | 
					    max_articles_per_feed = 100
 | 
				
			||||||
 | 
					    use_embedded_content = False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    remove_empty_feeds = True
 | 
				
			||||||
 | 
					    no_stylesheets = True
 | 
				
			||||||
 | 
					    remove_javascript = True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    keep_only_tags = []
 | 
				
			||||||
 | 
					    keep_only_tags.append(dict(name = 'div', attrs = {'id': 'art_box2'}))
 | 
				
			||||||
 | 
					    keep_only_tags.append(dict(name = 'p', attrs = {'class': 'gen_footnote3'}))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    remove_tags = []
 | 
				
			||||||
 | 
					    remove_tags.append(dict(name = 'div', attrs = {'class': 'gen_clear'}))
 | 
				
			||||||
 | 
					    remove_tags.append(dict(name = 'div', attrs = {'class': re.compile(r'gen_spacer.*')}))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    remove_attributes = ['style']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # feeds from http://ad.nl/ad/nl/1401/home/integration/nmc/frameset/ad_footer/rssFeeds.dhtml
 | 
				
			||||||
 | 
					    feeds = []
 | 
				
			||||||
 | 
					    feeds.append((u'Binnenland', u'http://www.ad.nl/nieuws/binnenland/rss.xml'))
 | 
				
			||||||
 | 
					    feeds.append((u'Buitenland', u'http://www.ad.nl/nieuws/buitenland/rss.xml'))
 | 
				
			||||||
 | 
					    feeds.append((u'Bizar', u'http://www.ad.nl/nieuws/bizar/rss.xml'))
 | 
				
			||||||
 | 
					    feeds.append((u'Gezondheid & Wetenschap', u'http://www.ad.nl/nieuws/gezondheidwetenschap/rss.xml'))
 | 
				
			||||||
 | 
					    feeds.append((u'Economie', u'http://www.ad.nl/nieuws/economie/rss.xml'))
 | 
				
			||||||
 | 
					    feeds.append((u'Nederlands Voetbal', u'http://www.ad.nl/sportwereld/nederlandsvoetbal/rss.xml'))
 | 
				
			||||||
 | 
					    feeds.append((u'Buitenlands Voetbal', u'http://www.ad.nl/sportwereld/buitenlandsvoetbal/rss.xml'))
 | 
				
			||||||
 | 
					    feeds.append((u'Champions League/Europa League', u'http://www.ad.nl/sportwereld/championsleagueeuropaleague/rss.xml'))
 | 
				
			||||||
 | 
					    feeds.append((u'Wielrennen', u'http://www.ad.nl/sportwereld/wielrennen/rss.xml'))
 | 
				
			||||||
 | 
					    feeds.append((u'Tennis', u'http://www.ad.nl/sportwereld/tennis/rss.xml'))
 | 
				
			||||||
 | 
					    feeds.append((u'Formule 1', u'http://www.ad.nl/sportwereld/formule1/rss.xml'))
 | 
				
			||||||
 | 
					    feeds.append((u'Meer Sport', u'http://www.ad.nl/sportwereld/meersport/rss.xml'))
 | 
				
			||||||
 | 
					    feeds.append((u'Celebs', u'http://www.ad.nl/showbizz/celebs/rss.xml'))
 | 
				
			||||||
 | 
					    feeds.append((u'Film', u'http://www.ad.nl/showbizz/film/rss.xml'))
 | 
				
			||||||
 | 
					    feeds.append((u'Muziek', u'http://www.ad.nl/showbizz/muziek/rss.xml'))
 | 
				
			||||||
 | 
					    feeds.append((u'TV', u'http://www.ad.nl/showbizz/tv/rss.xml'))
 | 
				
			||||||
 | 
					    feeds.append((u'Kunst & Literatuur', u'http://www.ad.nl/showbizz/kunstenliteratuur/rss.xml'))
 | 
				
			||||||
 | 
					    feeds.append((u'Jouw Wereld', u'http://www.ad.nl/you/rss.xml'))
 | 
				
			||||||
 | 
					    feeds.append((u'Consument', u'http://www.ad.nl/consument/rss.xml'))
 | 
				
			||||||
 | 
					    feeds.append((u'Autowereld', u'http://www.ad.nl/autowereld/rss.xml'))
 | 
				
			||||||
 | 
					    feeds.append((u'Reiswereld', u'http://www.ad.nl/reiswereld/rss.xml'))
 | 
				
			||||||
 | 
					    feeds.append((u'Internet', u'http://www.ad.nl/digitaal/internet/rss.xml'))
 | 
				
			||||||
 | 
					    feeds.append((u'Games', u'http://www.ad.nl/digitaal/games/rss.xml'))
 | 
				
			||||||
 | 
					    feeds.append((u'Multimedia', u'http://www.ad.nl/digitaal/multimedia/rss.xml'))
 | 
				
			||||||
 | 
					    feeds.append((u'Planet Watch', u'http://www.ad.nl/planetwatch/rss.xml'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    extra_css = '''
 | 
				
			||||||
 | 
					                body {font-family: verdana, arial, helvetica, geneva, sans-serif;}
 | 
				
			||||||
 | 
					                div.captionEmbeddedMasterObject {font-size: x-small; font-style: italic; color: #696969;}
 | 
				
			||||||
 | 
					                .gen_footnote3 {font-size: small; color: #666666; margin-top: 0.6em;}
 | 
				
			||||||
 | 
					                '''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    conversion_options = {'comments': description, 'tags': category, 'language': 'en',
 | 
				
			||||||
 | 
					                          'publisher': publisher}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def print_version(self, url):
 | 
				
			||||||
 | 
					        parts = url.split('/')
 | 
				
			||||||
 | 
					        print_url = 'http://' + parts[2] + '/' + parts[3] + '/' + parts[4] + '/' + parts[5] + '/' \
 | 
				
			||||||
 | 
					                + parts[10] + '/' + parts[7] + '/print/' + parts[8] + '/' + parts[9] + '/' + parts[13]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return print_url
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def preprocess_html(self, soup):
 | 
				
			||||||
 | 
					        for br in soup.findAll('br'):
 | 
				
			||||||
 | 
					            prev = br.findPreviousSibling(True)
 | 
				
			||||||
 | 
					            if hasattr(prev, 'name') and prev.name == 'br':
 | 
				
			||||||
 | 
					                next = br.findNextSibling(True)
 | 
				
			||||||
 | 
					                if hasattr(next, 'name') and next.name == 'br':
 | 
				
			||||||
 | 
					                    br.extract()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return soup
 | 
				
			||||||
@ -1,7 +1,5 @@
 | 
				
			|||||||
#!/usr/bin/env  python
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
__license__   = 'GPL v3'
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
 | 
					__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
spectator.org
 | 
					spectator.org
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
@ -11,20 +9,22 @@ from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			|||||||
class TheAmericanSpectator(BasicNewsRecipe):
 | 
					class TheAmericanSpectator(BasicNewsRecipe):
 | 
				
			||||||
    title                 = 'The American Spectator'
 | 
					    title                 = 'The American Spectator'
 | 
				
			||||||
    __author__            = 'Darko Miletic'
 | 
					    __author__            = 'Darko Miletic'
 | 
				
			||||||
    language = 'en'
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    description           = 'News from USA'
 | 
					    description           = 'News from USA'
 | 
				
			||||||
 | 
					    category              = 'news, politics, USA, world'
 | 
				
			||||||
 | 
					    publisher             = 'The American Spectator'
 | 
				
			||||||
    oldest_article        = 7
 | 
					    oldest_article        = 7
 | 
				
			||||||
    max_articles_per_feed = 100
 | 
					    max_articles_per_feed = 100
 | 
				
			||||||
    no_stylesheets        = True
 | 
					    no_stylesheets        = True
 | 
				
			||||||
    use_embedded_content  = False
 | 
					    use_embedded_content  = False
 | 
				
			||||||
 | 
					    language              = 'en'
 | 
				
			||||||
    INDEX                 = 'http://spectator.org'
 | 
					    INDEX                 = 'http://spectator.org'
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
    html2lrf_options = [
 | 
					    conversion_options = {  
 | 
				
			||||||
                             '--comment'       , description
 | 
					                             'comments'        : description
 | 
				
			||||||
                           , '--category'      , 'news, politics, USA'
 | 
					                            ,'tags'            : category
 | 
				
			||||||
                           , '--publisher'     , title
 | 
					                            ,'language'        : language
 | 
				
			||||||
                         ]
 | 
					                            ,'publisher'       : publisher
 | 
				
			||||||
 | 
					                         }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    keep_only_tags   = [
 | 
					    keep_only_tags   = [
 | 
				
			||||||
                             dict(name='div', attrs={'class':'post inner'})
 | 
					                             dict(name='div', attrs={'class':'post inner'})
 | 
				
			||||||
@ -33,13 +33,11 @@ class TheAmericanSpectator(BasicNewsRecipe):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    remove_tags     = [
 | 
					    remove_tags     = [
 | 
				
			||||||
                             dict(name='object')
 | 
					                             dict(name='object')
 | 
				
			||||||
                            ,dict(name='div', attrs={'class':'col3'         })
 | 
					                            ,dict(name='div', attrs={'class':['col3','post-options','social']})
 | 
				
			||||||
                            ,dict(name='div', attrs={'class':'post-options' })
 | 
					                            ,dict(name='p'  , attrs={'class':['letter-editor','meta']})
 | 
				
			||||||
                            ,dict(name='p'  , attrs={'class':'letter-editor'})
 | 
					 | 
				
			||||||
                            ,dict(name='div', attrs={'class':'social'       })
 | 
					 | 
				
			||||||
                        ]
 | 
					                        ]
 | 
				
			||||||
                         
 | 
					                         
 | 
				
			||||||
    feeds = [ (u'Articles', u'http://feedproxy.google.com/amspecarticles')]
 | 
					    feeds = [ (u'Articles', u'http://feeds.feedburner.com/amspecarticles')]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def get_cover_url(self):
 | 
					    def get_cover_url(self):
 | 
				
			||||||
        cover_url = None
 | 
					        cover_url = None
 | 
				
			||||||
@ -53,3 +51,7 @@ class TheAmericanSpectator(BasicNewsRecipe):
 | 
				
			|||||||
          
 | 
					          
 | 
				
			||||||
    def print_version(self, url):
 | 
					    def print_version(self, url):
 | 
				
			||||||
        return url + '/print'
 | 
					        return url + '/print'
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					    def get_article_url(self, article):
 | 
				
			||||||
 | 
					        return article.get('guid', None)
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										60
									
								
								resources/recipes/bbc_fast.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										60
									
								
								resources/recipes/bbc_fast.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,60 @@
 | 
				
			|||||||
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
 | 
					__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					news.bbc.co.uk
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from calibre.web.feeds.recipes import BasicNewsRecipe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class BBC(BasicNewsRecipe):
 | 
				
			||||||
 | 
					    title                  = 'BBC News (fast)'
 | 
				
			||||||
 | 
					    __author__             = 'Darko Miletic'
 | 
				
			||||||
 | 
					    description            = 'News from UK. A much faster version that does not download pictures'
 | 
				
			||||||
 | 
					    oldest_article         = 2
 | 
				
			||||||
 | 
					    max_articles_per_feed  = 100
 | 
				
			||||||
 | 
					    no_stylesheets         = True
 | 
				
			||||||
 | 
					    #delay                  = 1
 | 
				
			||||||
 | 
					    use_embedded_content   = False
 | 
				
			||||||
 | 
					    encoding               = 'utf8'
 | 
				
			||||||
 | 
					    publisher              = 'BBC'
 | 
				
			||||||
 | 
					    category               = 'news, UK, world'
 | 
				
			||||||
 | 
					    language               = 'en'
 | 
				
			||||||
 | 
					    extra_css              = ' body{ font-family: sans-serif; } .headline{font-size: xx-large; font-weight: bold} .ibox{display: block; margin: 20px 50px; padding: 10px; border: 1px solid } '
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    conversion_options = {
 | 
				
			||||||
 | 
					                             'comments'        : description
 | 
				
			||||||
 | 
					                            ,'tags'            : category
 | 
				
			||||||
 | 
					                            ,'language'        : language
 | 
				
			||||||
 | 
					                            ,'publisher'       : publisher
 | 
				
			||||||
 | 
					                         }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    remove_tags_before = dict(name='div',attrs={'class':'headline'})
 | 
				
			||||||
 | 
					    remove_tags_after  = dict(name='div', attrs={'class':'footer'})
 | 
				
			||||||
 | 
					    remove_tags       = [
 | 
				
			||||||
 | 
					                           dict(name=['object','link','script','iframe'])
 | 
				
			||||||
 | 
					                          ,dict(name='div', attrs={'class':'footer'})
 | 
				
			||||||
 | 
					                        ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    feeds          = [
 | 
				
			||||||
 | 
					                      ('News Front Page', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/front_page/rss.xml'),
 | 
				
			||||||
 | 
					                      ('Science/Nature', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/science/nature/rss.xml'),
 | 
				
			||||||
 | 
					                      ('Technology', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/technology/rss.xml'),
 | 
				
			||||||
 | 
					                      ('Entertainment', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/entertainment/rss.xml'),
 | 
				
			||||||
 | 
					                      ('Magazine', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/uk_news/magazine/rss.xml'),
 | 
				
			||||||
 | 
					                      ('Business', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/business/rss.xml'),
 | 
				
			||||||
 | 
					                      ('Health', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/health/rss.xml'),
 | 
				
			||||||
 | 
					                      ('Americas', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/americas/rss.xml'),
 | 
				
			||||||
 | 
					                      ('Europe', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/europe/rss.xml'),
 | 
				
			||||||
 | 
					                      ('South Asia', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/south_asia/rss.xml'),
 | 
				
			||||||
 | 
					                      ('UK', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/uk_news/rss.xml'),
 | 
				
			||||||
 | 
					                      ('Asia-Pacific', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/asia-pacific/rss.xml'),
 | 
				
			||||||
 | 
					                      ('Africa', 'http://newsrss.bbc.co.uk/rss/newsonline_world_edition/africa/rss.xml'),
 | 
				
			||||||
 | 
					                    ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def print_version(self, url):
 | 
				
			||||||
 | 
					        emp,sep,rstrip = url.partition('http://')
 | 
				
			||||||
 | 
					        return 'http://newsvote.bbc.co.uk/mpapps/pagetools/print/' + rstrip
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def get_article_url(self, article):
 | 
				
			||||||
 | 
					        return article.get('guid', None)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										121
									
								
								resources/recipes/calgary_herald.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										121
									
								
								resources/recipes/calgary_herald.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,121 @@
 | 
				
			|||||||
 | 
					#!/usr/bin/env  python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					www.canada.com
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from calibre.web.feeds.recipes import BasicNewsRecipe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class CanWestPaper(BasicNewsRecipe):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Calgary Herald
 | 
				
			||||||
 | 
					    title = u'Calgary Herald'
 | 
				
			||||||
 | 
					    url_prefix = 'http://www.calgaryherald.com'
 | 
				
			||||||
 | 
					    description = u'News from Calgary, AB'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Regina Leader-Post
 | 
				
			||||||
 | 
					    #title = u'Regina Leader-Post'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.leaderpost.com'
 | 
				
			||||||
 | 
					    #description = u'News from Regina, SK'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Saskatoon Star-Phoenix
 | 
				
			||||||
 | 
					    #title = u'Saskatoon Star-Phoenix'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.thestarphoenix.com'
 | 
				
			||||||
 | 
					    #description = u'News from Saskatoon, SK'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Windsor Star
 | 
				
			||||||
 | 
					    #title = u'Windsor Star'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.windsorstar.com'
 | 
				
			||||||
 | 
					    #description = u'News from Windsor, ON'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Ottawa Citizen
 | 
				
			||||||
 | 
					    #title = u'Ottawa Citizen'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.ottawacitizen.com'
 | 
				
			||||||
 | 
					    #description = u'News from Ottawa, ON'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Montreal Gazette
 | 
				
			||||||
 | 
					    #title = u'Montreal Gazette'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.montrealgazette.com'
 | 
				
			||||||
 | 
					    #description = u'News from Montreal, QC'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    language = 'en_CA'
 | 
				
			||||||
 | 
					    __author__ = 'Nick Redding'
 | 
				
			||||||
 | 
					    no_stylesheets = True
 | 
				
			||||||
 | 
					    timefmt = ' [%b %d]'
 | 
				
			||||||
 | 
					    extra_css = '''
 | 
				
			||||||
 | 
					                .timestamp {  font-size:xx-small; display: block; }
 | 
				
			||||||
 | 
					                #storyheader { font-size: medium; }
 | 
				
			||||||
 | 
					                #storyheader h1 { font-size: x-large; }
 | 
				
			||||||
 | 
					                #storyheader h2 { font-size: large;  font-style: italic; }
 | 
				
			||||||
 | 
					                .byline { font-size:xx-small; }
 | 
				
			||||||
 | 
					                #photocaption { font-size: small; font-style: italic }
 | 
				
			||||||
 | 
					                #photocredit { font-size: xx-small; }'''
 | 
				
			||||||
 | 
					    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
 | 
				
			||||||
 | 
					    remove_tags = [{'class':'comments'},
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'rule_grey_solid'}),
 | 
				
			||||||
 | 
					                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def preprocess_html(self,soup):
 | 
				
			||||||
 | 
					        #delete iempty id attributes--they screw up the TOC for unknow reasons
 | 
				
			||||||
 | 
					        divtags = soup.findAll('div',attrs={'id':''})
 | 
				
			||||||
 | 
					        if divtags:
 | 
				
			||||||
 | 
					            for div in divtags:
 | 
				
			||||||
 | 
					                del(div['id'])
 | 
				
			||||||
 | 
					        return soup
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def parse_index(self):
 | 
				
			||||||
 | 
					        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        articles = {}
 | 
				
			||||||
 | 
					        key = 'News'
 | 
				
			||||||
 | 
					        ans = ['News']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Find each instance of class="sectiontitle", class="featurecontent"
 | 
				
			||||||
 | 
					        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
 | 
				
			||||||
 | 
					                #self.log(" div class = %s" % divtag['class'])
 | 
				
			||||||
 | 
					                if divtag['class'].startswith('section_title'):
 | 
				
			||||||
 | 
					                    # div contains section title
 | 
				
			||||||
 | 
					                    if not divtag.h3:
 | 
				
			||||||
 | 
					                        continue
 | 
				
			||||||
 | 
					                    key = self.tag_to_string(divtag.h3,False)
 | 
				
			||||||
 | 
					                    ans.append(key)
 | 
				
			||||||
 | 
					                    self.log("Section name %s" % key)
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                # div contains article data
 | 
				
			||||||
 | 
					                h1tag = divtag.find('h1')
 | 
				
			||||||
 | 
					                if not h1tag:
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                atag = h1tag.find('a',href=True)
 | 
				
			||||||
 | 
					                if not atag:
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                url = self.url_prefix+'/news/todays-paper/'+atag['href']
 | 
				
			||||||
 | 
					                #self.log("Section %s" % key)
 | 
				
			||||||
 | 
					                #self.log("url %s" % url)
 | 
				
			||||||
 | 
					                title = self.tag_to_string(atag,False)
 | 
				
			||||||
 | 
					                #self.log("title %s" % title)
 | 
				
			||||||
 | 
					                pubdate = ''
 | 
				
			||||||
 | 
					                description = ''
 | 
				
			||||||
 | 
					                ptag = divtag.find('p');
 | 
				
			||||||
 | 
					                if ptag:
 | 
				
			||||||
 | 
					                    description = self.tag_to_string(ptag,False)
 | 
				
			||||||
 | 
					                    #self.log("description %s" % description)
 | 
				
			||||||
 | 
					                author = ''
 | 
				
			||||||
 | 
					                autag = divtag.find('h4')
 | 
				
			||||||
 | 
					                if autag:
 | 
				
			||||||
 | 
					                    author = self.tag_to_string(autag,False)
 | 
				
			||||||
 | 
					                    #self.log("author %s" % author)
 | 
				
			||||||
 | 
					                if not articles.has_key(key):
 | 
				
			||||||
 | 
					                    articles[key] = []
 | 
				
			||||||
 | 
					                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
 | 
				
			||||||
 | 
					        return ans
 | 
				
			||||||
							
								
								
									
										15
									
								
								resources/recipes/cjr.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								resources/recipes/cjr.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,15 @@
 | 
				
			|||||||
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class CJR(BasicNewsRecipe):
 | 
				
			||||||
 | 
					    title              = u'Columbia Journalism Review'
 | 
				
			||||||
 | 
					    __author__         = u'Xanthan Gum'
 | 
				
			||||||
 | 
					    description        = 'News about journalism.'
 | 
				
			||||||
 | 
					    language = 'en'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    oldest_article = 7
 | 
				
			||||||
 | 
					    max_articles_per_feed = 100
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    feeds = [(u'News Stories', u'http://www.cjr.org/index.xml')]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def print_version(self, url):
 | 
				
			||||||
 | 
					        return url + '?page=all&print=true'
 | 
				
			||||||
							
								
								
									
										52
									
								
								resources/recipes/digitaljournal.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										52
									
								
								resources/recipes/digitaljournal.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,52 @@
 | 
				
			|||||||
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
 | 
					__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					digitaljournal.com
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class DigitalJournal(BasicNewsRecipe):
 | 
				
			||||||
 | 
					    title                 = 'Digital Journal'
 | 
				
			||||||
 | 
					    __author__            = 'Darko Miletic'
 | 
				
			||||||
 | 
					    description           = 'A Global Citizen Journalism News Network'
 | 
				
			||||||
 | 
					    category              = 'news, politics, USA, world'
 | 
				
			||||||
 | 
					    publisher             = 'Digital Journal'
 | 
				
			||||||
 | 
					    oldest_article        = 2
 | 
				
			||||||
 | 
					    max_articles_per_feed = 100
 | 
				
			||||||
 | 
					    no_stylesheets        = True
 | 
				
			||||||
 | 
					    use_embedded_content  = False
 | 
				
			||||||
 | 
					    encoding              = 'utf8'
 | 
				
			||||||
 | 
					    language              = 'en'
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					    conversion_options = {  
 | 
				
			||||||
 | 
					                             'comments'        : description
 | 
				
			||||||
 | 
					                            ,'tags'            : category
 | 
				
			||||||
 | 
					                            ,'language'        : language
 | 
				
			||||||
 | 
					                            ,'publisher'       : publisher
 | 
				
			||||||
 | 
					                         }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    keep_only_tags   = [dict(name='div', attrs={'class':['article','body']})]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    remove_tags     = [dict(name=['object','table'])]
 | 
				
			||||||
 | 
					                         
 | 
				
			||||||
 | 
					    feeds = [ 
 | 
				
			||||||
 | 
					                (u'Latest News'  , u'http://digitaljournal.com/rss/?feed=latest_news'                   )
 | 
				
			||||||
 | 
					               ,(u'Business'     , u'http://digitaljournal.com/rss/?feed=top_news&depname=Business'     )
 | 
				
			||||||
 | 
					               ,(u'Entertainment', u'http://digitaljournal.com/rss/?feed=top_news&depname=Entertainment')
 | 
				
			||||||
 | 
					               ,(u'Environment'  , u'http://digitaljournal.com/rss/?feed=top_news&depname=Environment'  )
 | 
				
			||||||
 | 
					               ,(u'Food'         , u'http://digitaljournal.com/rss/?feed=top_news&depname=Food'         )
 | 
				
			||||||
 | 
					               ,(u'Health'       , u'http://digitaljournal.com/rss/?feed=top_news&depname=Health'       )
 | 
				
			||||||
 | 
					               ,(u'Internet'     , u'http://digitaljournal.com/rss/?feed=top_news&depname=Internet'     )
 | 
				
			||||||
 | 
					               ,(u'Politics'     , u'http://digitaljournal.com/rss/?feed=top_news&depname=Politics'     )
 | 
				
			||||||
 | 
					               ,(u'Religion'     , u'http://digitaljournal.com/rss/?feed=top_news&depname=Religion'     )
 | 
				
			||||||
 | 
					               ,(u'Science'      , u'http://digitaljournal.com/rss/?feed=top_news&depname=Science'      )
 | 
				
			||||||
 | 
					               ,(u'Sports'       , u'http://digitaljournal.com/rss/?feed=top_news&depname=Sports'       )
 | 
				
			||||||
 | 
					               ,(u'Technology'   , u'http://digitaljournal.com/rss/?feed=top_news&depname=Technology'   )
 | 
				
			||||||
 | 
					               ,(u'World'        , u'http://digitaljournal.com/rss/?feed=top_news&depname=World'        )
 | 
				
			||||||
 | 
					               ,(u'Arts'         , u'http://digitaljournal.com/rss/?feed=top_news&depname=Arts'         )
 | 
				
			||||||
 | 
					            ]
 | 
				
			||||||
 | 
					          
 | 
				
			||||||
 | 
					    def print_version(self, url):
 | 
				
			||||||
 | 
					        return url.replace('digitaljournal.com/','digitaljournal.com/print/')
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
							
								
								
									
										126
									
								
								resources/recipes/edmonton_journal.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										126
									
								
								resources/recipes/edmonton_journal.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,126 @@
 | 
				
			|||||||
 | 
					#!/usr/bin/env  python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					www.canada.com
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from calibre.web.feeds.recipes import BasicNewsRecipe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class CanWestPaper(BasicNewsRecipe):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Edmonton Journal
 | 
				
			||||||
 | 
					    title = u'Edmonton Journal'
 | 
				
			||||||
 | 
					    url_prefix = 'http://www.edmontonjournal.com'
 | 
				
			||||||
 | 
					    description = u'News from Edmonton, AB'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Calgary Herald
 | 
				
			||||||
 | 
					    #title = u'Calgary Herald'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.calgaryherald.com'
 | 
				
			||||||
 | 
					    #description = u'News from Calgary, AB'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Regina Leader-Post
 | 
				
			||||||
 | 
					    #title = u'Regina Leader-Post'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.leaderpost.com'
 | 
				
			||||||
 | 
					    #description = u'News from Regina, SK'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Saskatoon Star-Phoenix
 | 
				
			||||||
 | 
					    #title = u'Saskatoon Star-Phoenix'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.thestarphoenix.com'
 | 
				
			||||||
 | 
					    #description = u'News from Saskatoon, SK'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Windsor Star
 | 
				
			||||||
 | 
					    #title = u'Windsor Star'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.windsorstar.com'
 | 
				
			||||||
 | 
					    #description = u'News from Windsor, ON'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Ottawa Citizen
 | 
				
			||||||
 | 
					    #title = u'Ottawa Citizen'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.ottawacitizen.com'
 | 
				
			||||||
 | 
					    #description = u'News from Ottawa, ON'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Montreal Gazette
 | 
				
			||||||
 | 
					    #title = u'Montreal Gazette'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.montrealgazette.com'
 | 
				
			||||||
 | 
					    #description = u'News from Montreal, QC'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    language = 'en_CA'
 | 
				
			||||||
 | 
					    __author__ = 'Nick Redding'
 | 
				
			||||||
 | 
					    no_stylesheets = True
 | 
				
			||||||
 | 
					    timefmt = ' [%b %d]'
 | 
				
			||||||
 | 
					    extra_css = '''
 | 
				
			||||||
 | 
					                .timestamp {  font-size:xx-small; display: block; }
 | 
				
			||||||
 | 
					                #storyheader { font-size: medium; }
 | 
				
			||||||
 | 
					                #storyheader h1 { font-size: x-large; }
 | 
				
			||||||
 | 
					                #storyheader h2 { font-size: large;  font-style: italic; }
 | 
				
			||||||
 | 
					                .byline { font-size:xx-small; }
 | 
				
			||||||
 | 
					                #photocaption { font-size: small; font-style: italic }
 | 
				
			||||||
 | 
					                #photocredit { font-size: xx-small; }'''
 | 
				
			||||||
 | 
					    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
 | 
				
			||||||
 | 
					    remove_tags = [{'class':'comments'},
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'rule_grey_solid'}),
 | 
				
			||||||
 | 
					                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def preprocess_html(self,soup):
 | 
				
			||||||
 | 
					        #delete iempty id attributes--they screw up the TOC for unknow reasons
 | 
				
			||||||
 | 
					        divtags = soup.findAll('div',attrs={'id':''})
 | 
				
			||||||
 | 
					        if divtags:
 | 
				
			||||||
 | 
					            for div in divtags:
 | 
				
			||||||
 | 
					                del(div['id'])
 | 
				
			||||||
 | 
					        return soup
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def parse_index(self):
 | 
				
			||||||
 | 
					        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        articles = {}
 | 
				
			||||||
 | 
					        key = 'News'
 | 
				
			||||||
 | 
					        ans = ['News']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Find each instance of class="sectiontitle", class="featurecontent"
 | 
				
			||||||
 | 
					        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
 | 
				
			||||||
 | 
					                #self.log(" div class = %s" % divtag['class'])
 | 
				
			||||||
 | 
					                if divtag['class'].startswith('section_title'):
 | 
				
			||||||
 | 
					                    # div contains section title
 | 
				
			||||||
 | 
					                    if not divtag.h3:
 | 
				
			||||||
 | 
					                        continue
 | 
				
			||||||
 | 
					                    key = self.tag_to_string(divtag.h3,False)
 | 
				
			||||||
 | 
					                    ans.append(key)
 | 
				
			||||||
 | 
					                    self.log("Section name %s" % key)
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                # div contains article data
 | 
				
			||||||
 | 
					                h1tag = divtag.find('h1')
 | 
				
			||||||
 | 
					                if not h1tag:
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                atag = h1tag.find('a',href=True)
 | 
				
			||||||
 | 
					                if not atag:
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                url = self.url_prefix+'/news/todays-paper/'+atag['href']
 | 
				
			||||||
 | 
					                #self.log("Section %s" % key)
 | 
				
			||||||
 | 
					                #self.log("url %s" % url)
 | 
				
			||||||
 | 
					                title = self.tag_to_string(atag,False)
 | 
				
			||||||
 | 
					                #self.log("title %s" % title)
 | 
				
			||||||
 | 
					                pubdate = ''
 | 
				
			||||||
 | 
					                description = ''
 | 
				
			||||||
 | 
					                ptag = divtag.find('p');
 | 
				
			||||||
 | 
					                if ptag:
 | 
				
			||||||
 | 
					                    description = self.tag_to_string(ptag,False)
 | 
				
			||||||
 | 
					                    #self.log("description %s" % description)
 | 
				
			||||||
 | 
					                author = ''
 | 
				
			||||||
 | 
					                autag = divtag.find('h4')
 | 
				
			||||||
 | 
					                if autag:
 | 
				
			||||||
 | 
					                    author = self.tag_to_string(autag,False)
 | 
				
			||||||
 | 
					                    #self.log("author %s" % author)
 | 
				
			||||||
 | 
					                if not articles.has_key(key):
 | 
				
			||||||
 | 
					                    articles[key] = []
 | 
				
			||||||
 | 
					                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
 | 
				
			||||||
 | 
					        return ans
 | 
				
			||||||
@ -15,7 +15,7 @@ class FTDe(BasicNewsRecipe):
 | 
				
			|||||||
    __author__ = 'Oliver Niesner'
 | 
					    __author__ = 'Oliver Niesner'
 | 
				
			||||||
    use_embedded_content   = False
 | 
					    use_embedded_content   = False
 | 
				
			||||||
    timefmt = ' [%d %b %Y]'
 | 
					    timefmt = ' [%d %b %Y]'
 | 
				
			||||||
    language = 'de'
 | 
					    language = _('German')
 | 
				
			||||||
    max_articles_per_feed = 40
 | 
					    max_articles_per_feed = 40
 | 
				
			||||||
    no_stylesheets = True
 | 
					    no_stylesheets = True
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
@ -23,13 +23,19 @@ class FTDe(BasicNewsRecipe):
 | 
				
			|||||||
		   dict(id='topbanner'),
 | 
							   dict(id='topbanner'),
 | 
				
			||||||
		   dict(id='seitenkopf'),
 | 
							   dict(id='seitenkopf'),
 | 
				
			||||||
		   dict(id='BoxA-0-0-0'),
 | 
							   dict(id='BoxA-0-0-0'),
 | 
				
			||||||
 | 
							   #dict(id='BoxA-2-0-0'),
 | 
				
			||||||
		   dict(id='footer'),
 | 
							   dict(id='footer'),
 | 
				
			||||||
		   dict(id='rating_open'),
 | 
							   dict(id='rating_open'),
 | 
				
			||||||
		   dict(id='ADS_Top'),
 | 
							   dict(id='ADS_Top'),
 | 
				
			||||||
		   dict(id='spinner'),
 | 
							   dict(id='spinner'),
 | 
				
			||||||
		   dict(id='ftd-contentad'),
 | 
							   dict(id='ftd-contentad'),
 | 
				
			||||||
 | 
							   dict(id='ftd-promo'),
 | 
				
			||||||
		   dict(id='nava-50009007-1-0'),
 | 
							   dict(id='nava-50009007-1-0'),
 | 
				
			||||||
		   dict(id='navli-50009007-1-0'),
 | 
							   dict(id='navli-50009007-1-0'),
 | 
				
			||||||
 | 
							   dict(id='Box5000534-0-0-0'),
 | 
				
			||||||
 | 
							   dict(id='ExpV-1-0-0-1'),
 | 
				
			||||||
 | 
							   dict(id='ExpV-1-0-0-0'),
 | 
				
			||||||
 | 
							   dict(id='PollExpV-2-0-0-0'),
 | 
				
			||||||
		   dict(id='starRating'),
 | 
							   dict(id='starRating'),
 | 
				
			||||||
		   dict(id='saveRating'),
 | 
							   dict(id='saveRating'),
 | 
				
			||||||
		   dict(id='yLayer'),
 | 
							   dict(id='yLayer'),
 | 
				
			||||||
@ -44,14 +50,20 @@ class FTDe(BasicNewsRecipe):
 | 
				
			|||||||
		   dict(name='ul', attrs={'class':'nav'}),
 | 
							   dict(name='ul', attrs={'class':'nav'}),
 | 
				
			||||||
		   dict(name='p', attrs={'class':'articleOptionHead'}),
 | 
							   dict(name='p', attrs={'class':'articleOptionHead'}),
 | 
				
			||||||
		   dict(name='p', attrs={'class':'articleOptionFoot'}),
 | 
							   dict(name='p', attrs={'class':'articleOptionFoot'}),
 | 
				
			||||||
 | 
							   dict(name='p', attrs={'class':'moreInfo'}),
 | 
				
			||||||
		   dict(name='div', attrs={'class':'chartBox'}),
 | 
							   dict(name='div', attrs={'class':'chartBox'}),
 | 
				
			||||||
		   dict(name='div', attrs={'class':'ratingOpt starRatingContainer articleOptionFootFrame'}),
 | 
							   dict(name='div', attrs={'class':'ratingOpt starRatingContainer articleOptionFootFrame'}),
 | 
				
			||||||
		   dict(name='div', attrs={'class':'box boxArticleBasic boxComments boxTransparent'}),
 | 
							   dict(name='div', attrs={'class':'box boxArticleBasic boxComments boxTransparent'}),
 | 
				
			||||||
		   dict(name='div', attrs={'class':'box boxNavTabs '}),
 | 
							   dict(name='div', attrs={'class':'box boxNavTabs'}),
 | 
				
			||||||
 | 
							   dict(name='div', attrs={'class':'boxMMRgtLow'}),
 | 
				
			||||||
		   dict(name='span', attrs={'class':'vote_455857'}),
 | 
							   dict(name='span', attrs={'class':'vote_455857'}),
 | 
				
			||||||
		   dict(name='div', attrs={'class':'relatedhalb'}),
 | 
							   dict(name='div', attrs={'class':'relatedhalb'}),
 | 
				
			||||||
		   dict(name='div', attrs={'class':'box boxListScrollOutline'}),
 | 
							   dict(name='div', attrs={'class':'box boxListScrollOutline'}),
 | 
				
			||||||
 | 
							   dict(name='div', attrs={'class':'box boxPhotoshow boxImgWide'}),
 | 
				
			||||||
 | 
							   dict(name='div', attrs={'class':'box boxTeaser boxPhotoshow boxImgWide'}),
 | 
				
			||||||
 | 
							   dict(name='div', attrs={'class':'box boxTeaser'}),
 | 
				
			||||||
		   dict(name='div', attrs={'class':'tagCloud'}),
 | 
							   dict(name='div', attrs={'class':'tagCloud'}),
 | 
				
			||||||
 | 
							   dict(name='div', attrs={'class':'pollView'}),
 | 
				
			||||||
		   dict(name='div', attrs={'class':'box boxArticleBasic boxNavTabsOutline'}),
 | 
							   dict(name='div', attrs={'class':'box boxArticleBasic boxNavTabsOutline'}),
 | 
				
			||||||
		   dict(name='div', attrs={'class':'ftdHpNav'}),
 | 
							   dict(name='div', attrs={'class':'ftdHpNav'}),
 | 
				
			||||||
		   dict(name='div', attrs={'class':'ftdHead'}),
 | 
							   dict(name='div', attrs={'class':'ftdHead'}),
 | 
				
			||||||
@ -67,9 +79,10 @@ class FTDe(BasicNewsRecipe):
 | 
				
			|||||||
		   dict(name='div', attrs={'class':'wertungoben'}),
 | 
							   dict(name='div', attrs={'class':'wertungoben'}),
 | 
				
			||||||
		   dict(name='div', attrs={'class':'artikelfuss'}),
 | 
							   dict(name='div', attrs={'class':'artikelfuss'}),
 | 
				
			||||||
		   dict(name='a', attrs={'class':'rating'}),
 | 
							   dict(name='a', attrs={'class':'rating'}),
 | 
				
			||||||
 | 
							   dict(name='a', attrs={'href':'#rt'}),
 | 
				
			||||||
		   dict(name='div', attrs={'class':'articleOptionFootFrame'}),
 | 
							   dict(name='div', attrs={'class':'articleOptionFootFrame'}),
 | 
				
			||||||
		   dict(name='div', attrs={'class':'artikelsplitfaq'})]
 | 
							   dict(name='div', attrs={'class':'artikelsplitfaq'})]
 | 
				
			||||||
    remove_tags_after = [dict(name='a', attrs={'class':'more'})]
 | 
					    #remove_tags_after = [dict(name='a', attrs={'class':'more'})]
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    feeds =  [ ('Finanzen', 'http://www.ftd.de/rss2/finanzen/maerkte'), 
 | 
					    feeds =  [ ('Finanzen', 'http://www.ftd.de/rss2/finanzen/maerkte'), 
 | 
				
			||||||
	       ('Meinungshungrige', 'http://www.ftd.de/rss2/meinungshungrige'),
 | 
						       ('Meinungshungrige', 'http://www.ftd.de/rss2/meinungshungrige'),
 | 
				
			||||||
@ -86,4 +99,4 @@ class FTDe(BasicNewsRecipe):
 | 
				
			|||||||
    
 | 
					    
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def print_version(self, url):
 | 
					    def print_version(self, url):
 | 
				
			||||||
        return url + '?mode=print'
 | 
					        return url.replace('.html', '.html?mode=print')
 | 
				
			||||||
 | 
				
			|||||||
@ -32,7 +32,7 @@ class GlobeAndMail(BasicNewsRecipe):
 | 
				
			|||||||
		'gallery-controls', 'video', 'galleryLoading','deck','header',
 | 
							'gallery-controls', 'video', 'galleryLoading','deck','header',
 | 
				
			||||||
        'toolsBottom'] },
 | 
					        'toolsBottom'] },
 | 
				
			||||||
		{'class':['credit','inline-img-caption','tab-pointer'] },
 | 
							{'class':['credit','inline-img-caption','tab-pointer'] },
 | 
				
			||||||
		dict(name='div', attrs={'id':'lead-photo'}),
 | 
							dict(name='div', attrs={'id':['lead-photo', 'most-popular-story']}),
 | 
				
			||||||
		dict(name='div', attrs={'class':'right'}),
 | 
							dict(name='div', attrs={'class':'right'}),
 | 
				
			||||||
		dict(name='div', attrs={'id':'footer'}),
 | 
							dict(name='div', attrs={'id':'footer'}),
 | 
				
			||||||
		dict(name='div', attrs={'id':'beta-msg'}),
 | 
							dict(name='div', attrs={'id':'beta-msg'}),
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										44
									
								
								resources/recipes/kitsapun.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								resources/recipes/kitsapun.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,44 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
 | 
					__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					www.kitsapun.com
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Kitsapsun(BasicNewsRecipe):
 | 
				
			||||||
 | 
					    title                 = 'Kitsap Sun'
 | 
				
			||||||
 | 
					    __author__            = 'Darko Miletic'
 | 
				
			||||||
 | 
					    description           = 'News from Kitsap County'
 | 
				
			||||||
 | 
					    publisher             = 'Scripps Interactive Newspapers Group'
 | 
				
			||||||
 | 
					    category              = 'news, Kitsap county, USA'    
 | 
				
			||||||
 | 
					    language              = 'en'
 | 
				
			||||||
 | 
					    oldest_article        = 2
 | 
				
			||||||
 | 
					    max_articles_per_feed = 100
 | 
				
			||||||
 | 
					    no_stylesheets        = True
 | 
				
			||||||
 | 
					    encoding              = 'cp1252'
 | 
				
			||||||
 | 
					    use_embedded_content  = False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    conversion_options = {  
 | 
				
			||||||
 | 
					                             'comments' : description
 | 
				
			||||||
 | 
					                            ,'tags'     : category
 | 
				
			||||||
 | 
					                            ,'language' : language
 | 
				
			||||||
 | 
					                            ,'publisher': publisher
 | 
				
			||||||
 | 
					                         }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    keep_only_tags = [dict(name='div', attrs={'id':['story_meta','story_content']})]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    remove_tags = [dict(name=['object','link','embed','form','iframe'])]
 | 
				
			||||||
 | 
					                  
 | 
				
			||||||
 | 
					    feeds = [
 | 
				
			||||||
 | 
					               (u'News'         , u'http://www.kitsapsun.com/rss/headlines/news/'         )
 | 
				
			||||||
 | 
					              ,(u'Business'     , u'http://www.kitsapsun.com/rss/headlines/business/'     )
 | 
				
			||||||
 | 
					              ,(u'Communities'  , u'http://www.kitsapsun.com/rss/headlines/communities/'  )
 | 
				
			||||||
 | 
					              ,(u'Entertainment', u'http://www.kitsapsun.com/rss/headlines/entertainment/')
 | 
				
			||||||
 | 
					              ,(u'Lifestyles'   , u'http://www.kitsapsun.com/rss/headlines/lifestyles/'   )
 | 
				
			||||||
 | 
					            ]
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    def print_version(self, url):
 | 
				
			||||||
 | 
					        return url.rpartition('/')[0] + '/?print=1'
 | 
				
			||||||
							
								
								
									
										96
									
								
								resources/recipes/montreal_gazette.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										96
									
								
								resources/recipes/montreal_gazette.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,96 @@
 | 
				
			|||||||
 | 
					#!/usr/bin/env  python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					www.canada.com
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from calibre.web.feeds.recipes import BasicNewsRecipe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class CanWestPaper(BasicNewsRecipe):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Montreal Gazette
 | 
				
			||||||
 | 
					    title = u'Montreal Gazette'
 | 
				
			||||||
 | 
					    url_prefix = 'http://www.montrealgazette.com'
 | 
				
			||||||
 | 
					    description = u'News from Montreal, QC'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    language = 'en_CA'
 | 
				
			||||||
 | 
					    __author__ = 'Nick Redding'
 | 
				
			||||||
 | 
					    no_stylesheets = True
 | 
				
			||||||
 | 
					    timefmt = ' [%b %d]'
 | 
				
			||||||
 | 
					    extra_css = '''
 | 
				
			||||||
 | 
					                .timestamp {  font-size:xx-small; display: block; }
 | 
				
			||||||
 | 
					                #storyheader { font-size: medium; }
 | 
				
			||||||
 | 
					                #storyheader h1 { font-size: x-large; }
 | 
				
			||||||
 | 
					                #storyheader h2 { font-size: large;  font-style: italic; }
 | 
				
			||||||
 | 
					                .byline { font-size:xx-small; }
 | 
				
			||||||
 | 
					                #photocaption { font-size: small; font-style: italic }
 | 
				
			||||||
 | 
					                #photocredit { font-size: xx-small; }'''
 | 
				
			||||||
 | 
					    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
 | 
				
			||||||
 | 
					    remove_tags = [{'class':'comments'},
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'rule_grey_solid'}),
 | 
				
			||||||
 | 
					                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def preprocess_html(self,soup):
 | 
				
			||||||
 | 
					        #delete iempty id attributes--they screw up the TOC for unknow reasons
 | 
				
			||||||
 | 
					        divtags = soup.findAll('div',attrs={'id':''})
 | 
				
			||||||
 | 
					        if divtags:
 | 
				
			||||||
 | 
					            for div in divtags:
 | 
				
			||||||
 | 
					                del(div['id'])
 | 
				
			||||||
 | 
					        return soup
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def parse_index(self):
 | 
				
			||||||
 | 
					        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        articles = {}
 | 
				
			||||||
 | 
					        key = 'News'
 | 
				
			||||||
 | 
					        ans = ['News']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Find each instance of class="sectiontitle", class="featurecontent"
 | 
				
			||||||
 | 
					        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
 | 
				
			||||||
 | 
					                #self.log(" div class = %s" % divtag['class'])
 | 
				
			||||||
 | 
					                if divtag['class'].startswith('section_title'):
 | 
				
			||||||
 | 
					                    # div contains section title
 | 
				
			||||||
 | 
					                    if not divtag.h3:
 | 
				
			||||||
 | 
					                        continue
 | 
				
			||||||
 | 
					                    key = self.tag_to_string(divtag.h3,False)
 | 
				
			||||||
 | 
					                    ans.append(key)
 | 
				
			||||||
 | 
					                    self.log("Section name %s" % key)
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                # div contains article data
 | 
				
			||||||
 | 
					                h1tag = divtag.find('h1')
 | 
				
			||||||
 | 
					                if not h1tag:
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                atag = h1tag.find('a',href=True)
 | 
				
			||||||
 | 
					                if not atag:
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                url = self.url_prefix+'/news/todays-paper/'+atag['href']
 | 
				
			||||||
 | 
					                #self.log("Section %s" % key)
 | 
				
			||||||
 | 
					                #self.log("url %s" % url)
 | 
				
			||||||
 | 
					                title = self.tag_to_string(atag,False)
 | 
				
			||||||
 | 
					                #self.log("title %s" % title)
 | 
				
			||||||
 | 
					                pubdate = ''
 | 
				
			||||||
 | 
					                description = ''
 | 
				
			||||||
 | 
					                ptag = divtag.find('p');
 | 
				
			||||||
 | 
					                if ptag:
 | 
				
			||||||
 | 
					                    description = self.tag_to_string(ptag,False)
 | 
				
			||||||
 | 
					                    #self.log("description %s" % description)
 | 
				
			||||||
 | 
					                author = ''
 | 
				
			||||||
 | 
					                autag = divtag.find('h4')
 | 
				
			||||||
 | 
					                if autag:
 | 
				
			||||||
 | 
					                    author = self.tag_to_string(autag,False)
 | 
				
			||||||
 | 
					                    #self.log("author %s" % author)
 | 
				
			||||||
 | 
					                if not articles.has_key(key):
 | 
				
			||||||
 | 
					                    articles[key] = []
 | 
				
			||||||
 | 
					                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
 | 
				
			||||||
 | 
					        return ans
 | 
				
			||||||
							
								
								
									
										101
									
								
								resources/recipes/ottawa_citizen.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										101
									
								
								resources/recipes/ottawa_citizen.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,101 @@
 | 
				
			|||||||
 | 
					#!/usr/bin/env  python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					www.canada.com
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from calibre.web.feeds.recipes import BasicNewsRecipe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class CanWestPaper(BasicNewsRecipe):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Ottawa Citizen
 | 
				
			||||||
 | 
					    title = u'Ottawa Citizen'
 | 
				
			||||||
 | 
					    url_prefix = 'http://www.ottawacitizen.com'
 | 
				
			||||||
 | 
					    description = u'News from Ottawa, ON'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Montreal Gazette
 | 
				
			||||||
 | 
					    #title = u'Montreal Gazette'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.montrealgazette.com'
 | 
				
			||||||
 | 
					    #description = u'News from Montreal, QC'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    language = 'en_CA'
 | 
				
			||||||
 | 
					    __author__ = 'Nick Redding'
 | 
				
			||||||
 | 
					    no_stylesheets = True
 | 
				
			||||||
 | 
					    timefmt = ' [%b %d]'
 | 
				
			||||||
 | 
					    extra_css = '''
 | 
				
			||||||
 | 
					                .timestamp {  font-size:xx-small; display: block; }
 | 
				
			||||||
 | 
					                #storyheader { font-size: medium; }
 | 
				
			||||||
 | 
					                #storyheader h1 { font-size: x-large; }
 | 
				
			||||||
 | 
					                #storyheader h2 { font-size: large;  font-style: italic; }
 | 
				
			||||||
 | 
					                .byline { font-size:xx-small; }
 | 
				
			||||||
 | 
					                #photocaption { font-size: small; font-style: italic }
 | 
				
			||||||
 | 
					                #photocredit { font-size: xx-small; }'''
 | 
				
			||||||
 | 
					    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
 | 
				
			||||||
 | 
					    remove_tags = [{'class':'comments'},
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'rule_grey_solid'}),
 | 
				
			||||||
 | 
					                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def preprocess_html(self,soup):
 | 
				
			||||||
 | 
					        #delete iempty id attributes--they screw up the TOC for unknow reasons
 | 
				
			||||||
 | 
					        divtags = soup.findAll('div',attrs={'id':''})
 | 
				
			||||||
 | 
					        if divtags:
 | 
				
			||||||
 | 
					            for div in divtags:
 | 
				
			||||||
 | 
					                del(div['id'])
 | 
				
			||||||
 | 
					        return soup
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def parse_index(self):
 | 
				
			||||||
 | 
					        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        articles = {}
 | 
				
			||||||
 | 
					        key = 'News'
 | 
				
			||||||
 | 
					        ans = ['News']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Find each instance of class="sectiontitle", class="featurecontent"
 | 
				
			||||||
 | 
					        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
 | 
				
			||||||
 | 
					                #self.log(" div class = %s" % divtag['class'])
 | 
				
			||||||
 | 
					                if divtag['class'].startswith('section_title'):
 | 
				
			||||||
 | 
					                    # div contains section title
 | 
				
			||||||
 | 
					                    if not divtag.h3:
 | 
				
			||||||
 | 
					                        continue
 | 
				
			||||||
 | 
					                    key = self.tag_to_string(divtag.h3,False)
 | 
				
			||||||
 | 
					                    ans.append(key)
 | 
				
			||||||
 | 
					                    self.log("Section name %s" % key)
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                # div contains article data
 | 
				
			||||||
 | 
					                h1tag = divtag.find('h1')
 | 
				
			||||||
 | 
					                if not h1tag:
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                atag = h1tag.find('a',href=True)
 | 
				
			||||||
 | 
					                if not atag:
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                url = self.url_prefix+'/news/todays-paper/'+atag['href']
 | 
				
			||||||
 | 
					                #self.log("Section %s" % key)
 | 
				
			||||||
 | 
					                #self.log("url %s" % url)
 | 
				
			||||||
 | 
					                title = self.tag_to_string(atag,False)
 | 
				
			||||||
 | 
					                #self.log("title %s" % title)
 | 
				
			||||||
 | 
					                pubdate = ''
 | 
				
			||||||
 | 
					                description = ''
 | 
				
			||||||
 | 
					                ptag = divtag.find('p');
 | 
				
			||||||
 | 
					                if ptag:
 | 
				
			||||||
 | 
					                    description = self.tag_to_string(ptag,False)
 | 
				
			||||||
 | 
					                    #self.log("description %s" % description)
 | 
				
			||||||
 | 
					                author = ''
 | 
				
			||||||
 | 
					                autag = divtag.find('h4')
 | 
				
			||||||
 | 
					                if autag:
 | 
				
			||||||
 | 
					                    author = self.tag_to_string(autag,False)
 | 
				
			||||||
 | 
					                    #self.log("author %s" % author)
 | 
				
			||||||
 | 
					                if not articles.has_key(key):
 | 
				
			||||||
 | 
					                    articles[key] = []
 | 
				
			||||||
 | 
					                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
 | 
				
			||||||
 | 
					        return ans
 | 
				
			||||||
							
								
								
									
										48
									
								
								resources/recipes/pajama.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								resources/recipes/pajama.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,48 @@
 | 
				
			|||||||
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
 | 
					from calibre.ebooks.BeautifulSoup import BeautifulSoup
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class PajamasMedia(BasicNewsRecipe):
 | 
				
			||||||
 | 
					    title          = u'Pajamas Media'
 | 
				
			||||||
 | 
					    description = u'Provides exclusive news and opinion for forty countries.'
 | 
				
			||||||
 | 
					    language       = 'en'
 | 
				
			||||||
 | 
					    __author__     = 'Krittika Goyal'
 | 
				
			||||||
 | 
					    oldest_article = 1 #days
 | 
				
			||||||
 | 
					    max_articles_per_feed = 25
 | 
				
			||||||
 | 
					    recursions = 1
 | 
				
			||||||
 | 
					    match_regexps = [r'http://pajamasmedia.com/blog/.*/2/$']
 | 
				
			||||||
 | 
					    #encoding = 'latin1'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    remove_stylesheets = True
 | 
				
			||||||
 | 
					    #remove_tags_before = dict(name='h1', attrs={'class':'heading'})
 | 
				
			||||||
 | 
					    remove_tags_after  = dict(name='div', attrs={'class':'paged-nav'})
 | 
				
			||||||
 | 
					    remove_tags = [
 | 
				
			||||||
 | 
					       dict(name='iframe'),
 | 
				
			||||||
 | 
					       dict(name='div', attrs={'class':['pages']}),
 | 
				
			||||||
 | 
					       #dict(name='div', attrs={'id':['bookmark']}),
 | 
				
			||||||
 | 
					       #dict(name='span', attrs={'class':['related_link', 'slideshowcontrols']}),
 | 
				
			||||||
 | 
					       #dict(name='ul', attrs={'class':'articleTools'}),
 | 
				
			||||||
 | 
					    ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    feeds          = [
 | 
				
			||||||
 | 
					('pajamas Media',
 | 
				
			||||||
 | 
					 'http://feeds.feedburner.com/PajamasMedia'),
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def preprocess_html(self, soup):
 | 
				
			||||||
 | 
					        story = soup.find(name='div', attrs={'id':'innerpage-content'})
 | 
				
			||||||
 | 
					        #td = heading.findParent(name='td')
 | 
				
			||||||
 | 
					        #td.extract()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        soup = BeautifulSoup('<html><head><title>t</title></head><body></body></html>')
 | 
				
			||||||
 | 
					        body = soup.find(name='body')
 | 
				
			||||||
 | 
					        body.insert(0, story)
 | 
				
			||||||
 | 
					        return soup
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def postprocess_html(self, soup, first):
 | 
				
			||||||
 | 
					        if not first:
 | 
				
			||||||
 | 
					            h = soup.find(attrs={'class':'innerpage-header'})
 | 
				
			||||||
 | 
					            if h: h.extract()
 | 
				
			||||||
 | 
					            auth = soup.find(attrs={'class':'author'})
 | 
				
			||||||
 | 
					            if auth: auth.extract()
 | 
				
			||||||
 | 
					        return soup
 | 
				
			||||||
@ -8,8 +8,7 @@ class Physicstoday(BasicNewsRecipe):
 | 
				
			|||||||
    description           = u'Physics Today magazine'
 | 
					    description           = u'Physics Today magazine'
 | 
				
			||||||
    publisher             = 'American Institute of Physics'
 | 
					    publisher             = 'American Institute of Physics'
 | 
				
			||||||
    category              = 'Physics'
 | 
					    category              = 'Physics'
 | 
				
			||||||
    language = 'en'
 | 
					    language              = 'en'
 | 
				
			||||||
 | 
					 | 
				
			||||||
    cover_url = strftime('http://ptonline.aip.org/journals/doc/PHTOAD-home/jrnls/images/medcover%m_%Y.jpg')
 | 
					    cover_url = strftime('http://ptonline.aip.org/journals/doc/PHTOAD-home/jrnls/images/medcover%m_%Y.jpg')
 | 
				
			||||||
    oldest_article = 30
 | 
					    oldest_article = 30
 | 
				
			||||||
    max_articles_per_feed = 100
 | 
					    max_articles_per_feed = 100
 | 
				
			||||||
@ -30,8 +29,8 @@ class Physicstoday(BasicNewsRecipe):
 | 
				
			|||||||
    def get_browser(self):
 | 
					    def get_browser(self):
 | 
				
			||||||
        br = BasicNewsRecipe.get_browser()
 | 
					        br = BasicNewsRecipe.get_browser()
 | 
				
			||||||
        if self.username is not None and self.password is not None:
 | 
					        if self.username is not None and self.password is not None:
 | 
				
			||||||
            br.open('http://www.physicstoday.org/pt/sso_login.jsp')
 | 
					            br.open('http://ptonline.aip.org/journals/doc/PHTOAD-home/pt_login.jsp?fl=f')
 | 
				
			||||||
            br.select_form(name='login')
 | 
					            br.select_form(name='login_form')
 | 
				
			||||||
            br['username'] = self.username
 | 
					            br['username'] = self.username
 | 
				
			||||||
            br['password'] = self.password
 | 
					            br['password'] = self.password
 | 
				
			||||||
            br.submit()
 | 
					            br.submit()
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										188
									
								
								resources/recipes/readers_digest.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										188
									
								
								resources/recipes/readers_digest.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,188 @@
 | 
				
			|||||||
 | 
					#!/usr/bin/env  python
 | 
				
			||||||
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					from calibre.web.feeds.recipes import BasicNewsRecipe
 | 
				
			||||||
 | 
					from calibre.web.feeds import Feed
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class ReadersDigest(BasicNewsRecipe):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    title       = 'Readers Digest'
 | 
				
			||||||
 | 
					    __author__  = 'BrianG'
 | 
				
			||||||
 | 
					    language = 'en'
 | 
				
			||||||
 | 
					    description = 'Readers Digest Feeds'
 | 
				
			||||||
 | 
					    no_stylesheets        = True
 | 
				
			||||||
 | 
					    use_embedded_content  = False
 | 
				
			||||||
 | 
					    oldest_article = 60
 | 
				
			||||||
 | 
					    max_articles_per_feed = 200
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    language = 'en'
 | 
				
			||||||
 | 
					    remove_javascript     = True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    extra_css      = ''' h1 {font-family:georgia,serif;color:#000000;}
 | 
				
			||||||
 | 
					                        .mainHd{font-family:georgia,serif;color:#000000;}
 | 
				
			||||||
 | 
					                         h2 {font-family:Arial,Sans-serif;}
 | 
				
			||||||
 | 
					                        .name{font-family:Arial,Sans-serif; font-size:x-small;font-weight:bold; }
 | 
				
			||||||
 | 
					                        .date{font-family:Arial,Sans-serif; font-size:x-small ;color:#999999;}
 | 
				
			||||||
 | 
					                        .byline{font-family:Arial,Sans-serif; font-size:x-small ;}
 | 
				
			||||||
 | 
					                        .photoBkt{ font-size:x-small ;}
 | 
				
			||||||
 | 
					                        .vertPhoto{font-size:x-small ;}
 | 
				
			||||||
 | 
					                        .credits{font-family:Arial,Sans-serif; font-size:x-small ;color:gray;}
 | 
				
			||||||
 | 
					                        .credit{font-family:Arial,Sans-serif; font-size:x-small ;color:gray;}
 | 
				
			||||||
 | 
					                        .artTxt{font-family:georgia,serif;}
 | 
				
			||||||
 | 
					                        .caption{font-family:georgia,serif; font-size:x-small;color:#333333;}
 | 
				
			||||||
 | 
					                        .credit{font-family:georgia,serif; font-size:x-small;color:#999999;}
 | 
				
			||||||
 | 
					                        a:link{color:#CC0000;}
 | 
				
			||||||
 | 
					                        .breadcrumb{font-family:Arial,Sans-serif;font-size:x-small;}
 | 
				
			||||||
 | 
					                        '''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    remove_tags = [
 | 
				
			||||||
 | 
					        dict(name='h4', attrs={'class':'close'}),
 | 
				
			||||||
 | 
					        dict(name='div', attrs={'class':'fromLine'}),
 | 
				
			||||||
 | 
					        dict(name='img', attrs={'class':'colorTag'}),
 | 
				
			||||||
 | 
					        dict(name='div', attrs={'id':'sponsorArticleHeader'}),
 | 
				
			||||||
 | 
					        dict(name='div', attrs={'class':'horizontalAd'}),
 | 
				
			||||||
 | 
					        dict(name='div', attrs={'id':'imageCounterLeft'}),
 | 
				
			||||||
 | 
					        dict(name='div', attrs={'id':'commentsPrint'})
 | 
				
			||||||
 | 
					        ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    feeds = [
 | 
				
			||||||
 | 
					            ('New in RD', 'http://feeds.rd.com/ReadersDigest'),
 | 
				
			||||||
 | 
					            ('Jokes', 'http://feeds.rd.com/ReadersDigestJokes'),
 | 
				
			||||||
 | 
					            ('Cartoons', 'http://feeds.rd.com/ReadersDigestCartoons'),
 | 
				
			||||||
 | 
					            ('Blogs','http://feeds.rd.com/ReadersDigestBlogs')
 | 
				
			||||||
 | 
					        ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    cover_url = 'http://www.rd.com/images/logo-main-rd.gif'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#-------------------------------------------------------------------------------------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def print_version(self, url):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Get the identity number of the current article and append it to the root print URL
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if url.find('/article') > 0:
 | 
				
			||||||
 | 
					            ident = url[url.find('/article')+8:url.find('.html?')-4]
 | 
				
			||||||
 | 
					            url = 'http://www.rd.com/content/printContent.do?contentId=' + ident
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        elif url.find('/post') > 0:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            # in this case, have to get the page itself to derive the Print page.
 | 
				
			||||||
 | 
					            soup = self.index_to_soup(url)
 | 
				
			||||||
 | 
					            newsoup = soup.find('ul',attrs={'class':'printBlock'})
 | 
				
			||||||
 | 
					            url = 'http://www.rd.com' + newsoup('a')[0]['href']
 | 
				
			||||||
 | 
					            url = url[0:url.find('&Keep')]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return url
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#-------------------------------------------------------------------------------------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def parse_index(self):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        pages = [
 | 
				
			||||||
 | 
					                ('Your America','http://www.rd.com/your-america-inspiring-people-and-stories', 'channelLeftContainer',{'class':'moreLeft'}),
 | 
				
			||||||
 | 
					                # useless recipes ('Living Healthy','http://www.rd.com/living-healthy', 'channelLeftContainer',{'class':'moreLeft'}),
 | 
				
			||||||
 | 
					                ('Advice and Know-How','http://www.rd.com/advice-and-know-how', 'channelLeftContainer',{'class':'moreLeft'})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        feeds = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for page in pages:
 | 
				
			||||||
 | 
					            section, url, divider, attrList = page
 | 
				
			||||||
 | 
					            newArticles = self.page_parse(url, divider, attrList)
 | 
				
			||||||
 | 
					            feeds.append((section,newArticles))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # after the pages of the site have been processed, parse several RSS feeds for additional sections
 | 
				
			||||||
 | 
					        newfeeds = Feed()
 | 
				
			||||||
 | 
					        newfeeds = self.parse_rss()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # The utility code in parse_rss returns a Feed object.  Convert each feed/article combination into a form suitable
 | 
				
			||||||
 | 
					        # for this module (parse_index).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for feed in newfeeds:
 | 
				
			||||||
 | 
					            newArticles = []
 | 
				
			||||||
 | 
					            for article in feed.articles:
 | 
				
			||||||
 | 
					                newArt = {
 | 
				
			||||||
 | 
					                            'title' : article.title,
 | 
				
			||||||
 | 
					                            'url'   : article.url,
 | 
				
			||||||
 | 
					                            'date'  : article.date,
 | 
				
			||||||
 | 
					                            'description' : article.text_summary
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                newArticles.append(newArt)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            # New and Blogs should be the first two feeds.
 | 
				
			||||||
 | 
					            if feed.title == 'New in RD':
 | 
				
			||||||
 | 
					                feeds.insert(0,(feed.title,newArticles))
 | 
				
			||||||
 | 
					            elif feed.title == 'Blogs':
 | 
				
			||||||
 | 
					                feeds.insert(1,(feed.title,newArticles))
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                feeds.append((feed.title,newArticles))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return feeds
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#-------------------------------------------------------------------------------------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def page_parse(self, mainurl, divider, attrList):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        articles = []
 | 
				
			||||||
 | 
					        mainsoup = self.index_to_soup(mainurl)
 | 
				
			||||||
 | 
					        for item in mainsoup.findAll(attrs=attrList):
 | 
				
			||||||
 | 
					            newArticle = {
 | 
				
			||||||
 | 
					                        'title' : item('img')[0]['alt'],
 | 
				
			||||||
 | 
					                        'url'   : 'http://www.rd.com'+item('a')[0]['href'],
 | 
				
			||||||
 | 
					                        'date'  : '',
 | 
				
			||||||
 | 
					                        'description' : ''
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					            articles.append(newArticle)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return articles
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#-------------------------------------------------------------------------------------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def parse_rss (self):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Do the "official" parse_feeds first
 | 
				
			||||||
 | 
					        feeds = BasicNewsRecipe.parse_feeds(self)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Loop thru the articles in all feeds to find articles with "recipe" in it
 | 
				
			||||||
 | 
					        recipeArticles = []
 | 
				
			||||||
 | 
					        for curfeed in feeds:
 | 
				
			||||||
 | 
					            delList = []
 | 
				
			||||||
 | 
					            for a,curarticle in enumerate(curfeed.articles):
 | 
				
			||||||
 | 
					                if curarticle.title.upper().find('RECIPE') >= 0:
 | 
				
			||||||
 | 
					                    recipeArticles.append(curarticle)
 | 
				
			||||||
 | 
					                    delList.append(curarticle)
 | 
				
			||||||
 | 
					            if len(delList)>0:
 | 
				
			||||||
 | 
					                for d in delList:
 | 
				
			||||||
 | 
					                    index = curfeed.articles.index(d)
 | 
				
			||||||
 | 
					                    curfeed.articles[index:index+1] = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # If there are any recipes found, create a new Feed object and append.
 | 
				
			||||||
 | 
					        if len(recipeArticles) > 0:
 | 
				
			||||||
 | 
					            pfeed = Feed()
 | 
				
			||||||
 | 
					            pfeed.title = 'Recipes'
 | 
				
			||||||
 | 
					            pfeed.descrition = 'Recipe Feed (Virtual)'
 | 
				
			||||||
 | 
					            pfeed.image_url  = None
 | 
				
			||||||
 | 
					            pfeed.oldest_article = 30
 | 
				
			||||||
 | 
					            pfeed.id_counter = len(recipeArticles)
 | 
				
			||||||
 | 
					            # Create a new Feed, add the recipe articles, and then append
 | 
				
			||||||
 | 
					            # to "official" list of feeds
 | 
				
			||||||
 | 
					            pfeed.articles = recipeArticles[:]
 | 
				
			||||||
 | 
					            feeds.append(pfeed)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return feeds
 | 
				
			||||||
 | 
					
 | 
				
			||||||
							
								
								
									
										116
									
								
								resources/recipes/regina_leader_post.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										116
									
								
								resources/recipes/regina_leader_post.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,116 @@
 | 
				
			|||||||
 | 
					#!/usr/bin/env  python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					www.canada.com
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from calibre.web.feeds.recipes import BasicNewsRecipe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class CanWestPaper(BasicNewsRecipe):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Regina Leader-Post
 | 
				
			||||||
 | 
					    title = u'Regina Leader-Post'
 | 
				
			||||||
 | 
					    url_prefix = 'http://www.leaderpost.com'
 | 
				
			||||||
 | 
					    description = u'News from Regina, SK'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Saskatoon Star-Phoenix
 | 
				
			||||||
 | 
					    #title = u'Saskatoon Star-Phoenix'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.thestarphoenix.com'
 | 
				
			||||||
 | 
					    #description = u'News from Saskatoon, SK'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Windsor Star
 | 
				
			||||||
 | 
					    #title = u'Windsor Star'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.windsorstar.com'
 | 
				
			||||||
 | 
					    #description = u'News from Windsor, ON'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Ottawa Citizen
 | 
				
			||||||
 | 
					    #title = u'Ottawa Citizen'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.ottawacitizen.com'
 | 
				
			||||||
 | 
					    #description = u'News from Ottawa, ON'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Montreal Gazette
 | 
				
			||||||
 | 
					    #title = u'Montreal Gazette'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.montrealgazette.com'
 | 
				
			||||||
 | 
					    #description = u'News from Montreal, QC'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    language = 'en_CA'
 | 
				
			||||||
 | 
					    __author__ = 'Nick Redding'
 | 
				
			||||||
 | 
					    no_stylesheets = True
 | 
				
			||||||
 | 
					    timefmt = ' [%b %d]'
 | 
				
			||||||
 | 
					    extra_css = '''
 | 
				
			||||||
 | 
					                .timestamp {  font-size:xx-small; display: block; }
 | 
				
			||||||
 | 
					                #storyheader { font-size: medium; }
 | 
				
			||||||
 | 
					                #storyheader h1 { font-size: x-large; }
 | 
				
			||||||
 | 
					                #storyheader h2 { font-size: large;  font-style: italic; }
 | 
				
			||||||
 | 
					                .byline { font-size:xx-small; }
 | 
				
			||||||
 | 
					                #photocaption { font-size: small; font-style: italic }
 | 
				
			||||||
 | 
					                #photocredit { font-size: xx-small; }'''
 | 
				
			||||||
 | 
					    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
 | 
				
			||||||
 | 
					    remove_tags = [{'class':'comments'},
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'rule_grey_solid'}),
 | 
				
			||||||
 | 
					                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def preprocess_html(self,soup):
 | 
				
			||||||
 | 
					        #delete iempty id attributes--they screw up the TOC for unknow reasons
 | 
				
			||||||
 | 
					        divtags = soup.findAll('div',attrs={'id':''})
 | 
				
			||||||
 | 
					        if divtags:
 | 
				
			||||||
 | 
					            for div in divtags:
 | 
				
			||||||
 | 
					                del(div['id'])
 | 
				
			||||||
 | 
					        return soup
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def parse_index(self):
 | 
				
			||||||
 | 
					        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        articles = {}
 | 
				
			||||||
 | 
					        key = 'News'
 | 
				
			||||||
 | 
					        ans = ['News']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Find each instance of class="sectiontitle", class="featurecontent"
 | 
				
			||||||
 | 
					        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
 | 
				
			||||||
 | 
					                #self.log(" div class = %s" % divtag['class'])
 | 
				
			||||||
 | 
					                if divtag['class'].startswith('section_title'):
 | 
				
			||||||
 | 
					                    # div contains section title
 | 
				
			||||||
 | 
					                    if not divtag.h3:
 | 
				
			||||||
 | 
					                        continue
 | 
				
			||||||
 | 
					                    key = self.tag_to_string(divtag.h3,False)
 | 
				
			||||||
 | 
					                    ans.append(key)
 | 
				
			||||||
 | 
					                    self.log("Section name %s" % key)
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                # div contains article data
 | 
				
			||||||
 | 
					                h1tag = divtag.find('h1')
 | 
				
			||||||
 | 
					                if not h1tag:
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                atag = h1tag.find('a',href=True)
 | 
				
			||||||
 | 
					                if not atag:
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                url = self.url_prefix+'/news/todays-paper/'+atag['href']
 | 
				
			||||||
 | 
					                #self.log("Section %s" % key)
 | 
				
			||||||
 | 
					                #self.log("url %s" % url)
 | 
				
			||||||
 | 
					                title = self.tag_to_string(atag,False)
 | 
				
			||||||
 | 
					                #self.log("title %s" % title)
 | 
				
			||||||
 | 
					                pubdate = ''
 | 
				
			||||||
 | 
					                description = ''
 | 
				
			||||||
 | 
					                ptag = divtag.find('p');
 | 
				
			||||||
 | 
					                if ptag:
 | 
				
			||||||
 | 
					                    description = self.tag_to_string(ptag,False)
 | 
				
			||||||
 | 
					                    #self.log("description %s" % description)
 | 
				
			||||||
 | 
					                author = ''
 | 
				
			||||||
 | 
					                autag = divtag.find('h4')
 | 
				
			||||||
 | 
					                if autag:
 | 
				
			||||||
 | 
					                    author = self.tag_to_string(autag,False)
 | 
				
			||||||
 | 
					                    #self.log("author %s" % author)
 | 
				
			||||||
 | 
					                if not articles.has_key(key):
 | 
				
			||||||
 | 
					                    articles[key] = []
 | 
				
			||||||
 | 
					                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
 | 
				
			||||||
 | 
					        return ans
 | 
				
			||||||
							
								
								
									
										111
									
								
								resources/recipes/saskatoon_star_phoenix.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										111
									
								
								resources/recipes/saskatoon_star_phoenix.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,111 @@
 | 
				
			|||||||
 | 
					#!/usr/bin/env  python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					www.canada.com
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from calibre.web.feeds.recipes import BasicNewsRecipe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class CanWestPaper(BasicNewsRecipe):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Saskatoon Star-Phoenix
 | 
				
			||||||
 | 
					    title = u'Saskatoon Star-Phoenix'
 | 
				
			||||||
 | 
					    url_prefix = 'http://www.thestarphoenix.com'
 | 
				
			||||||
 | 
					    description = u'News from Saskatoon, SK'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Windsor Star
 | 
				
			||||||
 | 
					    #title = u'Windsor Star'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.windsorstar.com'
 | 
				
			||||||
 | 
					    #description = u'News from Windsor, ON'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Ottawa Citizen
 | 
				
			||||||
 | 
					    #title = u'Ottawa Citizen'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.ottawacitizen.com'
 | 
				
			||||||
 | 
					    #description = u'News from Ottawa, ON'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Montreal Gazette
 | 
				
			||||||
 | 
					    #title = u'Montreal Gazette'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.montrealgazette.com'
 | 
				
			||||||
 | 
					    #description = u'News from Montreal, QC'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    language = 'en_CA'
 | 
				
			||||||
 | 
					    __author__ = 'Nick Redding'
 | 
				
			||||||
 | 
					    no_stylesheets = True
 | 
				
			||||||
 | 
					    timefmt = ' [%b %d]'
 | 
				
			||||||
 | 
					    extra_css = '''
 | 
				
			||||||
 | 
					                .timestamp {  font-size:xx-small; display: block; }
 | 
				
			||||||
 | 
					                #storyheader { font-size: medium; }
 | 
				
			||||||
 | 
					                #storyheader h1 { font-size: x-large; }
 | 
				
			||||||
 | 
					                #storyheader h2 { font-size: large;  font-style: italic; }
 | 
				
			||||||
 | 
					                .byline { font-size:xx-small; }
 | 
				
			||||||
 | 
					                #photocaption { font-size: small; font-style: italic }
 | 
				
			||||||
 | 
					                #photocredit { font-size: xx-small; }'''
 | 
				
			||||||
 | 
					    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
 | 
				
			||||||
 | 
					    remove_tags = [{'class':'comments'},
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'rule_grey_solid'}),
 | 
				
			||||||
 | 
					                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def preprocess_html(self,soup):
 | 
				
			||||||
 | 
					        #delete iempty id attributes--they screw up the TOC for unknow reasons
 | 
				
			||||||
 | 
					        divtags = soup.findAll('div',attrs={'id':''})
 | 
				
			||||||
 | 
					        if divtags:
 | 
				
			||||||
 | 
					            for div in divtags:
 | 
				
			||||||
 | 
					                del(div['id'])
 | 
				
			||||||
 | 
					        return soup
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def parse_index(self):
 | 
				
			||||||
 | 
					        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        articles = {}
 | 
				
			||||||
 | 
					        key = 'News'
 | 
				
			||||||
 | 
					        ans = ['News']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Find each instance of class="sectiontitle", class="featurecontent"
 | 
				
			||||||
 | 
					        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
 | 
				
			||||||
 | 
					                #self.log(" div class = %s" % divtag['class'])
 | 
				
			||||||
 | 
					                if divtag['class'].startswith('section_title'):
 | 
				
			||||||
 | 
					                    # div contains section title
 | 
				
			||||||
 | 
					                    if not divtag.h3:
 | 
				
			||||||
 | 
					                        continue
 | 
				
			||||||
 | 
					                    key = self.tag_to_string(divtag.h3,False)
 | 
				
			||||||
 | 
					                    ans.append(key)
 | 
				
			||||||
 | 
					                    self.log("Section name %s" % key)
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                # div contains article data
 | 
				
			||||||
 | 
					                h1tag = divtag.find('h1')
 | 
				
			||||||
 | 
					                if not h1tag:
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                atag = h1tag.find('a',href=True)
 | 
				
			||||||
 | 
					                if not atag:
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                url = self.url_prefix+'/news/todays-paper/'+atag['href']
 | 
				
			||||||
 | 
					                #self.log("Section %s" % key)
 | 
				
			||||||
 | 
					                #self.log("url %s" % url)
 | 
				
			||||||
 | 
					                title = self.tag_to_string(atag,False)
 | 
				
			||||||
 | 
					                #self.log("title %s" % title)
 | 
				
			||||||
 | 
					                pubdate = ''
 | 
				
			||||||
 | 
					                description = ''
 | 
				
			||||||
 | 
					                ptag = divtag.find('p');
 | 
				
			||||||
 | 
					                if ptag:
 | 
				
			||||||
 | 
					                    description = self.tag_to_string(ptag,False)
 | 
				
			||||||
 | 
					                    #self.log("description %s" % description)
 | 
				
			||||||
 | 
					                author = ''
 | 
				
			||||||
 | 
					                autag = divtag.find('h4')
 | 
				
			||||||
 | 
					                if autag:
 | 
				
			||||||
 | 
					                    author = self.tag_to_string(autag,False)
 | 
				
			||||||
 | 
					                    #self.log("author %s" % author)
 | 
				
			||||||
 | 
					                if not articles.has_key(key):
 | 
				
			||||||
 | 
					                    articles[key] = []
 | 
				
			||||||
 | 
					                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
 | 
				
			||||||
 | 
					        return ans
 | 
				
			||||||
							
								
								
									
										136
									
								
								resources/recipes/vancouver_provice.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										136
									
								
								resources/recipes/vancouver_provice.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,136 @@
 | 
				
			|||||||
 | 
					#!/usr/bin/env  python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					www.canada.com
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from calibre.web.feeds.recipes import BasicNewsRecipe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class CanWestPaper(BasicNewsRecipe):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Vancouver Province
 | 
				
			||||||
 | 
					    title = u'Vancouver Province'
 | 
				
			||||||
 | 
					    url_prefix = 'http://www.theprovince.com'
 | 
				
			||||||
 | 
					    description = u'News from Vancouver, BC'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Vancouver Sun
 | 
				
			||||||
 | 
					    #title = u'Vancouver Sun'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.vancouversun.com'
 | 
				
			||||||
 | 
					    #description = u'News from Vancouver, BC'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Edmonton Journal
 | 
				
			||||||
 | 
					    #title = u'Edmonton Journal'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.edmontonjournal.com'
 | 
				
			||||||
 | 
					    #description = u'News from Edmonton, AB'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Calgary Herald
 | 
				
			||||||
 | 
					    #title = u'Calgary Herald'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.calgaryherald.com'
 | 
				
			||||||
 | 
					    #description = u'News from Calgary, AB'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Regina Leader-Post
 | 
				
			||||||
 | 
					    #title = u'Regina Leader-Post'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.leaderpost.com'
 | 
				
			||||||
 | 
					    #description = u'News from Regina, SK'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Saskatoon Star-Phoenix
 | 
				
			||||||
 | 
					    #title = u'Saskatoon Star-Phoenix'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.thestarphoenix.com'
 | 
				
			||||||
 | 
					    #description = u'News from Saskatoon, SK'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Windsor Star
 | 
				
			||||||
 | 
					    #title = u'Windsor Star'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.windsorstar.com'
 | 
				
			||||||
 | 
					    #description = u'News from Windsor, ON'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Ottawa Citizen
 | 
				
			||||||
 | 
					    #title = u'Ottawa Citizen'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.ottawacitizen.com'
 | 
				
			||||||
 | 
					    #description = u'News from Ottawa, ON'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Montreal Gazette
 | 
				
			||||||
 | 
					    #title = u'Montreal Gazette'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.montrealgazette.com'
 | 
				
			||||||
 | 
					    #description = u'News from Montreal, QC'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    language = 'en_CA'
 | 
				
			||||||
 | 
					    __author__ = 'Nick Redding'
 | 
				
			||||||
 | 
					    no_stylesheets = True
 | 
				
			||||||
 | 
					    timefmt = ' [%b %d]'
 | 
				
			||||||
 | 
					    extra_css = '''
 | 
				
			||||||
 | 
					                .timestamp {  font-size:xx-small; display: block; }
 | 
				
			||||||
 | 
					                #storyheader { font-size: medium; }
 | 
				
			||||||
 | 
					                #storyheader h1 { font-size: x-large; }
 | 
				
			||||||
 | 
					                #storyheader h2 { font-size: large;  font-style: italic; }
 | 
				
			||||||
 | 
					                .byline { font-size:xx-small; }
 | 
				
			||||||
 | 
					                #photocaption { font-size: small; font-style: italic }
 | 
				
			||||||
 | 
					                #photocredit { font-size: xx-small; }'''
 | 
				
			||||||
 | 
					    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
 | 
				
			||||||
 | 
					    remove_tags = [{'class':'comments'},
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'rule_grey_solid'}),
 | 
				
			||||||
 | 
					                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def preprocess_html(self,soup):
 | 
				
			||||||
 | 
					        #delete iempty id attributes--they screw up the TOC for unknow reasons
 | 
				
			||||||
 | 
					        divtags = soup.findAll('div',attrs={'id':''})
 | 
				
			||||||
 | 
					        if divtags:
 | 
				
			||||||
 | 
					            for div in divtags:
 | 
				
			||||||
 | 
					                del(div['id'])
 | 
				
			||||||
 | 
					        return soup
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def parse_index(self):
 | 
				
			||||||
 | 
					        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        articles = {}
 | 
				
			||||||
 | 
					        key = 'News'
 | 
				
			||||||
 | 
					        ans = ['News']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Find each instance of class="sectiontitle", class="featurecontent"
 | 
				
			||||||
 | 
					        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
 | 
				
			||||||
 | 
					                #self.log(" div class = %s" % divtag['class'])
 | 
				
			||||||
 | 
					                if divtag['class'].startswith('section_title'):
 | 
				
			||||||
 | 
					                    # div contains section title
 | 
				
			||||||
 | 
					                    if not divtag.h3:
 | 
				
			||||||
 | 
					                        continue
 | 
				
			||||||
 | 
					                    key = self.tag_to_string(divtag.h3,False)
 | 
				
			||||||
 | 
					                    ans.append(key)
 | 
				
			||||||
 | 
					                    self.log("Section name %s" % key)
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                # div contains article data
 | 
				
			||||||
 | 
					                h1tag = divtag.find('h1')
 | 
				
			||||||
 | 
					                if not h1tag:
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                atag = h1tag.find('a',href=True)
 | 
				
			||||||
 | 
					                if not atag:
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                url = self.url_prefix+'/news/todays-paper/'+atag['href']
 | 
				
			||||||
 | 
					                #self.log("Section %s" % key)
 | 
				
			||||||
 | 
					                #self.log("url %s" % url)
 | 
				
			||||||
 | 
					                title = self.tag_to_string(atag,False)
 | 
				
			||||||
 | 
					                #self.log("title %s" % title)
 | 
				
			||||||
 | 
					                pubdate = ''
 | 
				
			||||||
 | 
					                description = ''
 | 
				
			||||||
 | 
					                ptag = divtag.find('p');
 | 
				
			||||||
 | 
					                if ptag:
 | 
				
			||||||
 | 
					                    description = self.tag_to_string(ptag,False)
 | 
				
			||||||
 | 
					                    #self.log("description %s" % description)
 | 
				
			||||||
 | 
					                author = ''
 | 
				
			||||||
 | 
					                autag = divtag.find('h4')
 | 
				
			||||||
 | 
					                if autag:
 | 
				
			||||||
 | 
					                    author = self.tag_to_string(autag,False)
 | 
				
			||||||
 | 
					                    #self.log("author %s" % author)
 | 
				
			||||||
 | 
					                if not articles.has_key(key):
 | 
				
			||||||
 | 
					                    articles[key] = []
 | 
				
			||||||
 | 
					                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
 | 
				
			||||||
 | 
					        return ans
 | 
				
			||||||
							
								
								
									
										131
									
								
								resources/recipes/vancouver_sun.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										131
									
								
								resources/recipes/vancouver_sun.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,131 @@
 | 
				
			|||||||
 | 
					#!/usr/bin/env  python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					www.canada.com
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from calibre.web.feeds.recipes import BasicNewsRecipe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class CanWestPaper(BasicNewsRecipe):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Vancouver Sun
 | 
				
			||||||
 | 
					    title = u'Vancouver Sun'
 | 
				
			||||||
 | 
					    url_prefix = 'http://www.vancouversun.com'
 | 
				
			||||||
 | 
					    description = u'News from Vancouver, BC'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Edmonton Journal
 | 
				
			||||||
 | 
					    #title = u'Edmonton Journal'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.edmontonjournal.com'
 | 
				
			||||||
 | 
					    #description = u'News from Edmonton, AB'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Calgary Herald
 | 
				
			||||||
 | 
					    #title = u'Calgary Herald'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.calgaryherald.com'
 | 
				
			||||||
 | 
					    #description = u'News from Calgary, AB'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Regina Leader-Post
 | 
				
			||||||
 | 
					    #title = u'Regina Leader-Post'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.leaderpost.com'
 | 
				
			||||||
 | 
					    #description = u'News from Regina, SK'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Saskatoon Star-Phoenix
 | 
				
			||||||
 | 
					    #title = u'Saskatoon Star-Phoenix'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.thestarphoenix.com'
 | 
				
			||||||
 | 
					    #description = u'News from Saskatoon, SK'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Windsor Star
 | 
				
			||||||
 | 
					    #title = u'Windsor Star'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.windsorstar.com'
 | 
				
			||||||
 | 
					    #description = u'News from Windsor, ON'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Ottawa Citizen
 | 
				
			||||||
 | 
					    #title = u'Ottawa Citizen'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.ottawacitizen.com'
 | 
				
			||||||
 | 
					    #description = u'News from Ottawa, ON'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Montreal Gazette
 | 
				
			||||||
 | 
					    #title = u'Montreal Gazette'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.montrealgazette.com'
 | 
				
			||||||
 | 
					    #description = u'News from Montreal, QC'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    language = 'en_CA'
 | 
				
			||||||
 | 
					    __author__ = 'Nick Redding'
 | 
				
			||||||
 | 
					    no_stylesheets = True
 | 
				
			||||||
 | 
					    timefmt = ' [%b %d]'
 | 
				
			||||||
 | 
					    extra_css = '''
 | 
				
			||||||
 | 
					                .timestamp {  font-size:xx-small; display: block; }
 | 
				
			||||||
 | 
					                #storyheader { font-size: medium; }
 | 
				
			||||||
 | 
					                #storyheader h1 { font-size: x-large; }
 | 
				
			||||||
 | 
					                #storyheader h2 { font-size: large;  font-style: italic; }
 | 
				
			||||||
 | 
					                .byline { font-size:xx-small; }
 | 
				
			||||||
 | 
					                #photocaption { font-size: small; font-style: italic }
 | 
				
			||||||
 | 
					                #photocredit { font-size: xx-small; }'''
 | 
				
			||||||
 | 
					    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
 | 
				
			||||||
 | 
					    remove_tags = [{'class':'comments'},
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'rule_grey_solid'}),
 | 
				
			||||||
 | 
					                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def preprocess_html(self,soup):
 | 
				
			||||||
 | 
					        #delete iempty id attributes--they screw up the TOC for unknow reasons
 | 
				
			||||||
 | 
					        divtags = soup.findAll('div',attrs={'id':''})
 | 
				
			||||||
 | 
					        if divtags:
 | 
				
			||||||
 | 
					            for div in divtags:
 | 
				
			||||||
 | 
					                del(div['id'])
 | 
				
			||||||
 | 
					        return soup
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def parse_index(self):
 | 
				
			||||||
 | 
					        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        articles = {}
 | 
				
			||||||
 | 
					        key = 'News'
 | 
				
			||||||
 | 
					        ans = ['News']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Find each instance of class="sectiontitle", class="featurecontent"
 | 
				
			||||||
 | 
					        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
 | 
				
			||||||
 | 
					                #self.log(" div class = %s" % divtag['class'])
 | 
				
			||||||
 | 
					                if divtag['class'].startswith('section_title'):
 | 
				
			||||||
 | 
					                    # div contains section title
 | 
				
			||||||
 | 
					                    if not divtag.h3:
 | 
				
			||||||
 | 
					                        continue
 | 
				
			||||||
 | 
					                    key = self.tag_to_string(divtag.h3,False)
 | 
				
			||||||
 | 
					                    ans.append(key)
 | 
				
			||||||
 | 
					                    self.log("Section name %s" % key)
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                # div contains article data
 | 
				
			||||||
 | 
					                h1tag = divtag.find('h1')
 | 
				
			||||||
 | 
					                if not h1tag:
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                atag = h1tag.find('a',href=True)
 | 
				
			||||||
 | 
					                if not atag:
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                url = self.url_prefix+'/news/todays-paper/'+atag['href']
 | 
				
			||||||
 | 
					                #self.log("Section %s" % key)
 | 
				
			||||||
 | 
					                #self.log("url %s" % url)
 | 
				
			||||||
 | 
					                title = self.tag_to_string(atag,False)
 | 
				
			||||||
 | 
					                #self.log("title %s" % title)
 | 
				
			||||||
 | 
					                pubdate = ''
 | 
				
			||||||
 | 
					                description = ''
 | 
				
			||||||
 | 
					                ptag = divtag.find('p');
 | 
				
			||||||
 | 
					                if ptag:
 | 
				
			||||||
 | 
					                    description = self.tag_to_string(ptag,False)
 | 
				
			||||||
 | 
					                    #self.log("description %s" % description)
 | 
				
			||||||
 | 
					                author = ''
 | 
				
			||||||
 | 
					                autag = divtag.find('h4')
 | 
				
			||||||
 | 
					                if autag:
 | 
				
			||||||
 | 
					                    author = self.tag_to_string(autag,False)
 | 
				
			||||||
 | 
					                    #self.log("author %s" % author)
 | 
				
			||||||
 | 
					                if not articles.has_key(key):
 | 
				
			||||||
 | 
					                    articles[key] = []
 | 
				
			||||||
 | 
					                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
 | 
				
			||||||
 | 
					        return ans
 | 
				
			||||||
							
								
								
									
										141
									
								
								resources/recipes/vic_times.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										141
									
								
								resources/recipes/vic_times.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,141 @@
 | 
				
			|||||||
 | 
					#!/usr/bin/env  python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					www.canada.com
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from calibre.web.feeds.recipes import BasicNewsRecipe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class CanWestPaper(BasicNewsRecipe):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Victoria Times Colonist
 | 
				
			||||||
 | 
					    title = u'Victoria Times Colonist'
 | 
				
			||||||
 | 
					    url_prefix = 'http://www.timescolonist.com'
 | 
				
			||||||
 | 
					    description = u'News from Victoria, BC'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Vancouver Province
 | 
				
			||||||
 | 
					    #title = u'Vancouver Province'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.theprovince.com'
 | 
				
			||||||
 | 
					    #description = u'News from Vancouver, BC'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Vancouver Sun
 | 
				
			||||||
 | 
					    #title = u'Vancouver Sun'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.vancouversun.com'
 | 
				
			||||||
 | 
					    #description = u'News from Vancouver, BC'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Edmonton Journal
 | 
				
			||||||
 | 
					    #title = u'Edmonton Journal'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.edmontonjournal.com'
 | 
				
			||||||
 | 
					    #description = u'News from Edmonton, AB'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Calgary Herald
 | 
				
			||||||
 | 
					    #title = u'Calgary Herald'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.calgaryherald.com'
 | 
				
			||||||
 | 
					    #description = u'News from Calgary, AB'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Regina Leader-Post
 | 
				
			||||||
 | 
					    #title = u'Regina Leader-Post'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.leaderpost.com'
 | 
				
			||||||
 | 
					    #description = u'News from Regina, SK'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Saskatoon Star-Phoenix
 | 
				
			||||||
 | 
					    #title = u'Saskatoon Star-Phoenix'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.thestarphoenix.com'
 | 
				
			||||||
 | 
					    #description = u'News from Saskatoon, SK'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Windsor Star
 | 
				
			||||||
 | 
					    #title = u'Windsor Star'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.windsorstar.com'
 | 
				
			||||||
 | 
					    #description = u'News from Windsor, ON'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Ottawa Citizen
 | 
				
			||||||
 | 
					    #title = u'Ottawa Citizen'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.ottawacitizen.com'
 | 
				
			||||||
 | 
					    #description = u'News from Ottawa, ON'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Montreal Gazette
 | 
				
			||||||
 | 
					    #title = u'Montreal Gazette'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.montrealgazette.com'
 | 
				
			||||||
 | 
					    #description = u'News from Montreal, QC'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    language = 'en_CA'
 | 
				
			||||||
 | 
					    __author__ = 'Nick Redding'
 | 
				
			||||||
 | 
					    no_stylesheets = True
 | 
				
			||||||
 | 
					    timefmt = ' [%b %d]'
 | 
				
			||||||
 | 
					    extra_css = '''
 | 
				
			||||||
 | 
					                .timestamp {  font-size:xx-small; display: block; }
 | 
				
			||||||
 | 
					                #storyheader { font-size: medium; }
 | 
				
			||||||
 | 
					                #storyheader h1 { font-size: x-large; }
 | 
				
			||||||
 | 
					                #storyheader h2 { font-size: large;  font-style: italic; }
 | 
				
			||||||
 | 
					                .byline { font-size:xx-small; }
 | 
				
			||||||
 | 
					                #photocaption { font-size: small; font-style: italic }
 | 
				
			||||||
 | 
					                #photocredit { font-size: xx-small; }'''
 | 
				
			||||||
 | 
					    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
 | 
				
			||||||
 | 
					    remove_tags = [{'class':'comments'},
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'rule_grey_solid'}),
 | 
				
			||||||
 | 
					                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def preprocess_html(self,soup):
 | 
				
			||||||
 | 
					        #delete iempty id attributes--they screw up the TOC for unknow reasons
 | 
				
			||||||
 | 
					        divtags = soup.findAll('div',attrs={'id':''})
 | 
				
			||||||
 | 
					        if divtags:
 | 
				
			||||||
 | 
					            for div in divtags:
 | 
				
			||||||
 | 
					                del(div['id'])
 | 
				
			||||||
 | 
					        return soup
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def parse_index(self):
 | 
				
			||||||
 | 
					        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        articles = {}
 | 
				
			||||||
 | 
					        key = 'News'
 | 
				
			||||||
 | 
					        ans = ['News']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Find each instance of class="sectiontitle", class="featurecontent"
 | 
				
			||||||
 | 
					        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
 | 
				
			||||||
 | 
					                #self.log(" div class = %s" % divtag['class'])
 | 
				
			||||||
 | 
					                if divtag['class'].startswith('section_title'):
 | 
				
			||||||
 | 
					                    # div contains section title
 | 
				
			||||||
 | 
					                    if not divtag.h3:
 | 
				
			||||||
 | 
					                        continue
 | 
				
			||||||
 | 
					                    key = self.tag_to_string(divtag.h3,False)
 | 
				
			||||||
 | 
					                    ans.append(key)
 | 
				
			||||||
 | 
					                    self.log("Section name %s" % key)
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                # div contains article data
 | 
				
			||||||
 | 
					                h1tag = divtag.find('h1')
 | 
				
			||||||
 | 
					                if not h1tag:
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                atag = h1tag.find('a',href=True)
 | 
				
			||||||
 | 
					                if not atag:
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                url = self.url_prefix+'/news/todays-paper/'+atag['href']
 | 
				
			||||||
 | 
					                #self.log("Section %s" % key)
 | 
				
			||||||
 | 
					                #self.log("url %s" % url)
 | 
				
			||||||
 | 
					                title = self.tag_to_string(atag,False)
 | 
				
			||||||
 | 
					                #self.log("title %s" % title)
 | 
				
			||||||
 | 
					                pubdate = ''
 | 
				
			||||||
 | 
					                description = ''
 | 
				
			||||||
 | 
					                ptag = divtag.find('p');
 | 
				
			||||||
 | 
					                if ptag:
 | 
				
			||||||
 | 
					                    description = self.tag_to_string(ptag,False)
 | 
				
			||||||
 | 
					                    #self.log("description %s" % description)
 | 
				
			||||||
 | 
					                author = ''
 | 
				
			||||||
 | 
					                autag = divtag.find('h4')
 | 
				
			||||||
 | 
					                if autag:
 | 
				
			||||||
 | 
					                    author = self.tag_to_string(autag,False)
 | 
				
			||||||
 | 
					                    #self.log("author %s" % author)
 | 
				
			||||||
 | 
					                if not articles.has_key(key):
 | 
				
			||||||
 | 
					                    articles[key] = []
 | 
				
			||||||
 | 
					                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
 | 
				
			||||||
 | 
					        return ans
 | 
				
			||||||
							
								
								
									
										106
									
								
								resources/recipes/windows_star.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										106
									
								
								resources/recipes/windows_star.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,106 @@
 | 
				
			|||||||
 | 
					#!/usr/bin/env  python
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					www.canada.com
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from calibre.web.feeds.recipes import BasicNewsRecipe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class CanWestPaper(BasicNewsRecipe):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Windsor Star
 | 
				
			||||||
 | 
					    title = u'Windsor Star'
 | 
				
			||||||
 | 
					    url_prefix = 'http://www.windsorstar.com'
 | 
				
			||||||
 | 
					    description = u'News from Windsor, ON'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Ottawa Citizen
 | 
				
			||||||
 | 
					    #title = u'Ottawa Citizen'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.ottawacitizen.com'
 | 
				
			||||||
 | 
					    #description = u'News from Ottawa, ON'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # un-comment the following three lines for the Montreal Gazette
 | 
				
			||||||
 | 
					    #title = u'Montreal Gazette'
 | 
				
			||||||
 | 
					    #url_prefix = 'http://www.montrealgazette.com'
 | 
				
			||||||
 | 
					    #description = u'News from Montreal, QC'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    language = 'en_CA'
 | 
				
			||||||
 | 
					    __author__ = 'Nick Redding'
 | 
				
			||||||
 | 
					    no_stylesheets = True
 | 
				
			||||||
 | 
					    timefmt = ' [%b %d]'
 | 
				
			||||||
 | 
					    extra_css = '''
 | 
				
			||||||
 | 
					                .timestamp {  font-size:xx-small; display: block; }
 | 
				
			||||||
 | 
					                #storyheader { font-size: medium; }
 | 
				
			||||||
 | 
					                #storyheader h1 { font-size: x-large; }
 | 
				
			||||||
 | 
					                #storyheader h2 { font-size: large;  font-style: italic; }
 | 
				
			||||||
 | 
					                .byline { font-size:xx-small; }
 | 
				
			||||||
 | 
					                #photocaption { font-size: small; font-style: italic }
 | 
				
			||||||
 | 
					                #photocredit { font-size: xx-small; }'''
 | 
				
			||||||
 | 
					    keep_only_tags = [dict(name='div', attrs={'id':'storyheader'}),dict(name='div', attrs={'id':'storycontent'})]
 | 
				
			||||||
 | 
					    remove_tags = [{'class':'comments'},
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'navbar'}),dict(name='div', attrs={'class':'morelinks'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'viewmore'}),dict(name='li', attrs={'class':'email'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'story_tool_hr'}),dict(name='div', attrs={'class':'clear'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'story_tool'}),dict(name='div', attrs={'class':'copyright'}),
 | 
				
			||||||
 | 
					                   dict(name='div', attrs={'class':'rule_grey_solid'}),
 | 
				
			||||||
 | 
					                   dict(name='li', attrs={'class':'print'}),dict(name='li', attrs={'class':'share'}),dict(name='ul', attrs={'class':'bullet'})]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def preprocess_html(self,soup):
 | 
				
			||||||
 | 
					        #delete iempty id attributes--they screw up the TOC for unknow reasons
 | 
				
			||||||
 | 
					        divtags = soup.findAll('div',attrs={'id':''})
 | 
				
			||||||
 | 
					        if divtags:
 | 
				
			||||||
 | 
					            for div in divtags:
 | 
				
			||||||
 | 
					                del(div['id'])
 | 
				
			||||||
 | 
					        return soup
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def parse_index(self):
 | 
				
			||||||
 | 
					        soup = self.index_to_soup(self.url_prefix+'/news/todays-paper/index.html')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        articles = {}
 | 
				
			||||||
 | 
					        key = 'News'
 | 
				
			||||||
 | 
					        ans = ['News']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Find each instance of class="sectiontitle", class="featurecontent"
 | 
				
			||||||
 | 
					        for divtag in soup.findAll('div',attrs={'class' : ["section_title02","featurecontent"]}):
 | 
				
			||||||
 | 
					                #self.log(" div class = %s" % divtag['class'])
 | 
				
			||||||
 | 
					                if divtag['class'].startswith('section_title'):
 | 
				
			||||||
 | 
					                    # div contains section title
 | 
				
			||||||
 | 
					                    if not divtag.h3:
 | 
				
			||||||
 | 
					                        continue
 | 
				
			||||||
 | 
					                    key = self.tag_to_string(divtag.h3,False)
 | 
				
			||||||
 | 
					                    ans.append(key)
 | 
				
			||||||
 | 
					                    self.log("Section name %s" % key)
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                # div contains article data
 | 
				
			||||||
 | 
					                h1tag = divtag.find('h1')
 | 
				
			||||||
 | 
					                if not h1tag:
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                atag = h1tag.find('a',href=True)
 | 
				
			||||||
 | 
					                if not atag:
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					                url = self.url_prefix+'/news/todays-paper/'+atag['href']
 | 
				
			||||||
 | 
					                #self.log("Section %s" % key)
 | 
				
			||||||
 | 
					                #self.log("url %s" % url)
 | 
				
			||||||
 | 
					                title = self.tag_to_string(atag,False)
 | 
				
			||||||
 | 
					                #self.log("title %s" % title)
 | 
				
			||||||
 | 
					                pubdate = ''
 | 
				
			||||||
 | 
					                description = ''
 | 
				
			||||||
 | 
					                ptag = divtag.find('p');
 | 
				
			||||||
 | 
					                if ptag:
 | 
				
			||||||
 | 
					                    description = self.tag_to_string(ptag,False)
 | 
				
			||||||
 | 
					                    #self.log("description %s" % description)
 | 
				
			||||||
 | 
					                author = ''
 | 
				
			||||||
 | 
					                autag = divtag.find('h4')
 | 
				
			||||||
 | 
					                if autag:
 | 
				
			||||||
 | 
					                    author = self.tag_to_string(autag,False)
 | 
				
			||||||
 | 
					                    #self.log("author %s" % author)
 | 
				
			||||||
 | 
					                if not articles.has_key(key):
 | 
				
			||||||
 | 
					                    articles[key] = []
 | 
				
			||||||
 | 
					                articles[key].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
 | 
				
			||||||
 | 
					        return ans
 | 
				
			||||||
@ -5,6 +5,7 @@ __docformat__ = 'restructuredtext en'
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from calibre.web.feeds.news import BasicNewsRecipe
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
 | 
					from calibre import strftime
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# http://online.wsj.com/page/us_in_todays_paper.html
 | 
					# http://online.wsj.com/page/us_in_todays_paper.html
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -67,6 +68,13 @@ class WallStreetJournal(BasicNewsRecipe):
 | 
				
			|||||||
        def parse_index(self):
 | 
					        def parse_index(self):
 | 
				
			||||||
            soup = self.wsj_get_index()
 | 
					            soup = self.wsj_get_index()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            year = strftime('%Y')
 | 
				
			||||||
 | 
					            for x in soup.findAll('td', attrs={'class':'b14'}):
 | 
				
			||||||
 | 
					                txt = self.tag_to_string(x).strip()
 | 
				
			||||||
 | 
					                if year in txt:
 | 
				
			||||||
 | 
					                    self.timefmt = ' [%s]'%txt
 | 
				
			||||||
 | 
					                    break
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            left_column = soup.find(
 | 
					            left_column = soup.find(
 | 
				
			||||||
                    text=lambda t: 'begin ITP Left Column' in str(t))
 | 
					                    text=lambda t: 'begin ITP Left Column' in str(t))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -91,7 +99,7 @@ class WallStreetJournal(BasicNewsRecipe):
 | 
				
			|||||||
                    url = url.partition('#')[0]
 | 
					                    url = url.partition('#')[0]
 | 
				
			||||||
                    desc = ''
 | 
					                    desc = ''
 | 
				
			||||||
                    d = x.findNextSibling(True)
 | 
					                    d = x.findNextSibling(True)
 | 
				
			||||||
                    if d.get('class', None) == 'arialResize':
 | 
					                    if d is not None and d.get('class', None) == 'arialResize':
 | 
				
			||||||
                        desc = self.tag_to_string(d)
 | 
					                        desc = self.tag_to_string(d)
 | 
				
			||||||
                        desc = desc.partition(u'\u2022')[0]
 | 
					                        desc = desc.partition(u'\u2022')[0]
 | 
				
			||||||
                    self.log('\t\tFound article:', title)
 | 
					                    self.log('\t\tFound article:', title)
 | 
				
			||||||
 | 
				
			|||||||
@ -3,47 +3,139 @@
 | 
				
			|||||||
__license__   = 'GPL v3'
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
online.wsj.com.com
 | 
					online.wsj.com
 | 
				
			||||||
'''
 | 
					'''
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
 | 
					from calibre.web.feeds.recipes import BasicNewsRecipe
 | 
				
			||||||
 | 
					from calibre.ebooks.BeautifulSoup import Tag, NavigableString
 | 
				
			||||||
 | 
					from datetime import timedelta, date
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class WSJ(BasicNewsRecipe):
 | 
					class WSJ(BasicNewsRecipe):
 | 
				
			||||||
    # formatting adapted from original recipe by Kovid Goyal and Sujata Raman
 | 
					    # formatting adapted from original recipe by Kovid Goyal and Sujata Raman
 | 
				
			||||||
    title          = u'Wall Street Journal (free)'
 | 
					    title          = u'Wall Street Journal (free)'
 | 
				
			||||||
    __author__     = 'Nick Redding'
 | 
					    __author__     = 'Nick Redding'
 | 
				
			||||||
    language = 'en'
 | 
					    language = 'en'
 | 
				
			||||||
    description = ('All the free content from the Wall Street Journal (business'
 | 
					    description = ('All the free content from the Wall Street Journal (business, financial and political news)')
 | 
				
			||||||
            ', financial and political news)')
 | 
					
 | 
				
			||||||
    no_stylesheets = True
 | 
					    no_stylesheets = True
 | 
				
			||||||
    timefmt = ' [%b %d]'
 | 
					    timefmt = ' [%b %d]'
 | 
				
			||||||
    extra_css   = '''h1{font-size:large; font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif;}
 | 
					
 | 
				
			||||||
                    h2{font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; font-size:small; font-style:italic;}
 | 
					    # customization notes: delete sections you are not interested in
 | 
				
			||||||
                    .subhead{font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; font-size:small; font-style:italic;}
 | 
					    # set omit_paid_content to False if you want the paid content article snippets
 | 
				
			||||||
                    .insettipUnit {font-family:Arial,Sans-serif;font-size:xx-small;}
 | 
					    # set oldest_article to the maximum number of days back from today to include articles
 | 
				
			||||||
                    .targetCaption{font-size:x-small; font-family:Arial,Helvetica,sans-serif;}
 | 
					    sectionlist = [
 | 
				
			||||||
                    .article{font-family :Arial,Helvetica,sans-serif; font-size:x-small;}
 | 
					                        ['/home-page','Front Page'],
 | 
				
			||||||
                    .tagline { ont-size:xx-small;}
 | 
					                        ['/public/page/news-opinion-commentary.html','Commentary'],
 | 
				
			||||||
                    .dateStamp {font-family:Arial,Helvetica,sans-serif;}
 | 
					                        ['/public/page/news-global-world.html','World News'],
 | 
				
			||||||
                    h3{font-family:Arial,Helvetica,sans-serif; font-size:xx-small;}
 | 
					                        ['/public/page/news-world-business.html','US News'],
 | 
				
			||||||
                    .byline {font-family:Arial,Helvetica,sans-serif; font-size:xx-small; list-style-type: none;}
 | 
					                        ['/public/page/news-business-us.html','Business'],
 | 
				
			||||||
 | 
					                        ['/public/page/news-financial-markets-stock.html','Markets'],
 | 
				
			||||||
 | 
					                        ['/public/page/news-tech-technology.html','Technology'],
 | 
				
			||||||
 | 
					                        ['/public/page/news-personal-finance.html','Personal Finnce'],
 | 
				
			||||||
 | 
					                        ['/public/page/news-lifestyle-arts-entertainment.html','Life & Style'],
 | 
				
			||||||
 | 
					                        ['/public/page/news-real-estate-homes.html','Real Estate'],
 | 
				
			||||||
 | 
					                        ['/public/page/news-career-jobs.html','Careers'],
 | 
				
			||||||
 | 
					                        ['/public/page/news-small-business-marketing.html','Small Business']
 | 
				
			||||||
 | 
					                    ]
 | 
				
			||||||
 | 
					    oldest_article = 2
 | 
				
			||||||
 | 
					    omit_paid_content = True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    extra_css   = '''h1{font-size:large; font-family:Times,serif;}
 | 
				
			||||||
 | 
					                    h2{font-family:Times,serif; font-size:small; font-style:italic;}
 | 
				
			||||||
 | 
					                    .subhead{font-family:Times,serif; font-size:small; font-style:italic;}
 | 
				
			||||||
 | 
					                    .insettipUnit {font-family:Times,serif;font-size:xx-small;}
 | 
				
			||||||
 | 
					                    .targetCaption{font-size:x-small; font-family:Times,serif; font-style:italic; margin-top: 0.25em;}
 | 
				
			||||||
 | 
					                    .article{font-family:Times,serif; font-size:x-small;}
 | 
				
			||||||
 | 
					                    .tagline { font-size:xx-small;}
 | 
				
			||||||
 | 
					                    .dateStamp {font-family:Times,serif;}
 | 
				
			||||||
 | 
					                    h3{font-family:Times,serif; font-size:xx-small;}
 | 
				
			||||||
 | 
					                    .byline {font-family:Times,serif; font-size:xx-small; list-style-type: none;}
 | 
				
			||||||
                    .metadataType-articleCredits {list-style-type: none;}
 | 
					                    .metadataType-articleCredits {list-style-type: none;}
 | 
				
			||||||
                    h6{ font-family:Georgia,"Century Schoolbook","Times New Roman",Times,serif; font-size:small;font-style:italic;}
 | 
					                    h6{font-family:Times,serif; font-size:small; font-style:italic;}
 | 
				
			||||||
                    .paperLocation{font-size:xx-small;}'''
 | 
					                    .paperLocation{font-size:xx-small;}'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    remove_tags_before = dict(name='h1')
 | 
					
 | 
				
			||||||
    remove_tags =   [   dict(id=["articleTabs_tab_article", "articleTabs_tab_comments",
 | 
					    remove_tags_before = dict({'class':re.compile('^articleHeadlineBox')})
 | 
				
			||||||
                                 "articleTabs_tab_interactive","articleTabs_tab_video",
 | 
					    remove_tags =   [   dict({'id':re.compile('^articleTabs_tab_')}),
 | 
				
			||||||
                                 "articleTabs_tab_map","articleTabs_tab_slideshow"]),
 | 
					                        #dict(id=["articleTabs_tab_article", "articleTabs_tab_comments",
 | 
				
			||||||
			{'class':['footer_columns','network','insetCol3wide','interactive','video','slideshow','map',
 | 
					                        #         "articleTabs_tab_interactive","articleTabs_tab_video",
 | 
				
			||||||
			'insettip','insetClose','more_in', "insetContent", 'articleTools_bottom', 'aTools', 'tooltip',
 | 
					                        #         "articleTabs_tab_map","articleTabs_tab_slideshow"]),
 | 
				
			||||||
			'adSummary', 'nav-inline','insetFullBracket']},
 | 
								{'class':  ['footer_columns','network','insetCol3wide','interactive','video','slideshow','map',
 | 
				
			||||||
                        dict(rel='shortcut icon'),
 | 
					                                    'insettip','insetClose','more_in', "insetContent",
 | 
				
			||||||
 | 
					                        #            'articleTools_bottom','articleTools_bottom mjArticleTools',
 | 
				
			||||||
 | 
					                                    'aTools', 'tooltip',
 | 
				
			||||||
 | 
					                                    'adSummary', 'nav-inline','insetFullBracket']},
 | 
				
			||||||
 | 
					                        dict({'class':re.compile('^articleTools_bottom')}),
 | 
				
			||||||
 | 
					                        dict(rel='shortcut icon')
 | 
				
			||||||
                    ]
 | 
					                    ]
 | 
				
			||||||
    remove_tags_after = [dict(id="article_story_body"), {'class':"article story"}]
 | 
					    remove_tags_after = [dict(id="article_story_body"), {'class':"article story"}]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def get_browser(self):
 | 
				
			||||||
 | 
					        br = BasicNewsRecipe.get_browser()
 | 
				
			||||||
 | 
					        return br
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def preprocess_html(self,soup):
 | 
					    def preprocess_html(self,soup):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        def decode_us_date(datestr):
 | 
				
			||||||
 | 
					            udate = datestr.strip().lower().split()
 | 
				
			||||||
 | 
					            m = ['january','february','march','april','may','june','july','august','september','october','november','december'].index(udate[0])+1
 | 
				
			||||||
 | 
					            d = int(udate[1])
 | 
				
			||||||
 | 
					            y = int(udate[2])
 | 
				
			||||||
 | 
					            return date(y,m,d)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # check if article is paid content
 | 
				
			||||||
 | 
					        if self.omit_paid_content:
 | 
				
			||||||
 | 
					            divtags = soup.findAll('div','tooltip')
 | 
				
			||||||
 | 
					            if divtags:
 | 
				
			||||||
 | 
					                for divtag in divtags:
 | 
				
			||||||
 | 
					                    if divtag.find(text="Subscriber Content"):
 | 
				
			||||||
 | 
					                        return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # check if article is too old
 | 
				
			||||||
 | 
					        datetag = soup.find('li',attrs={'class' : re.compile("^dateStamp")})
 | 
				
			||||||
 | 
					        if datetag:
 | 
				
			||||||
 | 
					            dateline_string = self.tag_to_string(datetag,False)
 | 
				
			||||||
 | 
					            date_items = dateline_string.split(',')
 | 
				
			||||||
 | 
					            datestring = date_items[0]+date_items[1]
 | 
				
			||||||
 | 
					            article_date = decode_us_date(datestring)
 | 
				
			||||||
 | 
					            earliest_date = date.today() - timedelta(days=self.oldest_article)
 | 
				
			||||||
 | 
					            if article_date < earliest_date:
 | 
				
			||||||
 | 
					                self.log("Skipping article dated %s" % datestring)
 | 
				
			||||||
 | 
					                return None
 | 
				
			||||||
 | 
					            datetag.parent.extract()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            # place dateline in article heading
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            bylinetag = soup.find('h3','byline')
 | 
				
			||||||
 | 
					            if bylinetag:
 | 
				
			||||||
 | 
					                h3bylinetag = bylinetag
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                bylinetag = soup.find('li','byline')
 | 
				
			||||||
 | 
					                if bylinetag:
 | 
				
			||||||
 | 
					                    h3bylinetag = bylinetag.h3
 | 
				
			||||||
 | 
					                    if not h3bylinetag:
 | 
				
			||||||
 | 
					                        h3bylinetag = bylinetag
 | 
				
			||||||
 | 
					                    bylinetag = bylinetag.parent
 | 
				
			||||||
 | 
					            if bylinetag:
 | 
				
			||||||
 | 
					                if h3bylinetag.a:
 | 
				
			||||||
 | 
					                    bylinetext = 'By '+self.tag_to_string(h3bylinetag.a,False)
 | 
				
			||||||
 | 
					                else:
 | 
				
			||||||
 | 
					                    bylinetext = self.tag_to_string(h3bylinetag,False)
 | 
				
			||||||
 | 
					                h3byline = Tag(soup,'h3',[('class','byline')])
 | 
				
			||||||
 | 
					                if bylinetext.isspace() or (bylinetext == ''):
 | 
				
			||||||
 | 
					                    h3byline.insert(0,NavigableString(date_items[0]+','+date_items[1]))
 | 
				
			||||||
 | 
					                else:
 | 
				
			||||||
 | 
					                    h3byline.insert(0,NavigableString(bylinetext+u'\u2014'+date_items[0]+','+date_items[1]))
 | 
				
			||||||
 | 
					                bylinetag.replaceWith(h3byline)
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                headlinetag = soup.find('div',attrs={'class' : re.compile("^articleHeadlineBox")})
 | 
				
			||||||
 | 
					                if headlinetag:
 | 
				
			||||||
 | 
					                    dateline = Tag(soup,'h3', [('class','byline')])
 | 
				
			||||||
 | 
					                    dateline.insert(0,NavigableString(date_items[0]+','+date_items[1]))
 | 
				
			||||||
 | 
					                    headlinetag.insert(len(headlinetag),dateline)
 | 
				
			||||||
 | 
					        else: # if no date tag, don't process this page--it's not a news item
 | 
				
			||||||
 | 
					            return None
 | 
				
			||||||
        # This gets rid of the annoying superfluous bullet symbol preceding columnist bylines
 | 
					        # This gets rid of the annoying superfluous bullet symbol preceding columnist bylines
 | 
				
			||||||
        ultag = soup.find('ul',attrs={'class' : 'cMetadata metadataType-articleCredits'})
 | 
					        ultag = soup.find('ul',attrs={'class' : 'cMetadata metadataType-articleCredits'})
 | 
				
			||||||
        if ultag:
 | 
					        if ultag:
 | 
				
			||||||
@ -58,7 +150,7 @@ class WSJ(BasicNewsRecipe):
 | 
				
			|||||||
        key = None
 | 
					        key = None
 | 
				
			||||||
        ans = []
 | 
					        ans = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        def parse_index_page(page_name,page_title,omit_paid_content):
 | 
					        def parse_index_page(page_name,page_title):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            def article_title(tag):
 | 
					            def article_title(tag):
 | 
				
			||||||
                atag = tag.find('h2') # title is usually in an h2 tag
 | 
					                atag = tag.find('h2') # title is usually in an h2 tag
 | 
				
			||||||
@ -119,7 +211,6 @@ class WSJ(BasicNewsRecipe):
 | 
				
			|||||||
            soup = self.index_to_soup(pageurl)
 | 
					            soup = self.index_to_soup(pageurl)
 | 
				
			||||||
            # Find each instance of div with class including "headlineSummary"
 | 
					            # Find each instance of div with class including "headlineSummary"
 | 
				
			||||||
            for divtag in soup.findAll('div',attrs={'class' : re.compile("^headlineSummary")}):
 | 
					            for divtag in soup.findAll('div',attrs={'class' : re.compile("^headlineSummary")}):
 | 
				
			||||||
 | 
					 | 
				
			||||||
                # divtag contains all article data as ul's and li's
 | 
					                # divtag contains all article data as ul's and li's
 | 
				
			||||||
                # first, check if there is an h3 tag which provides a section name
 | 
					                # first, check if there is an h3 tag which provides a section name
 | 
				
			||||||
                stag = divtag.find('h3')
 | 
					                stag = divtag.find('h3')
 | 
				
			||||||
@ -162,7 +253,7 @@ class WSJ(BasicNewsRecipe):
 | 
				
			|||||||
                        # now skip paid subscriber articles if desired
 | 
					                        # now skip paid subscriber articles if desired
 | 
				
			||||||
                        subscriber_tag = litag.find(text="Subscriber Content")
 | 
					                        subscriber_tag = litag.find(text="Subscriber Content")
 | 
				
			||||||
                        if subscriber_tag:
 | 
					                        if subscriber_tag:
 | 
				
			||||||
                                if omit_paid_content:
 | 
					                                if self.omit_paid_content:
 | 
				
			||||||
                                    continue
 | 
					                                    continue
 | 
				
			||||||
                                # delete the tip div so it doesn't get in the way
 | 
					                                # delete the tip div so it doesn't get in the way
 | 
				
			||||||
                                tiptag = litag.find("div", { "class" : "tipTargetBox" })
 | 
					                                tiptag = litag.find("div", { "class" : "tipTargetBox" })
 | 
				
			||||||
@ -185,7 +276,7 @@ class WSJ(BasicNewsRecipe):
 | 
				
			|||||||
                            continue
 | 
					                            continue
 | 
				
			||||||
                        if url.startswith("/article"):
 | 
					                        if url.startswith("/article"):
 | 
				
			||||||
                            url = mainurl+url
 | 
					                            url = mainurl+url
 | 
				
			||||||
                        if not url.startswith("http"):
 | 
					                        if not url.startswith("http://online.wsj.com"):
 | 
				
			||||||
                            continue
 | 
					                            continue
 | 
				
			||||||
                        if not url.endswith(".html"):
 | 
					                        if not url.endswith(".html"):
 | 
				
			||||||
                            continue
 | 
					                            continue
 | 
				
			||||||
@ -214,48 +305,10 @@ class WSJ(BasicNewsRecipe):
 | 
				
			|||||||
                            articles[page_title] = []
 | 
					                            articles[page_title] = []
 | 
				
			||||||
                        articles[page_title].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
 | 
					                        articles[page_title].append(dict(title=title,url=url,date=pubdate,description=description,author=author,content=''))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # customization notes: delete sections you are not interested in
 | 
					 | 
				
			||||||
        # set omit_paid_content to False if you want the paid content article previews
 | 
					 | 
				
			||||||
        sectionlist = ['Front Page','Commentary','World News','US News','Business','Markets',
 | 
					 | 
				
			||||||
                       'Technology','Personal Finance','Life & Style','Real Estate','Careers','Small Business']
 | 
					 | 
				
			||||||
        omit_paid_content = True
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if 'Front Page' in sectionlist:
 | 
					        for page_name,page_title in self.sectionlist:
 | 
				
			||||||
            parse_index_page('/home-page','Front Page',omit_paid_content)
 | 
					            parse_index_page(page_name,page_title)
 | 
				
			||||||
            ans.append('Front Page')
 | 
					            ans.append(page_title)
 | 
				
			||||||
        if 'Commentary' in sectionlist:
 | 
					 | 
				
			||||||
            parse_index_page('/public/page/news-opinion-commentary.html','Commentary',omit_paid_content)
 | 
					 | 
				
			||||||
            ans.append('Commentary')
 | 
					 | 
				
			||||||
        if 'World News' in sectionlist:
 | 
					 | 
				
			||||||
            parse_index_page('/public/page/news-global-world.html','World News',omit_paid_content)
 | 
					 | 
				
			||||||
            ans.append('World News')
 | 
					 | 
				
			||||||
        if 'US News' in sectionlist:
 | 
					 | 
				
			||||||
            parse_index_page('/public/page/news-world-business.html','US News',omit_paid_content)
 | 
					 | 
				
			||||||
            ans.append('US News')
 | 
					 | 
				
			||||||
        if 'Business' in sectionlist:
 | 
					 | 
				
			||||||
            parse_index_page('/public/page/news-business-us.html','Business',omit_paid_content)
 | 
					 | 
				
			||||||
            ans.append('Business')
 | 
					 | 
				
			||||||
        if 'Markets' in sectionlist:
 | 
					 | 
				
			||||||
            parse_index_page('/public/page/news-financial-markets-stock.html','Markets',omit_paid_content)
 | 
					 | 
				
			||||||
            ans.append('Markets')
 | 
					 | 
				
			||||||
        if 'Technology' in sectionlist:
 | 
					 | 
				
			||||||
            parse_index_page('/public/page/news-tech-technology.html','Technology',omit_paid_content)
 | 
					 | 
				
			||||||
            ans.append('Technology')
 | 
					 | 
				
			||||||
        if 'Personal Finance' in sectionlist:
 | 
					 | 
				
			||||||
            parse_index_page('/public/page/news-personal-finance.html','Personal Finance',omit_paid_content)
 | 
					 | 
				
			||||||
            ans.append('Personal Finance')
 | 
					 | 
				
			||||||
        if 'Life & Style' in sectionlist:
 | 
					 | 
				
			||||||
            parse_index_page('/public/page/news-lifestyle-arts-entertainment.html','Life & Style',omit_paid_content)
 | 
					 | 
				
			||||||
            ans.append('Life & Style')
 | 
					 | 
				
			||||||
        if 'Real Estate' in sectionlist:
 | 
					 | 
				
			||||||
            parse_index_page('/public/page/news-real-estate-homes.html','Real Estate',omit_paid_content)
 | 
					 | 
				
			||||||
            ans.append('Real Estate')
 | 
					 | 
				
			||||||
        if 'Careers' in sectionlist:
 | 
					 | 
				
			||||||
            parse_index_page('/public/page/news-career-jobs.html','Careers',omit_paid_content)
 | 
					 | 
				
			||||||
            ans.append('Careers')
 | 
					 | 
				
			||||||
        if 'Small Business' in sectionlist:
 | 
					 | 
				
			||||||
            parse_index_page('/public/page/news-small-business-marketing.html','Small Business',omit_paid_content)
 | 
					 | 
				
			||||||
            ans.append('Small Business')
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
 | 
					        ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
 | 
				
			||||||
        return ans
 | 
					        return ans
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										125
									
								
								resources/recipes/yementimes.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										125
									
								
								resources/recipes/yementimes.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,125 @@
 | 
				
			|||||||
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
 | 
					from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class YemenTimesRecipe(BasicNewsRecipe):
 | 
				
			||||||
 | 
					    __license__  = 'GPL v3'
 | 
				
			||||||
 | 
					    __author__ = 'kwetal'
 | 
				
			||||||
 | 
					    language = 'en_YE'
 | 
				
			||||||
 | 
					    country = 'YE'
 | 
				
			||||||
 | 
					    version = 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    title = u'Yemen Times'
 | 
				
			||||||
 | 
					    publisher = u'yementimes.com'
 | 
				
			||||||
 | 
					    category = u'News, Opinion, Yemen'
 | 
				
			||||||
 | 
					    description = u'Award winning weekly from Yemen, promoting press freedom, professional journalism and the defense of human rights.'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    oldest_article = 7
 | 
				
			||||||
 | 
					    max_articles_per_feed = 100
 | 
				
			||||||
 | 
					    use_embedded_content = False
 | 
				
			||||||
 | 
					    encoding = 'utf-8'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    remove_empty_feeds = True
 | 
				
			||||||
 | 
					    no_stylesheets = True
 | 
				
			||||||
 | 
					    remove_javascript = True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    keep_only_tags = []
 | 
				
			||||||
 | 
					    keep_only_tags.append(dict(name = 'div', attrs = {'id': 'ctl00_ContentPlaceHolder1_MAINNEWS0_Panel1',
 | 
				
			||||||
 | 
					                                                      'class': 'DMAIN2'}))
 | 
				
			||||||
 | 
					    remove_attributes = ['style']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    INDEX = 'http://www.yementimes.com/'
 | 
				
			||||||
 | 
					    feeds = []
 | 
				
			||||||
 | 
					    feeds.append((u'Our Viewpoint', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=6&pnm=OUR%20VIEWPOINT'))
 | 
				
			||||||
 | 
					    feeds.append((u'Local News', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=3&pnm=Local%20news'))
 | 
				
			||||||
 | 
					    feeds.append((u'Their News', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=80&pnm=Their%20News'))
 | 
				
			||||||
 | 
					    feeds.append((u'Report', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=8&pnm=report'))
 | 
				
			||||||
 | 
					    feeds.append((u'Health', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=51&pnm=health'))
 | 
				
			||||||
 | 
					    feeds.append((u'Interview', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=77&pnm=interview'))
 | 
				
			||||||
 | 
					    feeds.append((u'Opinion', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=7&pnm=opinion'))
 | 
				
			||||||
 | 
					    feeds.append((u'Business', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=5&pnm=business'))
 | 
				
			||||||
 | 
					    feeds.append((u'Op-Ed', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=81&pnm=Op-Ed'))
 | 
				
			||||||
 | 
					    feeds.append((u'Culture', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=75&pnm=Culture'))
 | 
				
			||||||
 | 
					    feeds.append((u'Readers View', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=4&pnm=Readers%20View'))
 | 
				
			||||||
 | 
					    feeds.append((u'Variety', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=9&pnm=Variety'))
 | 
				
			||||||
 | 
					    feeds.append((u'Education', u'http://www.yementimes.com/DEFAULTSUB.ASPX?pnc=57&pnm=Education'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    extra_css = '''
 | 
				
			||||||
 | 
					                body {font-family:verdana, arial, helvetica, geneva, sans-serif;}
 | 
				
			||||||
 | 
					                div.yemen_byline {font-size: medium; font-weight: bold;}
 | 
				
			||||||
 | 
					                div.yemen_date {font-size: small; color: #666666; margin-bottom: 0.6em;}
 | 
				
			||||||
 | 
					                .yemen_caption {font-size: x-small; font-style: italic; color: #696969;}
 | 
				
			||||||
 | 
					                '''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    conversion_options = {'comments': description, 'tags': category, 'language': 'en',
 | 
				
			||||||
 | 
					                          'publisher': publisher, 'linearize_tables': True}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def get_browser(self):
 | 
				
			||||||
 | 
					        br = BasicNewsRecipe.get_browser()
 | 
				
			||||||
 | 
					        br.set_handle_gzip(True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return br
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def parse_index(self):
 | 
				
			||||||
 | 
					        answer = []
 | 
				
			||||||
 | 
					        for feed_title, feed in self.feeds:
 | 
				
			||||||
 | 
					            soup = self.index_to_soup(feed)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            newsbox = soup.find('div', 'newsbox')
 | 
				
			||||||
 | 
					            main = newsbox.findNextSibling('table')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            articles = []
 | 
				
			||||||
 | 
					            for li in main.findAll('li'):
 | 
				
			||||||
 | 
					                title = self.tag_to_string(li.a)
 | 
				
			||||||
 | 
					                url = self.INDEX + li.a['href']
 | 
				
			||||||
 | 
					                articles.append({'title': title, 'date': None, 'url': url, 'description': '<br/> '})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            answer.append((feed_title, articles))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return answer
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def preprocess_html(self, soup):
 | 
				
			||||||
 | 
					        freshSoup = self.getFreshSoup(soup)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        headline = soup.find('div', attrs = {'id': 'DVMTIT'})
 | 
				
			||||||
 | 
					        if headline:
 | 
				
			||||||
 | 
					            div = headline.findNext('div', attrs = {'id': 'DVTOP'})
 | 
				
			||||||
 | 
					            img = None
 | 
				
			||||||
 | 
					            if div:
 | 
				
			||||||
 | 
					                img = div.find('img')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            headline.name = 'h1'
 | 
				
			||||||
 | 
					            freshSoup.body.append(headline)
 | 
				
			||||||
 | 
					            if img is not None:
 | 
				
			||||||
 | 
					                freshSoup.body.append(img)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        byline = soup.find('div', attrs = {'id': 'DVTIT'})
 | 
				
			||||||
 | 
					        if byline:
 | 
				
			||||||
 | 
					            date_el = byline.find('span')
 | 
				
			||||||
 | 
					            if date_el:
 | 
				
			||||||
 | 
					                pub_date = self.tag_to_string(date_el)
 | 
				
			||||||
 | 
					                date = Tag(soup, 'div', attrs = [('class', 'yemen_date')])
 | 
				
			||||||
 | 
					                date.append(pub_date)
 | 
				
			||||||
 | 
					                date_el.extract()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            raw = '<br/>'.join(['%s' % (part) for part in byline.findAll(text = True)])
 | 
				
			||||||
 | 
					            author = BeautifulSoup('<div class="yemen_byline">' + raw + '</div>')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if date is not None:
 | 
				
			||||||
 | 
					                freshSoup.body.append(date)
 | 
				
			||||||
 | 
					            freshSoup.body.append(author)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        story = soup.find('div', attrs = {'id': 'DVDET'})
 | 
				
			||||||
 | 
					        if story:
 | 
				
			||||||
 | 
					            for table in story.findAll('table'):
 | 
				
			||||||
 | 
					                if table.find('img'):
 | 
				
			||||||
 | 
					                    table['class'] = 'yemen_caption'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            freshSoup.body.append(story)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return freshSoup
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def getFreshSoup(self, oldSoup):
 | 
				
			||||||
 | 
					        freshSoup = BeautifulSoup('<html><head><title></title></head><body></body></html>')
 | 
				
			||||||
 | 
					        if oldSoup.head.title:
 | 
				
			||||||
 | 
					            freshSoup.head.title.append(self.tag_to_string(oldSoup.head.title))
 | 
				
			||||||
 | 
					        return freshSoup
 | 
				
			||||||
							
								
								
									
										23
									
								
								resources/viewer/images.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								resources/viewer/images.js
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,23 @@
 | 
				
			|||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * images management
 | 
				
			||||||
 | 
					 * Copyright 2008 Kovid Goyal
 | 
				
			||||||
 | 
					 * License: GNU GPL v3
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					function scale_images() {
 | 
				
			||||||
 | 
					    $("img:visible").each(function() {
 | 
				
			||||||
 | 
					        var offset = $(this).offset();
 | 
				
			||||||
 | 
					        //window.py_bridge.debug(window.getComputedStyle(this, '').getPropertyValue('max-width'));
 | 
				
			||||||
 | 
					        $(this).css("max-width", (window.innerWidth-offset.left-5)+"px");
 | 
				
			||||||
 | 
					        $(this).css("max-height", (window.innerHeight-5)+"px");
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					function setup_image_scaling_handlers() {
 | 
				
			||||||
 | 
					   scale_images();
 | 
				
			||||||
 | 
					   $(window).resize(function(){
 | 
				
			||||||
 | 
					        scale_images();
 | 
				
			||||||
 | 
					   });
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -2,10 +2,11 @@ from __future__ import with_statement
 | 
				
			|||||||
__license__   = 'GPL v3'
 | 
					__license__   = 'GPL v3'
 | 
				
			||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 | 
					__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import sys
 | 
					import atexit, os, shutil, sys, tempfile, zipfile
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from calibre.ptempfile import PersistentTemporaryFile
 | 
					 | 
				
			||||||
from calibre.constants import numeric_version
 | 
					from calibre.constants import numeric_version
 | 
				
			||||||
 | 
					from calibre.ptempfile import PersistentTemporaryFile
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Plugin(object):
 | 
					class Plugin(object):
 | 
				
			||||||
    '''
 | 
					    '''
 | 
				
			||||||
@ -231,6 +232,8 @@ class CatalogPlugin(Plugin):
 | 
				
			|||||||
    A plugin that implements a catalog generator.
 | 
					    A plugin that implements a catalog generator.
 | 
				
			||||||
    '''
 | 
					    '''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    resources_path = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    #: Output file type for which this plugin should be run
 | 
					    #: Output file type for which this plugin should be run
 | 
				
			||||||
    #: For example: 'epub' or 'xml'
 | 
					    #: For example: 'epub' or 'xml'
 | 
				
			||||||
    file_types = set([])
 | 
					    file_types = set([])
 | 
				
			||||||
@ -249,14 +252,18 @@ class CatalogPlugin(Plugin):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    cli_options = []
 | 
					    cli_options = []
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def search_sort_db(self, db, opts):
 | 
					    def search_sort_db(self, db, opts):
 | 
				
			||||||
        if opts.search_text:
 | 
					
 | 
				
			||||||
 | 
					        # If declared, --ids overrides any declared search criteria
 | 
				
			||||||
 | 
					        if not opts.ids and opts.search_text:
 | 
				
			||||||
            db.search(opts.search_text)
 | 
					            db.search(opts.search_text)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if opts.sort_by:
 | 
					        if opts.sort_by:
 | 
				
			||||||
            # 2nd arg = ascending
 | 
					            # 2nd arg = ascending
 | 
				
			||||||
            db.sort(opts.sort_by, True)
 | 
					            db.sort(opts.sort_by, True)
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        return db.get_data_as_dict()
 | 
					        return db.get_data_as_dict(ids=opts.ids)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def get_output_fields(self, opts):
 | 
					    def get_output_fields(self, opts):
 | 
				
			||||||
        # Return a list of requested fields, with opts.sort_by first
 | 
					        # Return a list of requested fields, with opts.sort_by first
 | 
				
			||||||
@ -272,11 +279,40 @@ class CatalogPlugin(Plugin):
 | 
				
			|||||||
            fields = list(all_fields & requested_fields)
 | 
					            fields = list(all_fields & requested_fields)
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            fields = list(all_fields)
 | 
					            fields = list(all_fields)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        fields.sort()
 | 
					        fields.sort()
 | 
				
			||||||
        fields.insert(0,fields.pop(int(fields.index(opts.sort_by))))
 | 
					        if opts.sort_by:
 | 
				
			||||||
 | 
					            fields.insert(0,fields.pop(int(fields.index(opts.sort_by))))
 | 
				
			||||||
        return fields
 | 
					        return fields
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def run(self, path_to_output, opts, db):
 | 
					    def initialize(self):
 | 
				
			||||||
 | 
					        '''
 | 
				
			||||||
 | 
					        If plugin is not a built-in, copy the plugin's .ui and .py files from
 | 
				
			||||||
 | 
					        the zip file to $TMPDIR.
 | 
				
			||||||
 | 
					        Tab will be dynamically generated and added to the Catalog Options dialog in 
 | 
				
			||||||
 | 
					        calibre.gui2.dialogs.catalog.py:Catalog
 | 
				
			||||||
 | 
					        '''
 | 
				
			||||||
 | 
					        from calibre.customize.builtins import plugins as builtin_plugins
 | 
				
			||||||
 | 
					        from calibre.customize.ui import config
 | 
				
			||||||
 | 
					        from calibre.ptempfile import PersistentTemporaryDirectory
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        if not type(self) in builtin_plugins and \
 | 
				
			||||||
 | 
					           not self.name in config['disabled_plugins']:
 | 
				
			||||||
 | 
					            files_to_copy = ["%s.%s" % (self.name.lower(),ext) for ext in ["ui","py"]]
 | 
				
			||||||
 | 
					            resources = zipfile.ZipFile(self.plugin_path,'r')
 | 
				
			||||||
 | 
					                        
 | 
				
			||||||
 | 
					            if self.resources_path is None:
 | 
				
			||||||
 | 
					                self.resources_path = PersistentTemporaryDirectory('_plugin_resources', prefix='')
 | 
				
			||||||
 | 
					                 
 | 
				
			||||||
 | 
					            for file in files_to_copy:
 | 
				
			||||||
 | 
					                try:
 | 
				
			||||||
 | 
					                    resources.extract(file, self.resources_path)
 | 
				
			||||||
 | 
					                except:
 | 
				
			||||||
 | 
					                    print " customize:__init__.initialize(): %s not found in %s" % (file, os.path.basename(self.plugin_path))
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					            resources.close()                
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
 | 
					    def run(self, path_to_output, opts, db, ids):
 | 
				
			||||||
        '''
 | 
					        '''
 | 
				
			||||||
        Run the plugin. Must be implemented in subclasses.
 | 
					        Run the plugin. Must be implemented in subclasses.
 | 
				
			||||||
        It should generate the catalog in the format specified
 | 
					        It should generate the catalog in the format specified
 | 
				
			||||||
 | 
				
			|||||||
@ -18,7 +18,7 @@ class BLACKBERRY(USBMS):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    VENDOR_ID   = [0x0fca]
 | 
					    VENDOR_ID   = [0x0fca]
 | 
				
			||||||
    PRODUCT_ID  = [0x8004, 0x0004]
 | 
					    PRODUCT_ID  = [0x8004, 0x0004]
 | 
				
			||||||
    BCD         = [0x0200, 0x0107]
 | 
					    BCD         = [0x0200, 0x0107, 0x0201]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    VENDOR_NAME = 'RIM'
 | 
					    VENDOR_NAME = 'RIM'
 | 
				
			||||||
    WINDOWS_MAIN_MEM = 'BLACKBERRY_SD'
 | 
					    WINDOWS_MAIN_MEM = 'BLACKBERRY_SD'
 | 
				
			||||||
 | 
				
			|||||||
@ -86,4 +86,5 @@ class NOOK(USBMS):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        return drives
 | 
					        return drives
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def sanitize_path_components(self, components):
 | 
				
			||||||
 | 
					        return [x.replace('#', '_') for x in components]
 | 
				
			||||||
 | 
				
			|||||||
@ -782,6 +782,13 @@ class Device(DeviceConfig, DevicePlugin):
 | 
				
			|||||||
        '''
 | 
					        '''
 | 
				
			||||||
        return default
 | 
					        return default
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def sanitize_path_components(self, components):
 | 
				
			||||||
 | 
					        '''
 | 
				
			||||||
 | 
					        Perform any device specific sanitization on the path components
 | 
				
			||||||
 | 
					        for files to be uploaded to the device
 | 
				
			||||||
 | 
					        '''
 | 
				
			||||||
 | 
					        return components
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def create_upload_path(self, path, mdata, fname):
 | 
					    def create_upload_path(self, path, mdata, fname):
 | 
				
			||||||
        path = os.path.abspath(path)
 | 
					        path = os.path.abspath(path)
 | 
				
			||||||
        extra_components = []
 | 
					        extra_components = []
 | 
				
			||||||
@ -834,6 +841,7 @@ class Device(DeviceConfig, DevicePlugin):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        extra_components = list(map(remove_trailing_periods, extra_components))
 | 
					        extra_components = list(map(remove_trailing_periods, extra_components))
 | 
				
			||||||
        components = shorten_components_to(250 - len(path), extra_components)
 | 
					        components = shorten_components_to(250 - len(path), extra_components)
 | 
				
			||||||
 | 
					        components = self.sanitize_path_components(components)
 | 
				
			||||||
        filepath = os.path.join(path, *components)
 | 
					        filepath = os.path.join(path, *components)
 | 
				
			||||||
        filedir = os.path.dirname(filepath)
 | 
					        filedir = os.path.dirname(filepath)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -132,7 +132,8 @@ class FB2MLizer(object):
 | 
				
			|||||||
            href = self.oeb_book.guide['titlepage'].href
 | 
					            href = self.oeb_book.guide['titlepage'].href
 | 
				
			||||||
            item = self.oeb_book.manifest.hrefs[href]
 | 
					            item = self.oeb_book.manifest.hrefs[href]
 | 
				
			||||||
            if item.spine_position is None:
 | 
					            if item.spine_position is None:
 | 
				
			||||||
                stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
 | 
					                stylizer = Stylizer(item.data, item.href, self.oeb_book,
 | 
				
			||||||
 | 
					                        self.opts, self.opts.output_profile)
 | 
				
			||||||
                output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
 | 
					                output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
 | 
				
			||||||
        return output
 | 
					        return output
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -152,7 +153,7 @@ class FB2MLizer(object):
 | 
				
			|||||||
        text = []
 | 
					        text = []
 | 
				
			||||||
        for item in self.oeb_book.spine:
 | 
					        for item in self.oeb_book.spine:
 | 
				
			||||||
            self.log.debug('Converting %s to FictionBook2 XML' % item.href)
 | 
					            self.log.debug('Converting %s to FictionBook2 XML' % item.href)
 | 
				
			||||||
            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
 | 
					            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
 | 
				
			||||||
            text.append(self.add_page_anchor(item))
 | 
					            text.append(self.add_page_anchor(item))
 | 
				
			||||||
            text += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
 | 
					            text += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
 | 
				
			||||||
        return ''.join(text)
 | 
					        return ''.join(text)
 | 
				
			||||||
 | 
				
			|||||||
@ -32,7 +32,7 @@ class LITOutput(OutputFormatPlugin):
 | 
				
			|||||||
        mangler(oeb, opts)
 | 
					        mangler(oeb, opts)
 | 
				
			||||||
        rasterizer = SVGRasterizer()
 | 
					        rasterizer = SVGRasterizer()
 | 
				
			||||||
        rasterizer(oeb, opts)
 | 
					        rasterizer(oeb, opts)
 | 
				
			||||||
        lit = LitWriter()
 | 
					        lit = LitWriter(self.opts)
 | 
				
			||||||
        lit(oeb, output_path)
 | 
					        lit(oeb, output_path)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -134,7 +134,7 @@ def warn(x):
 | 
				
			|||||||
class ReBinary(object):
 | 
					class ReBinary(object):
 | 
				
			||||||
    NSRMAP = {'': None, XML_NS: 'xml'}
 | 
					    NSRMAP = {'': None, XML_NS: 'xml'}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __init__(self, root, item, oeb, map=HTML_MAP):
 | 
					    def __init__(self, root, item, oeb, opts, map=HTML_MAP):
 | 
				
			||||||
        self.item = item
 | 
					        self.item = item
 | 
				
			||||||
        self.logger = oeb.logger
 | 
					        self.logger = oeb.logger
 | 
				
			||||||
        self.manifest = oeb.manifest
 | 
					        self.manifest = oeb.manifest
 | 
				
			||||||
@ -143,7 +143,7 @@ class ReBinary(object):
 | 
				
			|||||||
        self.anchors = []
 | 
					        self.anchors = []
 | 
				
			||||||
        self.page_breaks = []
 | 
					        self.page_breaks = []
 | 
				
			||||||
        self.is_html  = is_html = map is HTML_MAP
 | 
					        self.is_html  = is_html = map is HTML_MAP
 | 
				
			||||||
        self.stylizer = Stylizer(root, item.href, oeb) if is_html else None
 | 
					        self.stylizer = Stylizer(root, item.href, oeb, opts) if is_html else None
 | 
				
			||||||
        self.tree_to_binary(root)
 | 
					        self.tree_to_binary(root)
 | 
				
			||||||
        self.content = self.buf.getvalue()
 | 
					        self.content = self.buf.getvalue()
 | 
				
			||||||
        self.ahc = self.build_ahc() if is_html else None
 | 
					        self.ahc = self.build_ahc() if is_html else None
 | 
				
			||||||
@ -295,9 +295,8 @@ def preserve(function):
 | 
				
			|||||||
    return wrapper
 | 
					    return wrapper
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class LitWriter(object):
 | 
					class LitWriter(object):
 | 
				
			||||||
    def __init__(self):
 | 
					    def __init__(self, opts):
 | 
				
			||||||
        # Wow, no options
 | 
					        self.opts = opts
 | 
				
			||||||
        pass
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _litize_oeb(self):
 | 
					    def _litize_oeb(self):
 | 
				
			||||||
        oeb = self._oeb
 | 
					        oeb = self._oeb
 | 
				
			||||||
@ -469,7 +468,7 @@ class LitWriter(object):
 | 
				
			|||||||
            secnum = 0
 | 
					            secnum = 0
 | 
				
			||||||
            if isinstance(data, etree._Element):
 | 
					            if isinstance(data, etree._Element):
 | 
				
			||||||
                self._add_folder(name)
 | 
					                self._add_folder(name)
 | 
				
			||||||
                rebin = ReBinary(data, item, self._oeb, map=HTML_MAP)
 | 
					                rebin = ReBinary(data, item, self._oeb, self.opts, map=HTML_MAP)
 | 
				
			||||||
                self._add_file(name + '/ahc', rebin.ahc, 0)
 | 
					                self._add_file(name + '/ahc', rebin.ahc, 0)
 | 
				
			||||||
                self._add_file(name + '/aht', rebin.aht, 0)
 | 
					                self._add_file(name + '/aht', rebin.aht, 0)
 | 
				
			||||||
                item.page_breaks = rebin.page_breaks
 | 
					                item.page_breaks = rebin.page_breaks
 | 
				
			||||||
@ -562,7 +561,7 @@ class LitWriter(object):
 | 
				
			|||||||
        meta.attrib['ms--minimum_level'] = '0'
 | 
					        meta.attrib['ms--minimum_level'] = '0'
 | 
				
			||||||
        meta.attrib['ms--attr5'] = '1'
 | 
					        meta.attrib['ms--attr5'] = '1'
 | 
				
			||||||
        meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper()
 | 
					        meta.attrib['ms--guid'] = '{%s}' % str(uuid.uuid4()).upper()
 | 
				
			||||||
        rebin = ReBinary(meta, None, self._oeb, map=OPF_MAP)
 | 
					        rebin = ReBinary(meta, None, self._oeb, self.opts, map=OPF_MAP)
 | 
				
			||||||
        meta = rebin.content
 | 
					        meta = rebin.content
 | 
				
			||||||
        self._meta = meta
 | 
					        self._meta = meta
 | 
				
			||||||
        self._add_file('/meta', meta)
 | 
					        self._add_file('/meta', meta)
 | 
				
			||||||
 | 
				
			|||||||
@ -128,6 +128,10 @@ def do_set_metadata(opts, mi, stream, stream_type):
 | 
				
			|||||||
        mi.title_sort = title_sort(opts.title)
 | 
					        mi.title_sort = title_sort(opts.title)
 | 
				
			||||||
    if getattr(opts, 'tags', None) is not None:
 | 
					    if getattr(opts, 'tags', None) is not None:
 | 
				
			||||||
        mi.tags = [t.strip() for t in opts.tags.split(',')]
 | 
					        mi.tags = [t.strip() for t in opts.tags.split(',')]
 | 
				
			||||||
 | 
					    if getattr(opts, 'series', None) is not None:
 | 
				
			||||||
 | 
					        mi.series = opts.series.strip()
 | 
				
			||||||
 | 
					    if getattr(opts, 'series_index', None) is not None:
 | 
				
			||||||
 | 
					        mi.series_index = float(opts.series_index.strip())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if getattr(opts, 'cover', None) is not None:
 | 
					    if getattr(opts, 'cover', None) is not None:
 | 
				
			||||||
        ext = os.path.splitext(opts.cover)[1].replace('.', '').upper()
 | 
					        ext = os.path.splitext(opts.cover)[1].replace('.', '').upper()
 | 
				
			||||||
 | 
				
			|||||||
@ -134,7 +134,10 @@ def metadata_from_filename(name, pat=None):
 | 
				
			|||||||
            mi.authors = aus
 | 
					            mi.authors = aus
 | 
				
			||||||
            if prefs['swap_author_names'] and mi.authors:
 | 
					            if prefs['swap_author_names'] and mi.authors:
 | 
				
			||||||
                def swap(a):
 | 
					                def swap(a):
 | 
				
			||||||
                    parts = a.split()
 | 
					                    if ',' in a:
 | 
				
			||||||
 | 
					                        parts = a.split(',', 1)
 | 
				
			||||||
 | 
					                    else:
 | 
				
			||||||
 | 
					                        parts = a.split(None, 1)
 | 
				
			||||||
                    if len(parts) > 1:
 | 
					                    if len(parts) > 1:
 | 
				
			||||||
                        t = parts[-1]
 | 
					                        t = parts[-1]
 | 
				
			||||||
                        parts = parts[:-1]
 | 
					                        parts = parts[:-1]
 | 
				
			||||||
 | 
				
			|||||||
@ -92,6 +92,7 @@ class MobiMLizer(object):
 | 
				
			|||||||
    def __call__(self, oeb, context):
 | 
					    def __call__(self, oeb, context):
 | 
				
			||||||
        oeb.logger.info('Converting XHTML to Mobipocket markup...')
 | 
					        oeb.logger.info('Converting XHTML to Mobipocket markup...')
 | 
				
			||||||
        self.oeb = oeb
 | 
					        self.oeb = oeb
 | 
				
			||||||
 | 
					        self.opts = context
 | 
				
			||||||
        self.profile = profile = context.dest
 | 
					        self.profile = profile = context.dest
 | 
				
			||||||
        self.fnums = fnums = dict((v, k) for k, v in profile.fnums.items())
 | 
					        self.fnums = fnums = dict((v, k) for k, v in profile.fnums.items())
 | 
				
			||||||
        self.fmap = KeyMapper(profile.fbase, profile.fbase, fnums.keys())
 | 
					        self.fmap = KeyMapper(profile.fbase, profile.fbase, fnums.keys())
 | 
				
			||||||
@ -114,7 +115,7 @@ class MobiMLizer(object):
 | 
				
			|||||||
    def mobimlize_spine(self):
 | 
					    def mobimlize_spine(self):
 | 
				
			||||||
        'Iterate over the spine and convert it to MOBIML'
 | 
					        'Iterate over the spine and convert it to MOBIML'
 | 
				
			||||||
        for item in self.oeb.spine:
 | 
					        for item in self.oeb.spine:
 | 
				
			||||||
            stylizer = Stylizer(item.data, item.href, self.oeb, self.profile)
 | 
					            stylizer = Stylizer(item.data, item.href, self.oeb, self.opts, self.profile)
 | 
				
			||||||
            body = item.data.find(XHTML('body'))
 | 
					            body = item.data.find(XHTML('body'))
 | 
				
			||||||
            nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP)
 | 
					            nroot = etree.Element(XHTML('html'), nsmap=MOBI_NSMAP)
 | 
				
			||||||
            nbody = etree.SubElement(nroot, XHTML('body'))
 | 
					            nbody = etree.SubElement(nroot, XHTML('body'))
 | 
				
			||||||
 | 
				
			|||||||
@ -563,6 +563,16 @@ class MobiReader(object):
 | 
				
			|||||||
                    recindex = attrib.pop(attr, None) or recindex
 | 
					                    recindex = attrib.pop(attr, None) or recindex
 | 
				
			||||||
                if recindex is not None:
 | 
					                if recindex is not None:
 | 
				
			||||||
                    attrib['src'] = 'images/%s.jpg' % recindex
 | 
					                    attrib['src'] = 'images/%s.jpg' % recindex
 | 
				
			||||||
 | 
					                for attr in ('width', 'height'):
 | 
				
			||||||
 | 
					                    if attr in attrib:
 | 
				
			||||||
 | 
					                        val = attrib[attr]
 | 
				
			||||||
 | 
					                        if val.lower().endswith('em'):
 | 
				
			||||||
 | 
					                            try:
 | 
				
			||||||
 | 
					                                nval = float(val[:-2])
 | 
				
			||||||
 | 
					                                nval *= 16 * (168.451/72) # Assume this was set using the Kindle profile
 | 
				
			||||||
 | 
					                                attrib[attr] = "%dpx"%int(nval)
 | 
				
			||||||
 | 
					                            except:
 | 
				
			||||||
 | 
					                                del attrib[attr]
 | 
				
			||||||
            elif tag.tag == 'pre':
 | 
					            elif tag.tag == 'pre':
 | 
				
			||||||
                if not tag.text:
 | 
					                if not tag.text:
 | 
				
			||||||
                    tag.tag = 'div'
 | 
					                    tag.tag = 'div'
 | 
				
			||||||
 | 
				
			|||||||
@ -1,99 +0,0 @@
 | 
				
			|||||||
'''
 | 
					 | 
				
			||||||
Registry associating file extensions with Reader classes.
 | 
					 | 
				
			||||||
'''
 | 
					 | 
				
			||||||
from __future__ import with_statement
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
__license__   = 'GPL v3'
 | 
					 | 
				
			||||||
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
import sys, os, logging
 | 
					 | 
				
			||||||
from itertools import chain
 | 
					 | 
				
			||||||
import calibre
 | 
					 | 
				
			||||||
from calibre.ebooks.oeb.base import OEBError
 | 
					 | 
				
			||||||
from calibre.ebooks.oeb.reader import OEBReader
 | 
					 | 
				
			||||||
from calibre.ebooks.oeb.writer import OEBWriter
 | 
					 | 
				
			||||||
from calibre.ebooks.lit.reader import LitReader
 | 
					 | 
				
			||||||
from calibre.ebooks.lit.writer import LitWriter
 | 
					 | 
				
			||||||
from calibre.ebooks.mobi.reader import MobiReader
 | 
					 | 
				
			||||||
from calibre.ebooks.mobi.writer import MobiWriter
 | 
					 | 
				
			||||||
from calibre.ebooks.oeb.base import OEBBook
 | 
					 | 
				
			||||||
from calibre.ebooks.oeb.profile import Context
 | 
					 | 
				
			||||||
from calibre.utils.config import Config
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
__all__ = ['get_reader']
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
REGISTRY = {
 | 
					 | 
				
			||||||
    '.opf': (OEBReader, None),
 | 
					 | 
				
			||||||
    '.lit': (LitReader, LitWriter),
 | 
					 | 
				
			||||||
    '.mobi': (MobiReader, MobiWriter),
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def ReaderFactory(path):
 | 
					 | 
				
			||||||
    if os.path.isdir(path):
 | 
					 | 
				
			||||||
        return OEBReader
 | 
					 | 
				
			||||||
    ext = os.path.splitext(path)[1].lower()
 | 
					 | 
				
			||||||
    Reader = REGISTRY.get(ext, (None, None))[0]
 | 
					 | 
				
			||||||
    if Reader is None:
 | 
					 | 
				
			||||||
        raise OEBError('Unknown e-book file extension %r' % ext)
 | 
					 | 
				
			||||||
    return Reader
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def WriterFactory(path):
 | 
					 | 
				
			||||||
    if os.path.isdir(path):
 | 
					 | 
				
			||||||
        return OEBWriter
 | 
					 | 
				
			||||||
    ext = os.path.splitext(path)[1].lower()
 | 
					 | 
				
			||||||
    if not os.path.exists(path) and not ext:
 | 
					 | 
				
			||||||
        return OEBWriter
 | 
					 | 
				
			||||||
    Writer = REGISTRY.get(ext, (None, None))[1]
 | 
					 | 
				
			||||||
    if Writer is None:
 | 
					 | 
				
			||||||
        raise OEBError('Unknown e-book file extension %r' % ext)
 | 
					 | 
				
			||||||
    return Writer
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def option_parser(Reader, Writer):
 | 
					 | 
				
			||||||
    cfg = Config('ebook-convert', _('Options to control e-book conversion.'))
 | 
					 | 
				
			||||||
    Reader.config(cfg)
 | 
					 | 
				
			||||||
    for Transform in chain(Reader.TRANSFORMS, Writer.TRANSFORMS):
 | 
					 | 
				
			||||||
        Transform.config(cfg)
 | 
					 | 
				
			||||||
    Writer.config(cfg)
 | 
					 | 
				
			||||||
    parser = cfg.option_parser()
 | 
					 | 
				
			||||||
    parser.add_option('--encoding', default=None,
 | 
					 | 
				
			||||||
        help=_('Character encoding for input. Default is to auto detect.'))
 | 
					 | 
				
			||||||
    parser.add_option('-o', '--output', default=None, 
 | 
					 | 
				
			||||||
        help=_('Output file. Default is derived from input filename.'))
 | 
					 | 
				
			||||||
    parser.add_option('-p', '--pretty-print', action='store_true',
 | 
					 | 
				
			||||||
        default=False, help=_('Produce more human-readable XML output.'))
 | 
					 | 
				
			||||||
    parser.add_option('-v', '--verbose', default=0, action='count',
 | 
					 | 
				
			||||||
        help=_('Useful for debugging.'))
 | 
					 | 
				
			||||||
    return parser
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def main(argv=sys.argv):
 | 
					 | 
				
			||||||
    if len(argv) < 3:
 | 
					 | 
				
			||||||
        print _("Usage: ebook-convert INFILE OUTFILE [OPTIONS..]")
 | 
					 | 
				
			||||||
        return 1
 | 
					 | 
				
			||||||
    inpath, outpath = argv[1], argv[2]
 | 
					 | 
				
			||||||
    Reader = ReaderFactory(inpath)
 | 
					 | 
				
			||||||
    Writer = WriterFactory(outpath)
 | 
					 | 
				
			||||||
    parser = option_parser(Reader, Writer)
 | 
					 | 
				
			||||||
    opts, args = parser.parse_args(argv[3:])
 | 
					 | 
				
			||||||
    if len(args) != 0:
 | 
					 | 
				
			||||||
        parser.print_help()
 | 
					 | 
				
			||||||
        return 1
 | 
					 | 
				
			||||||
    logger = logging.getLogger('ebook-convert')
 | 
					 | 
				
			||||||
    calibre.setup_cli_handlers(logger, logging.DEBUG)
 | 
					 | 
				
			||||||
    encoding = opts.encoding
 | 
					 | 
				
			||||||
    pretty_print = opts.pretty_print
 | 
					 | 
				
			||||||
    oeb = OEBBook(encoding=encoding, pretty_print=pretty_print, logger=logger)
 | 
					 | 
				
			||||||
    context = Context(Reader.DEFAULT_PROFILE, Writer.DEFAULT_PROFILE)
 | 
					 | 
				
			||||||
    reader = Reader.generate(opts)
 | 
					 | 
				
			||||||
    writer = Writer.generate(opts)
 | 
					 | 
				
			||||||
    transforms = []
 | 
					 | 
				
			||||||
    for Transform in chain(Reader.TRANSFORMS, Writer.TRANSFORMS):
 | 
					 | 
				
			||||||
        transforms.append(Transform.generate(opts))
 | 
					 | 
				
			||||||
    reader(oeb, inpath)
 | 
					 | 
				
			||||||
    for transform in transforms:
 | 
					 | 
				
			||||||
        transform(oeb, context)
 | 
					 | 
				
			||||||
    writer(oeb, outpath)
 | 
					 | 
				
			||||||
    return 0
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
if __name__ == '__main__':
 | 
					 | 
				
			||||||
    sys.exit(main())
 | 
					 | 
				
			||||||
@ -110,9 +110,9 @@ class CSSSelector(etree.XPath):
 | 
				
			|||||||
class Stylizer(object):
 | 
					class Stylizer(object):
 | 
				
			||||||
    STYLESHEETS = WeakKeyDictionary()
 | 
					    STYLESHEETS = WeakKeyDictionary()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __init__(self, tree, path, oeb, profile=PROFILES['PRS505'],
 | 
					    def __init__(self, tree, path, oeb, opts, profile=PROFILES['PRS505'],
 | 
				
			||||||
            extra_css='', user_css=''):
 | 
					            extra_css='', user_css=''):
 | 
				
			||||||
        self.oeb = oeb
 | 
					        self.oeb, self.opts = oeb, opts
 | 
				
			||||||
        self.profile = profile
 | 
					        self.profile = profile
 | 
				
			||||||
        self.logger = oeb.logger
 | 
					        self.logger = oeb.logger
 | 
				
			||||||
        item = oeb.manifest.hrefs[path]
 | 
					        item = oeb.manifest.hrefs[path]
 | 
				
			||||||
@ -249,6 +249,8 @@ class Stylizer(object):
 | 
				
			|||||||
                style.update(self._normalize_font(prop.cssValue))
 | 
					                style.update(self._normalize_font(prop.cssValue))
 | 
				
			||||||
            elif name == 'list-style':
 | 
					            elif name == 'list-style':
 | 
				
			||||||
                style.update(self._normalize_list_style(prop.cssValue))
 | 
					                style.update(self._normalize_list_style(prop.cssValue))
 | 
				
			||||||
 | 
					            elif name == 'text-align':
 | 
				
			||||||
 | 
					                style.update(self._normalize_text_align(prop.cssValue))
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
                style[name] = prop.value
 | 
					                style[name] = prop.value
 | 
				
			||||||
        if 'font-size' in style:
 | 
					        if 'font-size' in style:
 | 
				
			||||||
@ -306,6 +308,19 @@ class Stylizer(object):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        return style
 | 
					        return style
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def _normalize_text_align(self, cssvalue):
 | 
				
			||||||
 | 
					        style = {}
 | 
				
			||||||
 | 
					        text = cssvalue.cssText
 | 
				
			||||||
 | 
					        if text == 'inherit':
 | 
				
			||||||
 | 
					            style['text-align'] = 'inherit'
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            if text in ('left', 'justify'):
 | 
				
			||||||
 | 
					                val = 'left' if self.opts.dont_justify else 'justify'
 | 
				
			||||||
 | 
					                style['text-align'] = val
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                style['text-align'] = text
 | 
				
			||||||
 | 
					        return style
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _normalize_font(self, cssvalue):
 | 
					    def _normalize_font(self, cssvalue):
 | 
				
			||||||
        composition = ('font-style', 'font-variant', 'font-weight',
 | 
					        composition = ('font-style', 'font-variant', 'font-weight',
 | 
				
			||||||
                       'font-size', 'line-height', 'font-family')
 | 
					                       'font-size', 'line-height', 'font-family')
 | 
				
			||||||
@ -411,6 +426,7 @@ class Style(object):
 | 
				
			|||||||
        return result
 | 
					        return result
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _unit_convert(self, value, base=None, font=None):
 | 
					    def _unit_convert(self, value, base=None, font=None):
 | 
				
			||||||
 | 
					        ' Return value in pts'
 | 
				
			||||||
        if isinstance(value, (int, long, float)):
 | 
					        if isinstance(value, (int, long, float)):
 | 
				
			||||||
            return value
 | 
					            return value
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
@ -447,6 +463,9 @@ class Style(object):
 | 
				
			|||||||
                result = value * 0.40
 | 
					                result = value * 0.40
 | 
				
			||||||
        return result
 | 
					        return result
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def pt_to_px(self, value):
 | 
				
			||||||
 | 
					        return (self._profile.dpi / 72.0) * value
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @property
 | 
					    @property
 | 
				
			||||||
    def fontSize(self):
 | 
					    def fontSize(self):
 | 
				
			||||||
        def normalize_fontsize(value, base):
 | 
					        def normalize_fontsize(value, base):
 | 
				
			||||||
 | 
				
			|||||||
@ -141,7 +141,7 @@ class CSSFlattener(object):
 | 
				
			|||||||
            bs.append('text-align: '+ \
 | 
					            bs.append('text-align: '+ \
 | 
				
			||||||
                    ('left' if self.context.dont_justify else 'justify'))
 | 
					                    ('left' if self.context.dont_justify else 'justify'))
 | 
				
			||||||
            body.set('style', '; '.join(bs))
 | 
					            body.set('style', '; '.join(bs))
 | 
				
			||||||
            stylizer = Stylizer(html, item.href, self.oeb, profile,
 | 
					            stylizer = Stylizer(html, item.href, self.oeb, self.context, profile,
 | 
				
			||||||
                    user_css=self.context.extra_css,
 | 
					                    user_css=self.context.extra_css,
 | 
				
			||||||
                    extra_css=css)
 | 
					                    extra_css=css)
 | 
				
			||||||
            self.stylizers[item] = stylizer
 | 
					            self.stylizers[item] = stylizer
 | 
				
			||||||
 | 
				
			|||||||
@ -33,6 +33,7 @@ class CaseMangler(object):
 | 
				
			|||||||
    def __call__(self, oeb, context):
 | 
					    def __call__(self, oeb, context):
 | 
				
			||||||
        oeb.logger.info('Applying case-transforming CSS...')
 | 
					        oeb.logger.info('Applying case-transforming CSS...')
 | 
				
			||||||
        self.oeb = oeb
 | 
					        self.oeb = oeb
 | 
				
			||||||
 | 
					        self.opts = context
 | 
				
			||||||
        self.profile = context.source
 | 
					        self.profile = context.source
 | 
				
			||||||
        self.mangle_spine()
 | 
					        self.mangle_spine()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -44,7 +45,7 @@ class CaseMangler(object):
 | 
				
			|||||||
            relhref = item.relhref(href)
 | 
					            relhref = item.relhref(href)
 | 
				
			||||||
            etree.SubElement(html.find(XHTML('head')), XHTML('link'),
 | 
					            etree.SubElement(html.find(XHTML('head')), XHTML('link'),
 | 
				
			||||||
                             rel='stylesheet', href=relhref, type=CSS_MIME)
 | 
					                             rel='stylesheet', href=relhref, type=CSS_MIME)
 | 
				
			||||||
            stylizer = Stylizer(html, item.href, self.oeb, self.profile)
 | 
					            stylizer = Stylizer(html, item.href, self.oeb, self.opts, self.profile)
 | 
				
			||||||
            self.mangle_elem(html.find(XHTML('body')), stylizer)
 | 
					            self.mangle_elem(html.find(XHTML('body')), stylizer)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def text_transform(self, transform, text):
 | 
					    def text_transform(self, transform, text):
 | 
				
			||||||
 | 
				
			|||||||
@ -44,6 +44,7 @@ class SVGRasterizer(object):
 | 
				
			|||||||
    def __call__(self, oeb, context):
 | 
					    def __call__(self, oeb, context):
 | 
				
			||||||
        oeb.logger.info('Rasterizing SVG images...')
 | 
					        oeb.logger.info('Rasterizing SVG images...')
 | 
				
			||||||
        self.oeb = oeb
 | 
					        self.oeb = oeb
 | 
				
			||||||
 | 
					        self.opts = context
 | 
				
			||||||
        self.profile = context.dest
 | 
					        self.profile = context.dest
 | 
				
			||||||
        self.images = {}
 | 
					        self.images = {}
 | 
				
			||||||
        self.dataize_manifest()
 | 
					        self.dataize_manifest()
 | 
				
			||||||
@ -102,7 +103,7 @@ class SVGRasterizer(object):
 | 
				
			|||||||
    def rasterize_spine(self):
 | 
					    def rasterize_spine(self):
 | 
				
			||||||
        for item in self.oeb.spine:
 | 
					        for item in self.oeb.spine:
 | 
				
			||||||
            html = item.data
 | 
					            html = item.data
 | 
				
			||||||
            stylizer = Stylizer(html, item.href, self.oeb, self.profile)
 | 
					            stylizer = Stylizer(html, item.href, self.oeb, self.opts, self.profile)
 | 
				
			||||||
            self.rasterize_item(item, stylizer)
 | 
					            self.rasterize_item(item, stylizer)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def rasterize_item(self, item, stylizer):
 | 
					    def rasterize_item(self, item, stylizer):
 | 
				
			||||||
 | 
				
			|||||||
@ -20,6 +20,10 @@ class Font(object):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
class Column(object):
 | 
					class Column(object):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # A column contains an element is the element bulges out to
 | 
				
			||||||
 | 
					    # the left or the right by at most HFUZZ*col width.
 | 
				
			||||||
 | 
					    HFUZZ = 0.2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __init__(self):
 | 
					    def __init__(self):
 | 
				
			||||||
        self.left = self.right = self.top = self.bottom = 0
 | 
					        self.left = self.right = self.top = self.bottom = 0
 | 
				
			||||||
        self.width = self.height = 0
 | 
					        self.width = self.height = 0
 | 
				
			||||||
@ -41,6 +45,10 @@ class Column(object):
 | 
				
			|||||||
        for x in self.elements:
 | 
					        for x in self.elements:
 | 
				
			||||||
            yield x
 | 
					            yield x
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def contains(self, elem):
 | 
				
			||||||
 | 
					        return elem.left > self.left - self.HFUZZ*self.width and \
 | 
				
			||||||
 | 
					               elem.right < self.right + self.HFUZZ*self.width
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Element(object):
 | 
					class Element(object):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __eq__(self, other):
 | 
					    def __eq__(self, other):
 | 
				
			||||||
@ -132,6 +140,18 @@ class Interval(object):
 | 
				
			|||||||
    def __hash__(self):
 | 
					    def __hash__(self):
 | 
				
			||||||
        return hash('(%f,%f)'%self.left, self.right)
 | 
					        return hash('(%f,%f)'%self.left, self.right)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Region(object):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __init__(self):
 | 
				
			||||||
 | 
					        self.columns = []
 | 
				
			||||||
 | 
					        self.top = self.bottom = self.left = self.right = self.width = self.height = 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def add_columns(self, columns):
 | 
				
			||||||
 | 
					        if not self.columns:
 | 
				
			||||||
 | 
					            for x in sorted(columns, cmp=lambda x,y: cmp(x.left, y.left)):
 | 
				
			||||||
 | 
					                self.columns.append(x)
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					           pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Page(object):
 | 
					class Page(object):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -238,11 +258,10 @@ class Page(object):
 | 
				
			|||||||
        return columns
 | 
					        return columns
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def find_elements_in_row_of(self, x):
 | 
					    def find_elements_in_row_of(self, x):
 | 
				
			||||||
        interval = Interval(x.top - self.YFUZZ * self.average_text_height,
 | 
					        interval = Interval(x.top,
 | 
				
			||||||
                x.top + self.YFUZZ*(1+self.average_text_height))
 | 
					                x.top + self.YFUZZ*(1+self.average_text_height))
 | 
				
			||||||
        h_interval = Interval(x.left, x.right)
 | 
					        h_interval = Interval(x.left, x.right)
 | 
				
			||||||
        m = max(0, x.idx-15)
 | 
					        for y in self.elements[x.idx:x.idx+15]:
 | 
				
			||||||
        for y in self.elements[m:x.idx+15]:
 | 
					 | 
				
			||||||
            if y is not x:
 | 
					            if y is not x:
 | 
				
			||||||
                y_interval = Interval(y.top, y.bottom)
 | 
					                y_interval = Interval(y.top, y.bottom)
 | 
				
			||||||
                x_interval = Interval(y.left, y.right)
 | 
					                x_interval = Interval(y.left, y.right)
 | 
				
			||||||
 | 
				
			|||||||
@ -113,7 +113,8 @@ class PMLMLizer(object):
 | 
				
			|||||||
            href = self.oeb_book.guide['titlepage'].href
 | 
					            href = self.oeb_book.guide['titlepage'].href
 | 
				
			||||||
            item = self.oeb_book.manifest.hrefs[href]
 | 
					            item = self.oeb_book.manifest.hrefs[href]
 | 
				
			||||||
            if item.spine_position is None:
 | 
					            if item.spine_position is None:
 | 
				
			||||||
                stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
 | 
					                stylizer = Stylizer(item.data, item.href, self.oeb_book,
 | 
				
			||||||
 | 
					                        self.opts, self.opts.output_profile)
 | 
				
			||||||
                output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
 | 
					                output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
 | 
				
			||||||
        return output
 | 
					        return output
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -90,7 +90,8 @@ class RBMLizer(object):
 | 
				
			|||||||
            href = self.oeb_book.guide['titlepage'].href
 | 
					            href = self.oeb_book.guide['titlepage'].href
 | 
				
			||||||
            item = self.oeb_book.manifest.hrefs[href]
 | 
					            item = self.oeb_book.manifest.hrefs[href]
 | 
				
			||||||
            if item.spine_position is None:
 | 
					            if item.spine_position is None:
 | 
				
			||||||
                stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
 | 
					                stylizer = Stylizer(item.data, item.href, self.oeb_book,
 | 
				
			||||||
 | 
					                        self.opts, self.opts.output_profile)
 | 
				
			||||||
                output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
 | 
					                output += ''.join(self.dump_text(item.data.find(XHTML('body')), stylizer, item))
 | 
				
			||||||
        return output
 | 
					        return output
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -111,7 +112,7 @@ class RBMLizer(object):
 | 
				
			|||||||
        output = [u'']
 | 
					        output = [u'']
 | 
				
			||||||
        for item in self.oeb_book.spine:
 | 
					        for item in self.oeb_book.spine:
 | 
				
			||||||
            self.log.debug('Converting %s to RocketBook HTML...' % item.href)
 | 
					            self.log.debug('Converting %s to RocketBook HTML...' % item.href)
 | 
				
			||||||
            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
 | 
					            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
 | 
				
			||||||
            output.append(self.add_page_anchor(item))
 | 
					            output.append(self.add_page_anchor(item))
 | 
				
			||||||
            output += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
 | 
					            output += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
 | 
				
			||||||
        return ''.join(output)
 | 
					        return ''.join(output)
 | 
				
			||||||
 | 
				
			|||||||
@ -111,12 +111,13 @@ class RTFMLizer(object):
 | 
				
			|||||||
            href = self.oeb_book.guide['titlepage'].href
 | 
					            href = self.oeb_book.guide['titlepage'].href
 | 
				
			||||||
            item = self.oeb_book.manifest.hrefs[href]
 | 
					            item = self.oeb_book.manifest.hrefs[href]
 | 
				
			||||||
            if item.spine_position is None:
 | 
					            if item.spine_position is None:
 | 
				
			||||||
                stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
 | 
					                stylizer = Stylizer(item.data, item.href, self.oeb_book,
 | 
				
			||||||
 | 
					                        self.opts, self.opts.output_profile)
 | 
				
			||||||
                output += self.dump_text(item.data.find(XHTML('body')), stylizer)
 | 
					                output += self.dump_text(item.data.find(XHTML('body')), stylizer)
 | 
				
			||||||
                output += '{\\page } '
 | 
					                output += '{\\page } '
 | 
				
			||||||
        for item in self.oeb_book.spine:
 | 
					        for item in self.oeb_book.spine:
 | 
				
			||||||
            self.log.debug('Converting %s to RTF markup...' % item.href)
 | 
					            self.log.debug('Converting %s to RTF markup...' % item.href)
 | 
				
			||||||
            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
 | 
					            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
 | 
				
			||||||
            output += self.dump_text(item.data.find(XHTML('body')), stylizer)
 | 
					            output += self.dump_text(item.data.find(XHTML('body')), stylizer)
 | 
				
			||||||
        output += self.footer()
 | 
					        output += self.footer()
 | 
				
			||||||
        output = self.insert_images(output)
 | 
					        output = self.insert_images(output)
 | 
				
			||||||
 | 
				
			|||||||
@ -54,7 +54,7 @@ class TXTMLizer(object):
 | 
				
			|||||||
        output.append(self.get_toc())
 | 
					        output.append(self.get_toc())
 | 
				
			||||||
        for item in self.oeb_book.spine:
 | 
					        for item in self.oeb_book.spine:
 | 
				
			||||||
            self.log.debug('Converting %s to TXT...' % item.href)
 | 
					            self.log.debug('Converting %s to TXT...' % item.href)
 | 
				
			||||||
            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
 | 
					            stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile)
 | 
				
			||||||
            content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode))
 | 
					            content = unicode(etree.tostring(item.data.find(XHTML('body')), encoding=unicode))
 | 
				
			||||||
            content = self.remove_newlines(content)
 | 
					            content = self.remove_newlines(content)
 | 
				
			||||||
            output += self.dump_text(etree.fromstring(content), stylizer)
 | 
					            output += self.dump_text(etree.fromstring(content), stylizer)
 | 
				
			||||||
 | 
				
			|||||||
@ -4,9 +4,14 @@ __license__ = 'GPL 3'
 | 
				
			|||||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
 | 
					__copyright__ = '2009, John Schember <john@nachtimwald.com>'
 | 
				
			||||||
__docformat__ = 'restructuredtext en'
 | 
					__docformat__ = 'restructuredtext en'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from calibre.ebooks.conversion.plumber import Plumber
 | 
					import os
 | 
				
			||||||
from calibre.utils.logging import Log
 | 
					from optparse import OptionParser
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from calibre.customize.conversion import OptionRecommendation, DummyReporter
 | 
					from calibre.customize.conversion import OptionRecommendation, DummyReporter
 | 
				
			||||||
 | 
					from calibre.ebooks.conversion.plumber import Plumber
 | 
				
			||||||
 | 
					from calibre.customize.ui import plugin_for_catalog_format
 | 
				
			||||||
 | 
					from calibre.utils.logging import Log
 | 
				
			||||||
 | 
					from calibre.gui2 import choose_dir, Application
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def gui_convert(input, output, recommendations, notification=DummyReporter(),
 | 
					def gui_convert(input, output, recommendations, notification=DummyReporter(),
 | 
				
			||||||
        abort_after_input_dump=False, log=None):
 | 
					        abort_after_input_dump=False, log=None):
 | 
				
			||||||
@ -20,7 +25,7 @@ def gui_convert(input, output, recommendations, notification=DummyReporter(),
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    plumber.run()
 | 
					    plumber.run()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def gui_catalog(fmt, title, dbspec, ids, out_file_name,
 | 
					def gui_catalog(fmt, title, dbspec, ids, out_file_name, fmt_options,
 | 
				
			||||||
        notification=DummyReporter(), log=None):
 | 
					        notification=DummyReporter(), log=None):
 | 
				
			||||||
    if log is None:
 | 
					    if log is None:
 | 
				
			||||||
        log = Log()
 | 
					        log = Log()
 | 
				
			||||||
@ -31,8 +36,28 @@ def gui_catalog(fmt, title, dbspec, ids, out_file_name,
 | 
				
			|||||||
        db = LibraryDatabase2(dbpath)
 | 
					        db = LibraryDatabase2(dbpath)
 | 
				
			||||||
    else: # To be implemented in the future
 | 
					    else: # To be implemented in the future
 | 
				
			||||||
        pass
 | 
					        pass
 | 
				
			||||||
    # Implement the interface to the catalog generating code here
 | 
					    
 | 
				
			||||||
    db
 | 
					    # Create a minimal OptionParser that we can append to
 | 
				
			||||||
 | 
					    parser = OptionParser()
 | 
				
			||||||
 | 
					    args = []
 | 
				
			||||||
 | 
					    parser.add_option("--verbose", action="store_true", dest="verbose", default=True)
 | 
				
			||||||
 | 
					    opts, args = parser.parse_args()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Populate opts
 | 
				
			||||||
 | 
					    opts.ids = ids
 | 
				
			||||||
 | 
					    opts.search_text = None
 | 
				
			||||||
 | 
					    opts.sort_by = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Extract the option dictionary to comma-separated lists
 | 
				
			||||||
 | 
					    for option in fmt_options:
 | 
				
			||||||
 | 
					        setattr(opts,option, ','.join(fmt_options[option]))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Fetch and run the plugin for fmt
 | 
				
			||||||
 | 
					    plugin = plugin_for_catalog_format(fmt)
 | 
				
			||||||
 | 
					    plugin.run(out_file_name, opts, db)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -6,29 +6,121 @@ __license__   = 'GPL v3'
 | 
				
			|||||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 | 
					__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 | 
				
			||||||
__docformat__ = 'restructuredtext en'
 | 
					__docformat__ = 'restructuredtext en'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from PyQt4.Qt import QDialog
 | 
					import os, shutil, sys, tempfile
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from PyQt4.Qt import QDialog, QWidget
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from calibre.customize.ui import config
 | 
				
			||||||
from calibre.gui2.dialogs.catalog_ui import Ui_Dialog
 | 
					from calibre.gui2.dialogs.catalog_ui import Ui_Dialog
 | 
				
			||||||
from calibre.gui2 import dynamic
 | 
					from calibre.gui2 import gprefs, dynamic
 | 
				
			||||||
from calibre.customize.ui import available_catalog_formats
 | 
					from calibre.customize.ui import available_catalog_formats, catalog_plugins
 | 
				
			||||||
 | 
					from calibre.gui2.catalog.catalog_csv_xml import PluginWidget
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Catalog(QDialog, Ui_Dialog):
 | 
					class Catalog(QDialog, Ui_Dialog):
 | 
				
			||||||
 | 
					    ''' Catalog Dialog builder'''
 | 
				
			||||||
 | 
					    widgets = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __init__(self, parent, dbspec, ids):
 | 
					    def __init__(self, parent, dbspec, ids):
 | 
				
			||||||
 | 
					        import re, cStringIO
 | 
				
			||||||
 | 
					        from calibre import prints as info
 | 
				
			||||||
 | 
					        from calibre.gui2 import dynamic
 | 
				
			||||||
 | 
					        from PyQt4.uic import compileUi
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
        QDialog.__init__(self, parent)
 | 
					        QDialog.__init__(self, parent)
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        # Run the dialog setup generated from catalog.ui
 | 
				
			||||||
        self.setupUi(self)
 | 
					        self.setupUi(self)
 | 
				
			||||||
        self.dbspec, self.ids = dbspec, ids
 | 
					        self.dbspec, self.ids = dbspec, ids
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Display the number of books we've been passed
 | 
				
			||||||
        self.count.setText(unicode(self.count.text()).format(len(ids)))
 | 
					        self.count.setText(unicode(self.count.text()).format(len(ids)))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Display the last-used title
 | 
				
			||||||
        self.title.setText(dynamic.get('catalog_last_used_title',
 | 
					        self.title.setText(dynamic.get('catalog_last_used_title',
 | 
				
			||||||
            _('My Books')))
 | 
					            _('My Books')))
 | 
				
			||||||
        fmts = sorted([x.upper() for x in available_catalog_formats()])
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # GwR *** Add option tabs for built-in formats
 | 
				
			||||||
 | 
					        # This code models #69 in calibre/gui2/dialogs/config/__init__.py
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.fmts = []
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        from calibre.customize.builtins import plugins as builtin_plugins
 | 
				
			||||||
 | 
					        from calibre.customize import CatalogPlugin
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for plugin in catalog_plugins():
 | 
				
			||||||
 | 
					            if plugin.name in config['disabled_plugins']:
 | 
				
			||||||
 | 
					                continue
 | 
				
			||||||
 | 
					                
 | 
				
			||||||
 | 
					            name = plugin.name.lower().replace(' ', '_')
 | 
				
			||||||
 | 
					            if type(plugin) in builtin_plugins:
 | 
				
			||||||
 | 
					                #info("Adding widget for builtin Catalog plugin %s" % plugin.name)                
 | 
				
			||||||
 | 
					                try:
 | 
				
			||||||
 | 
					                    catalog_widget = __import__('calibre.gui2.catalog.'+name,
 | 
				
			||||||
 | 
					                            fromlist=[1])
 | 
				
			||||||
 | 
					                    pw = catalog_widget.PluginWidget()
 | 
				
			||||||
 | 
					                    pw.initialize(name)
 | 
				
			||||||
 | 
					                    pw.ICON = I('forward.svg')    
 | 
				
			||||||
 | 
					                    self.widgets.append(pw)
 | 
				
			||||||
 | 
					                    [self.fmts.append([file_type.upper(), pw.sync_enabled,pw]) for file_type in plugin.file_types]                    
 | 
				
			||||||
 | 
					                except ImportError:
 | 
				
			||||||
 | 
					                    info("ImportError with %s" % name)
 | 
				
			||||||
 | 
					                    continue
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                # Load dynamic tab
 | 
				
			||||||
 | 
					                form = os.path.join(plugin.resources_path,'%s.ui' % name)
 | 
				
			||||||
 | 
					                klass = os.path.join(plugin.resources_path,'%s.py' % name)
 | 
				
			||||||
 | 
					                compiled_form = os.path.join(plugin.resources_path,'%s_ui.py' % name)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                if os.path.exists(form) and os.path.exists(klass):
 | 
				
			||||||
 | 
					                    #info("Adding widget for user-installed Catalog plugin %s" % plugin.name)
 | 
				
			||||||
 | 
					                    
 | 
				
			||||||
 | 
					                    # Compile the .ui form provided in plugin.zip
 | 
				
			||||||
 | 
					                    if not os.path.exists(compiled_form):
 | 
				
			||||||
 | 
					                        # info('\tCompiling form', form)
 | 
				
			||||||
 | 
					                        buf = cStringIO.StringIO()
 | 
				
			||||||
 | 
					                        compileUi(form, buf)
 | 
				
			||||||
 | 
					                        dat = buf.getvalue()
 | 
				
			||||||
 | 
					                        dat = re.compile(r'QtGui.QApplication.translate\(.+?,\s+"(.+?)(?<!\\)",.+?\)', 
 | 
				
			||||||
 | 
					                                         re.DOTALL).sub(r'_("\1")', dat)
 | 
				
			||||||
 | 
					                        open(compiled_form, 'wb').write(dat)
 | 
				
			||||||
 | 
					                    
 | 
				
			||||||
 | 
					                    # Import the dynamic PluginWidget() from .py file provided in plugin.zip
 | 
				
			||||||
 | 
					                    try:
 | 
				
			||||||
 | 
					                        sys.path.insert(0, plugin.resources_path)
 | 
				
			||||||
 | 
					                        catalog_widget = __import__(name, fromlist=[1])
 | 
				
			||||||
 | 
					                        pw = catalog_widget.PluginWidget()
 | 
				
			||||||
 | 
					                        pw.initialize(name)
 | 
				
			||||||
 | 
					                        pw.ICON = I('forward.svg')    
 | 
				
			||||||
 | 
					                        self.widgets.append(pw)                        
 | 
				
			||||||
 | 
					                        [self.fmts.append([file_type.upper(), pw.sync_enabled,pw]) for file_type in plugin.file_types]
 | 
				
			||||||
 | 
					                    except ImportError:
 | 
				
			||||||
 | 
					                        info("ImportError with %s" % name)
 | 
				
			||||||
 | 
					                        continue
 | 
				
			||||||
 | 
					                    finally:
 | 
				
			||||||
 | 
					                        sys.path.remove(plugin.resources_path)
 | 
				
			||||||
 | 
					                        
 | 
				
			||||||
 | 
					                else:
 | 
				
			||||||
 | 
					                    info("No dynamic tab resources found for %s" % name)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.widgets = sorted(self.widgets, key=lambda x:(x.TITLE, x.TITLE))
 | 
				
			||||||
 | 
					        for pw in self.widgets:
 | 
				
			||||||
 | 
					            page = self.tabs.addTab(pw,pw.TITLE)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Generate a sorted list of installed catalog formats/sync_enabled pairs
 | 
				
			||||||
 | 
					        fmts = sorted([x[0] for x in self.fmts])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.sync_enabled_formats = []
 | 
				
			||||||
 | 
					        for fmt in self.fmts:
 | 
				
			||||||
 | 
					            if fmt[1]:
 | 
				
			||||||
 | 
					                self.sync_enabled_formats.append(fmt[0])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Callback when format changes
 | 
				
			||||||
        self.format.currentIndexChanged.connect(self.format_changed)
 | 
					        self.format.currentIndexChanged.connect(self.format_changed)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Add the installed catalog format list to the format QComboBox
 | 
				
			||||||
        self.format.addItems(fmts)
 | 
					        self.format.addItems(fmts)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        pref = dynamic.get('catalog_preferred_format', 'EPUB')
 | 
					        pref = dynamic.get('catalog_preferred_format', 'CSV')
 | 
				
			||||||
        idx = self.format.findText(pref)
 | 
					        idx = self.format.findText(pref)
 | 
				
			||||||
        if idx > -1:
 | 
					        if idx > -1:
 | 
				
			||||||
            self.format.setCurrentIndex(idx)
 | 
					            self.format.setCurrentIndex(idx)
 | 
				
			||||||
@ -38,7 +130,7 @@ class Catalog(QDialog, Ui_Dialog):
 | 
				
			|||||||
                            
 | 
					                            
 | 
				
			||||||
    def format_changed(self, idx):
 | 
					    def format_changed(self, idx):
 | 
				
			||||||
        cf = unicode(self.format.currentText())
 | 
					        cf = unicode(self.format.currentText())
 | 
				
			||||||
        if cf in ('EPUB', 'MOBI'):
 | 
					        if cf in self.sync_enabled_formats:
 | 
				
			||||||
            self.sync.setEnabled(True)
 | 
					            self.sync.setEnabled(True)
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            self.sync.setDisabled(True)
 | 
					            self.sync.setDisabled(True)
 | 
				
			||||||
 | 
				
			|||||||
@ -6,105 +6,121 @@
 | 
				
			|||||||
   <rect>
 | 
					   <rect>
 | 
				
			||||||
    <x>0</x>
 | 
					    <x>0</x>
 | 
				
			||||||
    <y>0</y>
 | 
					    <y>0</y>
 | 
				
			||||||
    <width>628</width>
 | 
					    <width>611</width>
 | 
				
			||||||
    <height>503</height>
 | 
					    <height>514</height>
 | 
				
			||||||
   </rect>
 | 
					   </rect>
 | 
				
			||||||
  </property>
 | 
					  </property>
 | 
				
			||||||
  <property name="windowTitle">
 | 
					  <property name="windowTitle">
 | 
				
			||||||
   <string>Generate catalog</string>
 | 
					   <string>Generate catalog</string>
 | 
				
			||||||
  </property>
 | 
					  </property>
 | 
				
			||||||
  <property name="windowIcon">
 | 
					  <property name="windowIcon">
 | 
				
			||||||
   <iconset resource="../../../work/calibre/resources/images.qrc">
 | 
					   <iconset>
 | 
				
			||||||
    <normaloff>:/images/library.png</normaloff>:/images/library.png</iconset>
 | 
					    <normaloff>:/images/library.png</normaloff>:/images/library.png</iconset>
 | 
				
			||||||
  </property>
 | 
					  </property>
 | 
				
			||||||
  <layout class="QGridLayout" name="gridLayout">
 | 
					  <widget class="QDialogButtonBox" name="buttonBox">
 | 
				
			||||||
   <item row="2" column="0">
 | 
					   <property name="geometry">
 | 
				
			||||||
    <widget class="QDialogButtonBox" name="buttonBox">
 | 
					    <rect>
 | 
				
			||||||
     <property name="orientation">
 | 
					     <x>430</x>
 | 
				
			||||||
      <enum>Qt::Horizontal</enum>
 | 
					     <y>470</y>
 | 
				
			||||||
     </property>
 | 
					     <width>164</width>
 | 
				
			||||||
     <property name="standardButtons">
 | 
					     <height>32</height>
 | 
				
			||||||
      <set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
 | 
					    </rect>
 | 
				
			||||||
     </property>
 | 
					   </property>
 | 
				
			||||||
    </widget>
 | 
					   <property name="orientation">
 | 
				
			||||||
   </item>
 | 
					    <enum>Qt::Horizontal</enum>
 | 
				
			||||||
   <item row="1" column="0">
 | 
					   </property>
 | 
				
			||||||
    <widget class="QTabWidget" name="tabs">
 | 
					   <property name="standardButtons">
 | 
				
			||||||
     <property name="currentIndex">
 | 
					    <set>QDialogButtonBox::Cancel|QDialogButtonBox::Ok</set>
 | 
				
			||||||
      <number>0</number>
 | 
					   </property>
 | 
				
			||||||
     </property>
 | 
					  </widget>
 | 
				
			||||||
     <widget class="QWidget" name="tab">
 | 
					  <widget class="QTabWidget" name="tabs">
 | 
				
			||||||
      <attribute name="title">
 | 
					   <property name="geometry">
 | 
				
			||||||
       <string>Catalog options</string>
 | 
					    <rect>
 | 
				
			||||||
      </attribute>
 | 
					     <x>12</x>
 | 
				
			||||||
      <layout class="QGridLayout" name="gridLayout_2">
 | 
					     <y>39</y>
 | 
				
			||||||
       <item row="0" column="0">
 | 
					     <width>579</width>
 | 
				
			||||||
        <widget class="QLabel" name="label">
 | 
					     <height>411</height>
 | 
				
			||||||
         <property name="text">
 | 
					    </rect>
 | 
				
			||||||
          <string>Catalog &format:</string>
 | 
					   </property>
 | 
				
			||||||
         </property>
 | 
					   <property name="currentIndex">
 | 
				
			||||||
         <property name="buddy">
 | 
					    <number>0</number>
 | 
				
			||||||
          <cstring>format</cstring>
 | 
					   </property>
 | 
				
			||||||
         </property>
 | 
					   <widget class="QWidget" name="tab">
 | 
				
			||||||
        </widget>
 | 
					    <attribute name="title">
 | 
				
			||||||
       </item>
 | 
					     <string>Catalog options</string>
 | 
				
			||||||
       <item row="0" column="2">
 | 
					    </attribute>
 | 
				
			||||||
        <widget class="QComboBox" name="format"/>
 | 
					    <layout class="QGridLayout" name="gridLayout_2">
 | 
				
			||||||
       </item>
 | 
					     <item row="0" column="0">
 | 
				
			||||||
       <item row="1" column="0">
 | 
					      <widget class="QLabel" name="label">
 | 
				
			||||||
        <widget class="QLabel" name="label_2">
 | 
					       <property name="text">
 | 
				
			||||||
         <property name="text">
 | 
					        <string>Catalog &format:</string>
 | 
				
			||||||
          <string>Catalog &title (existing catalog with the same title will be replaced):</string>
 | 
					       </property>
 | 
				
			||||||
         </property>
 | 
					       <property name="buddy">
 | 
				
			||||||
         <property name="wordWrap">
 | 
					        <cstring>format</cstring>
 | 
				
			||||||
          <bool>true</bool>
 | 
					       </property>
 | 
				
			||||||
         </property>
 | 
					      </widget>
 | 
				
			||||||
         <property name="buddy">
 | 
					     </item>
 | 
				
			||||||
          <cstring>title</cstring>
 | 
					     <item row="0" column="2">
 | 
				
			||||||
         </property>
 | 
					      <widget class="QComboBox" name="format"/>
 | 
				
			||||||
        </widget>
 | 
					     </item>
 | 
				
			||||||
       </item>
 | 
					     <item row="1" column="0">
 | 
				
			||||||
       <item row="2" column="1">
 | 
					      <widget class="QLabel" name="label_2">
 | 
				
			||||||
        <spacer name="verticalSpacer">
 | 
					       <property name="text">
 | 
				
			||||||
         <property name="orientation">
 | 
					        <string>Catalog &title (existing catalog with the same title will be replaced):</string>
 | 
				
			||||||
          <enum>Qt::Vertical</enum>
 | 
					       </property>
 | 
				
			||||||
         </property>
 | 
					       <property name="wordWrap">
 | 
				
			||||||
         <property name="sizeHint" stdset="0">
 | 
					        <bool>true</bool>
 | 
				
			||||||
          <size>
 | 
					       </property>
 | 
				
			||||||
           <width>20</width>
 | 
					       <property name="buddy">
 | 
				
			||||||
           <height>299</height>
 | 
					        <cstring>title</cstring>
 | 
				
			||||||
          </size>
 | 
					       </property>
 | 
				
			||||||
         </property>
 | 
					      </widget>
 | 
				
			||||||
        </spacer>
 | 
					     </item>
 | 
				
			||||||
       </item>
 | 
					     <item row="1" column="2">
 | 
				
			||||||
       <item row="3" column="0">
 | 
					      <widget class="QLineEdit" name="title"/>
 | 
				
			||||||
        <widget class="QCheckBox" name="sync">
 | 
					     </item>
 | 
				
			||||||
         <property name="text">
 | 
					     <item row="3" column="0">
 | 
				
			||||||
          <string>&Send catalog to device automatically</string>
 | 
					      <widget class="QCheckBox" name="sync">
 | 
				
			||||||
         </property>
 | 
					       <property name="text">
 | 
				
			||||||
        </widget>
 | 
					        <string>&Send catalog to device automatically</string>
 | 
				
			||||||
       </item>
 | 
					       </property>
 | 
				
			||||||
       <item row="1" column="2">
 | 
					      </widget>
 | 
				
			||||||
        <widget class="QLineEdit" name="title"/>
 | 
					     </item>
 | 
				
			||||||
       </item>
 | 
					     <item row="2" column="1">
 | 
				
			||||||
      </layout>
 | 
					      <spacer name="verticalSpacer">
 | 
				
			||||||
     </widget>
 | 
					       <property name="orientation">
 | 
				
			||||||
    </widget>
 | 
					        <enum>Qt::Vertical</enum>
 | 
				
			||||||
   </item>
 | 
					       </property>
 | 
				
			||||||
   <item row="0" column="0">
 | 
					       <property name="sizeHint" stdset="0">
 | 
				
			||||||
    <widget class="QLabel" name="count">
 | 
					        <size>
 | 
				
			||||||
     <property name="font">
 | 
					         <width>20</width>
 | 
				
			||||||
      <font>
 | 
					         <height>299</height>
 | 
				
			||||||
       <weight>75</weight>
 | 
					        </size>
 | 
				
			||||||
       <bold>true</bold>
 | 
					       </property>
 | 
				
			||||||
      </font>
 | 
					      </spacer>
 | 
				
			||||||
     </property>
 | 
					     </item>
 | 
				
			||||||
     <property name="text">
 | 
					    </layout>
 | 
				
			||||||
      <string>Generate catalog for {0} books</string>
 | 
					   </widget>
 | 
				
			||||||
     </property>
 | 
					  </widget>
 | 
				
			||||||
    </widget>
 | 
					  <widget class="QLabel" name="count">
 | 
				
			||||||
   </item>
 | 
					   <property name="geometry">
 | 
				
			||||||
  </layout>
 | 
					    <rect>
 | 
				
			||||||
 | 
					     <x>12</x>
 | 
				
			||||||
 | 
					     <y>12</y>
 | 
				
			||||||
 | 
					     <width>205</width>
 | 
				
			||||||
 | 
					     <height>17</height>
 | 
				
			||||||
 | 
					    </rect>
 | 
				
			||||||
 | 
					   </property>
 | 
				
			||||||
 | 
					   <property name="font">
 | 
				
			||||||
 | 
					    <font>
 | 
				
			||||||
 | 
					     <weight>75</weight>
 | 
				
			||||||
 | 
					     <bold>true</bold>
 | 
				
			||||||
 | 
					    </font>
 | 
				
			||||||
 | 
					   </property>
 | 
				
			||||||
 | 
					   <property name="text">
 | 
				
			||||||
 | 
					    <string>Generate catalog for {0} books</string>
 | 
				
			||||||
 | 
					   </property>
 | 
				
			||||||
 | 
					  </widget>
 | 
				
			||||||
 </widget>
 | 
					 </widget>
 | 
				
			||||||
 <resources>
 | 
					 <resources>
 | 
				
			||||||
  <include location="../../../work/calibre/resources/images.qrc"/>
 | 
					  <include location="../../../work/calibre/resources/images.qrc"/>
 | 
				
			||||||
 | 
				
			|||||||
@ -532,7 +532,7 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
 | 
				
			|||||||
            if self.cover_fetcher.exception is not None:
 | 
					            if self.cover_fetcher.exception is not None:
 | 
				
			||||||
                err = self.cover_fetcher.exception
 | 
					                err = self.cover_fetcher.exception
 | 
				
			||||||
                error_dialog(self, _('Cannot fetch cover'),
 | 
					                error_dialog(self, _('Cannot fetch cover'),
 | 
				
			||||||
                    _('<b>Could not fetch cover.</b><br/>')+repr(err)).exec_()
 | 
					                    _('<b>Could not fetch cover.</b><br/>')+unicode(err)).exec_()
 | 
				
			||||||
                return
 | 
					                return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            pix = QPixmap()
 | 
					            pix = QPixmap()
 | 
				
			||||||
 | 
				
			|||||||
@ -215,7 +215,7 @@ class TagsModel(QAbstractItemModel):
 | 
				
			|||||||
            return QModelIndex()
 | 
					            return QModelIndex()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        child_item = index.internalPointer()
 | 
					        child_item = index.internalPointer()
 | 
				
			||||||
        parent_item = child_item.parent
 | 
					        parent_item = getattr(child_item, 'parent', None)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if parent_item is self.root_item or parent_item is None:
 | 
					        if parent_item is self.root_item or parent_item is None:
 | 
				
			||||||
            return QModelIndex()
 | 
					            return QModelIndex()
 | 
				
			||||||
 | 
				
			|||||||
@ -238,19 +238,36 @@ def fetch_scheduled_recipe(arg):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
def generate_catalog(parent, dbspec, ids):
 | 
					def generate_catalog(parent, dbspec, ids):
 | 
				
			||||||
    from calibre.gui2.dialogs.catalog import Catalog
 | 
					    from calibre.gui2.dialogs.catalog import Catalog
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    # Build the Catalog dialog in gui2.dialogs.catalog
 | 
				
			||||||
    d = Catalog(parent, dbspec, ids)
 | 
					    d = Catalog(parent, dbspec, ids)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if d.exec_() != d.Accepted:
 | 
					    if d.exec_() != d.Accepted:
 | 
				
			||||||
        return None
 | 
					        return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Create the output file
 | 
				
			||||||
    out = PersistentTemporaryFile(suffix='_catalog_out.'+d.catalog_format.lower())
 | 
					    out = PersistentTemporaryFile(suffix='_catalog_out.'+d.catalog_format.lower())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Retrieve plugin options
 | 
				
			||||||
 | 
					    fmt_options = {}
 | 
				
			||||||
 | 
					    for x in range(d.tabs.count()):
 | 
				
			||||||
 | 
					        if str(d.tabs.tabText(x)).find(str(d.catalog_format)) > -1:
 | 
				
			||||||
 | 
					            for fmt in d.fmts:
 | 
				
			||||||
 | 
					                if fmt[0] == d.catalog_format:
 | 
				
			||||||
 | 
					                    fmt_options = fmt[2].options()
 | 
				
			||||||
 | 
					                    # print "gui2.tools:generate_catalog(): options for %s: %s" % (fmt[0], fmt_options)
 | 
				
			||||||
 | 
					                                        
 | 
				
			||||||
    args = [
 | 
					    args = [
 | 
				
			||||||
        d.catalog_format,
 | 
					        d.catalog_format,
 | 
				
			||||||
        d.catalog_title,
 | 
					        d.catalog_title,
 | 
				
			||||||
        dbspec,
 | 
					        dbspec,
 | 
				
			||||||
        ids,
 | 
					        ids,
 | 
				
			||||||
        out.name,
 | 
					        out.name,
 | 
				
			||||||
 | 
					        fmt_options
 | 
				
			||||||
        ]
 | 
					        ]
 | 
				
			||||||
    out.close()
 | 
					    out.close()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # This calls gui2.convert.gui_conversion:gui_catalog()
 | 
				
			||||||
    return 'gui_catalog', args, _('Generate catalog'), out.name, d.catalog_sync, \
 | 
					    return 'gui_catalog', args, _('Generate catalog'), out.name, d.catalog_sync, \
 | 
				
			||||||
            d.catalog_title
 | 
					            d.catalog_title
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -9,7 +9,7 @@ __docformat__ = 'restructuredtext en'
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
'''The main GUI'''
 | 
					'''The main GUI'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import os, sys, textwrap, collections, time
 | 
					import atexit, os, shutil, sys, tempfile, textwrap, collections, time
 | 
				
			||||||
from xml.parsers.expat import ExpatError
 | 
					from xml.parsers.expat import ExpatError
 | 
				
			||||||
from Queue import Queue, Empty
 | 
					from Queue import Queue, Empty
 | 
				
			||||||
from threading import Thread
 | 
					from threading import Thread
 | 
				
			||||||
@ -357,7 +357,7 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
 | 
				
			|||||||
        cm.addAction(_('Bulk convert'))
 | 
					        cm.addAction(_('Bulk convert'))
 | 
				
			||||||
        cm.addSeparator()
 | 
					        cm.addSeparator()
 | 
				
			||||||
        ac = cm.addAction(
 | 
					        ac = cm.addAction(
 | 
				
			||||||
                _('Create catalog of the books in your calibre library'))
 | 
					                _('Create catalog of books in your calibre library'))
 | 
				
			||||||
        ac.triggered.connect(self.generate_catalog)
 | 
					        ac.triggered.connect(self.generate_catalog)
 | 
				
			||||||
        self.action_convert.setMenu(cm)
 | 
					        self.action_convert.setMenu(cm)
 | 
				
			||||||
        self._convert_single_hook = partial(self.convert_ebook, bulk=False)
 | 
					        self._convert_single_hook = partial(self.convert_ebook, bulk=False)
 | 
				
			||||||
@ -1361,23 +1361,29 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def generate_catalog(self):    
 | 
					    def generate_catalog(self):    
 | 
				
			||||||
        rows = self.library_view.selectionModel().selectedRows()
 | 
					        rows = self.library_view.selectionModel().selectedRows()
 | 
				
			||||||
        if not rows:
 | 
					        if not rows or len(rows) < 2:
 | 
				
			||||||
            rows = xrange(self.library_view.model().rowCount(QModelIndex()))
 | 
					            rows = xrange(self.library_view.model().rowCount(QModelIndex()))
 | 
				
			||||||
        ids = map(self.library_view.model().id, rows)
 | 
					        ids = map(self.library_view.model().id, rows)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        dbspec = None
 | 
					        dbspec = None
 | 
				
			||||||
        if not ids:
 | 
					        if not ids:
 | 
				
			||||||
            return error_dialog(self, _('No books selected'),
 | 
					            return error_dialog(self, _('No books selected'),
 | 
				
			||||||
                    _('No books selected to generate catalog for'),
 | 
					                    _('No books selected to generate catalog for'),
 | 
				
			||||||
                    show=True)
 | 
					                    show=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Calling gui2.tools:generate_catalog()
 | 
				
			||||||
        ret = generate_catalog(self, dbspec, ids)
 | 
					        ret = generate_catalog(self, dbspec, ids)
 | 
				
			||||||
        if ret is None:
 | 
					        if ret is None:
 | 
				
			||||||
            return
 | 
					            return
 | 
				
			||||||
 | 
					            
 | 
				
			||||||
        func, args, desc, out, sync, title = ret
 | 
					        func, args, desc, out, sync, title = ret
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        fmt = os.path.splitext(out)[1][1:].upper()
 | 
					        fmt = os.path.splitext(out)[1][1:].upper()
 | 
				
			||||||
        job = self.job_manager.run_job(
 | 
					        job = self.job_manager.run_job(
 | 
				
			||||||
                Dispatcher(self.catalog_generated), func, args=args,
 | 
					                Dispatcher(self.catalog_generated), func, args=args,
 | 
				
			||||||
                    description=desc)
 | 
					                    description=desc)
 | 
				
			||||||
        job.catalog_file_path = out
 | 
					        job.catalog_file_path = out
 | 
				
			||||||
 | 
					        job.fmt = fmt
 | 
				
			||||||
        job.catalog_sync, job.catalog_title = sync, title        
 | 
					        job.catalog_sync, job.catalog_title = sync, title        
 | 
				
			||||||
        self.status_bar.showMessage(_('Generating %s catalog...')%fmt)
 | 
					        self.status_bar.showMessage(_('Generating %s catalog...')%fmt)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -1392,7 +1398,12 @@ class Main(MainWindow, Ui_MainWindow, DeviceGUI):
 | 
				
			|||||||
            dynamic.set('catalogs_to_be_synced', sync)
 | 
					            dynamic.set('catalogs_to_be_synced', sync)
 | 
				
			||||||
        self.status_bar.showMessage(_('Catalog generated.'), 3000)
 | 
					        self.status_bar.showMessage(_('Catalog generated.'), 3000)
 | 
				
			||||||
        self.sync_catalogs()
 | 
					        self.sync_catalogs()
 | 
				
			||||||
 | 
							if job.fmt in ['CSV','XML']:
 | 
				
			||||||
 | 
								export_dir = choose_dir(self, 'Export Catalog Directory', 
 | 
				
			||||||
 | 
															          'Select destination for %s.%s' % (job.catalog_title, job.fmt.lower()))
 | 
				
			||||||
 | 
								if export_dir:
 | 
				
			||||||
 | 
									destination = os.path.join(export_dir, '%s.%s' % (job.catalog_title, job.fmt.lower()))
 | 
				
			||||||
 | 
									shutil.copyfile(job.catalog_file_path, destination)
 | 
				
			||||||
				
 | 
									
 | 
				
			||||||
    ############################### Fetch news #################################
 | 
					    ############################### Fetch news #################################
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -7,14 +7,14 @@
 | 
				
			|||||||
    <x>0</x>
 | 
					    <x>0</x>
 | 
				
			||||||
    <y>0</y>
 | 
					    <y>0</y>
 | 
				
			||||||
    <width>479</width>
 | 
					    <width>479</width>
 | 
				
			||||||
    <height>574</height>
 | 
					    <height>606</height>
 | 
				
			||||||
   </rect>
 | 
					   </rect>
 | 
				
			||||||
  </property>
 | 
					  </property>
 | 
				
			||||||
  <property name="windowTitle">
 | 
					  <property name="windowTitle">
 | 
				
			||||||
   <string>Configure Ebook viewer</string>
 | 
					   <string>Configure Ebook viewer</string>
 | 
				
			||||||
  </property>
 | 
					  </property>
 | 
				
			||||||
  <property name="windowIcon">
 | 
					  <property name="windowIcon">
 | 
				
			||||||
   <iconset resource="../../../../resources/images.qrc">
 | 
					   <iconset>
 | 
				
			||||||
    <normaloff>:/images/config.svg</normaloff>:/images/config.svg</iconset>
 | 
					    <normaloff>:/images/config.svg</normaloff>:/images/config.svg</iconset>
 | 
				
			||||||
  </property>
 | 
					  </property>
 | 
				
			||||||
  <layout class="QGridLayout" name="gridLayout_4">
 | 
					  <layout class="QGridLayout" name="gridLayout_4">
 | 
				
			||||||
@ -164,7 +164,7 @@
 | 
				
			|||||||
              </item>
 | 
					              </item>
 | 
				
			||||||
             </widget>
 | 
					             </widget>
 | 
				
			||||||
            </item>
 | 
					            </item>
 | 
				
			||||||
            <item row="6" column="0" colspan="2">
 | 
					            <item row="7" column="0" colspan="2">
 | 
				
			||||||
             <widget class="QCheckBox" name="opt_remember_window_size">
 | 
					             <widget class="QCheckBox" name="opt_remember_window_size">
 | 
				
			||||||
              <property name="text">
 | 
					              <property name="text">
 | 
				
			||||||
               <string>Remember last used &window size</string>
 | 
					               <string>Remember last used &window size</string>
 | 
				
			||||||
@ -218,6 +218,13 @@
 | 
				
			|||||||
              </property>
 | 
					              </property>
 | 
				
			||||||
             </widget>
 | 
					             </widget>
 | 
				
			||||||
            </item>
 | 
					            </item>
 | 
				
			||||||
 | 
					            <item row="6" column="0" colspan="2">
 | 
				
			||||||
 | 
					             <widget class="QCheckBox" name="opt_fit_images">
 | 
				
			||||||
 | 
					              <property name="text">
 | 
				
			||||||
 | 
					               <string>&Resize images larger than the viewer window (needs restart)</string>
 | 
				
			||||||
 | 
					              </property>
 | 
				
			||||||
 | 
					             </widget>
 | 
				
			||||||
 | 
					            </item>
 | 
				
			||||||
           </layout>
 | 
					           </layout>
 | 
				
			||||||
          </item>
 | 
					          </item>
 | 
				
			||||||
          <item row="3" column="0">
 | 
					          <item row="3" column="0">
 | 
				
			||||||
 | 
				
			|||||||
@ -10,7 +10,7 @@ from base64 import b64encode
 | 
				
			|||||||
from PyQt4.Qt import QSize, QSizePolicy, QUrl, SIGNAL, Qt, QTimer, \
 | 
					from PyQt4.Qt import QSize, QSizePolicy, QUrl, SIGNAL, Qt, QTimer, \
 | 
				
			||||||
                     QPainter, QPalette, QBrush, QFontDatabase, QDialog, \
 | 
					                     QPainter, QPalette, QBrush, QFontDatabase, QDialog, \
 | 
				
			||||||
                     QColor, QPoint, QImage, QRegion, QVariant, QIcon, \
 | 
					                     QColor, QPoint, QImage, QRegion, QVariant, QIcon, \
 | 
				
			||||||
                     QFont, QObject, QApplication, pyqtSignature, QAction
 | 
					                     QFont, pyqtSignature, QAction
 | 
				
			||||||
from PyQt4.QtWebKit import QWebPage, QWebView, QWebSettings
 | 
					from PyQt4.QtWebKit import QWebPage, QWebView, QWebSettings
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from calibre.utils.config import Config, StringConfig
 | 
					from calibre.utils.config import Config, StringConfig
 | 
				
			||||||
@ -21,7 +21,7 @@ from calibre.constants import iswindows
 | 
				
			|||||||
from calibre import prints, guess_type
 | 
					from calibre import prints, guess_type
 | 
				
			||||||
from calibre.gui2.viewer.keys import SHORTCUTS
 | 
					from calibre.gui2.viewer.keys import SHORTCUTS
 | 
				
			||||||
 | 
					
 | 
				
			||||||
bookmarks = referencing = hyphenation = jquery = jquery_scrollTo = hyphenator = None
 | 
					bookmarks = referencing = hyphenation = jquery = jquery_scrollTo = hyphenator = images =None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def load_builtin_fonts():
 | 
					def load_builtin_fonts():
 | 
				
			||||||
    base = P('fonts/liberation/*.ttf')
 | 
					    base = P('fonts/liberation/*.ttf')
 | 
				
			||||||
@ -42,6 +42,8 @@ def config(defaults=None):
 | 
				
			|||||||
              help=_('Set the user CSS stylesheet. This can be used to customize the look of all books.'))
 | 
					              help=_('Set the user CSS stylesheet. This can be used to customize the look of all books.'))
 | 
				
			||||||
    c.add_opt('max_view_width', default=6000,
 | 
					    c.add_opt('max_view_width', default=6000,
 | 
				
			||||||
            help=_('Maximum width of the viewer window, in pixels.'))
 | 
					            help=_('Maximum width of the viewer window, in pixels.'))
 | 
				
			||||||
 | 
					    c.add_opt('fit_images', default=True,
 | 
				
			||||||
 | 
					            help=_('Resize images larger than the viewer window to fit inside it'))
 | 
				
			||||||
    c.add_opt('hyphenate', default=False, help=_('Hyphenate text'))
 | 
					    c.add_opt('hyphenate', default=False, help=_('Hyphenate text'))
 | 
				
			||||||
    c.add_opt('hyphenate_default_lang', default='en',
 | 
					    c.add_opt('hyphenate_default_lang', default='en',
 | 
				
			||||||
            help=_('Default language for hyphenation rules'))
 | 
					            help=_('Default language for hyphenation rules'))
 | 
				
			||||||
@ -59,20 +61,6 @@ def config(defaults=None):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    return c
 | 
					    return c
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class PythonJS(QObject):
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def __init__(self, callback):
 | 
					 | 
				
			||||||
        QObject.__init__(self, QApplication.instance())
 | 
					 | 
				
			||||||
        self.setObjectName("py_bridge")
 | 
					 | 
				
			||||||
        self._callback = callback
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    @pyqtSignature("QString")
 | 
					 | 
				
			||||||
    def callback(self, msg):
 | 
					 | 
				
			||||||
        print "callback called"
 | 
					 | 
				
			||||||
        self._callback(msg)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class ConfigDialog(QDialog, Ui_Dialog):
 | 
					class ConfigDialog(QDialog, Ui_Dialog):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __init__(self, shortcuts, parent=None):
 | 
					    def __init__(self, shortcuts, parent=None):
 | 
				
			||||||
@ -110,6 +98,7 @@ class ConfigDialog(QDialog, Ui_Dialog):
 | 
				
			|||||||
        self.shortcut_config = ShortcutConfig(shortcuts, parent=self)
 | 
					        self.shortcut_config = ShortcutConfig(shortcuts, parent=self)
 | 
				
			||||||
        p = self.tabs.widget(1)
 | 
					        p = self.tabs.widget(1)
 | 
				
			||||||
        p.layout().addWidget(self.shortcut_config)
 | 
					        p.layout().addWidget(self.shortcut_config)
 | 
				
			||||||
 | 
					        self.opt_fit_images.setChecked(opts.fit_images)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def accept(self, *args):
 | 
					    def accept(self, *args):
 | 
				
			||||||
@ -122,6 +111,7 @@ class ConfigDialog(QDialog, Ui_Dialog):
 | 
				
			|||||||
        c.set('standard_font', {0:'serif', 1:'sans', 2:'mono'}[self.standard_font.currentIndex()])
 | 
					        c.set('standard_font', {0:'serif', 1:'sans', 2:'mono'}[self.standard_font.currentIndex()])
 | 
				
			||||||
        c.set('user_css', unicode(self.css.toPlainText()))
 | 
					        c.set('user_css', unicode(self.css.toPlainText()))
 | 
				
			||||||
        c.set('remember_window_size', self.opt_remember_window_size.isChecked())
 | 
					        c.set('remember_window_size', self.opt_remember_window_size.isChecked())
 | 
				
			||||||
 | 
					        c.set('fit_images', self.opt_fit_images.isChecked())
 | 
				
			||||||
        c.set('max_view_width', int(self.max_view_width.value()))
 | 
					        c.set('max_view_width', int(self.max_view_width.value()))
 | 
				
			||||||
        c.set('hyphenate', self.hyphenate.isChecked())
 | 
					        c.set('hyphenate', self.hyphenate.isChecked())
 | 
				
			||||||
        idx = self.hyphenate_default_lang.currentIndex()
 | 
					        idx = self.hyphenate_default_lang.currentIndex()
 | 
				
			||||||
@ -157,7 +147,6 @@ class Document(QWebPage):
 | 
				
			|||||||
        self.setObjectName("py_bridge")
 | 
					        self.setObjectName("py_bridge")
 | 
				
			||||||
        self.debug_javascript = False
 | 
					        self.debug_javascript = False
 | 
				
			||||||
        self.current_language = None
 | 
					        self.current_language = None
 | 
				
			||||||
        #self.js_bridge = PythonJS(self.js_callback)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.setLinkDelegationPolicy(self.DelegateAllLinks)
 | 
					        self.setLinkDelegationPolicy(self.DelegateAllLinks)
 | 
				
			||||||
        self.scroll_marks = []
 | 
					        self.scroll_marks = []
 | 
				
			||||||
@ -197,9 +186,14 @@ class Document(QWebPage):
 | 
				
			|||||||
        opts = config().parse()
 | 
					        opts = config().parse()
 | 
				
			||||||
        self.hyphenate = opts.hyphenate
 | 
					        self.hyphenate = opts.hyphenate
 | 
				
			||||||
        self.hyphenate_default_lang = opts.hyphenate_default_lang
 | 
					        self.hyphenate_default_lang = opts.hyphenate_default_lang
 | 
				
			||||||
 | 
					        self.do_fit_images = opts.fit_images
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def fit_images(self):
 | 
				
			||||||
 | 
					        if self.do_fit_images:
 | 
				
			||||||
 | 
					            self.javascript('setup_image_scaling_handlers()')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def load_javascript_libraries(self):
 | 
					    def load_javascript_libraries(self):
 | 
				
			||||||
        global bookmarks, referencing, hyphenation, jquery, jquery_scrollTo, hyphenator
 | 
					        global bookmarks, referencing, hyphenation, jquery, jquery_scrollTo, hyphenator, images
 | 
				
			||||||
        self.mainFrame().addToJavaScriptWindowObject("py_bridge", self)
 | 
					        self.mainFrame().addToJavaScriptWindowObject("py_bridge", self)
 | 
				
			||||||
        if jquery is None:
 | 
					        if jquery is None:
 | 
				
			||||||
            jquery = P('content_server/jquery.js', data=True)
 | 
					            jquery = P('content_server/jquery.js', data=True)
 | 
				
			||||||
@ -215,6 +209,9 @@ class Document(QWebPage):
 | 
				
			|||||||
        if referencing is None:
 | 
					        if referencing is None:
 | 
				
			||||||
            referencing = P('viewer/referencing.js', data=True)
 | 
					            referencing = P('viewer/referencing.js', data=True)
 | 
				
			||||||
        self.javascript(referencing)
 | 
					        self.javascript(referencing)
 | 
				
			||||||
 | 
					        if images is None:
 | 
				
			||||||
 | 
					            images = P('viewer/images.js', data=True)
 | 
				
			||||||
 | 
					        self.javascript(images)
 | 
				
			||||||
        if hyphenation is None:
 | 
					        if hyphenation is None:
 | 
				
			||||||
            hyphenation = P('viewer/hyphenation.js', data=True)
 | 
					            hyphenation = P('viewer/hyphenation.js', data=True)
 | 
				
			||||||
        self.javascript(hyphenation)
 | 
					        self.javascript(hyphenation)
 | 
				
			||||||
@ -353,7 +350,13 @@ class Document(QWebPage):
 | 
				
			|||||||
        return self.mainFrame().contentsSize().width() # offsetWidth gives inaccurate results
 | 
					        return self.mainFrame().contentsSize().width() # offsetWidth gives inaccurate results
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def set_bottom_padding(self, amount):
 | 
					    def set_bottom_padding(self, amount):
 | 
				
			||||||
        self.javascript('$("body").css("padding-bottom", "%dpx")' % amount)
 | 
					        padding = '%dpx'%amount
 | 
				
			||||||
 | 
					        try:
 | 
				
			||||||
 | 
					            old_padding = unicode(self.javascript('$("body").css("padding-bottom")').toString())
 | 
				
			||||||
 | 
					        except:
 | 
				
			||||||
 | 
					            old_padding = ''
 | 
				
			||||||
 | 
					        if old_padding != padding:
 | 
				
			||||||
 | 
					            self.javascript('$("body").css("padding-bottom", "%s")' % padding)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class EntityDeclarationProcessor(object):
 | 
					class EntityDeclarationProcessor(object):
 | 
				
			||||||
@ -541,6 +544,7 @@ class DocumentView(QWebView):
 | 
				
			|||||||
            return
 | 
					            return
 | 
				
			||||||
        self.loading_url = None
 | 
					        self.loading_url = None
 | 
				
			||||||
        self.document.set_bottom_padding(0)
 | 
					        self.document.set_bottom_padding(0)
 | 
				
			||||||
 | 
					        self.document.fit_images()
 | 
				
			||||||
        self._size_hint = self.document.mainFrame().contentsSize()
 | 
					        self._size_hint = self.document.mainFrame().contentsSize()
 | 
				
			||||||
        scrolled = False
 | 
					        scrolled = False
 | 
				
			||||||
        if self.to_bottom:
 | 
					        if self.to_bottom:
 | 
				
			||||||
 | 
				
			|||||||
@ -40,8 +40,9 @@ class CSV_XML(CatalogPlugin):
 | 
				
			|||||||
        from calibre.utils.logging import Log
 | 
					        from calibre.utils.logging import Log
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        log = Log()
 | 
					        log = Log()
 | 
				
			||||||
        self.fmt = path_to_output[path_to_output.rfind('.') + 1:]
 | 
					        self.fmt = path_to_output.rpartition('.')[2]
 | 
				
			||||||
        if opts.verbose:
 | 
					        
 | 
				
			||||||
 | 
					        if False and opts.verbose:
 | 
				
			||||||
            log("%s:run" % self.name)
 | 
					            log("%s:run" % self.name)
 | 
				
			||||||
            log(" path_to_output: %s" % path_to_output)
 | 
					            log(" path_to_output: %s" % path_to_output)
 | 
				
			||||||
            log(" Output format: %s" % self.fmt)
 | 
					            log(" Output format: %s" % self.fmt)
 | 
				
			||||||
 | 
				
			|||||||
@ -644,6 +644,10 @@ def catalog_option_parser(args):
 | 
				
			|||||||
    output, fmt = validate_command_line(parser, args, log)
 | 
					    output, fmt = validate_command_line(parser, args, log)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Add options common to all catalog plugins
 | 
					    # Add options common to all catalog plugins
 | 
				
			||||||
 | 
					    parser.add_option('-i', '--ids', default=None, dest='ids',
 | 
				
			||||||
 | 
					                      help=_("Comma-separated list of database IDs to catalog.\n"
 | 
				
			||||||
 | 
					                      "If declared, --search is ignored.\n"
 | 
				
			||||||
 | 
					                             "Default: all"))
 | 
				
			||||||
    parser.add_option('-s', '--search', default=None, dest='search_text',
 | 
					    parser.add_option('-s', '--search', default=None, dest='search_text',
 | 
				
			||||||
                      help=_("Filter the results by the search query. "
 | 
					                      help=_("Filter the results by the search query. "
 | 
				
			||||||
                          "For the format of the search query, please see "
 | 
					                          "For the format of the search query, please see "
 | 
				
			||||||
@ -656,31 +660,6 @@ def catalog_option_parser(args):
 | 
				
			|||||||
    # Add options specific to fmt plugin
 | 
					    # Add options specific to fmt plugin
 | 
				
			||||||
    plugin = add_plugin_parser_options(fmt, parser, log)
 | 
					    plugin = add_plugin_parser_options(fmt, parser, log)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Merge options from GUI Preferences
 | 
					 | 
				
			||||||
    '''
 | 
					 | 
				
			||||||
    # Placeholder sample code until we implement GUI preferences
 | 
					 | 
				
			||||||
    from calibre.library.save_to_disk import config
 | 
					 | 
				
			||||||
    c = config()
 | 
					 | 
				
			||||||
    for pref in ['asciiize', 'update_metadata', 'write_opf', 'save_cover']:
 | 
					 | 
				
			||||||
        opt = c.get_option(pref)
 | 
					 | 
				
			||||||
        switch = '--dont-'+pref.replace('_', '-')
 | 
					 | 
				
			||||||
        parser.add_option(switch, default=True, action='store_false',
 | 
					 | 
				
			||||||
                help=opt.help+' '+_('Specifying this switch will turn '
 | 
					 | 
				
			||||||
                    'this behavior off.'), dest=pref)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for pref in ['timefmt', 'template', 'formats']:
 | 
					 | 
				
			||||||
        opt = c.get_option(pref)
 | 
					 | 
				
			||||||
        switch = '--'+pref
 | 
					 | 
				
			||||||
        parser.add_option(switch, default=opt.default,
 | 
					 | 
				
			||||||
                help=opt.help, dest=pref)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for pref in ('replace_whitespace', 'to_lowercase'):
 | 
					 | 
				
			||||||
        opt = c.get_option(pref)
 | 
					 | 
				
			||||||
        switch = '--'+pref.replace('_', '-')
 | 
					 | 
				
			||||||
        parser.add_option(switch, default=False, action='store_true',
 | 
					 | 
				
			||||||
                help=opt.help)
 | 
					 | 
				
			||||||
    '''
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return parser, plugin, log
 | 
					    return parser, plugin, log
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def command_catalog(args, dbpath):
 | 
					def command_catalog(args, dbpath):
 | 
				
			||||||
@ -693,6 +672,9 @@ def command_catalog(args, dbpath):
 | 
				
			|||||||
        return 1
 | 
					        return 1
 | 
				
			||||||
    if opts.verbose:
 | 
					    if opts.verbose:
 | 
				
			||||||
        log("library.cli:command_catalog dispatching to plugin %s" % plugin.name)
 | 
					        log("library.cli:command_catalog dispatching to plugin %s" % plugin.name)
 | 
				
			||||||
 | 
					    if opts.ids:
 | 
				
			||||||
 | 
					        opts.ids = [int(id) for id in opts.ids.split(',')]    
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    with plugin:
 | 
					    with plugin:
 | 
				
			||||||
        plugin.run(args[1], opts, get_db(dbpath, opts))
 | 
					        plugin.run(args[1], opts, get_db(dbpath, opts))
 | 
				
			||||||
    return 0
 | 
					    return 0
 | 
				
			||||||
 | 
				
			|||||||
@ -1634,13 +1634,15 @@ class LibraryDatabase2(LibraryDatabase):
 | 
				
			|||||||
        for i in iter(self):
 | 
					        for i in iter(self):
 | 
				
			||||||
            yield i[x]
 | 
					            yield i[x]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def get_data_as_dict(self, prefix=None, authors_as_string=False):
 | 
					    def get_data_as_dict(self, prefix=None, authors_as_string=False, ids=None):
 | 
				
			||||||
        '''
 | 
					        '''
 | 
				
			||||||
        Return all metadata stored in the database as a dict. Includes paths to
 | 
					        Return all metadata stored in the database as a dict. Includes paths to
 | 
				
			||||||
        the cover and each format.
 | 
					        the cover and each format.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        :param prefix: The prefix for all paths. By default, the prefix is the absolute path
 | 
					        :param prefix: The prefix for all paths. By default, the prefix is the absolute path
 | 
				
			||||||
        to the library folder.
 | 
					        to the library folder.
 | 
				
			||||||
 | 
					        :param ids: Set of ids to return the data for. If None return data for
 | 
				
			||||||
 | 
					        all entries in database.
 | 
				
			||||||
        '''
 | 
					        '''
 | 
				
			||||||
        if prefix is None:
 | 
					        if prefix is None:
 | 
				
			||||||
            prefix = self.library_path
 | 
					            prefix = self.library_path
 | 
				
			||||||
@ -1650,11 +1652,14 @@ class LibraryDatabase2(LibraryDatabase):
 | 
				
			|||||||
        data = []
 | 
					        data = []
 | 
				
			||||||
        for record in self.data:
 | 
					        for record in self.data:
 | 
				
			||||||
            if record is None: continue
 | 
					            if record is None: continue
 | 
				
			||||||
 | 
					            db_id = record[FIELD_MAP['id']]
 | 
				
			||||||
 | 
					            if ids is not None and db_id not in ids:
 | 
				
			||||||
 | 
					                continue
 | 
				
			||||||
            x = {}
 | 
					            x = {}
 | 
				
			||||||
            for field in FIELDS:
 | 
					            for field in FIELDS:
 | 
				
			||||||
                x[field] = record[FIELD_MAP[field]]
 | 
					                x[field] = record[FIELD_MAP[field]]
 | 
				
			||||||
            data.append(x)
 | 
					            data.append(x)
 | 
				
			||||||
            x['id'] = record[FIELD_MAP['id']]
 | 
					            x['id'] = db_id
 | 
				
			||||||
            x['formats'] = []
 | 
					            x['formats'] = []
 | 
				
			||||||
            if not x['authors']:
 | 
					            if not x['authors']:
 | 
				
			||||||
                x['authors'] = _('Unknown')
 | 
					                x['authors'] = _('Unknown')
 | 
				
			||||||
 | 
				
			|||||||
@ -524,6 +524,7 @@ class DynamicConfig(dict):
 | 
				
			|||||||
                    pass
 | 
					                    pass
 | 
				
			||||||
                except:
 | 
					                except:
 | 
				
			||||||
                    import traceback
 | 
					                    import traceback
 | 
				
			||||||
 | 
					                    print 'Failed to unpickle stored object:'
 | 
				
			||||||
                    traceback.print_exc()
 | 
					                    traceback.print_exc()
 | 
				
			||||||
                    d = {}
 | 
					                    d = {}
 | 
				
			||||||
        self.clear()
 | 
					        self.clear()
 | 
				
			||||||
 | 
				
			|||||||
@ -104,6 +104,7 @@ _extra_lang_codes = {
 | 
				
			|||||||
        'en_CY' : _('English (Cyprus)'),
 | 
					        'en_CY' : _('English (Cyprus)'),
 | 
				
			||||||
        'en_PK' : _('English (Pakistan)'),
 | 
					        'en_PK' : _('English (Pakistan)'),
 | 
				
			||||||
        'en_SG' : _('English (Singapore)'),
 | 
					        'en_SG' : _('English (Singapore)'),
 | 
				
			||||||
 | 
					        'en_YE' : _('English (Yemen)'),
 | 
				
			||||||
        'de_AT' : _('German (AT)'),
 | 
					        'de_AT' : _('German (AT)'),
 | 
				
			||||||
        'nl'    : _('Dutch (NL)'),
 | 
					        'nl'    : _('Dutch (NL)'),
 | 
				
			||||||
        'nl_BE' : _('Dutch (BE)'),
 | 
					        'nl_BE' : _('Dutch (BE)'),
 | 
				
			||||||
 | 
				
			|||||||
@ -9,9 +9,22 @@ __docformat__ = 'restructuredtext en'
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
import __builtin__, sys, os
 | 
					import __builtin__, sys, os
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					_dev_path = os.environ.get('CALIBRE_DEVELOP_FROM', None)
 | 
				
			||||||
 | 
					if _dev_path is not None:
 | 
				
			||||||
 | 
					    _dev_path = os.path.join(os.path.abspath(os.path.dirname(_dev_path)), 'resources')
 | 
				
			||||||
 | 
					    if not os.path.exists(_dev_path):
 | 
				
			||||||
 | 
					        _dev_path = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_path(path, data=False):
 | 
					def get_path(path, data=False):
 | 
				
			||||||
 | 
					    global _dev_path
 | 
				
			||||||
    path = path.replace(os.sep, '/')
 | 
					    path = path.replace(os.sep, '/')
 | 
				
			||||||
    path = os.path.join(sys.resources_location, *path.split('/'))
 | 
					    base = None
 | 
				
			||||||
 | 
					    if _dev_path is not None:
 | 
				
			||||||
 | 
					        if os.path.exists(os.path.join(_dev_path, *path.split('/'))):
 | 
				
			||||||
 | 
					            base = _dev_path
 | 
				
			||||||
 | 
					    if base is None:
 | 
				
			||||||
 | 
					        base = sys.resources_location
 | 
				
			||||||
 | 
					    path = os.path.join(base, *path.split('/'))
 | 
				
			||||||
    if data:
 | 
					    if data:
 | 
				
			||||||
        return open(path, 'rb').read()
 | 
					        return open(path, 'rb').read()
 | 
				
			||||||
    return path
 | 
					    return path
 | 
				
			||||||
 | 
				
			|||||||
@ -357,9 +357,17 @@ class BasicNewsRecipe(Recipe):
 | 
				
			|||||||
        Override in a subclass to customize extraction of the :term:`URL` that points
 | 
					        Override in a subclass to customize extraction of the :term:`URL` that points
 | 
				
			||||||
        to the content for each article. Return the
 | 
					        to the content for each article. Return the
 | 
				
			||||||
        article URL. It is called with `article`, an object representing a parsed article
 | 
					        article URL. It is called with `article`, an object representing a parsed article
 | 
				
			||||||
        from a feed. See `feedsparser <http://www.feedparser.org/docs/>`_.
 | 
					        from a feed. See `feedparser <http://www.feedparser.org/docs/>`_.
 | 
				
			||||||
        By default it returns `article.link <http://www.feedparser.org/docs/reference-entry-link.html>`_.
 | 
					        By default it looks for the original link (for feeds syndicated via a
 | 
				
			||||||
 | 
					        service like feedburner or pheedo) and if found,
 | 
				
			||||||
 | 
					        returns that or else returns
 | 
				
			||||||
 | 
					        `article.link <http://www.feedparser.org/docs/reference-entry-link.html>`_.
 | 
				
			||||||
        '''
 | 
					        '''
 | 
				
			||||||
 | 
					        for key in article.keys():
 | 
				
			||||||
 | 
					            if key.endswith('_origlink'):
 | 
				
			||||||
 | 
					                url = article[key]
 | 
				
			||||||
 | 
					                if url and url.startswith('http://'):
 | 
				
			||||||
 | 
					                    return url
 | 
				
			||||||
        return article.get('link',  None)
 | 
					        return article.get('link',  None)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def preprocess_html(self, soup):
 | 
					    def preprocess_html(self, soup):
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user